diff options
Diffstat (limited to 'arch')
91 files changed, 1495 insertions, 508 deletions
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild index e256592eb66e..eae2c64cf69d 100644 --- a/arch/s390/Kbuild +++ b/arch/s390/Kbuild @@ -1,7 +1,7 @@ obj-y += kernel/ obj-y += mm/ obj-$(CONFIG_KVM) += kvm/ -obj-$(CONFIG_CRYPTO_HW) += crypto/ +obj-y += crypto/ obj-$(CONFIG_S390_HYPFS_FS) += hypfs/ obj-$(CONFIG_APPLDATA_BASE) += appldata/ obj-y += net/ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b8b143432381..e161fafb495b 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -105,6 +105,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE select ARCH_SAVE_PAGE_KEYS if HIBERNATION select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF @@ -123,7 +124,6 @@ config S390 select GENERIC_TIME_VSYSCALL select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL - select HAVE_ARCH_EARLY_PFN_TO_NID select HAVE_ARCH_JUMP_LABEL select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER @@ -506,6 +506,21 @@ source kernel/Kconfig.preempt source kernel/Kconfig.hz +config ARCH_RANDOM + def_bool y + prompt "s390 architectural random number generation API" + help + Enable the s390 architectural random number generation API + to provide random data for all consumers within the Linux + kernel. + + When enabled the arch_random_* functions declared in linux/random.h + are implemented. The implementation is based on the s390 CPACF + instruction subfunction TRNG which provides a real true random + number generator. + + If unsure, say Y. + endmenu menu "Memory setup" @@ -536,6 +551,16 @@ config FORCE_MAX_ZONEORDER source "mm/Kconfig" +config MAX_PHYSMEM_BITS + int "Maximum size of supported physical memory in bits (42-53)" + range 42 53 + default "46" + help + This option specifies the maximum supported size of physical memory + in bits. Supported is any size between 2^42 (4TB) and 2^53 (8PB). + Increasing the number of bits also increases the kernel image size. + By default 46 bits (64TB) are supported. + config PACK_STACK def_bool y prompt "Pack kernel stack" @@ -613,7 +638,7 @@ if PCI config PCI_NR_FUNCTIONS int "Maximum number of PCI functions (1-4096)" range 1 4096 - default "64" + default "128" help This allows you to specify the maximum number of PCI functions which this kernel will support. @@ -671,6 +696,16 @@ config EADM_SCH To compile this driver as a module, choose M here: the module will be called eadm_sch. +config VFIO_CCW + def_tristate n + prompt "Support for VFIO-CCW subchannels" + depends on S390_CCW_IOMMU && VFIO_MDEV + help + This driver allows usage of I/O subchannels via VFIO-CCW. + + To compile this driver as a module, choose M here: the + module will be called vfio_ccw. + endmenu menu "Dump support" diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 4b176fe83da4..a5039fa89314 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -73,6 +73,7 @@ CONFIG_ZSWAP=y CONFIG_ZBUD=m CONFIG_ZSMALLOC=m CONFIG_ZSMALLOC_STAT=y +CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PCI=y CONFIG_PCI_DEBUG=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index 0de46cc397f6..83970b5afb2b 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -72,6 +72,7 @@ CONFIG_ZSWAP=y CONFIG_ZBUD=m CONFIG_ZSMALLOC=m CONFIG_ZSMALLOC_STAT=y +CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PCI=y CONFIG_HOTPLUG_PCI=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index e167557b434c..fbc6542aaf59 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -70,6 +70,7 @@ CONFIG_ZSWAP=y CONFIG_ZBUD=m CONFIG_ZSMALLOC=m CONFIG_ZSMALLOC_STAT=y +CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PCI=y CONFIG_HOTPLUG_PCI=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 4366a3e3e754..e23d97c13735 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -35,7 +35,6 @@ CONFIG_SCSI_ENCLOSURE=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_FC_ATTRS=y -CONFIG_SCSI_SRP_ATTRS=y CONFIG_ZFCP=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set # CONFIG_INPUT_KEYBOARD is not set diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile index 402c530c6da5..678d9863e3f0 100644 --- a/arch/s390/crypto/Makefile +++ b/arch/s390/crypto/Makefile @@ -10,5 +10,6 @@ obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o paes_s390.o obj-$(CONFIG_S390_PRNG) += prng.o obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o +obj-$(CONFIG_ARCH_RANDOM) += arch_random.o crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c new file mode 100644 index 000000000000..9317b3e645e2 --- /dev/null +++ b/arch/s390/crypto/arch_random.c @@ -0,0 +1,31 @@ +/* + * s390 arch random implementation. + * + * Copyright IBM Corp. 2017 + * Author(s): Harald Freudenberger <freude@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + */ + +#include <linux/kernel.h> +#include <linux/atomic.h> +#include <linux/static_key.h> +#include <asm/cpacf.h> + +DEFINE_STATIC_KEY_FALSE(s390_arch_random_available); + +atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0); +EXPORT_SYMBOL(s390_arch_random_counter); + +static int __init s390_arch_random_init(void) +{ + /* check if subfunction CPACF_PRNO_TRNG is available */ + if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) + static_branch_enable(&s390_arch_random_available); + + return 0; +} +arch_initcall(s390_arch_random_init); diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 716b17238599..a4e903ed7e21 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -616,7 +616,7 @@ out_err: module_init(paes_s390_init); module_exit(paes_s390_fini); -MODULE_ALIAS_CRYPTO("aes-all"); +MODULE_ALIAS_CRYPTO("paes"); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm with protected keys"); MODULE_LICENSE("GPL"); diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 5a3ec04a7082..3e47c4a0f18b 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -81,7 +81,7 @@ struct prng_ws_s { u64 byte_counter; }; -struct ppno_ws_s { +struct prno_ws_s { u32 res; u32 reseed_counter; u64 stream_bytes; @@ -93,7 +93,7 @@ struct prng_data_s { struct mutex mutex; union { struct prng_ws_s prngws; - struct ppno_ws_s ppnows; + struct prno_ws_s prnows; }; u8 *buf; u32 rest; @@ -306,12 +306,12 @@ static int __init prng_sha512_selftest(void) 0x36, 0x8c, 0x5a, 0x9f, 0x7a, 0x4b, 0x3e, 0xe2 }; u8 buf[sizeof(random)]; - struct ppno_ws_s ws; + struct prno_ws_s ws; memset(&ws, 0, sizeof(ws)); /* initial seed */ - cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, + cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, &ws, NULL, 0, seed, sizeof(seed)); /* check working states V and C */ @@ -324,9 +324,9 @@ static int __init prng_sha512_selftest(void) } /* generate random bytes */ - cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, + cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, &ws, buf, sizeof(buf), NULL, 0); - cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, + cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, &ws, buf, sizeof(buf), NULL, 0); /* check against expected data */ @@ -374,16 +374,16 @@ static int __init prng_sha512_instantiate(void) /* followed by 16 bytes of unique nonce */ get_tod_clock_ext(seed + 64 + 32); - /* initial seed of the ppno drng */ - cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, - &prng_data->ppnows, NULL, 0, seed, sizeof(seed)); + /* initial seed of the prno drng */ + cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, + &prng_data->prnows, NULL, 0, seed, sizeof(seed)); /* if fips mode is enabled, generate a first block of random bytes for the FIPS 140-2 Conditional Self Test */ if (fips_enabled) { prng_data->prev = prng_data->buf + prng_chunk_size; - cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, - &prng_data->ppnows, + cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, + &prng_data->prnows, prng_data->prev, prng_chunk_size, NULL, 0); } @@ -412,9 +412,9 @@ static int prng_sha512_reseed(void) if (ret != sizeof(seed)) return ret; - /* do a reseed of the ppno drng with this bytestring */ - cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, - &prng_data->ppnows, NULL, 0, seed, sizeof(seed)); + /* do a reseed of the prno drng with this bytestring */ + cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, + &prng_data->prnows, NULL, 0, seed, sizeof(seed)); return 0; } @@ -425,15 +425,15 @@ static int prng_sha512_generate(u8 *buf, size_t nbytes) int ret; /* reseed needed ? */ - if (prng_data->ppnows.reseed_counter > prng_reseed_limit) { + if (prng_data->prnows.reseed_counter > prng_reseed_limit) { ret = prng_sha512_reseed(); if (ret) return ret; } - /* PPNO generate */ - cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, - &prng_data->ppnows, buf, nbytes, NULL, 0); + /* PRNO generate */ + cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, + &prng_data->prnows, buf, nbytes, NULL, 0); /* FIPS 140-2 Conditional Self Test */ if (fips_enabled) { @@ -653,7 +653,7 @@ static ssize_t prng_counter_show(struct device *dev, if (mutex_lock_interruptible(&prng_data->mutex)) return -ERESTARTSYS; if (prng_mode == PRNG_MODE_SHA512) - counter = prng_data->ppnows.stream_bytes; + counter = prng_data->prnows.stream_bytes; else counter = prng_data->prngws.byte_counter; mutex_unlock(&prng_data->mutex); @@ -774,8 +774,8 @@ static int __init prng_init(void) /* choose prng mode */ if (prng_mode != PRNG_MODE_TDES) { - /* check for MSA5 support for PPNO operations */ - if (!cpacf_query_func(CPACF_PPNO, CPACF_PPNO_SHA512_DRNG_GEN)) { + /* check for MSA5 support for PRNO operations */ + if (!cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) { if (prng_mode == PRNG_MODE_SHA512) { pr_err("The prng module cannot " "start in SHA-512 mode\n"); diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 8aea32fe8bd2..7e3481eb2174 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -1,8 +1,14 @@ generic-y += asm-offsets.h generic-y += clkdev.h generic-y += dma-contiguous.h +generic-y += div64.h +generic-y += emergency-restart.h generic-y += export.h +generic-y += irq_regs.h generic-y += irq_work.h +generic-y += kmap_types.h +generic-y += local.h +generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += preempt.h diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h new file mode 100644 index 000000000000..6033901a40b2 --- /dev/null +++ b/arch/s390/include/asm/archrandom.h @@ -0,0 +1,69 @@ +/* + * Kernel interface for the s390 arch_random_* functions + * + * Copyright IBM Corp. 2017 + * + * Author: Harald Freudenberger <freude@de.ibm.com> + * + */ + +#ifndef _ASM_S390_ARCHRANDOM_H +#define _ASM_S390_ARCHRANDOM_H + +#ifdef CONFIG_ARCH_RANDOM + +#include <linux/static_key.h> +#include <linux/atomic.h> +#include <asm/cpacf.h> + +DECLARE_STATIC_KEY_FALSE(s390_arch_random_available); +extern atomic64_t s390_arch_random_counter; + +static void s390_arch_random_generate(u8 *buf, unsigned int nbytes) +{ + cpacf_trng(NULL, 0, buf, nbytes); + atomic64_add(nbytes, &s390_arch_random_counter); +} + +static inline bool arch_has_random(void) +{ + if (static_branch_likely(&s390_arch_random_available)) + return true; + return false; +} + +static inline bool arch_has_random_seed(void) +{ + return arch_has_random(); +} + +static inline bool arch_get_random_long(unsigned long *v) +{ + if (static_branch_likely(&s390_arch_random_available)) { + s390_arch_random_generate((u8 *)v, sizeof(*v)); + return true; + } + return false; +} + +static inline bool arch_get_random_int(unsigned int *v) +{ + if (static_branch_likely(&s390_arch_random_available)) { + s390_arch_random_generate((u8 *)v, sizeof(*v)); + return true; + } + return false; +} + +static inline bool arch_get_random_seed_long(unsigned long *v) +{ + return arch_get_random_long(v); +} + +static inline bool arch_get_random_seed_int(unsigned int *v) +{ + return arch_get_random_int(v); +} + +#endif /* CONFIG_ARCH_RANDOM */ +#endif /* _ASM_S390_ARCHRANDOM_H */ diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index ac9e2b939d04..ba6d29412344 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -111,20 +111,22 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr") static inline int __atomic_cmpxchg(int *ptr, int old, int new) { - asm volatile( - " cs %[old],%[new],%[ptr]" - : [old] "+d" (old), [ptr] "+Q" (*ptr) - : [new] "d" (new) : "cc", "memory"); - return old; + return __sync_val_compare_and_swap(ptr, old, new); +} + +static inline int __atomic_cmpxchg_bool(int *ptr, int old, int new) +{ + return __sync_bool_compare_and_swap(ptr, old, new); } static inline long __atomic64_cmpxchg(long *ptr, long old, long new) { - asm volatile( - " csg %[old],%[new],%[ptr]" - : [old] "+d" (old), [ptr] "+Q" (*ptr) - : [new] "d" (new) : "cc", "memory"); - return old; + return __sync_val_compare_and_swap(ptr, old, new); +} + +static inline long __atomic64_cmpxchg_bool(long *ptr, long old, long new) +{ + return __sync_bool_compare_and_swap(ptr, old, new); } #endif /* __ARCH_S390_ATOMIC_OPS__ */ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index d92047da5ccb..99902b7b9f0c 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -15,14 +15,6 @@ * end up numbered: * |63..............0|127............64|191...........128|255...........192| * - * There are a few little-endian macros used mostly for filesystem - * bitmaps, these work on similar bit array layouts, but byte-oriented: - * |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56| - * - * The main difference is that bit 3-5 in the bit number field needs to be - * reversed compared to the big-endian bit fields. This can be achieved by - * XOR with 0x38. - * * We also have special functions which work with an MSB0 encoding. * The bits are numbered: * |0..............63|64............127|128...........191|192...........255| @@ -253,6 +245,11 @@ unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size); unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size, unsigned long offset); +#define for_each_set_bit_inv(bit, addr, size) \ + for ((bit) = find_first_bit_inv((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_bit_inv((addr), (size), (bit) + 1)) + static inline void set_bit_inv(unsigned long nr, volatile unsigned long *ptr) { return set_bit(nr ^ (BITS_PER_LONG - 1), ptr); diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index f7ed88cc066e..7a38ca85190b 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -33,6 +33,24 @@ struct ccw1 { __u32 cda; } __attribute__ ((packed,aligned(8))); +/** + * struct ccw0 - channel command word + * @cmd_code: command code + * @cda: data address + * @flags: flags, like IDA addressing, etc. + * @reserved: will be ignored + * @count: byte count + * + * The format-0 ccw structure. + */ +struct ccw0 { + __u8 cmd_code; + __u32 cda : 24; + __u8 flags; + __u8 reserved; + __u16 count; +} __packed __aligned(8); + #define CCW_FLAG_DC 0x80 #define CCW_FLAG_CC 0x40 #define CCW_FLAG_SLI 0x20 diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index e2dfbf280d12..e06f2556b316 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -25,7 +25,8 @@ #define CPACF_KMO 0xb92b /* MSA4 */ #define CPACF_PCC 0xb92c /* MSA4 */ #define CPACF_KMCTR 0xb92d /* MSA4 */ -#define CPACF_PPNO 0xb93c /* MSA5 */ +#define CPACF_PRNO 0xb93c /* MSA5 */ +#define CPACF_KMA 0xb929 /* MSA8 */ /* * En/decryption modifier bits @@ -123,12 +124,14 @@ #define CPACF_PCKMO_ENC_AES_256_KEY 0x14 /* - * Function codes for the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION) + * Function codes for the PRNO (PERFORM RANDOM NUMBER OPERATION) * instruction */ -#define CPACF_PPNO_QUERY 0x00 -#define CPACF_PPNO_SHA512_DRNG_GEN 0x03 -#define CPACF_PPNO_SHA512_DRNG_SEED 0x83 +#define CPACF_PRNO_QUERY 0x00 +#define CPACF_PRNO_SHA512_DRNG_GEN 0x03 +#define CPACF_PRNO_SHA512_DRNG_SEED 0x83 +#define CPACF_PRNO_TRNG_Q_R2C_RATIO 0x70 +#define CPACF_PRNO_TRNG 0x72 typedef struct { unsigned char bytes[16]; } cpacf_mask_t; @@ -149,8 +152,8 @@ static inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask) asm volatile( " spm 0\n" /* pckmo doesn't change the cc */ - /* Parameter registers are ignored, but may not be 0 */ - "0: .insn rrf,%[opc] << 16,2,2,2,0\n" + /* Parameter regs are ignored, but must be nonzero and unique */ + "0: .insn rrf,%[opc] << 16,2,4,6,0\n" " brc 1,0b\n" /* handle partial completion */ : "=m" (*mask) : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (opcode) @@ -173,7 +176,7 @@ static inline int __cpacf_check_opcode(unsigned int opcode) case CPACF_PCC: case CPACF_KMCTR: return test_facility(77); /* check for MSA4 */ - case CPACF_PPNO: + case CPACF_PRNO: return test_facility(57); /* check for MSA5 */ default: BUG(); @@ -373,18 +376,18 @@ static inline int cpacf_kmctr(unsigned long func, void *param, u8 *dest, } /** - * cpacf_ppno() - executes the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION) + * cpacf_prno() - executes the PRNO (PERFORM RANDOM NUMBER OPERATION) * instruction - * @func: the function code passed to PPNO; see CPACF_PPNO_xxx defines + * @func: the function code passed to PRNO; see CPACF_PRNO_xxx defines * @param: address of parameter block; see POP for details on each func * @dest: address of destination memory area * @dest_len: size of destination memory area in bytes * @seed: address of seed data * @seed_len: size of seed data in bytes */ -static inline void cpacf_ppno(unsigned long func, void *param, - u8 *dest, long dest_len, - const u8 *seed, long seed_len) +static inline void cpacf_prno(unsigned long func, void *param, + u8 *dest, unsigned long dest_len, + const u8 *seed, unsigned long seed_len) { register unsigned long r0 asm("0") = (unsigned long) func; register unsigned long r1 asm("1") = (unsigned long) param; @@ -398,7 +401,32 @@ static inline void cpacf_ppno(unsigned long func, void *param, " brc 1,0b\n" /* handle partial completion */ : [dst] "+a" (r2), [dlen] "+d" (r3) : [fc] "d" (r0), [pba] "a" (r1), - [seed] "a" (r4), [slen] "d" (r5), [opc] "i" (CPACF_PPNO) + [seed] "a" (r4), [slen] "d" (r5), [opc] "i" (CPACF_PRNO) + : "cc", "memory"); +} + +/** + * cpacf_trng() - executes the TRNG subfunction of the PRNO instruction + * @ucbuf: buffer for unconditioned data + * @ucbuf_len: amount of unconditioned data to fetch in bytes + * @cbuf: buffer for conditioned data + * @cbuf_len: amount of conditioned data to fetch in bytes + */ +static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len, + u8 *cbuf, unsigned long cbuf_len) +{ + register unsigned long r0 asm("0") = (unsigned long) CPACF_PRNO_TRNG; + register unsigned long r2 asm("2") = (unsigned long) ucbuf; + register unsigned long r3 asm("3") = (unsigned long) ucbuf_len; + register unsigned long r4 asm("4") = (unsigned long) cbuf; + register unsigned long r5 asm("5") = (unsigned long) cbuf_len; + + asm volatile ( + "0: .insn rre,%[opc] << 16,%[ucbuf],%[cbuf]\n" + " brc 1,0b\n" /* handle partial completion */ + : [ucbuf] "+a" (r2), [ucbuflen] "+d" (r3), + [cbuf] "+a" (r4), [cbuflen] "+d" (r5) + : [fc] "d" (r0), [opc] "i" (CPACF_PRNO) : "cc", "memory"); } diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index d1e0707310fd..05480e4cc5ca 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -20,9 +20,11 @@ #define CPU_MF_INT_SF_PRA (1 << 29) /* program request alert */ #define CPU_MF_INT_SF_SACA (1 << 23) /* sampler auth. change alert */ #define CPU_MF_INT_SF_LSDA (1 << 22) /* loss of sample data alert */ +#define CPU_MF_INT_CF_MTDA (1 << 15) /* loss of MT ctr. data alert */ #define CPU_MF_INT_CF_CACA (1 << 7) /* counter auth. change alert */ #define CPU_MF_INT_CF_LCDA (1 << 6) /* loss of counter data alert */ -#define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA) +#define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_MTDA|CPU_MF_INT_CF_CACA| \ + CPU_MF_INT_CF_LCDA) #define CPU_MF_INT_SF_MASK (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE| \ CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \ CPU_MF_INT_SF_LSDA) @@ -172,7 +174,7 @@ static inline int lcctl(u64 ctl) /* Extract CPU counter */ static inline int __ecctr(u64 ctr, u64 *content) { - register u64 _content asm("4") = 0; + u64 _content; int cc; asm volatile ( diff --git a/arch/s390/include/asm/div64.h b/arch/s390/include/asm/div64.h deleted file mode 100644 index 6cd978cefb28..000000000000 --- a/arch/s390/include/asm/div64.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/div64.h> diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 1d48880b3cc1..e8f623041769 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -105,6 +105,7 @@ #define HWCAP_S390_VXRS 2048 #define HWCAP_S390_VXRS_BCD 4096 #define HWCAP_S390_VXRS_EXT 8192 +#define HWCAP_S390_GS 16384 /* Internal bits, not exposed via elf */ #define HWCAP_INT_SIE 1UL diff --git a/arch/s390/include/asm/emergency-restart.h b/arch/s390/include/asm/emergency-restart.h deleted file mode 100644 index 108d8c48e42e..000000000000 --- a/arch/s390/include/asm/emergency-restart.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_EMERGENCY_RESTART_H -#define _ASM_EMERGENCY_RESTART_H - -#include <asm-generic/emergency-restart.h> - -#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index 09b406db7529..cb60d5c5755d 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -8,14 +8,11 @@ #define __ASM_FACILITY_H #include <generated/facilities.h> - -#ifndef __ASSEMBLY__ - #include <linux/string.h> #include <linux/preempt.h> #include <asm/lowcore.h> -#define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */ +#define MAX_FACILITY_BIT (sizeof(((struct lowcore *)0)->stfle_fac_list) * 8) static inline int __test_facility(unsigned long nr, void *facilities) { @@ -72,5 +69,4 @@ static inline void stfle(u64 *stfle_fac_list, int size) preempt_enable(); } -#endif /* __ASSEMBLY__ */ #endif /* __ASM_FACILITY_H */ diff --git a/arch/s390/include/asm/irq_regs.h b/arch/s390/include/asm/irq_regs.h deleted file mode 100644 index 3dd9c0b70270..000000000000 --- a/arch/s390/include/asm/irq_regs.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/irq_regs.h> diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h index 68d7d68300f2..8a0b721a9b8d 100644 --- a/arch/s390/include/asm/isc.h +++ b/arch/s390/include/asm/isc.h @@ -16,6 +16,7 @@ #define CONSOLE_ISC 1 /* console I/O subchannel */ #define EADM_SCH_ISC 4 /* EADM subchannels */ #define CHSC_SCH_ISC 7 /* CHSC subchannels */ +#define VFIO_CCW_ISC IO_SCH_ISC /* VFIO-CCW I/O subchannels */ /* Adapter interrupts. */ #define QDIO_AIRQ_ISC IO_SCH_ISC /* I/O subchannel in qdio mode */ #define PCI_ISC 2 /* PCI I/O subchannels */ diff --git a/arch/s390/include/asm/kmap_types.h b/arch/s390/include/asm/kmap_types.h deleted file mode 100644 index 0a88622339ee..000000000000 --- a/arch/s390/include/asm/kmap_types.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_KMAP_TYPES_H -#define _ASM_KMAP_TYPES_H - -#include <asm-generic/kmap_types.h> - -#endif diff --git a/arch/s390/include/asm/local.h b/arch/s390/include/asm/local.h deleted file mode 100644 index c11c530f74d0..000000000000 --- a/arch/s390/include/asm/local.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/local.h> diff --git a/arch/s390/include/asm/local64.h b/arch/s390/include/asm/local64.h deleted file mode 100644 index 36c93b5cc239..000000000000 --- a/arch/s390/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/local64.h> diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 61261e0e95c0..8a5b082797f8 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -157,8 +157,8 @@ struct lowcore { __u64 stfle_fac_list[32]; /* 0x0f00 */ __u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */ - /* Pointer to vector register save area */ - __u64 vector_save_area_addr; /* 0x11b0 */ + /* Pointer to the machine check extended save area */ + __u64 mcesad; /* 0x11b0 */ /* 64 bit extparam used for pfault/diag 250: defined by architecture */ __u64 ext_params2; /* 0x11B8 */ @@ -182,10 +182,7 @@ struct lowcore { /* Transaction abort diagnostic block */ __u8 pgm_tdb[256]; /* 0x1800 */ - __u8 pad_0x1900[0x1c00-0x1900]; /* 0x1900 */ - - /* Software defined save area for vector registers */ - __u8 vector_save_area[1024]; /* 0x1c00 */ + __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */ } __packed; #define S390_lowcore (*((struct lowcore *) 0)) diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h index b55a59e1d134..b79813d9cf68 100644 --- a/arch/s390/include/asm/mman.h +++ b/arch/s390/include/asm/mman.h @@ -8,8 +8,4 @@ #include <uapi/asm/mman.h> -#ifndef __ASSEMBLY__ -int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags); -#define arch_mmap_check(addr, len, flags) s390_mmap_check(addr, len, flags) -#endif #endif /* __S390_MMAN_H__ */ diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index bea785d7f853..bd6f30304518 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -22,6 +22,8 @@ typedef struct { unsigned int has_pgste:1; /* The mmu context uses storage keys. */ unsigned int use_skey:1; + /* The mmu context uses CMMA. */ + unsigned int use_cmma:1; } mm_context_t; #define INIT_MM_CONTEXT(name) \ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index fa2bf69be182..8712e11bead4 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -28,6 +28,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.alloc_pgste = page_table_allocate_pgste; mm->context.has_pgste = 0; mm->context.use_skey = 0; + mm->context.use_cmma = 0; #endif switch (mm->context.asce_limit) { case 1UL << 42: diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index b75fd910386a..e3e8895f5d3e 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -58,7 +58,9 @@ union mci { u64 ie : 1; /* 32 indirect storage error */ u64 ar : 1; /* 33 access register validity */ u64 da : 1; /* 34 delayed access exception */ - u64 : 7; /* 35-41 */ + u64 : 1; /* 35 */ + u64 gs : 1; /* 36 guarded storage registers */ + u64 : 5; /* 37-41 */ u64 pr : 1; /* 42 tod programmable register validity */ u64 fc : 1; /* 43 fp control register validity */ u64 ap : 1; /* 44 ancillary report */ @@ -69,6 +71,14 @@ union mci { }; }; +#define MCESA_ORIGIN_MASK (~0x3ffUL) +#define MCESA_LC_MASK (0xfUL) + +struct mcesa { + u8 vector_save_area[1024]; + u8 guarded_storage_save_area[32]; +}; + struct pt_regs; extern void s390_handle_mcck(void); diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h new file mode 100644 index 000000000000..42267a2fe29e --- /dev/null +++ b/arch/s390/include/asm/page-states.h @@ -0,0 +1,19 @@ +/* + * Copyright IBM Corp. 2017 + * Author(s): Claudio Imbrenda <imbrenda@linux.vnet.ibm.com> + */ + +#ifndef PAGE_STATES_H +#define PAGE_STATES_H + +#define ESSA_GET_STATE 0 +#define ESSA_SET_STABLE 1 +#define ESSA_SET_UNUSED 2 +#define ESSA_SET_VOLATILE 3 +#define ESSA_SET_POT_VOLATILE 4 +#define ESSA_SET_STABLE_RESIDENT 5 +#define ESSA_SET_STABLE_IF_RESIDENT 6 + +#define ESSA_MAX ESSA_SET_STABLE_IF_RESIDENT + +#endif diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index c64c0befd3f3..dd32beb9d30c 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -1,7 +1,7 @@ /* * Performance event support - s390 specific definitions. * - * Copyright IBM Corp. 2009, 2013 + * Copyright IBM Corp. 2009, 2017 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> * Hendrik Brueckner <brueckner@linux.vnet.ibm.com> */ @@ -47,7 +47,7 @@ struct perf_sf_sde_regs { }; /* Perf PMU definitions for the counter facility */ -#define PERF_CPUM_CF_MAX_CTR 256 +#define PERF_CPUM_CF_MAX_CTR 0xffffUL /* Max ctr for ECCTR */ /* Perf PMU definitions for the sampling facility */ #define PERF_CPUM_SF_MAX_CTR 2 diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index ecec682bb516..e6e3b887bee3 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -372,10 +372,12 @@ static inline int is_module_addr(void *addr) #define PGSTE_VSIE_BIT 0x0000200000000000UL /* ref'd in a shadow table */ /* Guest Page State used for virtualization */ -#define _PGSTE_GPS_ZERO 0x0000000080000000UL -#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL -#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL -#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL +#define _PGSTE_GPS_ZERO 0x0000000080000000UL +#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL +#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL +#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL +#define _PGSTE_GPS_USAGE_POT_VOLATILE 0x0000000002000000UL +#define _PGSTE_GPS_USAGE_VOLATILE _PGSTE_GPS_USAGE_MASK /* * A user page table pointer has the space-switch-event bit, the @@ -1041,6 +1043,12 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr); int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char *key); +int set_pgste_bits(struct mm_struct *mm, unsigned long addr, + unsigned long bits, unsigned long value); +int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep); +int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, + unsigned long *oldpte, unsigned long *oldpgste); + /* * Certain architectures need to do special things when PTEs * within a page table are directly modified. Thus, the following diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h index b48aef4188f6..4c484590d858 100644 --- a/arch/s390/include/asm/pkey.h +++ b/arch/s390/include/asm/pkey.h @@ -87,4 +87,25 @@ int pkey_findcard(const struct pkey_seckey *seckey, int pkey_skey2pkey(const struct pkey_seckey *seckey, struct pkey_protkey *protkey); +/* + * Verify the given secure key for being able to be useable with + * the pkey module. Check for correct key type and check for having at + * least one crypto card being able to handle this key (master key + * or old master key verification pattern matches). + * Return some info about the key: keysize in bits, keytype (currently + * only AES), flag if key is wrapped with an old MKVP. + * @param seckey pointer to buffer with the input secure key + * @param pcardnr pointer to cardnr, receives the card number on success + * @param pdomain pointer to domain, receives the domain number on success + * @param pkeysize pointer to keysize, receives the bitsize of the key + * @param pattributes pointer to attributes, receives additional info + * PKEY_VERIFY_ATTR_AES if the key is an AES key + * PKEY_VERIFY_ATTR_OLD_MKVP if key has old mkvp stored in + * @return 0 on success, negative errno value on failure. If no card could + * be found which is able to handle this key, -ENODEV is returned. + */ +int pkey_verifykey(const struct pkey_seckey *seckey, + u16 *pcardnr, u16 *pdomain, + u16 *pkeysize, u32 *pattributes); + #endif /* _KAPI_PKEY_H */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index e4988710aa86..60d395fdc864 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -91,14 +91,15 @@ extern void execve_tail(void); * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit. */ -#define TASK_SIZE_OF(tsk) ((tsk)->mm ? \ - (tsk)->mm->context.asce_limit : TASK_MAX_SIZE) +#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_31BIT) ? \ + (1UL << 31) : (1UL << 53)) #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ (1UL << 30) : (1UL << 41)) #define TASK_SIZE TASK_SIZE_OF(current) -#define TASK_MAX_SIZE (1UL << 53) +#define TASK_SIZE_MAX (1UL << 53) -#define STACK_TOP (1UL << (test_thread_flag(TIF_31BIT) ? 31:42)) +#define STACK_TOP (test_thread_flag(TIF_31BIT) ? \ + (1UL << 31) : (1UL << 42)) #define STACK_TOP_MAX (1UL << 42) #define HAVE_ARCH_PICK_MMAP_LAYOUT @@ -135,6 +136,8 @@ struct thread_struct { struct list_head list; /* cpu runtime instrumentation */ struct runtime_instr_cb *ri_cb; + struct gs_cb *gs_cb; /* Current guarded storage cb */ + struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */ unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ /* * Warning: 'fpu' is dynamically-sized. It *MUST* be at @@ -215,6 +218,9 @@ void show_cacheinfo(struct seq_file *m); /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); +/* Free guarded storage control block for current */ +void exit_thread_gs(void); + /* * Return saved PC of a blocked thread. */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 30bdb5a027f3..cd78155b1829 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -29,8 +29,8 @@ #define MACHINE_FLAG_TE _BITUL(11) #define MACHINE_FLAG_TLB_LC _BITUL(12) #define MACHINE_FLAG_VX _BITUL(13) -#define MACHINE_FLAG_CAD _BITUL(14) -#define MACHINE_FLAG_NX _BITUL(15) +#define MACHINE_FLAG_NX _BITUL(14) +#define MACHINE_FLAG_GS _BITUL(15) #define LPP_MAGIC _BITUL(31) #define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) @@ -68,8 +68,8 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) #define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) #define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX) -#define MACHINE_HAS_CAD (S390_lowcore.machine_flags & MACHINE_FLAG_CAD) #define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) +#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS) /* * Console mode. Override with conmode= diff --git a/arch/s390/include/asm/sparsemem.h b/arch/s390/include/asm/sparsemem.h index 487428b6d099..334e279f1bce 100644 --- a/arch/s390/include/asm/sparsemem.h +++ b/arch/s390/include/asm/sparsemem.h @@ -2,6 +2,6 @@ #define _ASM_S390_SPARSEMEM_H #define SECTION_SIZE_BITS 28 -#define MAX_PHYSMEM_BITS 46 +#define MAX_PHYSMEM_BITS CONFIG_MAX_PHYSMEM_BITS #endif /* _ASM_S390_SPARSEMEM_H */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index ffc45048ea7d..f7838ecd83c6 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -10,6 +10,7 @@ #define __ASM_SPINLOCK_H #include <linux/smp.h> +#include <asm/atomic_ops.h> #include <asm/barrier.h> #include <asm/processor.h> @@ -17,12 +18,6 @@ extern int spin_retry; -static inline int -_raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new) -{ - return __sync_bool_compare_and_swap(lock, old, new); -} - #ifndef CONFIG_SMP static inline bool arch_vcpu_is_preempted(int cpu) { return false; } #else @@ -40,7 +35,7 @@ bool arch_vcpu_is_preempted(int cpu); * (the type definitions are in asm/spinlock_types.h) */ -void arch_lock_relax(unsigned int cpu); +void arch_lock_relax(int cpu); void arch_spin_lock_wait(arch_spinlock_t *); int arch_spin_trylock_retry(arch_spinlock_t *); @@ -70,7 +65,7 @@ static inline int arch_spin_trylock_once(arch_spinlock_t *lp) { barrier(); return likely(arch_spin_value_unlocked(*lp) && - _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL)); + __atomic_cmpxchg_bool(&lp->lock, 0, SPINLOCK_LOCKVAL)); } static inline void arch_spin_lock(arch_spinlock_t *lp) @@ -95,7 +90,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp) static inline void arch_spin_unlock(arch_spinlock_t *lp) { - typecheck(unsigned int, lp->lock); + typecheck(int, lp->lock); asm volatile( "st %1,%0\n" : "+Q" (lp->lock) @@ -141,16 +136,16 @@ extern int _raw_write_trylock_retry(arch_rwlock_t *lp); static inline int arch_read_trylock_once(arch_rwlock_t *rw) { - unsigned int old = ACCESS_ONCE(rw->lock); - return likely((int) old >= 0 && - _raw_compare_and_swap(&rw->lock, old, old + 1)); + int old = ACCESS_ONCE(rw->lock); + return likely(old >= 0 && + __atomic_cmpxchg_bool(&rw->lock, old, old + 1)); } static inline int arch_write_trylock_once(arch_rwlock_t *rw) { - unsigned int old = ACCESS_ONCE(rw->lock); + int old = ACCESS_ONCE(rw->lock); return likely(old == 0 && - _raw_compare_and_swap(&rw->lock, 0, 0x80000000)); + __atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000)); } #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES @@ -161,9 +156,9 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw) #define __RAW_LOCK(ptr, op_val, op_string) \ ({ \ - unsigned int old_val; \ + int old_val; \ \ - typecheck(unsigned int *, ptr); \ + typecheck(int *, ptr); \ asm volatile( \ op_string " %0,%2,%1\n" \ "bcr 14,0\n" \ @@ -175,9 +170,9 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw) #define __RAW_UNLOCK(ptr, op_val, op_string) \ ({ \ - unsigned int old_val; \ + int old_val; \ \ - typecheck(unsigned int *, ptr); \ + typecheck(int *, ptr); \ asm volatile( \ op_string " %0,%2,%1\n" \ : "=d" (old_val), "+Q" (*ptr) \ @@ -187,14 +182,14 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw) }) extern void _raw_read_lock_wait(arch_rwlock_t *lp); -extern void _raw_write_lock_wait(arch_rwlock_t *lp, unsigned int prev); +extern void _raw_write_lock_wait(arch_rwlock_t *lp, int prev); static inline void arch_read_lock(arch_rwlock_t *rw) { - unsigned int old; + int old; old = __RAW_LOCK(&rw->lock, 1, __RAW_OP_ADD); - if ((int) old < 0) + if (old < 0) _raw_read_lock_wait(rw); } @@ -205,7 +200,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) static inline void arch_write_lock(arch_rwlock_t *rw) { - unsigned int old; + int old; old = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR); if (old != 0) @@ -232,11 +227,11 @@ static inline void arch_read_lock(arch_rwlock_t *rw) static inline void arch_read_unlock(arch_rwlock_t *rw) { - unsigned int old; + int old; do { old = ACCESS_ONCE(rw->lock); - } while (!_raw_compare_and_swap(&rw->lock, old, old - 1)); + } while (!__atomic_cmpxchg_bool(&rw->lock, old, old - 1)); } static inline void arch_write_lock(arch_rwlock_t *rw) @@ -248,7 +243,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) static inline void arch_write_unlock(arch_rwlock_t *rw) { - typecheck(unsigned int, rw->lock); + typecheck(int, rw->lock); rw->owner = 0; asm volatile( diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h index d84b6939237c..fe755eec275f 100644 --- a/arch/s390/include/asm/spinlock_types.h +++ b/arch/s390/include/asm/spinlock_types.h @@ -6,14 +6,14 @@ #endif typedef struct { - unsigned int lock; + int lock; } __attribute__ ((aligned (4))) arch_spinlock_t; #define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, } typedef struct { - unsigned int lock; - unsigned int owner; + int lock; + int owner; } arch_rwlock_t; #define __ARCH_RW_LOCK_UNLOCKED { 0 } diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index 12d45f0cfdd9..f6c2b5814ab0 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -10,6 +10,7 @@ #include <linux/thread_info.h> #include <asm/fpu/api.h> #include <asm/ptrace.h> +#include <asm/guarded_storage.h> extern struct task_struct *__switch_to(void *, void *); extern void update_cr_regs(struct task_struct *task); @@ -33,12 +34,14 @@ static inline void restore_access_regs(unsigned int *acrs) save_fpu_regs(); \ save_access_regs(&prev->thread.acrs[0]); \ save_ri_cb(prev->thread.ri_cb); \ + save_gs_cb(prev->thread.gs_cb); \ } \ if (next->mm) { \ update_cr_regs(next); \ set_cpu_flag(CIF_FPU); \ restore_access_regs(&next->thread.acrs[0]); \ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ + restore_gs_cb(next->thread.gs_cb); \ } \ prev = __switch_to(prev,next); \ } while (0) diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index 229326c942c7..73bff45ced55 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -142,7 +142,15 @@ struct sysinfo_3_2_2 { extern int topology_max_mnest; -#define TOPOLOGY_CORE_BITS 64 +/* + * Returns the maximum nesting level supported by the cpu topology code. + * The current maximum level is 4 which is the drawer level. + */ +static inline int topology_mnest_limit(void) +{ + return min(topology_max_mnest, 4); +} + #define TOPOLOGY_NR_MAG 6 struct topology_core { @@ -152,7 +160,7 @@ struct topology_core { unsigned char pp:2; unsigned char reserved1; unsigned short origin; - unsigned long mask[TOPOLOGY_CORE_BITS / BITS_PER_LONG]; + unsigned long mask; }; struct topology_container { diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index a5b54a445eb8..f36e6e2b73f0 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -54,11 +54,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define TIF_NOTIFY_RESUME 0 /* callback before returning to user */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_SYSCALL_TRACE 3 /* syscall trace active */ -#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ -#define TIF_SECCOMP 5 /* secure computing */ -#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ -#define TIF_UPROBE 7 /* breakpointed or single-stepping */ +#define TIF_UPROBE 3 /* breakpointed or single-stepping */ +#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */ +#define TIF_SYSCALL_TRACE 8 /* syscall trace active */ +#define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */ +#define TIF_SECCOMP 10 /* secure computing */ +#define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */ #define TIF_31BIT 16 /* 32bit process */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ @@ -76,5 +77,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define _TIF_UPROBE _BITUL(TIF_UPROBE) #define _TIF_31BIT _BITUL(TIF_31BIT) #define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP) +#define _TIF_GUARDED_STORAGE _BITUL(TIF_GUARDED_STORAGE) #endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 6848ba5c1454..addb09cee0f5 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -1,6 +1,16 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += errno.h +generic-y += fcntl.h +generic-y += ioctl.h +generic-y += mman.h +generic-y += param.h +generic-y += poll.h +generic-y += resource.h +generic-y += sockios.h +generic-y += termbits.h + header-y += auxvec.h header-y += bitsperlong.h header-y += byteorder.h @@ -11,25 +21,20 @@ header-y += cmb.h header-y += dasd.h header-y += debug.h header-y += errno.h -header-y += fcntl.h +header-y += guarded_storage.h header-y += hypfs.h -header-y += ioctl.h header-y += ioctls.h header-y += ipcbuf.h header-y += kvm.h header-y += kvm_para.h header-y += kvm_perf.h header-y += kvm_virtio.h -header-y += mman.h header-y += monwriter.h header-y += msgbuf.h -header-y += param.h header-y += pkey.h -header-y += poll.h header-y += posix_types.h header-y += ptrace.h header-y += qeth.h -header-y += resource.h header-y += schid.h header-y += sclp_ctl.h header-y += sembuf.h @@ -40,12 +45,10 @@ header-y += sigcontext.h header-y += siginfo.h header-y += signal.h header-y += socket.h -header-y += sockios.h header-y += stat.h header-y += statfs.h header-y += swab.h header-y += tape390.h -header-y += termbits.h header-y += termios.h header-y += types.h header-y += ucontext.h diff --git a/arch/s390/include/uapi/asm/errno.h b/arch/s390/include/uapi/asm/errno.h deleted file mode 100644 index 395e97d8005e..000000000000 --- a/arch/s390/include/uapi/asm/errno.h +++ /dev/null @@ -1,11 +0,0 @@ -/* - * S390 version - * - */ - -#ifndef _S390_ERRNO_H -#define _S390_ERRNO_H - -#include <asm-generic/errno.h> - -#endif diff --git a/arch/s390/include/uapi/asm/fcntl.h b/arch/s390/include/uapi/asm/fcntl.h deleted file mode 100644 index 46ab12db5739..000000000000 --- a/arch/s390/include/uapi/asm/fcntl.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/fcntl.h> diff --git a/arch/s390/include/uapi/asm/guarded_storage.h b/arch/s390/include/uapi/asm/guarded_storage.h new file mode 100644 index 000000000000..852850e8e17e --- /dev/null +++ b/arch/s390/include/uapi/asm/guarded_storage.h @@ -0,0 +1,77 @@ +#ifndef _GUARDED_STORAGE_H +#define _GUARDED_STORAGE_H + +#include <linux/types.h> + +struct gs_cb { + __u64 reserved; + __u64 gsd; + __u64 gssm; + __u64 gs_epl_a; +}; + +struct gs_epl { + __u8 pad1; + union { + __u8 gs_eam; + struct { + __u8 : 6; + __u8 e : 1; + __u8 b : 1; + }; + }; + union { + __u8 gs_eci; + struct { + __u8 tx : 1; + __u8 cx : 1; + __u8 : 5; + __u8 in : 1; + }; + }; + union { + __u8 gs_eai; + struct { + __u8 : 1; + __u8 t : 1; + __u8 as : 2; + __u8 ar : 4; + }; + }; + __u32 pad2; + __u64 gs_eha; + __u64 gs_eia; + __u64 gs_eoa; + __u64 gs_eir; + __u64 gs_era; +}; + +#define GS_ENABLE 0 +#define GS_DISABLE 1 +#define GS_SET_BC_CB 2 +#define GS_CLEAR_BC_CB 3 +#define GS_BROADCAST 4 + +static inline void load_gs_cb(struct gs_cb *gs_cb) +{ + asm volatile(".insn rxy,0xe3000000004d,0,%0" : : "Q" (*gs_cb)); +} + +static inline void store_gs_cb(struct gs_cb *gs_cb) +{ + asm volatile(".insn rxy,0xe30000000049,0,%0" : : "Q" (*gs_cb)); +} + +static inline void save_gs_cb(struct gs_cb *gs_cb) +{ + if (gs_cb) + store_gs_cb(gs_cb); +} + +static inline void restore_gs_cb(struct gs_cb *gs_cb) +{ + if (gs_cb) + load_gs_cb(gs_cb); +} + +#endif /* _GUARDED_STORAGE_H */ diff --git a/arch/s390/include/uapi/asm/ioctl.h b/arch/s390/include/uapi/asm/ioctl.h deleted file mode 100644 index b279fe06dfe5..000000000000 --- a/arch/s390/include/uapi/asm/ioctl.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ioctl.h> diff --git a/arch/s390/include/uapi/asm/mman.h b/arch/s390/include/uapi/asm/mman.h deleted file mode 100644 index de23da1f41b2..000000000000 --- a/arch/s390/include/uapi/asm/mman.h +++ /dev/null @@ -1,6 +0,0 @@ -/* - * S390 version - * - * Derived from "include/asm-i386/mman.h" - */ -#include <asm-generic/mman.h> diff --git a/arch/s390/include/uapi/asm/param.h b/arch/s390/include/uapi/asm/param.h deleted file mode 100644 index c616821bf2ac..000000000000 --- a/arch/s390/include/uapi/asm/param.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASMS390_PARAM_H -#define _ASMS390_PARAM_H - -#include <asm-generic/param.h> - -#endif /* _ASMS390_PARAM_H */ diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h index ed7f19c27ce5..e6c04faf8a6c 100644 --- a/arch/s390/include/uapi/asm/pkey.h +++ b/arch/s390/include/uapi/asm/pkey.h @@ -109,4 +109,23 @@ struct pkey_skey2pkey { }; #define PKEY_SKEY2PKEY _IOWR(PKEY_IOCTL_MAGIC, 0x06, struct pkey_skey2pkey) +/* + * Verify the given secure key for being able to be useable with + * the pkey module. Check for correct key type and check for having at + * least one crypto card being able to handle this key (master key + * or old master key verification pattern matches). + * Return some info about the key: keysize in bits, keytype (currently + * only AES), flag if key is wrapped with an old MKVP. + */ +struct pkey_verifykey { + struct pkey_seckey seckey; /* in: the secure key blob */ + __u16 cardnr; /* out: card number */ + __u16 domain; /* out: domain number */ + __u16 keysize; /* out: key size in bits */ + __u32 attributes; /* out: attribute bits */ +}; +#define PKEY_VERIFYKEY _IOWR(PKEY_IOCTL_MAGIC, 0x07, struct pkey_verifykey) +#define PKEY_VERIFY_ATTR_AES 0x00000001 /* key is an AES key */ +#define PKEY_VERIFY_ATTR_OLD_MKVP 0x00000100 /* key has old MKVP value */ + #endif /* _UAPI_PKEY_H */ diff --git a/arch/s390/include/uapi/asm/poll.h b/arch/s390/include/uapi/asm/poll.h deleted file mode 100644 index c98509d3149e..000000000000 --- a/arch/s390/include/uapi/asm/poll.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/poll.h> diff --git a/arch/s390/include/uapi/asm/resource.h b/arch/s390/include/uapi/asm/resource.h deleted file mode 100644 index ec23d1c73c92..000000000000 --- a/arch/s390/include/uapi/asm/resource.h +++ /dev/null @@ -1,13 +0,0 @@ -/* - * S390 version - * - * Derived from "include/asm-i386/resources.h" - */ - -#ifndef _S390_RESOURCE_H -#define _S390_RESOURCE_H - -#include <asm-generic/resource.h> - -#endif - diff --git a/arch/s390/include/uapi/asm/sockios.h b/arch/s390/include/uapi/asm/sockios.h deleted file mode 100644 index 6f60eee73242..000000000000 --- a/arch/s390/include/uapi/asm/sockios.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_S390_SOCKIOS_H -#define _ASM_S390_SOCKIOS_H - -#include <asm-generic/sockios.h> - -#endif diff --git a/arch/s390/include/uapi/asm/termbits.h b/arch/s390/include/uapi/asm/termbits.h deleted file mode 100644 index 71bf6ac6a2b9..000000000000 --- a/arch/s390/include/uapi/asm/termbits.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_S390_TERMBITS_H -#define _ASM_S390_TERMBITS_H - -#include <asm-generic/termbits.h> - -#endif diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 152de9b796e1..ea42290e7d51 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -313,7 +313,7 @@ #define __NR_copy_file_range 375 #define __NR_preadv2 376 #define __NR_pwritev2 377 -/* Number 378 is reserved for guarded storage */ +#define __NR_s390_guarded_storage 378 #define __NR_statx 379 #define NR_syscalls 380 diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 060ce548fe8b..adb3fe2e3d42 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -51,14 +51,12 @@ CFLAGS_dumpstack.o += -fno-optimize-sibling-calls # CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' -CFLAGS_sysinfo.o += -w - obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o -obj-y += runtime_instr.o cache.o fpu.o dumpstack.o -obj-y += entry.o reipl.o relocate_kernel.o +obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o +obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o extra-y += head.o head64.o vmlinux.lds diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index c4b3570ded5b..6bb29633e1f1 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -175,7 +175,7 @@ int main(void) /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ OFFSET(__LC_DUMP_REIPL, lowcore, ipib); /* hardware defined lowcore locations 0x1000 - 0x18ff */ - OFFSET(__LC_VX_SAVE_AREA_ADDR, lowcore, vector_save_area_addr); + OFFSET(__LC_MCESAD, lowcore, mcesad); OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2); OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area); OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area); diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index e89cc2e71db1..986642a3543b 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -178,4 +178,5 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len); COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags); COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb); COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index dd1d5c62c374..d628afc26708 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -429,6 +429,20 @@ static void *nt_vmcoreinfo(void *ptr) } /* + * Initialize final note (needed for /proc/vmcore code) + */ +static void *nt_final(void *ptr) +{ + Elf64_Nhdr *note; + + note = (Elf64_Nhdr *) ptr; + note->n_namesz = 0; + note->n_descsz = 0; + note->n_type = 0; + return PTR_ADD(ptr, sizeof(Elf64_Nhdr)); +} + +/* * Initialize ELF header (new kernel) */ static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) @@ -515,6 +529,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) if (sa->prefix != 0) ptr = fill_cpu_elf_notes(ptr, cpu++, sa); ptr = nt_vmcoreinfo(ptr); + ptr = nt_final(ptr); memset(phdr, 0, sizeof(*phdr)); phdr->p_type = PT_NOTE; phdr->p_offset = notes_offset; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 4e65c79cc5f2..5d20182ee8ae 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -231,9 +231,29 @@ static noinline __init void detect_machine_type(void) S390_lowcore.machine_flags |= MACHINE_FLAG_VM; } +/* Remove leading, trailing and double whitespace. */ +static inline void strim_all(char *str) +{ + char *s; + + s = strim(str); + if (s != str) + memmove(str, s, strlen(s)); + while (*str) { + if (!isspace(*str++)) + continue; + if (isspace(*str)) { + s = skip_spaces(str); + memmove(str, s, strlen(s) + 1); + } + } +} + static noinline __init void setup_arch_string(void) { struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page; + struct sysinfo_3_2_2 *vm = (struct sysinfo_3_2_2 *)&sysinfo_page; + char mstr[80], hvstr[17]; if (stsi(mach, 1, 1, 1)) return; @@ -241,14 +261,21 @@ static noinline __init void setup_arch_string(void) EBCASC(mach->type, sizeof(mach->type)); EBCASC(mach->model, sizeof(mach->model)); EBCASC(mach->model_capacity, sizeof(mach->model_capacity)); - dump_stack_set_arch_desc("%-16.16s %-4.4s %-16.16s %-16.16s (%s)", - mach->manufacturer, - mach->type, - mach->model, - mach->model_capacity, - MACHINE_IS_LPAR ? "LPAR" : - MACHINE_IS_VM ? "z/VM" : - MACHINE_IS_KVM ? "KVM" : "unknown"); + sprintf(mstr, "%-16.16s %-4.4s %-16.16s %-16.16s", + mach->manufacturer, mach->type, + mach->model, mach->model_capacity); + strim_all(mstr); + if (stsi(vm, 3, 2, 2) == 0 && vm->count) { + EBCASC(vm->vm[0].cpi, sizeof(vm->vm[0].cpi)); + sprintf(hvstr, "%-16.16s", vm->vm[0].cpi); + strim_all(hvstr); + } else { + sprintf(hvstr, "%s", + MACHINE_IS_LPAR ? "LPAR" : + MACHINE_IS_VM ? "z/VM" : + MACHINE_IS_KVM ? "KVM" : "unknown"); + } + dump_stack_set_arch_desc("%s (%s)", mstr, hvstr); } static __init void setup_topology(void) @@ -358,6 +385,8 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_NX; __ctl_set_bit(0, 20); } + if (test_facility(133)) + S390_lowcore.machine_flags |= MACHINE_FLAG_GS; } static inline void save_vector_registers(void) @@ -375,7 +404,7 @@ static int __init topology_setup(char *str) rc = kstrtobool(str, &enabled); if (!rc && !enabled) - S390_lowcore.machine_flags &= ~MACHINE_HAS_TOPOLOGY; + S390_lowcore.machine_flags &= ~MACHINE_FLAG_TOPOLOGY; return rc; } early_param("topology", topology_setup); @@ -405,23 +434,16 @@ early_param("noexec", noexec_setup); static int __init cad_setup(char *str) { - int val; - - get_option(&str, &val); - if (val && test_facility(128)) - S390_lowcore.machine_flags |= MACHINE_FLAG_CAD; - return 0; -} -early_param("cad", cad_setup); + bool enabled; + int rc; -static int __init cad_init(void) -{ - if (MACHINE_HAS_CAD) + rc = kstrtobool(str, &enabled); + if (!rc && enabled && test_facility(128)) /* Enable problem state CAD. */ __ctl_set_bit(2, 3); - return 0; + return rc; } -early_initcall(cad_init); +early_param("cad", cad_setup); static __init void memmove_early(void *dst, const void *src, size_t n) { diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 6a7d737d514c..c6cf338c9327 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -47,7 +47,7 @@ STACK_SIZE = 1 << STACK_SHIFT STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_UPROBE) + _TIF_UPROBE | _TIF_GUARDED_STORAGE) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) _CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \ @@ -189,8 +189,6 @@ ENTRY(__switch_to) stg %r3,__LC_CURRENT # store task struct of next stg %r15,__LC_KERNEL_STACK # store end of kernel stack lg %r15,__THREAD_ksp(%r1) # load kernel stack of next - /* c4 is used in guest detection: arch/s390/kernel/perf_cpum_sf.c */ - lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP @@ -332,6 +330,8 @@ ENTRY(system_call) TSTMSK __TI_flags(%r12),_TIF_UPROBE jo .Lsysc_uprobe_notify #endif + TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE + jo .Lsysc_guarded_storage TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP jo .Lsysc_singlestep TSTMSK __TI_flags(%r12),_TIF_SIGPENDING @@ -409,6 +409,14 @@ ENTRY(system_call) #endif # +# _TIF_GUARDED_STORAGE is set, call guarded_storage_load +# +.Lsysc_guarded_storage: + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,.Lsysc_return + jg gs_load_bc_cb + +# # _PIF_PER_TRAP is set, call do_per_trap # .Lsysc_singlestep: @@ -663,6 +671,8 @@ ENTRY(io_int_handler) jo .Lio_sigpending TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME jo .Lio_notify_resume + TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE + jo .Lio_guarded_storage TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lio_vxrs TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) @@ -697,6 +707,18 @@ ENTRY(io_int_handler) jg load_fpu_regs # +# _TIF_GUARDED_STORAGE is set, call guarded_storage_load +# +.Lio_guarded_storage: + # TRACE_IRQS_ON already done at .Lio_return + ssm __LC_SVC_NEW_PSW # reenable interrupts + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,gs_load_bc_cb + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j .Lio_return + +# # _TIF_NEED_RESCHED is set, call schedule # .Lio_reschedule: diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 33f901865326..dbf5f7e18246 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -74,12 +74,14 @@ long sys_sigreturn(void); long sys_s390_personality(unsigned int personality); long sys_s390_runtime_instr(int command, int signum); +long sys_s390_guarded_storage(int command, struct gs_cb __user *); long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t); long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t); DECLARE_PER_CPU(u64, mt_cycles[8]); void verify_facilities(void); +void gs_load_bc_cb(struct pt_regs *regs); void set_fs_fixup(void); #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c new file mode 100644 index 000000000000..6f064745c3b1 --- /dev/null +++ b/arch/s390/kernel/guarded_storage.c @@ -0,0 +1,128 @@ +/* + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/syscalls.h> +#include <linux/signal.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <asm/guarded_storage.h> +#include "entry.h" + +void exit_thread_gs(void) +{ + kfree(current->thread.gs_cb); + kfree(current->thread.gs_bc_cb); + current->thread.gs_cb = current->thread.gs_bc_cb = NULL; +} + +static int gs_enable(void) +{ + struct gs_cb *gs_cb; + + if (!current->thread.gs_cb) { + gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL); + if (!gs_cb) + return -ENOMEM; + gs_cb->gsd = 25; + preempt_disable(); + __ctl_set_bit(2, 4); + load_gs_cb(gs_cb); + current->thread.gs_cb = gs_cb; + preempt_enable(); + } + return 0; +} + +static int gs_disable(void) +{ + if (current->thread.gs_cb) { + preempt_disable(); + kfree(current->thread.gs_cb); + current->thread.gs_cb = NULL; + __ctl_clear_bit(2, 4); + preempt_enable(); + } + return 0; +} + +static int gs_set_bc_cb(struct gs_cb __user *u_gs_cb) +{ + struct gs_cb *gs_cb; + + gs_cb = current->thread.gs_bc_cb; + if (!gs_cb) { + gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL); + if (!gs_cb) + return -ENOMEM; + current->thread.gs_bc_cb = gs_cb; + } + if (copy_from_user(gs_cb, u_gs_cb, sizeof(*gs_cb))) + return -EFAULT; + return 0; +} + +static int gs_clear_bc_cb(void) +{ + struct gs_cb *gs_cb; + + gs_cb = current->thread.gs_bc_cb; + current->thread.gs_bc_cb = NULL; + kfree(gs_cb); + return 0; +} + +void gs_load_bc_cb(struct pt_regs *regs) +{ + struct gs_cb *gs_cb; + + preempt_disable(); + clear_thread_flag(TIF_GUARDED_STORAGE); + gs_cb = current->thread.gs_bc_cb; + if (gs_cb) { + kfree(current->thread.gs_cb); + current->thread.gs_bc_cb = NULL; + __ctl_set_bit(2, 4); + load_gs_cb(gs_cb); + current->thread.gs_cb = gs_cb; + } + preempt_enable(); +} + +static int gs_broadcast(void) +{ + struct task_struct *sibling; + + read_lock(&tasklist_lock); + for_each_thread(current, sibling) { + if (!sibling->thread.gs_bc_cb) + continue; + if (test_and_set_tsk_thread_flag(sibling, TIF_GUARDED_STORAGE)) + kick_process(sibling); + } + read_unlock(&tasklist_lock); + return 0; +} + +SYSCALL_DEFINE2(s390_guarded_storage, int, command, + struct gs_cb __user *, gs_cb) +{ + if (!MACHINE_HAS_GS) + return -EOPNOTSUPP; + switch (command) { + case GS_ENABLE: + return gs_enable(); + case GS_DISABLE: + return gs_disable(); + case GS_SET_BC_CB: + return gs_set_bc_cb(gs_cb); + case GS_CLEAR_BC_CB: + return gs_clear_bc_cb(); + case GS_BROADCAST: + return gs_broadcast(); + default: + return -EINVAL; + } +} diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 0b5ebf8a3d30..eff5b31671d4 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -25,7 +25,6 @@ #include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> -#include <asm/facility.h> #include <asm/page.h> #include <asm/ptrace.h> diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 482d3526e32b..31c91f24e562 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -52,7 +52,7 @@ ENTRY(startup_continue) .quad 0 # cr1: primary space segment table .quad .Lduct # cr2: dispatchable unit control table .quad 0 # cr3: instruction authorization - .quad 0 # cr4: instruction authorization + .quad 0xffff # cr4: instruction authorization .quad .Lduct # cr5: primary-aste origin .quad 0 # cr6: I/O interrupts .quad 0 # cr7: secondary space segment table diff --git a/arch/s390/kernel/kdebugfs.c b/arch/s390/kernel/kdebugfs.c new file mode 100644 index 000000000000..ee85e17dd79d --- /dev/null +++ b/arch/s390/kernel/kdebugfs.c @@ -0,0 +1,15 @@ +#include <linux/debugfs.h> +#include <linux/export.h> +#include <linux/init.h> + +struct dentry *arch_debugfs_dir; +EXPORT_SYMBOL(arch_debugfs_dir); + +static int __init arch_kdebugfs_init(void) +{ + arch_debugfs_dir = debugfs_create_dir("s390", NULL); + if (IS_ERR(arch_debugfs_dir)) + arch_debugfs_dir = NULL; + return 0; +} +postcore_initcall(arch_kdebugfs_init); diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 3074c1d83829..db5658daf994 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -27,6 +27,7 @@ #include <asm/cacheflush.h> #include <asm/os_info.h> #include <asm/switch_to.h> +#include <asm/nmi.h> typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); @@ -102,6 +103,8 @@ static void __do_machine_kdump(void *image) */ static noinline void __machine_kdump(void *image) { + struct mcesa *mcesa; + unsigned long cr2_old, cr2_new; int this_cpu, cpu; lgr_info_log(); @@ -114,8 +117,16 @@ static noinline void __machine_kdump(void *image) continue; } /* Store status of the boot CPU */ + mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK); if (MACHINE_HAS_VX) - save_vx_regs((void *) &S390_lowcore.vector_save_area); + save_vx_regs((__vector128 *) mcesa->vector_save_area); + if (MACHINE_HAS_GS) { + __ctl_store(cr2_old, 2, 2); + cr2_new = cr2_old | (1UL << 4); + __ctl_load(cr2_new, 2, 2); + save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area); + __ctl_load(cr2_old, 2, 2); + } /* * To create a good backchain for this CPU in the dump store_status * is passed the address of a function. The address is saved into diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 9bf8327154ee..985589523970 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -106,6 +106,7 @@ static int notrace s390_validate_registers(union mci mci, int umode) int kill_task; u64 zero; void *fpt_save_area; + struct mcesa *mcesa; kill_task = 0; zero = 0; @@ -165,6 +166,7 @@ static int notrace s390_validate_registers(union mci mci, int umode) : : "Q" (S390_lowcore.fpt_creg_save_area)); } + mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK); if (!MACHINE_HAS_VX) { /* Validate floating point registers */ asm volatile( @@ -209,8 +211,8 @@ static int notrace s390_validate_registers(union mci mci, int umode) " la 1,%0\n" " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ - : : "Q" (*(struct vx_array *) - &S390_lowcore.vector_save_area) : "1"); + : : "Q" (*(struct vx_array *) mcesa->vector_save_area) + : "1"); __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); } /* Validate access registers */ @@ -224,6 +226,19 @@ static int notrace s390_validate_registers(union mci mci, int umode) */ kill_task = 1; } + /* Validate guarded storage registers */ + if (MACHINE_HAS_GS && (S390_lowcore.cregs_save_area[2] & (1UL << 4))) { + if (!mci.gs) + /* + * Guarded storage register can't be restored and + * the current processes uses guarded storage. + * It has to be terminated. + */ + kill_task = 1; + else + load_gs_cb((struct gs_cb *) + mcesa->guarded_storage_save_area); + } /* * We don't even try to validate the TOD register, since we simply * can't write something sensible into that register. diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 1aba10e90906..746d03423333 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -1,7 +1,7 @@ /* * Performance event support for s390x - CPU-measurement Counter Facility * - * Copyright IBM Corp. 2012 + * Copyright IBM Corp. 2012, 2017 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> * * This program is free software; you can redistribute it and/or modify @@ -22,19 +22,12 @@ #include <asm/irq.h> #include <asm/cpu_mf.h> -/* CPU-measurement counter facility supports these CPU counter sets: - * For CPU counter sets: - * Basic counter set: 0-31 - * Problem-state counter set: 32-63 - * Crypto-activity counter set: 64-127 - * Extented counter set: 128-159 - */ enum cpumf_ctr_set { - /* CPU counter sets */ - CPUMF_CTR_SET_BASIC = 0, - CPUMF_CTR_SET_USER = 1, - CPUMF_CTR_SET_CRYPTO = 2, - CPUMF_CTR_SET_EXT = 3, + CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */ + CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */ + CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */ + CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */ + CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */ /* Maximum number of counter sets */ CPUMF_CTR_SET_MAX, @@ -47,6 +40,7 @@ static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = { [CPUMF_CTR_SET_USER] = 0x04, [CPUMF_CTR_SET_CRYPTO] = 0x08, [CPUMF_CTR_SET_EXT] = 0x01, + [CPUMF_CTR_SET_MT_DIAG] = 0x20, }; static void ctr_set_enable(u64 *state, int ctr_set) @@ -76,19 +70,20 @@ struct cpu_hw_events { }; static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .ctr_set = { - [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_MT_DIAG] = ATOMIC_INIT(0), }, .state = 0, .flags = 0, .txn_flags = 0, }; -static int get_counter_set(u64 event) +static enum cpumf_ctr_set get_counter_set(u64 event) { - int set = -1; + int set = CPUMF_CTR_SET_MAX; if (event < 32) set = CPUMF_CTR_SET_BASIC; @@ -98,34 +93,17 @@ static int get_counter_set(u64 event) set = CPUMF_CTR_SET_CRYPTO; else if (event < 256) set = CPUMF_CTR_SET_EXT; + else if (event >= 448 && event < 496) + set = CPUMF_CTR_SET_MT_DIAG; return set; } -static int validate_event(const struct hw_perf_event *hwc) -{ - switch (hwc->config_base) { - case CPUMF_CTR_SET_BASIC: - case CPUMF_CTR_SET_USER: - case CPUMF_CTR_SET_CRYPTO: - case CPUMF_CTR_SET_EXT: - /* check for reserved counters */ - if ((hwc->config >= 6 && hwc->config <= 31) || - (hwc->config >= 38 && hwc->config <= 63) || - (hwc->config >= 80 && hwc->config <= 127)) - return -EOPNOTSUPP; - break; - default: - return -EINVAL; - } - - return 0; -} - static int validate_ctr_version(const struct hw_perf_event *hwc) { struct cpu_hw_events *cpuhw; int err = 0; + u16 mtdiag_ctl; cpuhw = &get_cpu_var(cpu_hw_events); @@ -145,6 +123,27 @@ static int validate_ctr_version(const struct hw_perf_event *hwc) (cpuhw->info.csvn > 2 && hwc->config > 255)) err = -EOPNOTSUPP; break; + case CPUMF_CTR_SET_MT_DIAG: + if (cpuhw->info.csvn <= 3) + err = -EOPNOTSUPP; + /* + * MT-diagnostic counters are read-only. The counter set + * is automatically enabled and activated on all CPUs with + * multithreading (SMT). Deactivation of multithreading + * also disables the counter set. State changes are ignored + * by lcctl(). Because Linux controls SMT enablement through + * a kernel parameter only, the counter set is either disabled + * or enabled and active. + * + * Thus, the counters can only be used if SMT is on and the + * counter set is enabled and active. + */ + mtdiag_ctl = cpumf_state_ctl[CPUMF_CTR_SET_MT_DIAG]; + if (!((cpuhw->info.auth_ctl & mtdiag_ctl) && + (cpuhw->info.enable_ctl & mtdiag_ctl) && + (cpuhw->info.act_ctl & mtdiag_ctl))) + err = -EOPNOTSUPP; + break; } put_cpu_var(cpu_hw_events); @@ -250,6 +249,11 @@ static void cpumf_measurement_alert(struct ext_code ext_code, /* loss of counter data alert */ if (alert & CPU_MF_INT_CF_LCDA) pr_err("CPU[%i] Counter data was lost\n", smp_processor_id()); + + /* loss of MT counter data alert */ + if (alert & CPU_MF_INT_CF_MTDA) + pr_warn("CPU[%i] MT counter data was lost\n", + smp_processor_id()); } #define PMC_INIT 0 @@ -330,6 +334,7 @@ static int __hw_perf_event_init(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; struct hw_perf_event *hwc = &event->hw; + enum cpumf_ctr_set set; int err; u64 ev; @@ -370,25 +375,30 @@ static int __hw_perf_event_init(struct perf_event *event) if (ev == -1) return -ENOENT; - if (ev >= PERF_CPUM_CF_MAX_CTR) + if (ev > PERF_CPUM_CF_MAX_CTR) return -EINVAL; - /* Use the hardware perf event structure to store the counter number - * in 'config' member and the counter set to which the counter belongs - * in the 'config_base'. The counter set (config_base) is then used - * to enable/disable the counters. - */ - hwc->config = ev; - hwc->config_base = get_counter_set(ev); - - /* Validate the counter that is assigned to this event. - * Because the counter facility can use numerous counters at the - * same time without constraints, it is not necessary to explicitly - * validate event groups (event->group_leader != event). - */ - err = validate_event(hwc); - if (err) - return err; + /* Obtain the counter set to which the specified counter belongs */ + set = get_counter_set(ev); + switch (set) { + case CPUMF_CTR_SET_BASIC: + case CPUMF_CTR_SET_USER: + case CPUMF_CTR_SET_CRYPTO: + case CPUMF_CTR_SET_EXT: + case CPUMF_CTR_SET_MT_DIAG: + /* + * Use the hardware perf event structure to store the + * counter number in the 'config' member and the counter + * set number in the 'config_base'. The counter set number + * is then later used to enable/disable the counter(s). + */ + hwc->config = ev; + hwc->config_base = set; + break; + case CPUMF_CTR_SET_MAX: + /* The counter could not be associated to a counter set */ + return -EINVAL; + }; /* Initialize for using the CPU-measurement counter facility */ if (!atomic_inc_not_zero(&num_events)) { @@ -452,7 +462,7 @@ static int hw_perf_event_reset(struct perf_event *event) return err; } -static int hw_perf_event_update(struct perf_event *event) +static void hw_perf_event_update(struct perf_event *event) { u64 prev, new, delta; int err; @@ -461,14 +471,12 @@ static int hw_perf_event_update(struct perf_event *event) prev = local64_read(&event->hw.prev_count); err = ecctr(event->hw.config, &new); if (err) - goto out; + return; } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); delta = (prev <= new) ? new - prev : (-1ULL - prev) + new + 1; /* overflow */ local64_add(delta, &event->count); -out: - return err; } static void cpumf_pmu_read(struct perf_event *event) diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index c343ac2cf6c5..d3133285b7d1 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -114,8 +114,64 @@ CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1); CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1); CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2); CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3); +CPUMF_EVENT_ATTR(cf_z13, L1D_WRITES_RO_EXCL, 0x0080); +CPUMF_EVENT_ATTR(cf_z13, DTLB1_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z13, DTLB1_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z13, DTLB1_HPAGE_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z13, DTLB1_GPAGE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z13, L1D_L2D_SOURCED_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_z13, ITLB1_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z13, ITLB1_MISSES, 0x0087); +CPUMF_EVENT_ATTR(cf_z13, L1I_L2I_SOURCED_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z13, TLB2_PTE_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_z13, TX_C_TEND, 0x008c); +CPUMF_EVENT_ATTR(cf_z13, TX_NC_TEND, 0x008d); +CPUMF_EVENT_ATTR(cf_z13, L1C_TLB1_MISSES, 0x008f); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0091); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES, 0x0092); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV, 0x0093); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x0095); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES, 0x0097); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x0098); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x0099); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x009a); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x009b); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x009c); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x009d); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES, 0x009e); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES, 0x009f); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES, 0x00a0); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES, 0x00a1); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a3); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES, 0x00a4); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV, 0x00a5); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES, 0x00a6); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00a7); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV, 0x00a8); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES, 0x00a9); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x00aa); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x00ab); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x00ac); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x00ad); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x00ae); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x00af); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES, 0x00b0); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES, 0x00b1); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES, 0x00b2); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES, 0x00b3); +CPUMF_EVENT_ATTR(cf_z13, TX_NC_TABORT, 0x00da); +CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_NO_SPECIAL, 0x00db); +CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_SPECIAL, 0x00dc); +CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); +CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); -static struct attribute *cpumcf_pmu_event_attr[] = { +static struct attribute *cpumcf_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf, CPU_CYCLES), CPUMF_EVENT_PTR(cf, INSTRUCTIONS), CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES), @@ -236,28 +292,87 @@ static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = { NULL, }; +static struct attribute *cpumcf_z13_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z13, L1D_WRITES_RO_EXCL), + CPUMF_EVENT_PTR(cf_z13, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_z13, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_z13, DTLB1_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z13, DTLB1_GPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_L2D_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_z13, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_z13, L1I_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z13, TX_C_TEND), + CPUMF_EVENT_PTR(cf_z13, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_z13, L1C_TLB1_MISSES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_SPECIAL), + CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE), + CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE), + NULL, +}; + /* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ -static struct attribute_group cpumsf_pmu_events_group = { +static struct attribute_group cpumcf_pmu_events_group = { .name = "events", - .attrs = cpumcf_pmu_event_attr, }; PMU_FORMAT_ATTR(event, "config:0-63"); -static struct attribute *cpumsf_pmu_format_attr[] = { +static struct attribute *cpumcf_pmu_format_attr[] = { &format_attr_event.attr, NULL, }; -static struct attribute_group cpumsf_pmu_format_group = { +static struct attribute_group cpumcf_pmu_format_group = { .name = "format", - .attrs = cpumsf_pmu_format_attr, + .attrs = cpumcf_pmu_format_attr, }; -static const struct attribute_group *cpumsf_pmu_attr_groups[] = { - &cpumsf_pmu_events_group, - &cpumsf_pmu_format_group, +static const struct attribute_group *cpumcf_pmu_attr_groups[] = { + &cpumcf_pmu_events_group, + &cpumcf_pmu_format_group, NULL, }; @@ -290,6 +405,7 @@ static __init struct attribute **merge_attr(struct attribute **a, __init const struct attribute_group **cpumf_cf_event_group(void) { struct attribute **combined, **model; + struct attribute *none[] = { NULL }; struct cpuid cpu_id; get_cpu_id(&cpu_id); @@ -306,17 +422,17 @@ __init const struct attribute_group **cpumf_cf_event_group(void) case 0x2828: model = cpumcf_zec12_pmu_event_attr; break; + case 0x2964: + case 0x2965: + model = cpumcf_z13_pmu_event_attr; + break; default: - model = NULL; + model = none; break; } - if (!model) - goto out; - combined = merge_attr(cpumcf_pmu_event_attr, model); if (combined) - cpumsf_pmu_events_group.attrs = combined; -out: - return cpumsf_pmu_attr_groups; + cpumcf_pmu_events_group.attrs = combined; + return cpumcf_pmu_attr_groups; } diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 1c0b58545c04..9a4f279d25ca 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1009,8 +1009,8 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) * sample. Some early samples or samples from guests without * lpp usage would be misaccounted to the host. We use the asn * value as an addon heuristic to detect most of these guest samples. - * If the value differs from the host hpp value, we assume to be a - * KVM guest. + * If the value differs from 0xffff (the host value), we assume to + * be a KVM guest. */ switch (sfr->basic.CL) { case 1: /* logical partition */ @@ -1020,8 +1020,7 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) sde_regs->in_guest = 1; break; default: /* old machine, use heuristics */ - if (sfr->basic.gpp || - sfr->basic.prim_asn != (u16)sfr->basic.hpp) + if (sfr->basic.gpp || sfr->basic.prim_asn != 0xffff) sde_regs->in_guest = 1; break; } diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index f29e41c5e2ec..999d7154bbdc 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -73,8 +73,10 @@ extern void kernel_thread_starter(void); */ void exit_thread(struct task_struct *tsk) { - if (tsk == current) + if (tsk == current) { exit_thread_runtime_instr(); + exit_thread_gs(); + } } void flush_thread(void) @@ -159,6 +161,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp, /* Don't copy runtime instrumentation info */ p->thread.ri_cb = NULL; frame->childregs.psw.mask &= ~PSW_MASK_RI; + /* Don't copy guarded storage control block */ + p->thread.gs_cb = NULL; + p->thread.gs_bc_cb = NULL; /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 928b929a6261..778cd6536175 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/cpufeature.h> +#include <linux/bitops.h> #include <linux/kernel.h> #include <linux/sched/mm.h> #include <linux/init.h> @@ -91,11 +92,23 @@ int cpu_have_feature(unsigned int num) } EXPORT_SYMBOL(cpu_have_feature); +static void show_facilities(struct seq_file *m) +{ + unsigned int bit; + long *facilities; + + facilities = (long *)&S390_lowcore.stfle_fac_list; + seq_puts(m, "facilities :"); + for_each_set_bit_inv(bit, facilities, MAX_FACILITY_BIT) + seq_printf(m, " %d", bit); + seq_putc(m, '\n'); +} + static void show_cpu_summary(struct seq_file *m, void *v) { static const char *hwcap_str[] = { "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", - "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe" + "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs" }; static const char * const int_hwcap_str[] = { "sie" @@ -116,6 +129,7 @@ static void show_cpu_summary(struct seq_file *m, void *v) if (int_hwcap_str[i] && (int_hwcap & (1UL << i))) seq_printf(m, "%s ", int_hwcap_str[i]); seq_puts(m, "\n"); + show_facilities(m); show_cacheinfo(m); for_each_online_cpu(cpu) { struct cpuid *id = &per_cpu(cpu_info.cpu_id, cpu); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index c14df0a1ec3c..488c5bb8dc77 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -44,30 +44,42 @@ void update_cr_regs(struct task_struct *task) struct pt_regs *regs = task_pt_regs(task); struct thread_struct *thread = &task->thread; struct per_regs old, new; - + unsigned long cr0_old, cr0_new; + unsigned long cr2_old, cr2_new; + int cr0_changed, cr2_changed; + + __ctl_store(cr0_old, 0, 0); + __ctl_store(cr2_old, 2, 2); + cr0_new = cr0_old; + cr2_new = cr2_old; /* Take care of the enable/disable of transactional execution. */ if (MACHINE_HAS_TE) { - unsigned long cr, cr_new; - - __ctl_store(cr, 0, 0); /* Set or clear transaction execution TXC bit 8. */ - cr_new = cr | (1UL << 55); + cr0_new |= (1UL << 55); if (task->thread.per_flags & PER_FLAG_NO_TE) - cr_new &= ~(1UL << 55); - if (cr_new != cr) - __ctl_load(cr_new, 0, 0); + cr0_new &= ~(1UL << 55); /* Set or clear transaction execution TDC bits 62 and 63. */ - __ctl_store(cr, 2, 2); - cr_new = cr & ~3UL; + cr2_new &= ~3UL; if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) - cr_new |= 1UL; + cr2_new |= 1UL; else - cr_new |= 2UL; + cr2_new |= 2UL; } - if (cr_new != cr) - __ctl_load(cr_new, 2, 2); } + /* Take care of enable/disable of guarded storage. */ + if (MACHINE_HAS_GS) { + cr2_new &= ~(1UL << 4); + if (task->thread.gs_cb) + cr2_new |= (1UL << 4); + } + /* Load control register 0/2 iff changed */ + cr0_changed = cr0_new != cr0_old; + cr2_changed = cr2_new != cr2_old; + if (cr0_changed) + __ctl_load(cr0_new, 0, 0); + if (cr2_changed) + __ctl_load(cr2_new, 2, 2); /* Copy user specified PER registers */ new.control = thread->per_user.control; new.start = thread->per_user.start; @@ -1137,6 +1149,74 @@ static int s390_system_call_set(struct task_struct *target, data, 0, sizeof(unsigned int)); } +static int s390_gs_cb_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_cb; + + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) + return -ENODATA; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + +static int s390_gs_cb_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_cb; + + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) { + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + target->thread.gs_cb = data; + } + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + +static int s390_gs_bc_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_bc_cb; + + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) + return -ENODATA; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + +static int s390_gs_bc_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_bc_cb; + + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) { + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + target->thread.gs_bc_cb = data; + } + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + static const struct user_regset s390_regsets[] = { { .core_note_type = NT_PRSTATUS, @@ -1194,6 +1274,22 @@ static const struct user_regset s390_regsets[] = { .get = s390_vxrs_high_get, .set = s390_vxrs_high_set, }, + { + .core_note_type = NT_S390_GS_CB, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_cb_get, + .set = s390_gs_cb_set, + }, + { + .core_note_type = NT_S390_GS_BC, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_bc_get, + .set = s390_gs_bc_set, + }, }; static const struct user_regset_view user_s390_view = { @@ -1422,6 +1518,14 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_compat_regs_high_get, .set = s390_compat_regs_high_set, }, + { + .core_note_type = NT_S390_GS_CB, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_cb_get, + .set = s390_gs_cb_set, + }, }; static const struct user_regset_view user_s390_compat_view = { diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 911dc0b49be0..3ae756c0db3d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -339,9 +339,15 @@ static void __init setup_lowcore(void) lc->stfl_fac_list = S390_lowcore.stfl_fac_list; memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, MAX_FACILITY_BIT/8); - if (MACHINE_HAS_VX) - lc->vector_save_area_addr = - (unsigned long) &lc->vector_save_area; + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + unsigned long bits, size; + + bits = MACHINE_HAS_GS ? 11 : 10; + size = 1UL << bits; + lc->mcesad = (__u64) memblock_virt_alloc(size, size); + if (MACHINE_HAS_GS) + lc->mcesad |= bits; + } lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; lc->sync_enter_timer = S390_lowcore.sync_enter_timer; lc->async_enter_timer = S390_lowcore.async_enter_timer; @@ -779,6 +785,12 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_S390_VXRS_BCD; } + /* + * Guarded storage support HWCAP_S390_GS is bit 12. + */ + if (MACHINE_HAS_GS) + elf_hwcap |= HWCAP_S390_GS; + get_cpu_id(&cpu_id); add_device_randomness(&cpu_id, sizeof(cpu_id)); switch (cpu_id.machine) { diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 5dab859b0d54..363000a77ffc 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -51,6 +51,7 @@ #include <asm/os_info.h> #include <asm/sigp.h> #include <asm/idle.h> +#include <asm/nmi.h> #include "entry.h" enum { @@ -78,6 +79,8 @@ struct pcpu { static u8 boot_core_type; static struct pcpu pcpu_devices[NR_CPUS]; +static struct kmem_cache *pcpu_mcesa_cache; + unsigned int smp_cpu_mt_shift; EXPORT_SYMBOL(smp_cpu_mt_shift); @@ -188,8 +191,10 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) { unsigned long async_stack, panic_stack; + unsigned long mcesa_origin, mcesa_bits; struct lowcore *lc; + mcesa_origin = mcesa_bits = 0; if (pcpu != &pcpu_devices[0]) { pcpu->lowcore = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); @@ -197,20 +202,27 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) panic_stack = __get_free_page(GFP_KERNEL); if (!pcpu->lowcore || !panic_stack || !async_stack) goto out; + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + mcesa_origin = (unsigned long) + kmem_cache_alloc(pcpu_mcesa_cache, GFP_KERNEL); + if (!mcesa_origin) + goto out; + mcesa_bits = MACHINE_HAS_GS ? 11 : 0; + } } else { async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET; panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET; + mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK; + mcesa_bits = pcpu->lowcore->mcesad & MCESA_LC_MASK; } lc = pcpu->lowcore; memcpy(lc, &S390_lowcore, 512); memset((char *) lc + 512, 0, sizeof(*lc) - 512); lc->async_stack = async_stack + ASYNC_FRAME_OFFSET; lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET; + lc->mcesad = mcesa_origin | mcesa_bits; lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); - if (MACHINE_HAS_VX) - lc->vector_save_area_addr = - (unsigned long) &lc->vector_save_area; if (vdso_alloc_per_cpu(lc)) goto out; lowcore_ptr[cpu] = lc; @@ -218,6 +230,9 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) return 0; out: if (pcpu != &pcpu_devices[0]) { + if (mcesa_origin) + kmem_cache_free(pcpu_mcesa_cache, + (void *) mcesa_origin); free_page(panic_stack); free_pages(async_stack, ASYNC_ORDER); free_pages((unsigned long) pcpu->lowcore, LC_ORDER); @@ -229,11 +244,17 @@ out: static void pcpu_free_lowcore(struct pcpu *pcpu) { + unsigned long mcesa_origin; + pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); lowcore_ptr[pcpu - pcpu_devices] = NULL; vdso_free_per_cpu(pcpu->lowcore); if (pcpu == &pcpu_devices[0]) return; + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK; + kmem_cache_free(pcpu_mcesa_cache, (void *) mcesa_origin); + } free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET); free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER); free_pages((unsigned long) pcpu->lowcore, LC_ORDER); @@ -550,9 +571,11 @@ int smp_store_status(int cpu) if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; - if (!MACHINE_HAS_VX) + if (!MACHINE_HAS_VX && !MACHINE_HAS_GS) return 0; - pa = __pa(pcpu->lowcore->vector_save_area_addr); + pa = __pa(pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK); + if (MACHINE_HAS_GS) + pa |= pcpu->lowcore->mcesad & MCESA_LC_MASK; if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; @@ -897,12 +920,22 @@ void __init smp_fill_possible_mask(void) void __init smp_prepare_cpus(unsigned int max_cpus) { + unsigned long size; + /* request the 0x1201 emergency signal external interrupt */ if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1201"); /* request the 0x1202 external call external interrupt */ if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1202"); + /* create slab cache for the machine-check-extended-save-areas */ + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + size = 1UL << (MACHINE_HAS_GS ? 11 : 10); + pcpu_mcesa_cache = kmem_cache_create("nmi_save_areas", + size, size, 0, NULL); + if (!pcpu_mcesa_cache) + panic("Couldn't create nmi save area cache"); + } } void __init smp_prepare_boot_cpu(void) diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 2659b5cfeddb..54fce7b065de 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -386,5 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2) SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */ SYSCALL(sys_preadv2,compat_sys_preadv2) SYSCALL(sys_pwritev2,compat_sys_pwritev2) -NI_SYSCALL +SYSCALL(sys_s390_guarded_storage,compat_sys_s390_guarded_storage) /* 378 */ SYSCALL(sys_statx,compat_sys_statx) diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 12b6b138e354..eefcb54872a5 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -4,6 +4,7 @@ * Martin Schwidefsky <schwidefsky@de.ibm.com>, */ +#include <linux/debugfs.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/proc_fs.h> @@ -13,6 +14,7 @@ #include <linux/export.h> #include <linux/slab.h> #include <asm/ebcdic.h> +#include <asm/debug.h> #include <asm/sysinfo.h> #include <asm/cpcmd.h> #include <asm/topology.h> @@ -485,3 +487,99 @@ void calibrate_delay(void) "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ)) % 100); } + +#ifdef CONFIG_DEBUG_FS + +#define STSI_FILE(fc, s1, s2) \ +static int stsi_open_##fc##_##s1##_##s2(struct inode *inode, struct file *file)\ +{ \ + file->private_data = (void *) get_zeroed_page(GFP_KERNEL); \ + if (!file->private_data) \ + return -ENOMEM; \ + if (stsi(file->private_data, fc, s1, s2)) { \ + free_page((unsigned long)file->private_data); \ + file->private_data = NULL; \ + return -EACCES; \ + } \ + return nonseekable_open(inode, file); \ +} \ + \ +static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = { \ + .open = stsi_open_##fc##_##s1##_##s2, \ + .release = stsi_release, \ + .read = stsi_read, \ + .llseek = no_llseek, \ +}; + +static int stsi_release(struct inode *inode, struct file *file) +{ + free_page((unsigned long)file->private_data); + return 0; +} + +static ssize_t stsi_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) +{ + return simple_read_from_buffer(buf, size, ppos, file->private_data, PAGE_SIZE); +} + +STSI_FILE( 1, 1, 1); +STSI_FILE( 1, 2, 1); +STSI_FILE( 1, 2, 2); +STSI_FILE( 2, 2, 1); +STSI_FILE( 2, 2, 2); +STSI_FILE( 3, 2, 2); +STSI_FILE(15, 1, 2); +STSI_FILE(15, 1, 3); +STSI_FILE(15, 1, 4); +STSI_FILE(15, 1, 5); +STSI_FILE(15, 1, 6); + +struct stsi_file { + const struct file_operations *fops; + char *name; +}; + +static struct stsi_file stsi_file[] __initdata = { + {.fops = &stsi_1_1_1_fs_ops, .name = "1_1_1"}, + {.fops = &stsi_1_2_1_fs_ops, .name = "1_2_1"}, + {.fops = &stsi_1_2_2_fs_ops, .name = "1_2_2"}, + {.fops = &stsi_2_2_1_fs_ops, .name = "2_2_1"}, + {.fops = &stsi_2_2_2_fs_ops, .name = "2_2_2"}, + {.fops = &stsi_3_2_2_fs_ops, .name = "3_2_2"}, + {.fops = &stsi_15_1_2_fs_ops, .name = "15_1_2"}, + {.fops = &stsi_15_1_3_fs_ops, .name = "15_1_3"}, + {.fops = &stsi_15_1_4_fs_ops, .name = "15_1_4"}, + {.fops = &stsi_15_1_5_fs_ops, .name = "15_1_5"}, + {.fops = &stsi_15_1_6_fs_ops, .name = "15_1_6"}, +}; + +static u8 stsi_0_0_0; + +static __init int stsi_init_debugfs(void) +{ + struct dentry *stsi_root; + struct stsi_file *sf; + int lvl, i; + + stsi_root = debugfs_create_dir("stsi", arch_debugfs_dir); + if (IS_ERR_OR_NULL(stsi_root)) + return 0; + lvl = stsi(NULL, 0, 0, 0); + if (lvl > 0) + stsi_0_0_0 = lvl; + debugfs_create_u8("0_0_0", 0400, stsi_root, &stsi_0_0_0); + for (i = 0; i < ARRAY_SIZE(stsi_file); i++) { + sf = &stsi_file[i]; + debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops); + } + if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && MACHINE_HAS_TOPOLOGY) { + char link_to[10]; + + sprintf(link_to, "15_1_%d", topology_mnest_limit()); + debugfs_create_symlink("topology", stsi_root, link_to); + } + return 0; +} +device_initcall(stsi_init_debugfs); + +#endif /* CONFIG_DEBUG_FS */ diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 17660e800e74..bb47c92476f0 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -83,6 +83,8 @@ static cpumask_t cpu_thread_map(unsigned int cpu) return mask; } +#define TOPOLOGY_CORE_BITS 64 + static void add_cpus_to_mask(struct topology_core *tl_core, struct mask_info *drawer, struct mask_info *book, @@ -91,7 +93,7 @@ static void add_cpus_to_mask(struct topology_core *tl_core, struct cpu_topology_s390 *topo; unsigned int core; - for_each_set_bit(core, &tl_core->mask[0], TOPOLOGY_CORE_BITS) { + for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) { unsigned int rcore; int lcpu, i; @@ -244,7 +246,7 @@ static void update_cpu_masks(void) void store_topology(struct sysinfo_15_1_x *info) { - stsi(info, 15, 1, min(topology_max_mnest, 4)); + stsi(info, 15, 1, topology_mnest_limit()); } static int __arch_update_cpu_topology(void) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 0f8f14199734..169558dc7daf 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -420,8 +420,8 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, save_access_regs(vcpu->run->s.regs.acrs); /* Extended save area */ - rc = read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, &ext_sa_addr, - sizeof(unsigned long)); + rc = read_guest_lc(vcpu, __LC_MCESAD, &ext_sa_addr, + sizeof(unsigned long)); /* Only bits 0-53 are used for address formation */ ext_sa_addr &= ~0x3ffUL; if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index fd6cd05bb6a7..d5c5c911821a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -273,7 +273,7 @@ static void kvm_s390_cpu_feat_init(void) kvm_s390_available_subfunc.pcc); } if (test_facility(57)) /* MSA5 */ - __cpacf_query(CPACF_PPNO, (cpacf_mask_t *) + __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) kvm_s390_available_subfunc.ppno); if (MACHINE_HAS_ESOP) @@ -1512,9 +1512,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; } else { if (sclp.hamax == U64_MAX) - kvm->arch.mem_limit = TASK_MAX_SIZE; + kvm->arch.mem_limit = TASK_SIZE_MAX; else - kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE, + kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, sclp.hamax + 1); kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); if (!kvm->arch.gmap) diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index ba427eb6f14c..ffb15bd4c593 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -17,7 +17,7 @@ int spin_retry = -1; static int __init spin_retry_init(void) { if (spin_retry < 0) - spin_retry = MACHINE_HAS_CAD ? 10 : 1000; + spin_retry = 1000; return 0; } early_initcall(spin_retry_init); @@ -32,23 +32,17 @@ static int __init spin_retry_setup(char *str) } __setup("spin_retry=", spin_retry_setup); -static inline void _raw_compare_and_delay(unsigned int *lock, unsigned int old) -{ - asm(".insn rsy,0xeb0000000022,%0,0,%1" : : "d" (old), "Q" (*lock)); -} - void arch_spin_lock_wait(arch_spinlock_t *lp) { - unsigned int cpu = SPINLOCK_LOCKVAL; - unsigned int owner; - int count, first_diag; + int cpu = SPINLOCK_LOCKVAL; + int owner, count, first_diag; first_diag = 1; while (1) { owner = ACCESS_ONCE(lp->lock); /* Try to get the lock if it is free. */ if (!owner) { - if (_raw_compare_and_swap(&lp->lock, 0, cpu)) + if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu)) return; continue; } @@ -61,8 +55,6 @@ void arch_spin_lock_wait(arch_spinlock_t *lp) /* Loop for a while on the lock value. */ count = spin_retry; do { - if (MACHINE_HAS_CAD) - _raw_compare_and_delay(&lp->lock, owner); owner = ACCESS_ONCE(lp->lock); } while (owner && count-- > 0); if (!owner) @@ -82,9 +74,8 @@ EXPORT_SYMBOL(arch_spin_lock_wait); void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) { - unsigned int cpu = SPINLOCK_LOCKVAL; - unsigned int owner; - int count, first_diag; + int cpu = SPINLOCK_LOCKVAL; + int owner, count, first_diag; local_irq_restore(flags); first_diag = 1; @@ -93,7 +84,7 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) /* Try to get the lock if it is free. */ if (!owner) { local_irq_disable(); - if (_raw_compare_and_swap(&lp->lock, 0, cpu)) + if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu)) return; local_irq_restore(flags); continue; @@ -107,8 +98,6 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) /* Loop for a while on the lock value. */ count = spin_retry; do { - if (MACHINE_HAS_CAD) - _raw_compare_and_delay(&lp->lock, owner); owner = ACCESS_ONCE(lp->lock); } while (owner && count-- > 0); if (!owner) @@ -128,18 +117,16 @@ EXPORT_SYMBOL(arch_spin_lock_wait_flags); int arch_spin_trylock_retry(arch_spinlock_t *lp) { - unsigned int cpu = SPINLOCK_LOCKVAL; - unsigned int owner; - int count; + int cpu = SPINLOCK_LOCKVAL; + int owner, count; for (count = spin_retry; count > 0; count--) { owner = READ_ONCE(lp->lock); /* Try to get the lock if it is free. */ if (!owner) { - if (_raw_compare_and_swap(&lp->lock, 0, cpu)) + if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu)) return 1; - } else if (MACHINE_HAS_CAD) - _raw_compare_and_delay(&lp->lock, owner); + } } return 0; } @@ -147,8 +134,8 @@ EXPORT_SYMBOL(arch_spin_trylock_retry); void _raw_read_lock_wait(arch_rwlock_t *rw) { - unsigned int owner, old; int count = spin_retry; + int owner, old; #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES __RAW_LOCK(&rw->lock, -1, __RAW_OP_ADD); @@ -162,12 +149,9 @@ void _raw_read_lock_wait(arch_rwlock_t *rw) } old = ACCESS_ONCE(rw->lock); owner = ACCESS_ONCE(rw->owner); - if ((int) old < 0) { - if (MACHINE_HAS_CAD) - _raw_compare_and_delay(&rw->lock, old); + if (old < 0) continue; - } - if (_raw_compare_and_swap(&rw->lock, old, old + 1)) + if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1)) return; } } @@ -175,17 +159,14 @@ EXPORT_SYMBOL(_raw_read_lock_wait); int _raw_read_trylock_retry(arch_rwlock_t *rw) { - unsigned int old; int count = spin_retry; + int old; while (count-- > 0) { old = ACCESS_ONCE(rw->lock); - if ((int) old < 0) { - if (MACHINE_HAS_CAD) - _raw_compare_and_delay(&rw->lock, old); + if (old < 0) continue; - } - if (_raw_compare_and_swap(&rw->lock, old, old + 1)) + if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1)) return 1; } return 0; @@ -194,10 +175,10 @@ EXPORT_SYMBOL(_raw_read_trylock_retry); #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES -void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev) +void _raw_write_lock_wait(arch_rwlock_t *rw, int prev) { - unsigned int owner, old; int count = spin_retry; + int owner, old; owner = 0; while (1) { @@ -209,14 +190,12 @@ void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev) old = ACCESS_ONCE(rw->lock); owner = ACCESS_ONCE(rw->owner); smp_mb(); - if ((int) old >= 0) { + if (old >= 0) { prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR); old = prev; } - if ((old & 0x7fffffff) == 0 && (int) prev >= 0) + if ((old & 0x7fffffff) == 0 && prev >= 0) break; - if (MACHINE_HAS_CAD) - _raw_compare_and_delay(&rw->lock, old); } } EXPORT_SYMBOL(_raw_write_lock_wait); @@ -225,8 +204,8 @@ EXPORT_SYMBOL(_raw_write_lock_wait); void _raw_write_lock_wait(arch_rwlock_t *rw) { - unsigned int owner, old, prev; int count = spin_retry; + int owner, old, prev; prev = 0x80000000; owner = 0; @@ -238,15 +217,13 @@ void _raw_write_lock_wait(arch_rwlock_t *rw) } old = ACCESS_ONCE(rw->lock); owner = ACCESS_ONCE(rw->owner); - if ((int) old >= 0 && - _raw_compare_and_swap(&rw->lock, old, old | 0x80000000)) + if (old >= 0 && + __atomic_cmpxchg_bool(&rw->lock, old, old | 0x80000000)) prev = old; else smp_mb(); - if ((old & 0x7fffffff) == 0 && (int) prev >= 0) + if ((old & 0x7fffffff) == 0 && prev >= 0) break; - if (MACHINE_HAS_CAD) - _raw_compare_and_delay(&rw->lock, old); } } EXPORT_SYMBOL(_raw_write_lock_wait); @@ -255,24 +232,21 @@ EXPORT_SYMBOL(_raw_write_lock_wait); int _raw_write_trylock_retry(arch_rwlock_t *rw) { - unsigned int old; int count = spin_retry; + int old; while (count-- > 0) { old = ACCESS_ONCE(rw->lock); - if (old) { - if (MACHINE_HAS_CAD) - _raw_compare_and_delay(&rw->lock, old); + if (old) continue; - } - if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000)) + if (__atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000)) return 1; } return 0; } EXPORT_SYMBOL(_raw_write_trylock_retry); -void arch_lock_relax(unsigned int cpu) +void arch_lock_relax(int cpu) { if (!cpu) return; diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index a07b1ec1391d..7f6db1e6c048 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -431,7 +431,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, if ((from | to | len) & (PMD_SIZE - 1)) return -EINVAL; if (len == 0 || from + len < from || to + len < to || - from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) + from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end) return -EINVAL; flush = 0; @@ -2004,20 +2004,12 @@ EXPORT_SYMBOL_GPL(gmap_shadow_page); * Called with sg->parent->shadow_lock. */ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, - unsigned long offset, pte_t *pte) + unsigned long gaddr, pte_t *pte) { struct gmap_rmap *rmap, *rnext, *head; - unsigned long gaddr, start, end, bits, raddr; - unsigned long *table; + unsigned long start, end, bits, raddr; BUG_ON(!gmap_is_shadow(sg)); - spin_lock(&sg->parent->guest_table_lock); - table = radix_tree_lookup(&sg->parent->host_to_guest, - vmaddr >> PMD_SHIFT); - gaddr = table ? __gmap_segment_gaddr(table) + offset : 0; - spin_unlock(&sg->parent->guest_table_lock); - if (!table) - return; spin_lock(&sg->guest_table_lock); if (sg->removed) { @@ -2076,7 +2068,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte, unsigned long bits) { - unsigned long offset, gaddr; + unsigned long offset, gaddr = 0; unsigned long *table; struct gmap *gmap, *sg, *next; @@ -2084,22 +2076,23 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, offset = offset * (4096 / sizeof(pte_t)); rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { - if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { - spin_lock(&gmap->shadow_lock); - list_for_each_entry_safe(sg, next, - &gmap->children, list) - gmap_shadow_notify(sg, vmaddr, offset, pte); - spin_unlock(&gmap->shadow_lock); - } - if (!(bits & PGSTE_IN_BIT)) - continue; spin_lock(&gmap->guest_table_lock); table = radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); if (table) gaddr = __gmap_segment_gaddr(table) + offset; spin_unlock(&gmap->guest_table_lock); - if (table) + if (!table) + continue; + + if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { + spin_lock(&gmap->shadow_lock); + list_for_each_entry_safe(sg, next, + &gmap->children, list) + gmap_shadow_notify(sg, vmaddr, gaddr, pte); + spin_unlock(&gmap->shadow_lock); + } + if (bits & PGSTE_IN_BIT) gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1); } rcu_read_unlock(); diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 18d4107e10ee..b7b779c40a5b 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -211,7 +211,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, addr = start; len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; - if ((end <= start) || (end > TASK_SIZE)) + if ((end <= start) || (end > mm->context.asce_limit)) return 0; /* * local_irq_save() doesn't prevent pagetable teardown, but does diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 50618614881f..b017daed6887 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -89,19 +89,20 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, struct mm_struct *mm = current->mm; struct vm_area_struct *vma; struct vm_unmapped_area_info info; + int rc; if (len > TASK_SIZE - mmap_min_addr) return -ENOMEM; if (flags & MAP_FIXED) - return addr; + goto check_asce_limit; if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && (!vma || addr + len <= vma->vm_start)) - return addr; + goto check_asce_limit; } info.flags = 0; @@ -113,7 +114,18 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, else info.align_mask = 0; info.align_offset = pgoff << PAGE_SHIFT; - return vm_unmapped_area(&info); + addr = vm_unmapped_area(&info); + if (addr & ~PAGE_MASK) + return addr; + +check_asce_limit: + if (addr + len > current->mm->context.asce_limit) { + rc = crst_table_upgrade(mm); + if (rc) + return (unsigned long) rc; + } + + return addr; } unsigned long @@ -125,13 +137,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, struct mm_struct *mm = current->mm; unsigned long addr = addr0; struct vm_unmapped_area_info info; + int rc; /* requested length too big for entire address space */ if (len > TASK_SIZE - mmap_min_addr) return -ENOMEM; if (flags & MAP_FIXED) - return addr; + goto check_asce_limit; /* requesting a specific address */ if (addr) { @@ -139,7 +152,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && (!vma || addr + len <= vma->vm_start)) - return addr; + goto check_asce_limit; } info.flags = VM_UNMAPPED_AREA_TOPDOWN; @@ -165,65 +178,20 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, info.low_limit = TASK_UNMAPPED_BASE; info.high_limit = TASK_SIZE; addr = vm_unmapped_area(&info); + if (addr & ~PAGE_MASK) + return addr; } - return addr; -} - -int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags) -{ - if (is_compat_task() || TASK_SIZE >= TASK_MAX_SIZE) - return 0; - if (!(flags & MAP_FIXED)) - addr = 0; - if ((addr + len) >= TASK_SIZE) - return crst_table_upgrade(current->mm); - return 0; -} - -static unsigned long -s390_get_unmapped_area(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) -{ - struct mm_struct *mm = current->mm; - unsigned long area; - int rc; - - area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); - if (!(area & ~PAGE_MASK)) - return area; - if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) { - /* Upgrade the page table to 4 levels and retry. */ +check_asce_limit: + if (addr + len > current->mm->context.asce_limit) { rc = crst_table_upgrade(mm); if (rc) return (unsigned long) rc; - area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); } - return area; -} - -static unsigned long -s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr, - const unsigned long len, const unsigned long pgoff, - const unsigned long flags) -{ - struct mm_struct *mm = current->mm; - unsigned long area; - int rc; - area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags); - if (!(area & ~PAGE_MASK)) - return area; - if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) { - /* Upgrade the page table to 4 levels and retry. */ - rc = crst_table_upgrade(mm); - if (rc) - return (unsigned long) rc; - area = arch_get_unmapped_area_topdown(filp, addr, len, - pgoff, flags); - } - return area; + return addr; } + /* * This function, called very early during the creation of a new * process VM image, sets up which VM layout function to use: @@ -241,9 +209,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm) */ if (mmap_is_legacy()) { mm->mmap_base = mmap_base_legacy(random_factor); - mm->get_unmapped_area = s390_get_unmapped_area; + mm->get_unmapped_area = arch_get_unmapped_area; } else { mm->mmap_base = mmap_base(random_factor); - mm->get_unmapped_area = s390_get_unmapped_area_topdown; + mm->get_unmapped_area = arch_get_unmapped_area_topdown; } } diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 3330ea124eec..69a7b01ae746 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -13,8 +13,7 @@ #include <linux/gfp.h> #include <linux/init.h> -#define ESSA_SET_STABLE 1 -#define ESSA_SET_UNUSED 2 +#include <asm/page-states.h> static int cmma_flag = 1; diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index fc5dc33bb141..fc321c5ec30e 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -94,7 +94,7 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, new = pte_wrprotect(new); else if (flags & SET_MEMORY_RW) new = pte_mkwrite(pte_mkdirty(new)); - if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX) + if (flags & SET_MEMORY_NX) pte_val(new) |= _PAGE_NOEXEC; else if (flags & SET_MEMORY_X) pte_val(new) &= ~_PAGE_NOEXEC; @@ -144,7 +144,7 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, new = pmd_wrprotect(new); else if (flags & SET_MEMORY_RW) new = pmd_mkwrite(pmd_mkdirty(new)); - if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX) + if (flags & SET_MEMORY_NX) pmd_val(new) |= _SEGMENT_ENTRY_NOEXEC; else if (flags & SET_MEMORY_X) pmd_val(new) &= ~_SEGMENT_ENTRY_NOEXEC; @@ -221,7 +221,7 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr, new = pud_wrprotect(new); else if (flags & SET_MEMORY_RW) new = pud_mkwrite(pud_mkdirty(new)); - if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX) + if (flags & SET_MEMORY_NX) pud_val(new) |= _REGION_ENTRY_NOEXEC; else if (flags & SET_MEMORY_X) pud_val(new) &= ~_REGION_ENTRY_NOEXEC; @@ -288,6 +288,10 @@ static int change_page_attr(unsigned long addr, unsigned long end, int __set_memory(unsigned long addr, int numpages, unsigned long flags) { + if (!MACHINE_HAS_NX) + flags &= ~(SET_MEMORY_NX | SET_MEMORY_X); + if (!flags) + return 0; addr &= PAGE_MASK; return change_page_attr(addr, addr + numpages * PAGE_SIZE, flags); } diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 995f78532cc2..f502cbe657af 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -95,7 +95,6 @@ int crst_table_upgrade(struct mm_struct *mm) mm->context.asce_limit = 1UL << 53; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_REGION2; - mm->task_size = mm->context.asce_limit; spin_unlock_bh(&mm->page_table_lock); on_each_cpu(__crst_table_upgrade, mm, 0); @@ -119,7 +118,6 @@ void crst_table_downgrade(struct mm_struct *mm) mm->context.asce_limit = 1UL << 31; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; - mm->task_size = mm->context.asce_limit; crst_table_free(mm, (unsigned long *) pgd); if (current->active_mm == mm) @@ -144,7 +142,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm) struct page *page; unsigned long *table; - page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + page = alloc_page(GFP_KERNEL); if (page) { table = (unsigned long *) page_to_phys(page); clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 463e5ef02304..947b66a5cdba 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -23,6 +23,7 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> +#include <asm/page-states.h> static inline pte_t ptep_flush_direct(struct mm_struct *mm, unsigned long addr, pte_t *ptep) @@ -787,4 +788,156 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, return 0; } EXPORT_SYMBOL(get_guest_storage_key); + +/** + * pgste_perform_essa - perform ESSA actions on the PGSTE. + * @mm: the memory context. It must have PGSTEs, no check is performed here! + * @hva: the host virtual address of the page whose PGSTE is to be processed + * @orc: the specific action to perform, see the ESSA_SET_* macros. + * @oldpte: the PTE will be saved there if the pointer is not NULL. + * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL. + * + * Return: 1 if the page is to be added to the CBRL, otherwise 0, + * or < 0 in case of error. -EINVAL is returned for invalid values + * of orc, -EFAULT for invalid addresses. + */ +int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, + unsigned long *oldpte, unsigned long *oldpgste) +{ + unsigned long pgstev; + spinlock_t *ptl; + pgste_t pgste; + pte_t *ptep; + int res = 0; + + WARN_ON_ONCE(orc > ESSA_MAX); + if (unlikely(orc > ESSA_MAX)) + return -EINVAL; + ptep = get_locked_pte(mm, hva, &ptl); + if (unlikely(!ptep)) + return -EFAULT; + pgste = pgste_get_lock(ptep); + pgstev = pgste_val(pgste); + if (oldpte) + *oldpte = pte_val(*ptep); + if (oldpgste) + *oldpgste = pgstev; + + switch (orc) { + case ESSA_GET_STATE: + break; + case ESSA_SET_STABLE: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_STABLE; + break; + case ESSA_SET_UNUSED: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_UNUSED; + if (pte_val(*ptep) & _PAGE_INVALID) + res = 1; + break; + case ESSA_SET_VOLATILE: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_VOLATILE; + if (pte_val(*ptep) & _PAGE_INVALID) + res = 1; + break; + case ESSA_SET_POT_VOLATILE: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + if (!(pte_val(*ptep) & _PAGE_INVALID)) { + pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE; + break; + } + if (pgstev & _PGSTE_GPS_ZERO) { + pgstev |= _PGSTE_GPS_USAGE_VOLATILE; + break; + } + if (!(pgstev & PGSTE_GC_BIT)) { + pgstev |= _PGSTE_GPS_USAGE_VOLATILE; + res = 1; + break; + } + break; + case ESSA_SET_STABLE_RESIDENT: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_STABLE; + /* + * Since the resident state can go away any time after this + * call, we will not make this page resident. We can revisit + * this decision if a guest will ever start using this. + */ + break; + case ESSA_SET_STABLE_IF_RESIDENT: + if (!(pte_val(*ptep) & _PAGE_INVALID)) { + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_STABLE; + } + break; + default: + /* we should never get here! */ + break; + } + /* If we are discarding a page, set it to logical zero */ + if (res) + pgstev |= _PGSTE_GPS_ZERO; + + pgste_val(pgste) = pgstev; + pgste_set_unlock(ptep, pgste); + pte_unmap_unlock(ptep, ptl); + return res; +} +EXPORT_SYMBOL(pgste_perform_essa); + +/** + * set_pgste_bits - set specific PGSTE bits. + * @mm: the memory context. It must have PGSTEs, no check is performed here! + * @hva: the host virtual address of the page whose PGSTE is to be processed + * @bits: a bitmask representing the bits that will be touched + * @value: the values of the bits to be written. Only the bits in the mask + * will be written. + * + * Return: 0 on success, < 0 in case of error. + */ +int set_pgste_bits(struct mm_struct *mm, unsigned long hva, + unsigned long bits, unsigned long value) +{ + spinlock_t *ptl; + pgste_t new; + pte_t *ptep; + + ptep = get_locked_pte(mm, hva, &ptl); + if (unlikely(!ptep)) + return -EFAULT; + new = pgste_get_lock(ptep); + + pgste_val(new) &= ~bits; + pgste_val(new) |= value & bits; + + pgste_set_unlock(ptep, new); + pte_unmap_unlock(ptep, ptl); + return 0; +} +EXPORT_SYMBOL(set_pgste_bits); + +/** + * get_pgste - get the current PGSTE for the given address. + * @mm: the memory context. It must have PGSTEs, no check is performed here! + * @hva: the host virtual address of the page whose PGSTE is to be processed + * @pgstep: will be written with the current PGSTE for the given address. + * + * Return: 0 on success, < 0 in case of error. + */ +int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep) +{ + spinlock_t *ptl; + pte_t *ptep; + + ptep = get_locked_pte(mm, hva, &ptl); + if (unlikely(!ptep)) + return -EFAULT; + *pgstep = pgste_val(pgste_get(ptep)); + pte_unmap_unlock(ptep, ptl); + return 0; +} +EXPORT_SYMBOL(get_pgste); #endif diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 364b9d824be3..8051df109db3 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -60,16 +60,8 @@ static DEFINE_SPINLOCK(zpci_domain_lock); static struct airq_iv *zpci_aisb_iv; static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES]; -/* Adapter interrupt definitions */ -static void zpci_irq_handler(struct airq_struct *airq); - -static struct airq_struct zpci_airq = { - .handler = zpci_irq_handler, - .isc = PCI_ISC, -}; - #define ZPCI_IOMAP_ENTRIES \ - min(((unsigned long) CONFIG_PCI_NR_FUNCTIONS * PCI_BAR_COUNT), \ + min(((unsigned long) ZPCI_NR_DEVICES * PCI_BAR_COUNT / 2), \ ZPCI_IOMAP_MAX_ENTRIES) static DEFINE_SPINLOCK(zpci_iomap_lock); @@ -214,8 +206,6 @@ int zpci_fmb_disable_device(struct zpci_dev *zdev) return rc; } -#define ZPCI_PCIAS_CFGSPC 15 - static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len) { u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len); @@ -507,6 +497,11 @@ static void zpci_unmap_resources(struct pci_dev *pdev) } } +static struct airq_struct zpci_airq = { + .handler = zpci_irq_handler, + .isc = PCI_ISC, +}; + static int __init zpci_irq_init(void) { int rc; @@ -871,11 +866,6 @@ int zpci_report_error(struct pci_dev *pdev, } EXPORT_SYMBOL(zpci_report_error); -static inline int barsize(u8 size) -{ - return (size) ? (1 << size) >> 10 : 0; -} - static int zpci_mem_init(void) { BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) || |