diff options
Diffstat (limited to 'arch/s390')
43 files changed, 1460 insertions, 603 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8e58c614c37d..b06dc3839268 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -115,7 +115,7 @@ config S390 select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE - select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z9_109_FEATURES + select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL select HAVE_DEBUG_KMEMLEAK diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h index ba3b2aefddf5..d9c4c313fbc6 100644 --- a/arch/s390/crypto/crypt_s390.h +++ b/arch/s390/crypto/crypt_s390.h @@ -3,9 +3,10 @@ * * Support for s390 cryptographic instructions. * - * Copyright IBM Corp. 2003, 2007 + * Copyright IBM Corp. 2003, 2015 * Author(s): Thomas Spatzier * Jan Glauber (jan.glauber@de.ibm.com) + * Harald Freudenberger (freude@de.ibm.com) * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -28,15 +29,17 @@ #define CRYPT_S390_MSA 0x1 #define CRYPT_S390_MSA3 0x2 #define CRYPT_S390_MSA4 0x4 +#define CRYPT_S390_MSA5 0x8 /* s390 cryptographic operations */ enum crypt_s390_operations { - CRYPT_S390_KM = 0x0100, - CRYPT_S390_KMC = 0x0200, - CRYPT_S390_KIMD = 0x0300, - CRYPT_S390_KLMD = 0x0400, - CRYPT_S390_KMAC = 0x0500, - CRYPT_S390_KMCTR = 0x0600 + CRYPT_S390_KM = 0x0100, + CRYPT_S390_KMC = 0x0200, + CRYPT_S390_KIMD = 0x0300, + CRYPT_S390_KLMD = 0x0400, + CRYPT_S390_KMAC = 0x0500, + CRYPT_S390_KMCTR = 0x0600, + CRYPT_S390_PPNO = 0x0700 }; /* @@ -138,6 +141,16 @@ enum crypt_s390_kmac_func { KMAC_TDEA_192 = CRYPT_S390_KMAC | 3 }; +/* + * function codes for PPNO (PERFORM PSEUDORANDOM NUMBER + * OPERATION) instruction + */ +enum crypt_s390_ppno_func { + PPNO_QUERY = CRYPT_S390_PPNO | 0, + PPNO_SHA512_DRNG_GEN = CRYPT_S390_PPNO | 3, + PPNO_SHA512_DRNG_SEED = CRYPT_S390_PPNO | 0x83 +}; + /** * crypt_s390_km: * @func: the function code passed to KM; see crypt_s390_km_func @@ -162,11 +175,11 @@ static inline int crypt_s390_km(long func, void *param, int ret; asm volatile( - "0: .insn rre,0xb92e0000,%3,%1 \n" /* KM opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rre,0xb92e0000,%3,%1\n" /* KM opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest) : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); if (ret < 0) @@ -198,11 +211,11 @@ static inline int crypt_s390_kmc(long func, void *param, int ret; asm volatile( - "0: .insn rre,0xb92f0000,%3,%1 \n" /* KMC opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rre,0xb92f0000,%3,%1\n" /* KMC opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest) : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); if (ret < 0) @@ -233,11 +246,11 @@ static inline int crypt_s390_kimd(long func, void *param, int ret; asm volatile( - "0: .insn rre,0xb93e0000,%1,%1 \n" /* KIMD opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rre,0xb93e0000,%1,%1\n" /* KIMD opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "=d" (ret), "+a" (__src), "+d" (__src_len) : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); if (ret < 0) @@ -267,11 +280,11 @@ static inline int crypt_s390_klmd(long func, void *param, int ret; asm volatile( - "0: .insn rre,0xb93f0000,%1,%1 \n" /* KLMD opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rre,0xb93f0000,%1,%1\n" /* KLMD opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "=d" (ret), "+a" (__src), "+d" (__src_len) : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); if (ret < 0) @@ -302,11 +315,11 @@ static inline int crypt_s390_kmac(long func, void *param, int ret; asm volatile( - "0: .insn rre,0xb91e0000,%1,%1 \n" /* KLAC opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rre,0xb91e0000,%1,%1\n" /* KLAC opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "=d" (ret), "+a" (__src), "+d" (__src_len) : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); if (ret < 0) @@ -340,11 +353,11 @@ static inline int crypt_s390_kmctr(long func, void *param, u8 *dest, int ret = -1; asm volatile( - "0: .insn rrf,0xb92d0000,%3,%1,%4,0 \n" /* KMCTR opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rrf,0xb92d0000,%3,%1,%4,0\n" /* KMCTR opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "+d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest), "+a" (__ctr) : "d" (__func), "a" (__param) : "cc", "memory"); @@ -354,6 +367,47 @@ static inline int crypt_s390_kmctr(long func, void *param, u8 *dest, } /** + * crypt_s390_ppno: + * @func: the function code passed to PPNO; see crypt_s390_ppno_func + * @param: address of parameter block; see POP for details on each func + * @dest: address of destination memory area + * @dest_len: size of destination memory area in bytes + * @seed: address of seed data + * @seed_len: size of seed data in bytes + * + * Executes the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION) + * operation of the CPU. + * + * Returns -1 for failure, 0 for the query func, number of random + * bytes stored in dest buffer for generate function + */ +static inline int crypt_s390_ppno(long func, void *param, + u8 *dest, long dest_len, + const u8 *seed, long seed_len) +{ + register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; + register void *__param asm("1") = param; /* param block (240 bytes) */ + register u8 *__dest asm("2") = dest; /* buf for recv random bytes */ + register long __dest_len asm("3") = dest_len; /* requested random bytes */ + register const u8 *__seed asm("4") = seed; /* buf with seed data */ + register long __seed_len asm("5") = seed_len; /* bytes in seed buf */ + int ret = -1; + + asm volatile ( + "0: .insn rre,0xb93c0000,%1,%5\n" /* PPNO opcode */ + "1: brc 1,0b\n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : "+d" (ret), "+a"(__dest), "+d"(__dest_len) + : "d"(__func), "a"(__param), "a"(__seed), "d"(__seed_len) + : "cc", "memory"); + if (ret < 0) + return ret; + return (func & CRYPT_S390_FUNC_MASK) ? dest_len - __dest_len : 0; +} + +/** * crypt_s390_func_available: * @func: the function code of the specific function; 0 if op in general * @@ -373,6 +427,9 @@ static inline int crypt_s390_func_available(int func, return 0; if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77)) return 0; + if (facility_mask & CRYPT_S390_MSA5 && !test_facility(57)) + return 0; + switch (func & CRYPT_S390_OP_MASK) { case CRYPT_S390_KM: ret = crypt_s390_km(KM_QUERY, &status, NULL, NULL, 0); @@ -390,8 +447,12 @@ static inline int crypt_s390_func_available(int func, ret = crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0); break; case CRYPT_S390_KMCTR: - ret = crypt_s390_kmctr(KMCTR_QUERY, &status, NULL, NULL, 0, - NULL); + ret = crypt_s390_kmctr(KMCTR_QUERY, &status, + NULL, NULL, 0, NULL); + break; + case CRYPT_S390_PPNO: + ret = crypt_s390_ppno(PPNO_QUERY, &status, + NULL, 0, NULL, 0); break; default: return 0; @@ -419,15 +480,14 @@ static inline int crypt_s390_pcc(long func, void *param) int ret = -1; asm volatile( - "0: .insn rre,0xb92c0000,0,0 \n" /* PCC opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rre,0xb92c0000,0,0\n" /* PCC opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "+d" (ret) : "d" (__func), "a" (__param) : "cc", "memory"); return ret; } - #endif /* _CRYPTO_ARCH_S390_CRYPT_S390_H */ diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index 7940dc90e80b..b258110da952 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -16,11 +16,12 @@ #define GHASH_DIGEST_SIZE 16 struct ghash_ctx { - u8 icv[16]; - u8 key[16]; + u8 key[GHASH_BLOCK_SIZE]; }; struct ghash_desc_ctx { + u8 icv[GHASH_BLOCK_SIZE]; + u8 key[GHASH_BLOCK_SIZE]; u8 buffer[GHASH_BLOCK_SIZE]; u32 bytes; }; @@ -28,8 +29,10 @@ struct ghash_desc_ctx { static int ghash_init(struct shash_desc *desc) { struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); memset(dctx, 0, sizeof(*dctx)); + memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE); return 0; } @@ -45,7 +48,6 @@ static int ghash_setkey(struct crypto_shash *tfm, } memcpy(ctx->key, key, GHASH_BLOCK_SIZE); - memset(ctx->icv, 0, GHASH_BLOCK_SIZE); return 0; } @@ -54,7 +56,6 @@ static int ghash_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); - struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); unsigned int n; u8 *buf = dctx->buffer; int ret; @@ -70,7 +71,7 @@ static int ghash_update(struct shash_desc *desc, src += n; if (!dctx->bytes) { - ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, + ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE); if (ret != GHASH_BLOCK_SIZE) return -EIO; @@ -79,7 +80,7 @@ static int ghash_update(struct shash_desc *desc, n = srclen & ~(GHASH_BLOCK_SIZE - 1); if (n) { - ret = crypt_s390_kimd(KIMD_GHASH, ctx, src, n); + ret = crypt_s390_kimd(KIMD_GHASH, dctx, src, n); if (ret != n) return -EIO; src += n; @@ -94,7 +95,7 @@ static int ghash_update(struct shash_desc *desc, return 0; } -static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) +static int ghash_flush(struct ghash_desc_ctx *dctx) { u8 *buf = dctx->buffer; int ret; @@ -104,24 +105,24 @@ static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) memset(pos, 0, dctx->bytes); - ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, GHASH_BLOCK_SIZE); + ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE); if (ret != GHASH_BLOCK_SIZE) return -EIO; + + dctx->bytes = 0; } - dctx->bytes = 0; return 0; } static int ghash_final(struct shash_desc *desc, u8 *dst) { struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); - struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); int ret; - ret = ghash_flush(ctx, dctx); + ret = ghash_flush(dctx); if (!ret) - memcpy(dst, ctx->icv, GHASH_BLOCK_SIZE); + memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE); return ret; } diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 94a35a4c1b48..9d5192c94963 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -1,106 +1,529 @@ /* - * Copyright IBM Corp. 2006, 2007 + * Copyright IBM Corp. 2006, 2015 * Author(s): Jan Glauber <jan.glauber@de.ibm.com> + * Harald Freudenberger <freude@de.ibm.com> * Driver for the s390 pseudo random number generator */ + +#define KMSG_COMPONENT "prng" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/fs.h> +#include <linux/fips.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/device.h> #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/mutex.h> #include <linux/random.h> #include <linux/slab.h> #include <asm/debug.h> #include <asm/uaccess.h> +#include <asm/timex.h> #include "crypt_s390.h" MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jan Glauber <jan.glauber@de.ibm.com>"); +MODULE_AUTHOR("IBM Corporation"); MODULE_DESCRIPTION("s390 PRNG interface"); -static int prng_chunk_size = 256; -module_param(prng_chunk_size, int, S_IRUSR | S_IRGRP | S_IROTH); + +#define PRNG_MODE_AUTO 0 +#define PRNG_MODE_TDES 1 +#define PRNG_MODE_SHA512 2 + +static unsigned int prng_mode = PRNG_MODE_AUTO; +module_param_named(mode, prng_mode, int, 0); +MODULE_PARM_DESC(prng_mode, "PRNG mode: 0 - auto, 1 - TDES, 2 - SHA512"); + + +#define PRNG_CHUNKSIZE_TDES_MIN 8 +#define PRNG_CHUNKSIZE_TDES_MAX (64*1024) +#define PRNG_CHUNKSIZE_SHA512_MIN 64 +#define PRNG_CHUNKSIZE_SHA512_MAX (64*1024) + +static unsigned int prng_chunk_size = 256; +module_param_named(chunksize, prng_chunk_size, int, 0); MODULE_PARM_DESC(prng_chunk_size, "PRNG read chunk size in bytes"); -static int prng_entropy_limit = 4096; -module_param(prng_entropy_limit, int, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR); -MODULE_PARM_DESC(prng_entropy_limit, - "PRNG add entropy after that much bytes were produced"); + +#define PRNG_RESEED_LIMIT_TDES 4096 +#define PRNG_RESEED_LIMIT_TDES_LOWER 4096 +#define PRNG_RESEED_LIMIT_SHA512 100000 +#define PRNG_RESEED_LIMIT_SHA512_LOWER 10000 + +static unsigned int prng_reseed_limit; +module_param_named(reseed_limit, prng_reseed_limit, int, 0); +MODULE_PARM_DESC(prng_reseed_limit, "PRNG reseed limit"); + /* * Any one who considers arithmetical methods of producing random digits is, * of course, in a state of sin. -- John von Neumann */ -struct s390_prng_data { - unsigned long count; /* how many bytes were produced */ - char *buf; +static int prng_errorflag; + +#define PRNG_GEN_ENTROPY_FAILED 1 +#define PRNG_SELFTEST_FAILED 2 +#define PRNG_INSTANTIATE_FAILED 3 +#define PRNG_SEED_FAILED 4 +#define PRNG_RESEED_FAILED 5 +#define PRNG_GEN_FAILED 6 + +struct prng_ws_s { + u8 parm_block[32]; + u32 reseed_counter; + u64 byte_counter; }; -static struct s390_prng_data *p; +struct ppno_ws_s { + u32 res; + u32 reseed_counter; + u64 stream_bytes; + u8 V[112]; + u8 C[112]; +}; -/* copied from libica, use a non-zero initial parameter block */ -static unsigned char parm_block[32] = { -0x0F,0x2B,0x8E,0x63,0x8C,0x8E,0xD2,0x52,0x64,0xB7,0xA0,0x7B,0x75,0x28,0xB8,0xF4, -0x75,0x5F,0xD2,0xA6,0x8D,0x97,0x11,0xFF,0x49,0xD8,0x23,0xF3,0x7E,0x21,0xEC,0xA0, +struct prng_data_s { + struct mutex mutex; + union { + struct prng_ws_s prngws; + struct ppno_ws_s ppnows; + }; + u8 *buf; + u32 rest; + u8 *prev; }; -static int prng_open(struct inode *inode, struct file *file) +static struct prng_data_s *prng_data; + +/* initial parameter block for tdes mode, copied from libica */ +static const u8 initial_parm_block[32] __initconst = { + 0x0F, 0x2B, 0x8E, 0x63, 0x8C, 0x8E, 0xD2, 0x52, + 0x64, 0xB7, 0xA0, 0x7B, 0x75, 0x28, 0xB8, 0xF4, + 0x75, 0x5F, 0xD2, 0xA6, 0x8D, 0x97, 0x11, 0xFF, + 0x49, 0xD8, 0x23, 0xF3, 0x7E, 0x21, 0xEC, 0xA0 }; + + +/*** helper functions ***/ + +static int generate_entropy(u8 *ebuf, size_t nbytes) { - return nonseekable_open(inode, file); + int n, ret = 0; + u8 *pg, *h, hash[32]; + + pg = (u8 *) __get_free_page(GFP_KERNEL); + if (!pg) { + prng_errorflag = PRNG_GEN_ENTROPY_FAILED; + return -ENOMEM; + } + + while (nbytes) { + /* fill page with urandom bytes */ + get_random_bytes(pg, PAGE_SIZE); + /* exor page with stckf values */ + for (n = 0; n < PAGE_SIZE / sizeof(u64); n++) { + u64 *p = ((u64 *)pg) + n; + *p ^= get_tod_clock_fast(); + } + n = (nbytes < sizeof(hash)) ? nbytes : sizeof(hash); + if (n < sizeof(hash)) + h = hash; + else + h = ebuf; + /* generate sha256 from this page */ + if (crypt_s390_kimd(KIMD_SHA_256, h, + pg, PAGE_SIZE) != PAGE_SIZE) { + prng_errorflag = PRNG_GEN_ENTROPY_FAILED; + ret = -EIO; + goto out; + } + if (n < sizeof(hash)) + memcpy(ebuf, hash, n); + ret += n; + ebuf += n; + nbytes -= n; + } + +out: + free_page((unsigned long)pg); + return ret; } -static void prng_add_entropy(void) + +/*** tdes functions ***/ + +static void prng_tdes_add_entropy(void) { __u64 entropy[4]; unsigned int i; int ret; for (i = 0; i < 16; i++) { - ret = crypt_s390_kmc(KMC_PRNG, parm_block, (char *)entropy, - (char *)entropy, sizeof(entropy)); + ret = crypt_s390_kmc(KMC_PRNG, prng_data->prngws.parm_block, + (char *)entropy, (char *)entropy, + sizeof(entropy)); BUG_ON(ret < 0 || ret != sizeof(entropy)); - memcpy(parm_block, entropy, sizeof(entropy)); + memcpy(prng_data->prngws.parm_block, entropy, sizeof(entropy)); } } -static void prng_seed(int nbytes) + +static void prng_tdes_seed(int nbytes) { char buf[16]; int i = 0; - BUG_ON(nbytes > 16); + BUG_ON(nbytes > sizeof(buf)); + get_random_bytes(buf, nbytes); /* Add the entropy */ while (nbytes >= 8) { - *((__u64 *)parm_block) ^= *((__u64 *)(buf+i)); - prng_add_entropy(); + *((__u64 *)prng_data->prngws.parm_block) ^= *((__u64 *)(buf+i)); + prng_tdes_add_entropy(); i += 8; nbytes -= 8; } - prng_add_entropy(); + prng_tdes_add_entropy(); + prng_data->prngws.reseed_counter = 0; +} + + +static int __init prng_tdes_instantiate(void) +{ + int datalen; + + pr_debug("prng runs in TDES mode with " + "chunksize=%d and reseed_limit=%u\n", + prng_chunk_size, prng_reseed_limit); + + /* memory allocation, prng_data struct init, mutex init */ + datalen = sizeof(struct prng_data_s) + prng_chunk_size; + prng_data = kzalloc(datalen, GFP_KERNEL); + if (!prng_data) { + prng_errorflag = PRNG_INSTANTIATE_FAILED; + return -ENOMEM; + } + mutex_init(&prng_data->mutex); + prng_data->buf = ((u8 *)prng_data) + sizeof(struct prng_data_s); + memcpy(prng_data->prngws.parm_block, initial_parm_block, 32); + + /* initialize the PRNG, add 128 bits of entropy */ + prng_tdes_seed(16); + + return 0; } -static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes, - loff_t *ppos) + +static void prng_tdes_deinstantiate(void) +{ + pr_debug("The prng module stopped " + "after running in triple DES mode\n"); + kzfree(prng_data); +} + + +/*** sha512 functions ***/ + +static int __init prng_sha512_selftest(void) { - int chunk, n; + /* NIST DRBG testvector for Hash Drbg, Sha-512, Count #0 */ + static const u8 seed[] __initconst = { + 0x6b, 0x50, 0xa7, 0xd8, 0xf8, 0xa5, 0x5d, 0x7a, + 0x3d, 0xf8, 0xbb, 0x40, 0xbc, 0xc3, 0xb7, 0x22, + 0xd8, 0x70, 0x8d, 0xe6, 0x7f, 0xda, 0x01, 0x0b, + 0x03, 0xc4, 0xc8, 0x4d, 0x72, 0x09, 0x6f, 0x8c, + 0x3e, 0xc6, 0x49, 0xcc, 0x62, 0x56, 0xd9, 0xfa, + 0x31, 0xdb, 0x7a, 0x29, 0x04, 0xaa, 0xf0, 0x25 }; + static const u8 V0[] __initconst = { + 0x00, 0xad, 0xe3, 0x6f, 0x9a, 0x01, 0xc7, 0x76, + 0x61, 0x34, 0x35, 0xf5, 0x4e, 0x24, 0x74, 0x22, + 0x21, 0x9a, 0x29, 0x89, 0xc7, 0x93, 0x2e, 0x60, + 0x1e, 0xe8, 0x14, 0x24, 0x8d, 0xd5, 0x03, 0xf1, + 0x65, 0x5d, 0x08, 0x22, 0x72, 0xd5, 0xad, 0x95, + 0xe1, 0x23, 0x1e, 0x8a, 0xa7, 0x13, 0xd9, 0x2b, + 0x5e, 0xbc, 0xbb, 0x80, 0xab, 0x8d, 0xe5, 0x79, + 0xab, 0x5b, 0x47, 0x4e, 0xdd, 0xee, 0x6b, 0x03, + 0x8f, 0x0f, 0x5c, 0x5e, 0xa9, 0x1a, 0x83, 0xdd, + 0xd3, 0x88, 0xb2, 0x75, 0x4b, 0xce, 0x83, 0x36, + 0x57, 0x4b, 0xf1, 0x5c, 0xca, 0x7e, 0x09, 0xc0, + 0xd3, 0x89, 0xc6, 0xe0, 0xda, 0xc4, 0x81, 0x7e, + 0x5b, 0xf9, 0xe1, 0x01, 0xc1, 0x92, 0x05, 0xea, + 0xf5, 0x2f, 0xc6, 0xc6, 0xc7, 0x8f, 0xbc, 0xf4 }; + static const u8 C0[] __initconst = { + 0x00, 0xf4, 0xa3, 0xe5, 0xa0, 0x72, 0x63, 0x95, + 0xc6, 0x4f, 0x48, 0xd0, 0x8b, 0x5b, 0x5f, 0x8e, + 0x6b, 0x96, 0x1f, 0x16, 0xed, 0xbc, 0x66, 0x94, + 0x45, 0x31, 0xd7, 0x47, 0x73, 0x22, 0xa5, 0x86, + 0xce, 0xc0, 0x4c, 0xac, 0x63, 0xb8, 0x39, 0x50, + 0xbf, 0xe6, 0x59, 0x6c, 0x38, 0x58, 0x99, 0x1f, + 0x27, 0xa7, 0x9d, 0x71, 0x2a, 0xb3, 0x7b, 0xf9, + 0xfb, 0x17, 0x86, 0xaa, 0x99, 0x81, 0xaa, 0x43, + 0xe4, 0x37, 0xd3, 0x1e, 0x6e, 0xe5, 0xe6, 0xee, + 0xc2, 0xed, 0x95, 0x4f, 0x53, 0x0e, 0x46, 0x8a, + 0xcc, 0x45, 0xa5, 0xdb, 0x69, 0x0d, 0x81, 0xc9, + 0x32, 0x92, 0xbc, 0x8f, 0x33, 0xe6, 0xf6, 0x09, + 0x7c, 0x8e, 0x05, 0x19, 0x0d, 0xf1, 0xb6, 0xcc, + 0xf3, 0x02, 0x21, 0x90, 0x25, 0xec, 0xed, 0x0e }; + static const u8 random[] __initconst = { + 0x95, 0xb7, 0xf1, 0x7e, 0x98, 0x02, 0xd3, 0x57, + 0x73, 0x92, 0xc6, 0xa9, 0xc0, 0x80, 0x83, 0xb6, + 0x7d, 0xd1, 0x29, 0x22, 0x65, 0xb5, 0xf4, 0x2d, + 0x23, 0x7f, 0x1c, 0x55, 0xbb, 0x9b, 0x10, 0xbf, + 0xcf, 0xd8, 0x2c, 0x77, 0xa3, 0x78, 0xb8, 0x26, + 0x6a, 0x00, 0x99, 0x14, 0x3b, 0x3c, 0x2d, 0x64, + 0x61, 0x1e, 0xee, 0xb6, 0x9a, 0xcd, 0xc0, 0x55, + 0x95, 0x7c, 0x13, 0x9e, 0x8b, 0x19, 0x0c, 0x7a, + 0x06, 0x95, 0x5f, 0x2c, 0x79, 0x7c, 0x27, 0x78, + 0xde, 0x94, 0x03, 0x96, 0xa5, 0x01, 0xf4, 0x0e, + 0x91, 0x39, 0x6a, 0xcf, 0x8d, 0x7e, 0x45, 0xeb, + 0xdb, 0xb5, 0x3b, 0xbf, 0x8c, 0x97, 0x52, 0x30, + 0xd2, 0xf0, 0xff, 0x91, 0x06, 0xc7, 0x61, 0x19, + 0xae, 0x49, 0x8e, 0x7f, 0xbc, 0x03, 0xd9, 0x0f, + 0x8e, 0x4c, 0x51, 0x62, 0x7a, 0xed, 0x5c, 0x8d, + 0x42, 0x63, 0xd5, 0xd2, 0xb9, 0x78, 0x87, 0x3a, + 0x0d, 0xe5, 0x96, 0xee, 0x6d, 0xc7, 0xf7, 0xc2, + 0x9e, 0x37, 0xee, 0xe8, 0xb3, 0x4c, 0x90, 0xdd, + 0x1c, 0xf6, 0xa9, 0xdd, 0xb2, 0x2b, 0x4c, 0xbd, + 0x08, 0x6b, 0x14, 0xb3, 0x5d, 0xe9, 0x3d, 0xa2, + 0xd5, 0xcb, 0x18, 0x06, 0x69, 0x8c, 0xbd, 0x7b, + 0xbb, 0x67, 0xbf, 0xe3, 0xd3, 0x1f, 0xd2, 0xd1, + 0xdb, 0xd2, 0xa1, 0xe0, 0x58, 0xa3, 0xeb, 0x99, + 0xd7, 0xe5, 0x1f, 0x1a, 0x93, 0x8e, 0xed, 0x5e, + 0x1c, 0x1d, 0xe2, 0x3a, 0x6b, 0x43, 0x45, 0xd3, + 0x19, 0x14, 0x09, 0xf9, 0x2f, 0x39, 0xb3, 0x67, + 0x0d, 0x8d, 0xbf, 0xb6, 0x35, 0xd8, 0xe6, 0xa3, + 0x69, 0x32, 0xd8, 0x10, 0x33, 0xd1, 0x44, 0x8d, + 0x63, 0xb4, 0x03, 0xdd, 0xf8, 0x8e, 0x12, 0x1b, + 0x6e, 0x81, 0x9a, 0xc3, 0x81, 0x22, 0x6c, 0x13, + 0x21, 0xe4, 0xb0, 0x86, 0x44, 0xf6, 0x72, 0x7c, + 0x36, 0x8c, 0x5a, 0x9f, 0x7a, 0x4b, 0x3e, 0xe2 }; + int ret = 0; - int tmp; + u8 buf[sizeof(random)]; + struct ppno_ws_s ws; + + memset(&ws, 0, sizeof(ws)); + + /* initial seed */ + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED, + &ws, NULL, 0, + seed, sizeof(seed)); + if (ret < 0) { + pr_err("The prng self test seed operation for the " + "SHA-512 mode failed with rc=%d\n", ret); + prng_errorflag = PRNG_SELFTEST_FAILED; + return -EIO; + } + + /* check working states V and C */ + if (memcmp(ws.V, V0, sizeof(V0)) != 0 + || memcmp(ws.C, C0, sizeof(C0)) != 0) { + pr_err("The prng self test state test " + "for the SHA-512 mode failed\n"); + prng_errorflag = PRNG_SELFTEST_FAILED; + return -EIO; + } + + /* generate random bytes */ + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN, + &ws, buf, sizeof(buf), + NULL, 0); + if (ret < 0) { + pr_err("The prng self test generate operation for " + "the SHA-512 mode failed with rc=%d\n", ret); + prng_errorflag = PRNG_SELFTEST_FAILED; + return -EIO; + } + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN, + &ws, buf, sizeof(buf), + NULL, 0); + if (ret < 0) { + pr_err("The prng self test generate operation for " + "the SHA-512 mode failed with rc=%d\n", ret); + prng_errorflag = PRNG_SELFTEST_FAILED; + return -EIO; + } + + /* check against expected data */ + if (memcmp(buf, random, sizeof(random)) != 0) { + pr_err("The prng self test data test " + "for the SHA-512 mode failed\n"); + prng_errorflag = PRNG_SELFTEST_FAILED; + return -EIO; + } + + return 0; +} + + +static int __init prng_sha512_instantiate(void) +{ + int ret, datalen; + u8 seed[64]; + + pr_debug("prng runs in SHA-512 mode " + "with chunksize=%d and reseed_limit=%u\n", + prng_chunk_size, prng_reseed_limit); + + /* memory allocation, prng_data struct init, mutex init */ + datalen = sizeof(struct prng_data_s) + prng_chunk_size; + if (fips_enabled) + datalen += prng_chunk_size; + prng_data = kzalloc(datalen, GFP_KERNEL); + if (!prng_data) { + prng_errorflag = PRNG_INSTANTIATE_FAILED; + return -ENOMEM; + } + mutex_init(&prng_data->mutex); + prng_data->buf = ((u8 *)prng_data) + sizeof(struct prng_data_s); + + /* selftest */ + ret = prng_sha512_selftest(); + if (ret) + goto outfree; + + /* generate initial seed bytestring, first 48 bytes of entropy */ + ret = generate_entropy(seed, 48); + if (ret != 48) + goto outfree; + /* followed by 16 bytes of unique nonce */ + get_tod_clock_ext(seed + 48); + + /* initial seed of the ppno drng */ + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED, + &prng_data->ppnows, NULL, 0, + seed, sizeof(seed)); + if (ret < 0) { + prng_errorflag = PRNG_SEED_FAILED; + ret = -EIO; + goto outfree; + } + + /* if fips mode is enabled, generate a first block of random + bytes for the FIPS 140-2 Conditional Self Test */ + if (fips_enabled) { + prng_data->prev = prng_data->buf + prng_chunk_size; + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN, + &prng_data->ppnows, + prng_data->prev, + prng_chunk_size, + NULL, 0); + if (ret < 0 || ret != prng_chunk_size) { + prng_errorflag = PRNG_GEN_FAILED; + ret = -EIO; + goto outfree; + } + } + + return 0; + +outfree: + kfree(prng_data); + return ret; +} + + +static void prng_sha512_deinstantiate(void) +{ + pr_debug("The prng module stopped after running in SHA-512 mode\n"); + kzfree(prng_data); +} + + +static int prng_sha512_reseed(void) +{ + int ret; + u8 seed[32]; + + /* generate 32 bytes of fresh entropy */ + ret = generate_entropy(seed, sizeof(seed)); + if (ret != sizeof(seed)) + return ret; + + /* do a reseed of the ppno drng with this bytestring */ + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED, + &prng_data->ppnows, NULL, 0, + seed, sizeof(seed)); + if (ret) { + prng_errorflag = PRNG_RESEED_FAILED; + return -EIO; + } + + return 0; +} + + +static int prng_sha512_generate(u8 *buf, size_t nbytes) +{ + int ret; + + /* reseed needed ? */ + if (prng_data->ppnows.reseed_counter > prng_reseed_limit) { + ret = prng_sha512_reseed(); + if (ret) + return ret; + } + + /* PPNO generate */ + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN, + &prng_data->ppnows, buf, nbytes, + NULL, 0); + if (ret < 0 || ret != nbytes) { + prng_errorflag = PRNG_GEN_FAILED; + return -EIO; + } + + /* FIPS 140-2 Conditional Self Test */ + if (fips_enabled) { + if (!memcmp(prng_data->prev, buf, nbytes)) { + prng_errorflag = PRNG_GEN_FAILED; + return -EILSEQ; + } + memcpy(prng_data->prev, buf, nbytes); + } + + return ret; +} + + +/*** file io functions ***/ + +static int prng_open(struct inode *inode, struct file *file) +{ + return nonseekable_open(inode, file); +} + + +static ssize_t prng_tdes_read(struct file *file, char __user *ubuf, + size_t nbytes, loff_t *ppos) +{ + int chunk, n, tmp, ret = 0; + + /* lock prng_data struct */ + if (mutex_lock_interruptible(&prng_data->mutex)) + return -ERESTARTSYS; - /* nbytes can be arbitrary length, we split it into chunks */ while (nbytes) { - /* same as in extract_entropy_user in random.c */ if (need_resched()) { if (signal_pending(current)) { if (ret == 0) ret = -ERESTARTSYS; break; } + /* give mutex free before calling schedule() */ + mutex_unlock(&prng_data->mutex); schedule(); + /* occopy mutex again */ + if (mutex_lock_interruptible(&prng_data->mutex)) { + if (ret == 0) + ret = -ERESTARTSYS; + return ret; + } } /* @@ -112,12 +535,11 @@ static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes, /* PRNG only likes multiples of 8 bytes */ n = (chunk + 7) & -8; - if (p->count > prng_entropy_limit) - prng_seed(8); + if (prng_data->prngws.reseed_counter > prng_reseed_limit) + prng_tdes_seed(8); /* if the CPU supports PRNG stckf is present too */ - asm volatile(".insn s,0xb27c0000,%0" - : "=m" (*((unsigned long long *)p->buf)) : : "cc"); + *((unsigned long long *)prng_data->buf) = get_tod_clock_fast(); /* * Beside the STCKF the input for the TDES-EDE is the output @@ -132,35 +554,259 @@ static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes, * Note: you can still get strict X9.17 conformity by setting * prng_chunk_size to 8 bytes. */ - tmp = crypt_s390_kmc(KMC_PRNG, parm_block, p->buf, p->buf, n); - BUG_ON((tmp < 0) || (tmp != n)); + tmp = crypt_s390_kmc(KMC_PRNG, prng_data->prngws.parm_block, + prng_data->buf, prng_data->buf, n); + if (tmp < 0 || tmp != n) { + ret = -EIO; + break; + } - p->count += n; + prng_data->prngws.byte_counter += n; + prng_data->prngws.reseed_counter += n; - if (copy_to_user(ubuf, p->buf, chunk)) + if (copy_to_user(ubuf, prng_data->buf, chunk)) return -EFAULT; nbytes -= chunk; ret += chunk; ubuf += chunk; } + + /* unlock prng_data struct */ + mutex_unlock(&prng_data->mutex); + return ret; } -static const struct file_operations prng_fops = { + +static ssize_t prng_sha512_read(struct file *file, char __user *ubuf, + size_t nbytes, loff_t *ppos) +{ + int n, ret = 0; + u8 *p; + + /* if errorflag is set do nothing and return 'broken pipe' */ + if (prng_errorflag) + return -EPIPE; + + /* lock prng_data struct */ + if (mutex_lock_interruptible(&prng_data->mutex)) + return -ERESTARTSYS; + + while (nbytes) { + if (need_resched()) { + if (signal_pending(current)) { + if (ret == 0) + ret = -ERESTARTSYS; + break; + } + /* give mutex free before calling schedule() */ + mutex_unlock(&prng_data->mutex); + schedule(); + /* occopy mutex again */ + if (mutex_lock_interruptible(&prng_data->mutex)) { + if (ret == 0) + ret = -ERESTARTSYS; + return ret; + } + } + if (prng_data->rest) { + /* push left over random bytes from the previous read */ + p = prng_data->buf + prng_chunk_size - prng_data->rest; + n = (nbytes < prng_data->rest) ? + nbytes : prng_data->rest; + prng_data->rest -= n; + } else { + /* generate one chunk of random bytes into read buf */ + p = prng_data->buf; + n = prng_sha512_generate(p, prng_chunk_size); + if (n < 0) { + ret = n; + break; + } + if (nbytes < prng_chunk_size) { + n = nbytes; + prng_data->rest = prng_chunk_size - n; + } else { + n = prng_chunk_size; + prng_data->rest = 0; + } + } + if (copy_to_user(ubuf, p, n)) { + ret = -EFAULT; + break; + } + ubuf += n; + nbytes -= n; + ret += n; + } + + /* unlock prng_data struct */ + mutex_unlock(&prng_data->mutex); + + return ret; +} + + +/*** sysfs stuff ***/ + +static const struct file_operations prng_sha512_fops = { + .owner = THIS_MODULE, + .open = &prng_open, + .release = NULL, + .read = &prng_sha512_read, + .llseek = noop_llseek, +}; +static const struct file_operations prng_tdes_fops = { .owner = THIS_MODULE, .open = &prng_open, .release = NULL, - .read = &prng_read, + .read = &prng_tdes_read, .llseek = noop_llseek, }; -static struct miscdevice prng_dev = { +static struct miscdevice prng_sha512_dev = { + .name = "prandom", + .minor = MISC_DYNAMIC_MINOR, + .fops = &prng_sha512_fops, +}; +static struct miscdevice prng_tdes_dev = { .name = "prandom", .minor = MISC_DYNAMIC_MINOR, - .fops = &prng_fops, + .fops = &prng_tdes_fops, }; + +/* chunksize attribute (ro) */ +static ssize_t prng_chunksize_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size); +} +static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL); + +/* counter attribute (ro) */ +static ssize_t prng_counter_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u64 counter; + + if (mutex_lock_interruptible(&prng_data->mutex)) + return -ERESTARTSYS; + if (prng_mode == PRNG_MODE_SHA512) + counter = prng_data->ppnows.stream_bytes; + else + counter = prng_data->prngws.byte_counter; + mutex_unlock(&prng_data->mutex); + + return snprintf(buf, PAGE_SIZE, "%llu\n", counter); +} +static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL); + +/* errorflag attribute (ro) */ +static ssize_t prng_errorflag_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag); +} +static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL); + +/* mode attribute (ro) */ +static ssize_t prng_mode_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (prng_mode == PRNG_MODE_TDES) + return snprintf(buf, PAGE_SIZE, "TDES\n"); + else + return snprintf(buf, PAGE_SIZE, "SHA512\n"); +} +static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL); + +/* reseed attribute (w) */ +static ssize_t prng_reseed_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + if (mutex_lock_interruptible(&prng_data->mutex)) + return -ERESTARTSYS; + prng_sha512_reseed(); + mutex_unlock(&prng_data->mutex); + + return count; +} +static DEVICE_ATTR(reseed, 0200, NULL, prng_reseed_store); + +/* reseed limit attribute (rw) */ +static ssize_t prng_reseed_limit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit); +} +static ssize_t prng_reseed_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned limit; + + if (sscanf(buf, "%u\n", &limit) != 1) + return -EINVAL; + + if (prng_mode == PRNG_MODE_SHA512) { + if (limit < PRNG_RESEED_LIMIT_SHA512_LOWER) + return -EINVAL; + } else { + if (limit < PRNG_RESEED_LIMIT_TDES_LOWER) + return -EINVAL; + } + + prng_reseed_limit = limit; + + return count; +} +static DEVICE_ATTR(reseed_limit, 0644, + prng_reseed_limit_show, prng_reseed_limit_store); + +/* strength attribute (ro) */ +static ssize_t prng_strength_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "256\n"); +} +static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL); + +static struct attribute *prng_sha512_dev_attrs[] = { + &dev_attr_errorflag.attr, + &dev_attr_chunksize.attr, + &dev_attr_byte_counter.attr, + &dev_attr_mode.attr, + &dev_attr_reseed.attr, + &dev_attr_reseed_limit.attr, + &dev_attr_strength.attr, + NULL +}; +static struct attribute *prng_tdes_dev_attrs[] = { + &dev_attr_chunksize.attr, + &dev_attr_byte_counter.attr, + &dev_attr_mode.attr, + NULL +}; + +static struct attribute_group prng_sha512_dev_attr_group = { + .attrs = prng_sha512_dev_attrs +}; +static struct attribute_group prng_tdes_dev_attr_group = { + .attrs = prng_tdes_dev_attrs +}; + + +/*** module init and exit ***/ + static int __init prng_init(void) { int ret; @@ -169,43 +815,105 @@ static int __init prng_init(void) if (!crypt_s390_func_available(KMC_PRNG, CRYPT_S390_MSA)) return -EOPNOTSUPP; - if (prng_chunk_size < 8) - return -EINVAL; + /* choose prng mode */ + if (prng_mode != PRNG_MODE_TDES) { + /* check for MSA5 support for PPNO operations */ + if (!crypt_s390_func_available(PPNO_SHA512_DRNG_GEN, + CRYPT_S390_MSA5)) { + if (prng_mode == PRNG_MODE_SHA512) { + pr_err("The prng module cannot " + "start in SHA-512 mode\n"); + return -EOPNOTSUPP; + } + prng_mode = PRNG_MODE_TDES; + } else + prng_mode = PRNG_MODE_SHA512; + } - p = kmalloc(sizeof(struct s390_prng_data), GFP_KERNEL); - if (!p) - return -ENOMEM; - p->count = 0; + if (prng_mode == PRNG_MODE_SHA512) { - p->buf = kmalloc(prng_chunk_size, GFP_KERNEL); - if (!p->buf) { - ret = -ENOMEM; - goto out_free; - } + /* SHA512 mode */ - /* initialize the PRNG, add 128 bits of entropy */ - prng_seed(16); + if (prng_chunk_size < PRNG_CHUNKSIZE_SHA512_MIN + || prng_chunk_size > PRNG_CHUNKSIZE_SHA512_MAX) + return -EINVAL; + prng_chunk_size = (prng_chunk_size + 0x3f) & ~0x3f; - ret = misc_register(&prng_dev); - if (ret) - goto out_buf; - return 0; + if (prng_reseed_limit == 0) + prng_reseed_limit = PRNG_RESEED_LIMIT_SHA512; + else if (prng_reseed_limit < PRNG_RESEED_LIMIT_SHA512_LOWER) + return -EINVAL; + + ret = prng_sha512_instantiate(); + if (ret) + goto out; + + ret = misc_register(&prng_sha512_dev); + if (ret) { + prng_sha512_deinstantiate(); + goto out; + } + ret = sysfs_create_group(&prng_sha512_dev.this_device->kobj, + &prng_sha512_dev_attr_group); + if (ret) { + misc_deregister(&prng_sha512_dev); + prng_sha512_deinstantiate(); + goto out; + } -out_buf: - kfree(p->buf); -out_free: - kfree(p); + } else { + + /* TDES mode */ + + if (prng_chunk_size < PRNG_CHUNKSIZE_TDES_MIN + || prng_chunk_size > PRNG_CHUNKSIZE_TDES_MAX) + return -EINVAL; + prng_chunk_size = (prng_chunk_size + 0x07) & ~0x07; + + if (prng_reseed_limit == 0) + prng_reseed_limit = PRNG_RESEED_LIMIT_TDES; + else if (prng_reseed_limit < PRNG_RESEED_LIMIT_TDES_LOWER) + return -EINVAL; + + ret = prng_tdes_instantiate(); + if (ret) + goto out; + + ret = misc_register(&prng_tdes_dev); + if (ret) { + prng_tdes_deinstantiate(); + goto out; + } + ret = sysfs_create_group(&prng_tdes_dev.this_device->kobj, + &prng_tdes_dev_attr_group); + if (ret) { + misc_deregister(&prng_tdes_dev); + prng_tdes_deinstantiate(); + goto out; + } + + } + +out: return ret; } + static void __exit prng_exit(void) { - /* wipe me */ - kzfree(p->buf); - kfree(p); - - misc_deregister(&prng_dev); + if (prng_mode == PRNG_MODE_SHA512) { + sysfs_remove_group(&prng_sha512_dev.this_device->kobj, + &prng_sha512_dev_attr_group); + misc_deregister(&prng_sha512_dev); + prng_sha512_deinstantiate(); + } else { + sysfs_remove_group(&prng_tdes_dev.this_device->kobj, + &prng_tdes_dev_attr_group); + misc_deregister(&prng_tdes_dev); + prng_tdes_deinstantiate(); + } } + module_init(prng_init); module_exit(prng_exit); diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c index f043c3c7e73c..dd42a26d049d 100644 --- a/arch/s390/hypfs/hypfs_sprp.c +++ b/arch/s390/hypfs/hypfs_sprp.c @@ -128,14 +128,14 @@ static struct hypfs_dbfs_file hypfs_sprp_file = { int hypfs_sprp_init(void) { - if (!sclp_has_sprp()) + if (!sclp.has_sprp) return 0; return hypfs_dbfs_create_file(&hypfs_sprp_file); } void hypfs_sprp_exit(void) { - if (!sclp_has_sprp()) + if (!sclp.has_sprp) return; hypfs_dbfs_remove_file(&hypfs_sprp_file); } diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index c631f98fd524..dc5385ebb071 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -4,5 +4,4 @@ generic-y += clkdev.h generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h -generic-y += scatterlist.h generic-y += trace_clock.h diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index 8d724718ec21..e6f8615a11eb 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -36,7 +36,7 @@ #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() -#define set_mb(var, value) do { var = value; mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #define smp_store_release(p, v) \ do { \ diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 4eadec466b8c..411464f4c97a 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -32,8 +32,6 @@ __old; \ }) -#define __HAVE_ARCH_CMPXCHG - #define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \ ({ \ register __typeof__(*(p1)) __old1 asm("2") = (o1); \ diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 11eae5f55b70..0130d0379edd 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -35,12 +35,8 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } -#define hugetlb_prefault_arch_hook(mm) do { } while (0) #define arch_clear_hugepage_flags(page) do { } while (0) -int arch_prepare_hugepage(struct page *page); -void arch_release_hugepage(struct page *page); - static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index 30fd5c84680e..cb5fdf3a78fc 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -29,6 +29,7 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); #define ioremap_nocache(addr, size) ioremap(addr, size) #define ioremap_wc ioremap_nocache +#define ioremap_wt ioremap_nocache static inline void __iomem *ioremap(unsigned long offset, unsigned long size) { diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index 694bcd6bd927..2f924bc30e35 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -26,6 +26,9 @@ /* Not more than 2GB */ #define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31) +/* Allocate control page with GFP_DMA */ +#define KEXEC_CONTROL_MEMORY_GFP GFP_DMA + /* Maximum address we can use for the crash control pages */ #define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index d01fc588b5c3..3024acbe1f9d 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -80,6 +80,7 @@ struct sca_block { #define CPUSTAT_MCDS 0x00000100 #define CPUSTAT_SM 0x00000080 #define CPUSTAT_IBS 0x00000040 +#define CPUSTAT_GED2 0x00000010 #define CPUSTAT_G 0x00000008 #define CPUSTAT_GED 0x00000004 #define CPUSTAT_J 0x00000002 @@ -95,7 +96,8 @@ struct kvm_s390_sie_block { #define PROG_IN_SIE (1<<0) __u32 prog0c; /* 0x000c */ __u8 reserved10[16]; /* 0x0010 */ -#define PROG_BLOCK_SIE 0x00000001 +#define PROG_BLOCK_SIE (1<<0) +#define PROG_REQUEST (1<<1) atomic_t prog20; /* 0x0020 */ __u8 reserved24[4]; /* 0x0024 */ __u64 cputm; /* 0x0028 */ @@ -634,7 +636,7 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} -static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} diff --git a/arch/s390/include/asm/mm-arch-hooks.h b/arch/s390/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..07680b2f3c59 --- /dev/null +++ b/arch/s390/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_S390_MM_ARCH_HOOKS_H +#define _ASM_S390_MM_ARCH_HOOKS_H + +#endif /* _ASM_S390_MM_ARCH_HOOKS_H */ diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index a5e656260a70..d29ad9545b41 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -14,7 +14,9 @@ typedef struct { unsigned long asce_bits; unsigned long asce_limit; unsigned long vdso_base; - /* The mmu context has extended page tables. */ + /* The mmu context allocates 4K page tables. */ + unsigned int alloc_pgste:1; + /* The mmu context uses extended page tables. */ unsigned int has_pgste:1; /* The mmu context uses storage keys. */ unsigned int use_skey:1; diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index d25d9ff10ba8..fb1b93ea3e3f 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -20,8 +20,11 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.flush_mm = 0; mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; mm->context.asce_bits |= _ASCE_TYPE_REGION3; +#ifdef CONFIG_PGSTE + mm->context.alloc_pgste = page_table_allocate_pgste; mm->context.has_pgste = 0; mm->context.use_skey = 0; +#endif mm->context.asce_limit = STACK_TOP_MAX; crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); return 0; diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 53eacbd4f09b..dd345238d9a7 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -17,7 +17,10 @@ #define PAGE_DEFAULT_ACC 0 #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) -#define HPAGE_SHIFT 20 +#include <asm/setup.h> +#ifndef __ASSEMBLY__ + +extern int HPAGE_SHIFT; #define HPAGE_SIZE (1UL << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) @@ -27,9 +30,6 @@ #define ARCH_HAS_PREPARE_HUGEPAGE #define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH -#include <asm/setup.h> -#ifndef __ASSEMBLY__ - static inline void storage_key_init_range(unsigned long start, unsigned long end) { #if PAGE_DEFAULT_KEY diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 51e7fb634ebc..7b7858f158b4 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -21,6 +21,7 @@ void crst_table_free(struct mm_struct *, unsigned long *); unsigned long *page_table_alloc(struct mm_struct *); void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long); +extern int page_table_allocate_pgste; int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned long key, bool nq); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 989cfae9e202..f66d82798a6a 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -12,12 +12,9 @@ #define _ASM_S390_PGTABLE_H /* - * The Linux memory management assumes a three-level page table setup. For - * s390 31 bit we "fold" the mid level into the top-level page table, so - * that we physically have the same two-level page table as the s390 mmu - * expects in 31 bit mode. For s390 64 bit we use three of the five levels - * the hardware provides (region first and region second tables are not - * used). + * The Linux memory management assumes a three-level page table setup. + * For s390 64 bit we use up to four of the five levels the hardware + * provides (region first tables are not used). * * The "pgd_xxx()" functions are trivial for a folded two-level * setup: the pgd is never bad, and a pmd always exists (as it's folded @@ -101,8 +98,8 @@ extern unsigned long zero_page_mask; #ifndef __ASSEMBLY__ /* - * The vmalloc and module area will always be on the topmost area of the kernel - * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc and modules. + * The vmalloc and module area will always be on the topmost area of the + * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules. * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where * modules will reside. That makes sure that inter module branches always * happen without trampolines and in addition the placement within a 2GB frame @@ -131,38 +128,6 @@ static inline int is_module_addr(void *addr) } /* - * A 31 bit pagetable entry of S390 has following format: - * | PFRA | | OS | - * 0 0IP0 - * 00000000001111111111222222222233 - * 01234567890123456789012345678901 - * - * I Page-Invalid Bit: Page is not available for address-translation - * P Page-Protection Bit: Store access not possible for page - * - * A 31 bit segmenttable entry of S390 has following format: - * | P-table origin | |PTL - * 0 IC - * 00000000001111111111222222222233 - * 01234567890123456789012345678901 - * - * I Segment-Invalid Bit: Segment is not available for address-translation - * C Common-Segment Bit: Segment is not private (PoP 3-30) - * PTL Page-Table-Length: Page-table length (PTL+1*16 entries -> up to 256) - * - * The 31 bit segmenttable origin of S390 has following format: - * - * |S-table origin | | STL | - * X **GPS - * 00000000001111111111222222222233 - * 01234567890123456789012345678901 - * - * X Space-Switch event: - * G Segment-Invalid Bit: * - * P Private-Space Bit: Segment is not private (PoP 3-30) - * S Storage-Alteration: - * STL Segment-Table-Length: Segment-table length (STL+1*16 entries -> up to 2048) - * * A 64 bit pagetable entry of S390 has following format: * | PFRA |0IPC| OS | * 0000000000111111111122222222223333333333444444444455555555556666 @@ -220,7 +185,6 @@ static inline int is_module_addr(void *addr) /* Software bits in the page table entry */ #define _PAGE_PRESENT 0x001 /* SW pte present bit */ -#define _PAGE_TYPE 0x002 /* SW pte type bit */ #define _PAGE_YOUNG 0x004 /* SW pte young bit */ #define _PAGE_DIRTY 0x008 /* SW pte dirty bit */ #define _PAGE_READ 0x010 /* SW pte read bit */ @@ -240,31 +204,34 @@ static inline int is_module_addr(void *addr) * table lock held. * * The following table gives the different possible bit combinations for - * the pte hardware and software bits in the last 12 bits of a pte: + * the pte hardware and software bits in the last 12 bits of a pte + * (. unassigned bit, x don't care, t swap type): * * 842100000000 * 000084210000 * 000000008421 - * .IR...wrdytp - * empty .10...000000 - * swap .10...xxxx10 - * file .11...xxxxx0 - * prot-none, clean, old .11...000001 - * prot-none, clean, young .11...000101 - * prot-none, dirty, old .10...001001 - * prot-none, dirty, young .10...001101 - * read-only, clean, old .11...010001 - * read-only, clean, young .01...010101 - * read-only, dirty, old .11...011001 - * read-only, dirty, young .01...011101 - * read-write, clean, old .11...110001 - * read-write, clean, young .01...110101 - * read-write, dirty, old .10...111001 - * read-write, dirty, young .00...111101 + * .IR.uswrdy.p + * empty .10.00000000 + * swap .11..ttttt.0 + * prot-none, clean, old .11.xx0000.1 + * prot-none, clean, young .11.xx0001.1 + * prot-none, dirty, old .10.xx0010.1 + * prot-none, dirty, young .10.xx0011.1 + * read-only, clean, old .11.xx0100.1 + * read-only, clean, young .01.xx0101.1 + * read-only, dirty, old .11.xx0110.1 + * read-only, dirty, young .01.xx0111.1 + * read-write, clean, old .11.xx1100.1 + * read-write, clean, young .01.xx1101.1 + * read-write, dirty, old .10.xx1110.1 + * read-write, dirty, young .00.xx1111.1 + * HW-bits: R read-only, I invalid + * SW-bits: p present, y young, d dirty, r read, w write, s special, + * u unused, l large * - * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001 - * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400 - * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402 + * pte_none is true for the bit pattern .10.00000000, pte == 0x400 + * pte_swap is true for the bit pattern .11..ooooo.0, (pte & 0x201) == 0x200 + * pte_present is true for the bit pattern .xx.xxxxxx.1, (pte & 0x001) == 0x001 */ /* Bits in the segment/region table address-space-control-element */ @@ -335,6 +302,8 @@ static inline int is_module_addr(void *addr) * read-write, dirty, young 11..0...0...11 * The segment table origin is used to distinguish empty (origin==0) from * read-write, old segment table entries (origin!=0) + * HW-bits: R read-only, I invalid + * SW-bits: y young, d dirty, r read, w write */ #define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */ @@ -423,6 +392,15 @@ static inline int mm_has_pgste(struct mm_struct *mm) return 0; } +static inline int mm_alloc_pgste(struct mm_struct *mm) +{ +#ifdef CONFIG_PGSTE + if (unlikely(mm->context.alloc_pgste)) + return 1; +#endif + return 0; +} + /* * In the case that a guest uses storage keys * faults should no longer be backed by zero pages @@ -516,7 +494,7 @@ static inline int pmd_large(pmd_t pmd) return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; } -static inline int pmd_pfn(pmd_t pmd) +static inline unsigned long pmd_pfn(pmd_t pmd) { unsigned long origin_mask; @@ -582,10 +560,9 @@ static inline int pte_none(pte_t pte) static inline int pte_swap(pte_t pte) { - /* Bit pattern: (pte & 0x603) == 0x402 */ - return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | - _PAGE_TYPE | _PAGE_PRESENT)) - == (_PAGE_INVALID | _PAGE_TYPE); + /* Bit pattern: (pte & 0x201) == 0x200 */ + return (pte_val(pte) & (_PAGE_PROTECT | _PAGE_PRESENT)) + == _PAGE_PROTECT; } static inline int pte_special(pte_t pte) @@ -1521,9 +1498,9 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, return pmd_young(pmd); } -#define __HAVE_ARCH_PMDP_GET_AND_CLEAR -static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) +#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR +static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) { pmd_t pmd = *pmdp; @@ -1532,10 +1509,10 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, return pmd; } -#define __HAVE_ARCH_PMDP_GET_AND_CLEAR_FULL -static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm, - unsigned long address, - pmd_t *pmdp, int full) +#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL +static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm, + unsigned long address, + pmd_t *pmdp, int full) { pmd_t pmd = *pmdp; @@ -1545,11 +1522,11 @@ static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm, return pmd; } -#define __HAVE_ARCH_PMDP_CLEAR_FLUSH -static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) +#define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH +static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) { - return pmdp_get_and_clear(vma->vm_mm, address, pmdp); + return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); } #define __HAVE_ARCH_PMDP_INVALIDATE @@ -1571,6 +1548,14 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, } } +static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, + unsigned long address, + pmd_t *pmdp) +{ + return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); +} +#define pmdp_collapse_flush pmdp_collapse_flush + #define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot)) #define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) @@ -1586,51 +1571,51 @@ static inline int has_transparent_hugepage(void) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* - * 31 bit swap entry format: - * A page-table entry has some bits we have to treat in a special way. - * Bits 0, 20 and bit 23 have to be zero, otherwise an specification - * exception will occur instead of a page translation exception. The - * specifiation exception has the bad habit not to store necessary - * information in the lowcore. - * Bits 21, 22, 30 and 31 are used to indicate the page type. - * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 - * This leaves the bits 1-19 and bits 24-29 to store type and offset. - * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19 - * plus 24 for the offset. - * 0| offset |0110|o|type |00| - * 0 0000000001111111111 2222 2 22222 33 - * 0 1234567890123456789 0123 4 56789 01 - * * 64 bit swap entry format: * A page-table entry has some bits we have to treat in a special way. - * Bits 52 and bit 55 have to be zero, otherwise an specification + * Bits 52 and bit 55 have to be zero, otherwise a specification * exception will occur instead of a page translation exception. The - * specifiation exception has the bad habit not to store necessary + * specification exception has the bad habit not to store necessary * information in the lowcore. - * Bits 53, 54, 62 and 63 are used to indicate the page type. - * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 - * This leaves the bits 0-51 and bits 56-61 to store type and offset. - * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51 - * plus 56 for the offset. - * | offset |0110|o|type |00| - * 0000000000111111111122222222223333333333444444444455 5555 5 55566 66 - * 0123456789012345678901234567890123456789012345678901 2345 6 78901 23 + * Bits 54 and 63 are used to indicate the page type. + * A swap pte is indicated by bit pattern (pte & 0x201) == 0x200 + * This leaves the bits 0-51 and bits 56-62 to store type and offset. + * We use the 5 bits from 57-61 for the type and the 52 bits from 0-51 + * for the offset. + * | offset |01100|type |00| + * |0000000000111111111122222222223333333333444444444455|55555|55566|66| + * |0123456789012345678901234567890123456789012345678901|23456|78901|23| */ -#define __SWP_OFFSET_MASK (~0UL >> 11) +#define __SWP_OFFSET_MASK ((1UL << 52) - 1) +#define __SWP_OFFSET_SHIFT 12 +#define __SWP_TYPE_MASK ((1UL << 5) - 1) +#define __SWP_TYPE_SHIFT 2 static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) { pte_t pte; - offset &= __SWP_OFFSET_MASK; - pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) | - ((offset & 1UL) << 7) | ((offset & ~1UL) << 11); + + pte_val(pte) = _PAGE_INVALID | _PAGE_PROTECT; + pte_val(pte) |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT; + pte_val(pte) |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT; return pte; } -#define __swp_type(entry) (((entry).val >> 2) & 0x1f) -#define __swp_offset(entry) (((entry).val >> 11) | (((entry).val >> 7) & 1)) -#define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) }) +static inline unsigned long __swp_type(swp_entry_t entry) +{ + return (entry.val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK; +} + +static inline unsigned long __swp_offset(swp_entry_t entry) +{ + return (entry.val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK; +} + +static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) +{ + return (swp_entry_t) { pte_val(mk_swap_pte(type, offset)) }; +} #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index f1096bab5199..c891f41b2753 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -46,33 +46,39 @@ struct sclp_cpu_info { struct sclp_cpu_entry cpu[MAX_CPU_ADDRESS + 1]; }; +struct sclp_info { + unsigned char has_linemode : 1; + unsigned char has_vt220 : 1; + unsigned char has_siif : 1; + unsigned char has_sigpif : 1; + unsigned char has_cpu_type : 1; + unsigned char has_sprp : 1; + unsigned int ibc; + unsigned int mtid; + unsigned int mtid_cp; + unsigned int mtid_prev; + unsigned long long rzm; + unsigned long long rnmax; + unsigned long long hamax; + unsigned int max_cpu; + unsigned long hsa_size; + unsigned long long facilities; +}; +extern struct sclp_info sclp; + int sclp_get_cpu_info(struct sclp_cpu_info *info); int sclp_cpu_configure(u8 cpu); int sclp_cpu_deconfigure(u8 cpu); -unsigned long long sclp_get_rnmax(void); -unsigned long long sclp_get_rzm(void); -unsigned int sclp_get_max_cpu(void); -unsigned int sclp_get_mtid(u8 cpu_type); -unsigned int sclp_get_mtid_max(void); -unsigned int sclp_get_mtid_prev(void); int sclp_sdias_blk_count(void); int sclp_sdias_copy(void *dest, int blk_num, int nr_blks); int sclp_chp_configure(struct chp_id chpid); int sclp_chp_deconfigure(struct chp_id chpid); int sclp_chp_read_info(struct sclp_chp_info *info); void sclp_get_ipl_info(struct sclp_ipl_info *info); -bool __init sclp_has_linemode(void); -bool __init sclp_has_vt220(void); -bool sclp_has_sprp(void); int sclp_pci_configure(u32 fid); int sclp_pci_deconfigure(u32 fid); int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode); -unsigned long sclp_get_hsa_size(void); void sclp_early_detect(void); -int sclp_has_siif(void); -int sclp_has_sigpif(void); -unsigned int sclp_get_ibc(void); - long _sclp_print_early(const char *); #endif /* _ASM_S390_SCLP_H */ diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 98eb2a579223..dcb6312a0b91 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -10,6 +10,7 @@ #define _ASM_S390_TIMEX_H #include <asm/lowcore.h> +#include <linux/time64.h> /* The value of the TOD clock for 1.1.1970. */ #define TOD_UNIX_EPOCH 0x7d91048bca000000ULL @@ -108,10 +109,10 @@ int get_sync_clock(unsigned long long *clock); void init_cpu_timer(void); unsigned long long monotonic_clock(void); -void tod_to_timeval(__u64, struct timespec *); +void tod_to_timeval(__u64 todval, struct timespec64 *xt); static inline -void stck_to_timespec(unsigned long long stck, struct timespec *ts) +void stck_to_timespec64(unsigned long long stck, struct timespec64 *ts) { tod_to_timeval(stck - TOD_UNIX_EPOCH, ts); } diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index b1453a2ae1ca..4990f6c66288 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -22,7 +22,8 @@ DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology); #define topology_physical_package_id(cpu) (per_cpu(cpu_topology, cpu).socket_id) #define topology_thread_id(cpu) (per_cpu(cpu_topology, cpu).thread_id) -#define topology_thread_cpumask(cpu) (&per_cpu(cpu_topology, cpu).thread_mask) +#define topology_sibling_cpumask(cpu) \ + (&per_cpu(cpu_topology, cpu).thread_mask) #define topology_core_id(cpu) (per_cpu(cpu_topology, cpu).core_id) #define topology_core_cpumask(cpu) (&per_cpu(cpu_topology, cpu).core_mask) #define topology_book_id(cpu) (per_cpu(cpu_topology, cpu).book_id) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index d64a7a62164f..9dd4cc47ddc7 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -98,7 +98,8 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x) * @from: Source address, in user space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from user space to kernel space. Caller must check * the specified block with access_ok() before calling this function. @@ -118,7 +119,8 @@ unsigned long __must_check __copy_from_user(void *to, const void __user *from, * @from: Source address, in kernel space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from kernel space to user space. Caller must check * the specified block with access_ok() before calling this function. @@ -264,7 +266,8 @@ int __get_user_bad(void) __attribute__((noreturn)); * @from: Source address, in kernel space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from kernel space to user space. * @@ -290,7 +293,8 @@ __compiletime_warning("copy_from_user() buffer size is not provably correct") * @from: Source address, in user space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from user space to kernel space. * @@ -348,7 +352,8 @@ static inline unsigned long strnlen_user(const char __user *src, unsigned long n * strlen_user: - Get the size of a string in user space. * @str: The string to measure. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Get the size of a NUL-terminated string in user space. * diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index d7fa2f0f1425..f8498dde67b1 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -202,7 +202,7 @@ COMPAT_SYSCALL_WRAP1(epoll_create1, int, flags); COMPAT_SYSCALL_WRAP2(tkill, int, pid, int, sig); COMPAT_SYSCALL_WRAP3(tgkill, int, tgid, int, pid, int, sig); COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags); -COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val); +COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, unsigned long, tls); COMPAT_SYSCALL_WRAP2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags); COMPAT_SYSCALL_WRAP4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim); COMPAT_SYSCALL_WRAP5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, int __user *, mnt_id, int, flag); diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 9f73c8059022..7a75ad4594e3 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -33,11 +33,12 @@ static struct memblock_type oldmem_type = { }; #define for_each_dump_mem_range(i, nid, p_start, p_end, p_nid) \ - for (i = 0, __next_mem_range(&i, nid, &memblock.physmem, \ + for (i = 0, __next_mem_range(&i, nid, MEMBLOCK_NONE, \ + &memblock.physmem, \ &oldmem_type, p_start, \ p_end, p_nid); \ i != (u64)ULLONG_MAX; \ - __next_mem_range(&i, nid, &memblock.physmem, \ + __next_mem_range(&i, nid, MEMBLOCK_NONE, &memblock.physmem,\ &oldmem_type, \ p_start, p_end, p_nid)) @@ -122,7 +123,7 @@ static ssize_t copy_oldmem_page_zfcpdump(char *buf, size_t csize, { int rc; - if (src < sclp_get_hsa_size()) { + if (src < sclp.hsa_size) { rc = memcpy_hsa(buf, src, csize, userbuf); } else { if (userbuf) @@ -215,7 +216,7 @@ static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma, unsigned long pfn, unsigned long size, pgprot_t prot) { - unsigned long hsa_end = sclp_get_hsa_size(); + unsigned long hsa_end = sclp.hsa_size; unsigned long size_hsa; if (pfn < hsa_end >> PAGE_SHIFT) { @@ -258,7 +259,7 @@ int copy_from_oldmem(void *dest, void *src, size_t count) return rc; } } else { - unsigned long hsa_end = sclp_get_hsa_size(); + unsigned long hsa_end = sclp.hsa_size; if ((unsigned long) src < hsa_end) { copied = min(count, hsa_end - (unsigned long) src); rc = memcpy_hsa(dest, (unsigned long) src, copied, 0); @@ -609,7 +610,7 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) if (elfcorehdr_addr != ELFCORE_ADDR_MAX) return 0; /* If we cannot get HSA size for zfcpdump return error */ - if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp_get_hsa_size()) + if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp.hsa_size) return -ENODEV; /* For kdump, exclude previous crashkernel memory */ diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index c1f21aca76e7..6fca0e46464e 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -1457,23 +1457,24 @@ int debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, int area, debug_entry_t * entry, char *out_buf) { - struct timespec time_spec; + struct timespec64 time_spec; char *except_str; unsigned long caller; int rc = 0; unsigned int level; level = entry->id.fields.level; - stck_to_timespec(entry->id.stck, &time_spec); + stck_to_timespec64(entry->id.stck, &time_spec); if (entry->id.fields.exception) except_str = "*"; else except_str = "-"; caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN; - rc += sprintf(out_buf, "%02i %011lu:%06lu %1u %1s %02i %p ", - area, time_spec.tv_sec, time_spec.tv_nsec / 1000, level, - except_str, entry->id.fields.cpuid, (void *) caller); + rc += sprintf(out_buf, "%02i %011lld:%06lu %1u %1s %02i %p ", + area, (long long)time_spec.tv_sec, + time_spec.tv_nsec / 1000, level, except_str, + entry->id.fields.cpuid, (void *)caller); return rc; } EXPORT_SYMBOL(debug_dflt_header_fn); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 99b44acbfcc7..3238893c9d4f 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -1005,7 +1005,7 @@ ENTRY(sie64a) .Lsie_gmap: lg %r14,__SF_EMPTY(%r15) # get control block pointer oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now - tm __SIE_PROG20+3(%r14),1 # last exit... + tm __SIE_PROG20+3(%r14),3 # last exit... jnz .Lsie_done LPP __SF_EMPTY(%r15) # set guest id sie 0(%r14) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 7262fe438c99..73941bf42350 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -128,9 +128,9 @@ __setup("condev=", condev_setup); static void __init set_preferred_console(void) { if (MACHINE_IS_KVM) { - if (sclp_has_vt220()) + if (sclp.has_vt220) add_preferred_console("ttyS", 1, NULL); - else if (sclp_has_linemode()) + else if (sclp.has_linemode) add_preferred_console("ttyS", 0, NULL); else add_preferred_console("hvc", 0, NULL); @@ -510,8 +510,8 @@ static void reserve_memory_end(void) { #ifdef CONFIG_CRASH_DUMP if (ipl_info.type == IPL_TYPE_FCP_DUMP && - !OLDMEM_BASE && sclp_get_hsa_size()) { - memory_end = sclp_get_hsa_size(); + !OLDMEM_BASE && sclp.hsa_size) { + memory_end = sclp.hsa_size; memory_end &= PAGE_MASK; memory_end_set = 1; } @@ -576,7 +576,7 @@ static void __init reserve_crashkernel(void) crash_base = low; } else { /* Find suitable area in free memory */ - low = max_t(unsigned long, crash_size, sclp_get_hsa_size()); + low = max_t(unsigned long, crash_size, sclp.hsa_size); high = crash_base ? crash_base + crash_size : ULONG_MAX; if (crash_base && crash_base < low) { @@ -640,19 +640,24 @@ static void __init check_initrd(void) } /* - * Reserve all kernel text + * Reserve memory used for lowcore/command line/kernel image. */ static void __init reserve_kernel(void) { - unsigned long start_pfn; - start_pfn = PFN_UP(__pa(&_end)); + unsigned long start_pfn = PFN_UP(__pa(&_end)); +#ifdef CONFIG_DMA_API_DEBUG /* - * Reserve memory used for lowcore/command line/kernel image. + * DMA_API_DEBUG code stumbles over addresses from the + * range [_ehead, _stext]. Mark the memory as reserved + * so it is not used for CONFIG_DMA_API_DEBUG=y. */ + memblock_reserve(0, PFN_PHYS(start_pfn)); +#else memblock_reserve(0, (unsigned long)_ehead); memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn) - (unsigned long)_stext); +#endif } static void __init reserve_elfcorehdr(void) @@ -875,6 +880,8 @@ void __init setup_arch(char **cmdline_p) */ setup_hwcaps(); + HPAGE_SHIFT = MACHINE_HAS_HPAGE ? 20 : 0; + /* * Create kernel page tables and switch to virtual addressing. */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index efd2c1968000..0d9d59d4710e 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -601,7 +601,7 @@ static void __init smp_store_cpu_states(struct sclp_cpu_info *info) /* No previous system present, normal boot. */ return; /* Set multi-threading state to the previous system. */ - pcpu_set_smt(sclp_get_mtid_prev()); + pcpu_set_smt(sclp.mtid_prev); /* Collect CPU states. */ cpu = 0; for (i = 0; i < info->configured; i++) { @@ -740,7 +740,7 @@ static void __init smp_detect_cpus(void) #endif /* Set multi-threading state for the current system */ - mtid = sclp_get_mtid(boot_cpu_type); + mtid = boot_cpu_type ? sclp.mtid : sclp.mtid_cp; mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1; pcpu_set_smt(mtid); @@ -880,12 +880,13 @@ void __noreturn cpu_die(void) void __init smp_fill_possible_mask(void) { - unsigned int possible, sclp, cpu; + unsigned int possible, sclp_max, cpu; - sclp = min(smp_max_threads, sclp_get_mtid_max() + 1); - sclp = sclp_get_max_cpu()*sclp ?: nr_cpu_ids; + sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1; + sclp_max = min(smp_max_threads, sclp_max); + sclp_max = sclp.max_cpu * sclp_max ?: nr_cpu_ids; possible = setup_possible_cpus ?: nr_cpu_ids; - possible = min(possible, sclp); + possible = min(possible, sclp_max); for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++) set_cpu_possible(cpu, true); } diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index d3236c9e226b..39e2f41b6cf0 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -9,10 +9,10 @@ #include <linux/pfn.h> #include <linux/suspend.h> #include <linux/mm.h> +#include <linux/pci.h> #include <asm/ctl_reg.h> #include <asm/ipl.h> #include <asm/cio.h> -#include <asm/pci.h> #include <asm/sections.h> #include "entry.h" diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 170ddd2018b3..9e733d965e08 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -76,7 +76,7 @@ unsigned long long monotonic_clock(void) } EXPORT_SYMBOL(monotonic_clock); -void tod_to_timeval(__u64 todval, struct timespec *xt) +void tod_to_timeval(__u64 todval, struct timespec64 *xt) { unsigned long long sec; @@ -181,12 +181,12 @@ static void timing_alert_interrupt(struct ext_code ext_code, static void etr_reset(void); static void stp_reset(void); -void read_persistent_clock(struct timespec *ts) +void read_persistent_clock64(struct timespec64 *ts) { tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts); } -void read_boot_clock(struct timespec *ts) +void read_boot_clock64(struct timespec64 *ts) { tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts); } diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 9e3779e3e496..7365e8a46032 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -241,21 +241,6 @@ static int handle_prog(struct kvm_vcpu *vcpu) return kvm_s390_inject_prog_irq(vcpu, &pgm_info); } -static int handle_instruction_and_prog(struct kvm_vcpu *vcpu) -{ - int rc, rc2; - - vcpu->stat.exit_instr_and_program++; - rc = handle_instruction(vcpu); - rc2 = handle_prog(vcpu); - - if (rc == -EOPNOTSUPP) - vcpu->arch.sie_block->icptcode = 0x04; - if (rc) - return rc; - return rc2; -} - /** * handle_external_interrupt - used for external interruption interceptions * @@ -355,7 +340,6 @@ static const intercept_handler_t intercept_funcs[] = { [0x00 >> 2] = handle_noop, [0x04 >> 2] = handle_instruction, [0x08 >> 2] = handle_prog, - [0x0C >> 2] = handle_instruction_and_prog, [0x10 >> 2] = handle_noop, [0x14 >> 2] = handle_external_interrupt, [0x18 >> 2] = handle_noop, diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 9de47265ef73..c98d89708e99 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -134,6 +134,8 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) active_mask = pending_local_irqs(vcpu); active_mask |= pending_floating_irqs(vcpu); + if (!active_mask) + return 0; if (psw_extint_disabled(vcpu)) active_mask &= ~IRQ_PEND_EXT_MASK; @@ -799,7 +801,7 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; - if (!sclp_has_sigpif()) + if (!sclp.has_sigpif) return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); return (sigp_ctrl & SIGP_CTRL_C) && @@ -941,12 +943,9 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) if (cpu_timer_irq_pending(vcpu)) set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); - do { - irqs = deliverable_irqs(vcpu); + while ((irqs = deliverable_irqs(vcpu)) && !rc) { /* bits are in the order of interrupt priority */ irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT); - if (irq_type == IRQ_PEND_COUNT) - break; if (is_ioirq(irq_type)) { rc = __deliver_io(vcpu, irq_type); } else { @@ -958,9 +957,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) } rc = func(vcpu); } - if (rc) - break; - } while (!rc); + } set_intercept_indicators(vcpu); @@ -1058,10 +1055,10 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) kvm_get_vcpu(vcpu->kvm, src_id) == NULL) return -EINVAL; - if (sclp_has_sigpif()) + if (sclp.has_sigpif) return __inject_extcall_sigpif(vcpu, src_id); - if (!test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) + if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) return -EBUSY; *extcall = irq->u.extcall; atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); @@ -1340,12 +1337,54 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) return 0; } -static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) +/* + * Find a destination VCPU for a floating irq and kick it. + */ +static void __floating_irq_kick(struct kvm *kvm, u64 type) { + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; struct kvm_s390_local_interrupt *li; + struct kvm_vcpu *dst_vcpu; + int sigcpu, online_vcpus, nr_tries = 0; + + online_vcpus = atomic_read(&kvm->online_vcpus); + if (!online_vcpus) + return; + + /* find idle VCPUs first, then round robin */ + sigcpu = find_first_bit(fi->idle_mask, online_vcpus); + if (sigcpu == online_vcpus) { + do { + sigcpu = fi->next_rr_cpu; + fi->next_rr_cpu = (fi->next_rr_cpu + 1) % online_vcpus; + /* avoid endless loops if all vcpus are stopped */ + if (nr_tries++ >= online_vcpus) + return; + } while (is_vcpu_stopped(kvm_get_vcpu(kvm, sigcpu))); + } + dst_vcpu = kvm_get_vcpu(kvm, sigcpu); + + /* make the VCPU drop out of the SIE, or wake it up if sleeping */ + li = &dst_vcpu->arch.local_int; + spin_lock(&li->lock); + switch (type) { + case KVM_S390_MCHK: + atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags); + break; + default: + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + break; + } + spin_unlock(&li->lock); + kvm_s390_vcpu_wakeup(dst_vcpu); +} + +static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) +{ struct kvm_s390_float_interrupt *fi; - struct kvm_vcpu *dst_vcpu = NULL; - int sigcpu; u64 type = READ_ONCE(inti->type); int rc; @@ -1373,32 +1412,8 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) if (rc) return rc; - sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); - if (sigcpu == KVM_MAX_VCPUS) { - do { - sigcpu = fi->next_rr_cpu++; - if (sigcpu == KVM_MAX_VCPUS) - sigcpu = fi->next_rr_cpu = 0; - } while (kvm_get_vcpu(kvm, sigcpu) == NULL); - } - dst_vcpu = kvm_get_vcpu(kvm, sigcpu); - li = &dst_vcpu->arch.local_int; - spin_lock(&li->lock); - switch (type) { - case KVM_S390_MCHK: - atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); - break; - case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags); - break; - default: - atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - break; - } - spin_unlock(&li->lock); - kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu)); + __floating_irq_kick(kvm, type); return 0; - } int kvm_s390_inject_vm(struct kvm *kvm, @@ -1606,6 +1621,9 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm) int i; spin_lock(&fi->lock); + fi->pending_irqs = 0; + memset(&fi->srv_signal, 0, sizeof(fi->srv_signal)); + memset(&fi->mchk, 0, sizeof(fi->mchk)); for (i = 0; i < FIRQ_LIST_COUNT; i++) clear_irq_list(&fi->lists[i]); for (i = 0; i < FIRQ_MAX_COUNT; i++) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8cd8e7b288c5..2078f92d15ac 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -36,6 +36,10 @@ #include "kvm-s390.h" #include "gaccess.h" +#define KMSG_COMPONENT "kvm-s390" +#undef pr_fmt +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #define CREATE_TRACE_POINTS #include "trace.h" #include "trace-s390.h" @@ -110,7 +114,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { /* upper facilities limit for kvm */ unsigned long kvm_s390_fac_list_mask[] = { 0xffe6fffbfcfdfc40UL, - 0x005c800000000000UL, + 0x005e800000000000UL, }; unsigned long kvm_s390_fac_list_mask_size(void) @@ -236,6 +240,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, { int r; unsigned long n; + struct kvm_memslots *slots; struct kvm_memory_slot *memslot; int is_dirty = 0; @@ -245,7 +250,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (log->slot >= KVM_USER_MEM_SLOTS) goto out; - memslot = id_to_memslot(kvm->memslots, log->slot); + slots = kvm_memslots(kvm); + memslot = id_to_memslot(slots, log->slot); r = -ENOENT; if (!memslot->dirty_bitmap) goto out; @@ -454,10 +460,10 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) mutex_lock(&kvm->lock); kvm->arch.epoch = gtod - host_tod; - kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) { + kvm_s390_vcpu_block_all(kvm); + kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch; - exit_sie(cur_vcpu); - } + kvm_s390_vcpu_unblock_all(kvm); mutex_unlock(&kvm->lock); return 0; } @@ -604,7 +610,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) goto out; } get_cpu_id((struct cpuid *) &mach->cpuid); - mach->ibc = sclp_get_ibc(); + mach->ibc = sclp.ibc; memcpy(&mach->fac_mask, kvm->arch.model.fac->mask, S390_ARCH_FAC_LIST_SIZE_BYTE); memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, @@ -1068,7 +1074,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) S390_ARCH_FAC_LIST_SIZE_BYTE); kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id); - kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff; + kvm->arch.model.ibc = sclp.ibc & 0x0fff; if (kvm_s390_crypto_init(kvm) < 0) goto out_err; @@ -1311,8 +1317,13 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | CPUSTAT_SM | - CPUSTAT_STOPPED | - CPUSTAT_GED); + CPUSTAT_STOPPED); + + if (test_kvm_facility(vcpu->kvm, 78)) + atomic_set_mask(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags); + else if (test_kvm_facility(vcpu->kvm, 8)) + atomic_set_mask(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags); + kvm_s390_vcpu_setup_model(vcpu); vcpu->arch.sie_block->ecb = 6; @@ -1321,9 +1332,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->ecb2 = 8; vcpu->arch.sie_block->eca = 0xC1002000U; - if (sclp_has_siif()) + if (sclp.has_siif) vcpu->arch.sie_block->eca |= 1; - if (sclp_has_sigpif()) + if (sclp.has_sigpif) vcpu->arch.sie_block->eca |= 0x10000000U; if (test_kvm_facility(vcpu->kvm, 129)) { vcpu->arch.sie_block->eca |= 0x00020000; @@ -1409,16 +1420,28 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return kvm_s390_vcpu_has_irq(vcpu, 0); } -void s390_vcpu_block(struct kvm_vcpu *vcpu) +void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) { atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); + exit_sie(vcpu); } -void s390_vcpu_unblock(struct kvm_vcpu *vcpu) +void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) { atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); } +static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) +{ + atomic_set_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20); + exit_sie(vcpu); +} + +static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) +{ + atomic_clear_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20); +} + /* * Kick a guest cpu out of SIE and wait until SIE is not running. * If the CPU is not running (e.g. waiting as idle) the function will @@ -1430,11 +1453,11 @@ void exit_sie(struct kvm_vcpu *vcpu) cpu_relax(); } -/* Kick a guest cpu out of SIE and prevent SIE-reentry */ -void exit_sie_sync(struct kvm_vcpu *vcpu) +/* Kick a guest cpu out of SIE to process a request synchronously */ +void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) { - s390_vcpu_block(vcpu); - exit_sie(vcpu); + kvm_make_request(req, vcpu); + kvm_s390_vcpu_request(vcpu); } static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address) @@ -1447,8 +1470,7 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address) /* match against both prefix pages */ if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) { VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address); - kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); - exit_sie_sync(vcpu); + kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu); } } } @@ -1720,8 +1742,10 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu) static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) { + if (!vcpu->requests) + return 0; retry: - s390_vcpu_unblock(vcpu); + kvm_s390_vcpu_request_handled(vcpu); /* * We use MMU_RELOAD just to re-arm the ipte notifier for the * guest prefix page. gmap_ipte_notify will wait on the ptl lock. @@ -1993,12 +2017,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) * As PF_VCPU will be used in fault handler, between * guest_enter and guest_exit should be no uaccess. */ - preempt_disable(); - kvm_guest_enter(); - preempt_enable(); + local_irq_disable(); + __kvm_guest_enter(); + local_irq_enable(); exit_reason = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); - kvm_guest_exit(); + local_irq_disable(); + __kvm_guest_exit(); + local_irq_enable(); vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); rc = vcpu_post_run(vcpu, exit_reason); @@ -2068,7 +2094,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { kvm_s390_vcpu_start(vcpu); } else if (is_vcpu_stopped(vcpu)) { - pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n", + pr_err_ratelimited("can't run stopped vcpu %d\n", vcpu->vcpu_id); return -EINVAL; } @@ -2206,8 +2232,7 @@ int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr) static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) { kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); - kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu); - exit_sie_sync(vcpu); + kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); } static void __disable_ibs_on_all_vcpus(struct kvm *kvm) @@ -2223,8 +2248,7 @@ static void __disable_ibs_on_all_vcpus(struct kvm *kvm) static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) { kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); - kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu); - exit_sie_sync(vcpu); + kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); } void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) @@ -2563,7 +2587,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { /* A few sanity checks. We can have memory slots which have to be @@ -2581,8 +2605,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, } void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, enum kvm_mr_change change) { int rc; @@ -2601,7 +2626,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, mem->guest_phys_addr, mem->memory_size); if (rc) - printk(KERN_WARNING "kvm-s390: failed to commit memory region\n"); + pr_warn("failed to commit memory region\n"); return; } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index ca108b90ae56..c5704786e473 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -211,10 +211,10 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu); -void s390_vcpu_block(struct kvm_vcpu *vcpu); -void s390_vcpu_unblock(struct kvm_vcpu *vcpu); +void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu); +void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu); void exit_sie(struct kvm_vcpu *vcpu); -void exit_sie_sync(struct kvm_vcpu *vcpu); +void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu); int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); /* is cmma enabled */ @@ -228,6 +228,25 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, struct kvm_s390_pgm_info *pgm_info); +static inline void kvm_s390_vcpu_block_all(struct kvm *kvm) +{ + int i; + struct kvm_vcpu *vcpu; + + WARN_ON(!mutex_is_locked(&kvm->lock)); + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_s390_vcpu_block(vcpu); +} + +static inline void kvm_s390_vcpu_unblock_all(struct kvm *kvm) +{ + int i; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_s390_vcpu_unblock(vcpu); +} + /** * kvm_s390_inject_prog_cond - conditionally inject a program check * @vcpu: virtual cpu diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index d22d8ee1ff9d..ad4242245771 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -698,10 +698,14 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) case 0x00001000: end = (start + (1UL << 20)) & ~((1UL << 20) - 1); break; - /* We dont support EDAT2 case 0x00002000: + /* only support 2G frame size if EDAT2 is available and we are + not in 24-bit addressing mode */ + if (!test_kvm_facility(vcpu->kvm, 78) || + psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_24BIT) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); end = (start + (1UL << 31)) & ~((1UL << 31) - 1); - break;*/ + break; default: return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 76515bcea2f1..4c8f5d7f9c23 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -399,7 +399,7 @@ static inline int do_exception(struct pt_regs *regs, int access) * user context. */ fault = VM_FAULT_BADCONTEXT; - if (unlikely(!user_space_fault(regs) || in_atomic() || !mm)) + if (unlikely(!user_space_fault(regs) || faulthandler_disabled() || !mm)) goto out; address = trans_exc_code & __FAIL_ADDR_MASK; diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 210ffede0153..fb4bf2c4379e 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -14,20 +14,23 @@ static inline pmd_t __pte_to_pmd(pte_t pte) /* * Convert encoding pte bits pmd bits - * .IR...wrdytp dy..R...I...wr - * empty .10...000000 -> 00..0...1...00 - * prot-none, clean, old .11...000001 -> 00..1...1...00 - * prot-none, clean, young .11...000101 -> 01..1...1...00 - * prot-none, dirty, old .10...001001 -> 10..1...1...00 - * prot-none, dirty, young .10...001101 -> 11..1...1...00 - * read-only, clean, old .11...010001 -> 00..1...1...01 - * read-only, clean, young .01...010101 -> 01..1...0...01 - * read-only, dirty, old .11...011001 -> 10..1...1...01 - * read-only, dirty, young .01...011101 -> 11..1...0...01 - * read-write, clean, old .11...110001 -> 00..0...1...11 - * read-write, clean, young .01...110101 -> 01..0...0...11 - * read-write, dirty, old .10...111001 -> 10..0...1...11 - * read-write, dirty, young .00...111101 -> 11..0...0...11 + * lIR.uswrdy.p dy..R...I...wr + * empty 010.000000.0 -> 00..0...1...00 + * prot-none, clean, old 111.000000.1 -> 00..1...1...00 + * prot-none, clean, young 111.000001.1 -> 01..1...1...00 + * prot-none, dirty, old 111.000010.1 -> 10..1...1...00 + * prot-none, dirty, young 111.000011.1 -> 11..1...1...00 + * read-only, clean, old 111.000100.1 -> 00..1...1...01 + * read-only, clean, young 101.000101.1 -> 01..1...0...01 + * read-only, dirty, old 111.000110.1 -> 10..1...1...01 + * read-only, dirty, young 101.000111.1 -> 11..1...0...01 + * read-write, clean, old 111.001100.1 -> 00..1...1...11 + * read-write, clean, young 101.001101.1 -> 01..1...0...11 + * read-write, dirty, old 110.001110.1 -> 10..0...1...11 + * read-write, dirty, young 100.001111.1 -> 11..0...0...11 + * HW-bits: R read-only, I invalid + * SW-bits: p present, y young, d dirty, r read, w write, s special, + * u unused, l large */ if (pte_present(pte)) { pmd_val(pmd) = pte_val(pte) & PAGE_MASK; @@ -48,20 +51,23 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) /* * Convert encoding pmd bits pte bits - * dy..R...I...wr .IR...wrdytp - * empty 00..0...1...00 -> .10...001100 - * prot-none, clean, old 00..0...1...00 -> .10...000001 - * prot-none, clean, young 01..0...1...00 -> .10...000101 - * prot-none, dirty, old 10..0...1...00 -> .10...001001 - * prot-none, dirty, young 11..0...1...00 -> .10...001101 - * read-only, clean, old 00..1...1...01 -> .11...010001 - * read-only, clean, young 01..1...1...01 -> .11...010101 - * read-only, dirty, old 10..1...1...01 -> .11...011001 - * read-only, dirty, young 11..1...1...01 -> .11...011101 - * read-write, clean, old 00..0...1...11 -> .10...110001 - * read-write, clean, young 01..0...1...11 -> .10...110101 - * read-write, dirty, old 10..0...1...11 -> .10...111001 - * read-write, dirty, young 11..0...1...11 -> .10...111101 + * dy..R...I...wr lIR.uswrdy.p + * empty 00..0...1...00 -> 010.000000.0 + * prot-none, clean, old 00..1...1...00 -> 111.000000.1 + * prot-none, clean, young 01..1...1...00 -> 111.000001.1 + * prot-none, dirty, old 10..1...1...00 -> 111.000010.1 + * prot-none, dirty, young 11..1...1...00 -> 111.000011.1 + * read-only, clean, old 00..1...1...01 -> 111.000100.1 + * read-only, clean, young 01..1...0...01 -> 101.000101.1 + * read-only, dirty, old 10..1...1...01 -> 111.000110.1 + * read-only, dirty, young 11..1...0...01 -> 101.000111.1 + * read-write, clean, old 00..1...1...11 -> 111.001100.1 + * read-write, clean, young 01..1...0...11 -> 101.001101.1 + * read-write, dirty, old 10..0...1...11 -> 110.001110.1 + * read-write, dirty, young 11..0...0...11 -> 100.001111.1 + * HW-bits: R read-only, I invalid + * SW-bits: p present, y young, d dirty, r read, w write, s special, + * u unused, l large */ if (pmd_present(pmd)) { pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE; @@ -70,8 +76,8 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4; pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5; pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT); - pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10; - pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10; } else pte_val(pte) = _PAGE_INVALID; return pte; @@ -80,31 +86,16 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - pmd_t pmd; + pmd_t pmd = __pte_to_pmd(pte); - pmd = __pte_to_pmd(pte); - if (!MACHINE_HAS_HPAGE) { - /* Emulated huge ptes loose the dirty and young bit */ - pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; - pmd_val(pmd) |= pte_page(pte)[1].index; - } else - pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; + pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; *(pmd_t *) ptep = pmd; } pte_t huge_ptep_get(pte_t *ptep) { - unsigned long origin; - pmd_t pmd; + pmd_t pmd = *(pmd_t *) ptep; - pmd = *(pmd_t *) ptep; - if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) { - origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN; - pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; - pmd_val(pmd) |= *(unsigned long *) origin; - /* Emulated huge ptes are young and dirty by definition */ - pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG | _SEGMENT_ENTRY_DIRTY; - } return __pmd_to_pte(pmd); } @@ -119,45 +110,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, return pte; } -int arch_prepare_hugepage(struct page *page) -{ - unsigned long addr = page_to_phys(page); - pte_t pte; - pte_t *ptep; - int i; - - if (MACHINE_HAS_HPAGE) - return 0; - - ptep = (pte_t *) pte_alloc_one(&init_mm, addr); - if (!ptep) - return -ENOMEM; - - pte_val(pte) = addr; - for (i = 0; i < PTRS_PER_PTE; i++) { - set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte); - pte_val(pte) += PAGE_SIZE; - } - page[1].index = (unsigned long) ptep; - return 0; -} - -void arch_release_hugepage(struct page *page) -{ - pte_t *ptep; - - if (MACHINE_HAS_HPAGE) - return; - - ptep = (pte_t *) page[1].index; - if (!ptep) - return; - clear_table((unsigned long *) ptep, _PAGE_INVALID, - PTRS_PER_PTE * sizeof(pte_t)); - page_table_free(&init_mm, (unsigned long *) ptep); - page[1].index = 0; -} - pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { @@ -187,17 +139,9 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return (pte_t *) pmdp; } -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - return 0; -} - int pmd_huge(pmd_t pmd) { - if (!MACHINE_HAS_HPAGE) - return 0; - - return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE); + return pmd_large(pmd); } int pud_huge(pud_t pud) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 80875c43a4a4..76e873748b56 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -213,7 +213,7 @@ unsigned long memory_block_size_bytes(void) * Make sure the memory block size is always greater * or equal than the memory increment size. */ - return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp_get_rzm()); + return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp.rzm); } #ifdef CONFIG_MEMORY_HOTREMOVE diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c index 0f3604395805..e00f0d5d296d 100644 --- a/arch/s390/mm/mem_detect.c +++ b/arch/s390/mm/mem_detect.c @@ -31,8 +31,8 @@ void __init detect_memory_memblock(void) unsigned long addr, size; int type; - rzm = sclp_get_rzm(); - rnmax = sclp_get_rnmax(); + rzm = sclp.rzm; + rnmax = sclp.rnmax; memsize = rzm * rnmax; if (!rzm) rzm = 1ULL << 17; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 33f589459113..33082d0d101b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -18,6 +18,7 @@ #include <linux/rcupdate.h> #include <linux/slab.h> #include <linux/swapops.h> +#include <linux/sysctl.h> #include <linux/ksm.h> #include <linux/mman.h> @@ -30,6 +31,8 @@ #define ALLOC_ORDER 2 #define FRAG_MASK 0x03 +int HPAGE_SHIFT; + unsigned long *crst_table_alloc(struct mm_struct *mm) { struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); @@ -920,6 +923,40 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) } EXPORT_SYMBOL(get_guest_storage_key); +static int page_table_allocate_pgste_min = 0; +static int page_table_allocate_pgste_max = 1; +int page_table_allocate_pgste = 0; +EXPORT_SYMBOL(page_table_allocate_pgste); + +static struct ctl_table page_table_sysctl[] = { + { + .procname = "allocate_pgste", + .data = &page_table_allocate_pgste, + .maxlen = sizeof(int), + .mode = S_IRUGO | S_IWUSR, + .proc_handler = proc_dointvec, + .extra1 = &page_table_allocate_pgste_min, + .extra2 = &page_table_allocate_pgste_max, + }, + { } +}; + +static struct ctl_table page_table_sysctl_dir[] = { + { + .procname = "vm", + .maxlen = 0, + .mode = 0555, + .child = page_table_sysctl, + }, + { } +}; + +static int __init page_table_register_sysctl(void) +{ + return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM; +} +__initcall(page_table_register_sysctl); + #else /* CONFIG_PGSTE */ static inline int page_table_with_pgste(struct page *page) @@ -963,7 +1000,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) struct page *uninitialized_var(page); unsigned int mask, bit; - if (mm_has_pgste(mm)) + if (mm_alloc_pgste(mm)) return page_table_alloc_pgste(mm); /* Allocate fragments of a 4K page as 1K/2K page table */ spin_lock_bh(&mm->context.list_lock); @@ -1165,116 +1202,25 @@ static inline void thp_split_mm(struct mm_struct *mm) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb, - struct mm_struct *mm, pud_t *pud, - unsigned long addr, unsigned long end) -{ - unsigned long next, *table, *new; - struct page *page; - spinlock_t *ptl; - pmd_t *pmd; - - pmd = pmd_offset(pud, addr); - do { - next = pmd_addr_end(addr, end); -again: - if (pmd_none_or_clear_bad(pmd)) - continue; - table = (unsigned long *) pmd_deref(*pmd); - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - if (page_table_with_pgste(page)) - continue; - /* Allocate new page table with pgstes */ - new = page_table_alloc_pgste(mm); - if (!new) - return -ENOMEM; - - ptl = pmd_lock(mm, pmd); - if (likely((unsigned long *) pmd_deref(*pmd) == table)) { - /* Nuke pmd entry pointing to the "short" page table */ - pmdp_flush_lazy(mm, addr, pmd); - pmd_clear(pmd); - /* Copy ptes from old table to new table */ - memcpy(new, table, PAGE_SIZE/2); - clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); - /* Establish new table */ - pmd_populate(mm, pmd, (pte_t *) new); - /* Free old table with rcu, there might be a walker! */ - page_table_free_rcu(tlb, table, addr); - new = NULL; - } - spin_unlock(ptl); - if (new) { - page_table_free_pgste(new); - goto again; - } - } while (pmd++, addr = next, addr != end); - - return addr; -} - -static unsigned long page_table_realloc_pud(struct mmu_gather *tlb, - struct mm_struct *mm, pgd_t *pgd, - unsigned long addr, unsigned long end) -{ - unsigned long next; - pud_t *pud; - - pud = pud_offset(pgd, addr); - do { - next = pud_addr_end(addr, end); - if (pud_none_or_clear_bad(pud)) - continue; - next = page_table_realloc_pmd(tlb, mm, pud, addr, next); - if (unlikely(IS_ERR_VALUE(next))) - return next; - } while (pud++, addr = next, addr != end); - - return addr; -} - -static unsigned long page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm, - unsigned long addr, unsigned long end) -{ - unsigned long next; - pgd_t *pgd; - - pgd = pgd_offset(mm, addr); - do { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - continue; - next = page_table_realloc_pud(tlb, mm, pgd, addr, next); - if (unlikely(IS_ERR_VALUE(next))) - return next; - } while (pgd++, addr = next, addr != end); - - return 0; -} - /* * switch on pgstes for its userspace process (for kvm) */ int s390_enable_sie(void) { - struct task_struct *tsk = current; - struct mm_struct *mm = tsk->mm; - struct mmu_gather tlb; + struct mm_struct *mm = current->mm; /* Do we have pgstes? if yes, we are done */ - if (mm_has_pgste(tsk->mm)) + if (mm_has_pgste(mm)) return 0; - + /* Fail if the page tables are 2K */ + if (!mm_alloc_pgste(mm)) + return -EINVAL; down_write(&mm->mmap_sem); + mm->context.has_pgste = 1; /* split thp mappings and disable thp for future mappings */ thp_split_mm(mm); - /* Reallocate the page tables with pgstes */ - tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE); - if (!page_table_realloc(&tlb, mm, 0, TASK_SIZE)) - mm->context.has_pgste = 1; - tlb_finish_mmu(&tlb, 0, TASK_SIZE); up_write(&mm->mmap_sem); - return mm->context.has_pgste ? 0 : -ENOMEM; + return 0; } EXPORT_SYMBOL_GPL(s390_enable_sie); diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h index ba8593a515ba..f6498eec9ee1 100644 --- a/arch/s390/net/bpf_jit.h +++ b/arch/s390/net/bpf_jit.h @@ -28,6 +28,9 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; * | old backchain | | * +---------------+ | * | r15 - r6 | | + * +---------------+ | + * | 4 byte align | | + * | tail_call_cnt | | * BFP -> +===============+ | * | | | * | BPF stack | | @@ -46,12 +49,17 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; * R15 -> +---------------+ + low * * We get 160 bytes stack space from calling function, but only use - * 11 * 8 byte (old backchain + r15 - r6) for storing registers. + * 12 * 8 byte for old backchain, r15..r6, and tail_call_cnt. */ -#define STK_OFF (MAX_BPF_STACK + 8 + 4 + 4 + (160 - 11 * 8)) +#define STK_SPACE (MAX_BPF_STACK + 8 + 4 + 4 + 160) +#define STK_160_UNUSED (160 - 12 * 8) +#define STK_OFF (STK_SPACE - STK_160_UNUSED) #define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */ #define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */ +#define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */ +#define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */ + /* Offset to skip condition code check */ #define OFF_OK 4 diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 7690dc8e1ab5..d3766dd67e23 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -21,6 +21,7 @@ #include <linux/netdevice.h> #include <linux/filter.h> #include <linux/init.h> +#include <linux/bpf.h> #include <asm/cacheflush.h> #include <asm/dis.h> #include "bpf_jit.h" @@ -40,6 +41,8 @@ struct bpf_jit { int base_ip; /* Base address for literal pool */ int ret0_ip; /* Address of return 0 */ int exit_ip; /* Address of exit */ + int tail_call_start; /* Tail call start offset */ + int labels[1]; /* Labels for local jumps */ }; #define BPF_SIZE_MAX 4096 /* Max size for program */ @@ -49,6 +52,7 @@ struct bpf_jit { #define SEEN_RET0 4 /* ret0_ip points to a valid return 0 */ #define SEEN_LITERAL 8 /* code uses literals */ #define SEEN_FUNC 16 /* calls C functions */ +#define SEEN_TAIL_CALL 32 /* code uses tail calls */ #define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB) /* @@ -60,6 +64,7 @@ struct bpf_jit { #define REG_L (__MAX_BPF_REG+3) /* Literal pool register */ #define REG_15 (__MAX_BPF_REG+4) /* Register 15 */ #define REG_0 REG_W0 /* Register 0 */ +#define REG_1 REG_W1 /* Register 1 */ #define REG_2 BPF_REG_1 /* Register 2 */ #define REG_14 BPF_REG_0 /* Register 14 */ @@ -223,6 +228,24 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) REG_SET_SEEN(b3); \ }) +#define EMIT6_PCREL_LABEL(op1, op2, b1, b2, label, mask) \ +({ \ + int rel = (jit->labels[label] - jit->prg) >> 1; \ + _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), \ + op2 | mask << 12); \ + REG_SET_SEEN(b1); \ + REG_SET_SEEN(b2); \ +}) + +#define EMIT6_PCREL_IMM_LABEL(op1, op2, b1, imm, label, mask) \ +({ \ + int rel = (jit->labels[label] - jit->prg) >> 1; \ + _EMIT6(op1 | (reg_high(b1) | mask) << 16 | \ + (rel & 0xffff), op2 | (imm & 0xff) << 8); \ + REG_SET_SEEN(b1); \ + BUILD_BUG_ON(((unsigned long) imm) > 0xff); \ +}) + #define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \ ({ \ /* Branch instruction needs 6 bytes */ \ @@ -286,7 +309,7 @@ static void jit_fill_hole(void *area, unsigned int size) */ static void save_regs(struct bpf_jit *jit, u32 rs, u32 re) { - u32 off = 72 + (rs - 6) * 8; + u32 off = STK_OFF_R6 + (rs - 6) * 8; if (rs == re) /* stg %rs,off(%r15) */ @@ -301,7 +324,7 @@ static void save_regs(struct bpf_jit *jit, u32 rs, u32 re) */ static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re) { - u32 off = 72 + (rs - 6) * 8; + u32 off = STK_OFF_R6 + (rs - 6) * 8; if (jit->seen & SEEN_STACK) off += STK_OFF; @@ -374,6 +397,16 @@ static void save_restore_regs(struct bpf_jit *jit, int op) */ static void bpf_jit_prologue(struct bpf_jit *jit) { + if (jit->seen & SEEN_TAIL_CALL) { + /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */ + _EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT); + } else { + /* j tail_call_start: NOP if no tail calls are used */ + EMIT4_PCREL(0xa7f40000, 6); + _EMIT2(0); + } + /* Tail calls have to skip above initialization */ + jit->tail_call_start = jit->prg; /* Save registers */ save_restore_regs(jit, REGS_SAVE); /* Setup literal pool */ @@ -384,13 +417,16 @@ static void bpf_jit_prologue(struct bpf_jit *jit) } /* Setup stack and backchain */ if (jit->seen & SEEN_STACK) { - /* lgr %bfp,%r15 (BPF frame pointer) */ - EMIT4(0xb9040000, BPF_REG_FP, REG_15); + if (jit->seen & SEEN_FUNC) + /* lgr %w1,%r15 (backchain) */ + EMIT4(0xb9040000, REG_W1, REG_15); + /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */ + EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED); /* aghi %r15,-STK_OFF */ EMIT4_IMM(0xa70b0000, REG_15, -STK_OFF); if (jit->seen & SEEN_FUNC) - /* stg %bfp,152(%r15) (backchain) */ - EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_FP, REG_0, + /* stg %w1,152(%r15) (backchain) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, 152); } /* @@ -443,8 +479,11 @@ static void bpf_jit_epilogue(struct bpf_jit *jit) /* * Compile one eBPF instruction into s390x code + * + * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of + * stack space for the large switch statement. */ -static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) +static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) { struct bpf_insn *insn = &fp->insnsi[i]; int jmp_off, last, insn_count = 1; @@ -588,8 +627,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) EMIT4(0xb9160000, dst_reg, rc_reg); break; } - case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / (u32) src */ - case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % (u32) src */ + case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */ + case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */ { int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; @@ -602,10 +641,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) EMIT4_IMM(0xa7090000, REG_W0, 0); /* lgr %w1,%dst */ EMIT4(0xb9040000, REG_W1, dst_reg); - /* llgfr %dst,%src (u32 cast) */ - EMIT4(0xb9160000, dst_reg, src_reg); /* dlgr %w0,%dst */ - EMIT4(0xb9870000, REG_W0, dst_reg); + EMIT4(0xb9870000, REG_W0, src_reg); /* lgr %dst,%rc */ EMIT4(0xb9040000, dst_reg, rc_reg); break; @@ -632,8 +669,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) EMIT4(0xb9160000, dst_reg, rc_reg); break; } - case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / (u32) imm */ - case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % (u32) imm */ + case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */ + case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */ { int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; @@ -649,7 +686,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) EMIT4(0xb9040000, REG_W1, dst_reg); /* dlg %w0,<d(imm)>(%l) */ EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L, - EMIT_CONST_U64((u32) imm)); + EMIT_CONST_U64(imm)); /* lgr %dst,%rc */ EMIT4(0xb9040000, dst_reg, rc_reg); break; @@ -947,6 +984,75 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) EMIT4(0xb9040000, BPF_REG_0, REG_2); break; } + case BPF_JMP | BPF_CALL | BPF_X: + /* + * Implicit input: + * B1: pointer to ctx + * B2: pointer to bpf_array + * B3: index in bpf_array + */ + jit->seen |= SEEN_TAIL_CALL; + + /* + * if (index >= array->map.max_entries) + * goto out; + */ + + /* llgf %w1,map.max_entries(%b2) */ + EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2, + offsetof(struct bpf_array, map.max_entries)); + /* clgrj %b3,%w1,0xa,label0: if %b3 >= %w1 goto out */ + EMIT6_PCREL_LABEL(0xec000000, 0x0065, BPF_REG_3, + REG_W1, 0, 0xa); + + /* + * if (tail_call_cnt++ > MAX_TAIL_CALL_CNT) + * goto out; + */ + + if (jit->seen & SEEN_STACK) + off = STK_OFF_TCCNT + STK_OFF; + else + off = STK_OFF_TCCNT; + /* lhi %w0,1 */ + EMIT4_IMM(0xa7080000, REG_W0, 1); + /* laal %w1,%w0,off(%r15) */ + EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off); + /* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */ + EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1, + MAX_TAIL_CALL_CNT, 0, 0x2); + + /* + * prog = array->prog[index]; + * if (prog == NULL) + * goto out; + */ + + /* sllg %r1,%b3,3: %r1 = index * 8 */ + EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, BPF_REG_3, REG_0, 3); + /* lg %r1,prog(%b2,%r1) */ + EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, BPF_REG_2, + REG_1, offsetof(struct bpf_array, prog)); + /* clgij %r1,0,0x8,label0 */ + EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007d, REG_1, 0, 0, 0x8); + + /* + * Restore registers before calling function + */ + save_restore_regs(jit, REGS_RESTORE); + + /* + * goto *(prog->bpf_func + tail_call_start); + */ + + /* lg %r1,bpf_func(%r1) */ + EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_1, REG_0, + offsetof(struct bpf_prog, bpf_func)); + /* bc 0xf,tail_call_start(%r1) */ + _EMIT4(0x47f01000 + jit->tail_call_start); + /* out: */ + jit->labels[0] = jit->prg; + break; case BPF_JMP | BPF_EXIT: /* return b0 */ last = (i == fp->len - 1) ? 1 : 0; if (last && !(jit->seen & SEEN_RET0)) diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 460fdb21cf61..ed2394dd14e9 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -75,7 +75,13 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) zpci_err_hex(ccdf, sizeof(*ccdf)); switch (ccdf->pec) { - case 0x0301: /* Standby -> Configured */ + case 0x0301: /* Reserved|Standby -> Configured */ + if (!zdev) { + ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 0); + if (ret) + break; + zdev = get_zdev_by_fid(ccdf->fid); + } if (!zdev || zdev->state != ZPCI_FN_STATE_STANDBY) break; zdev->state = ZPCI_FN_STATE_CONFIGURED; |