summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig2
-rw-r--r--arch/s390/crypto/crypt_s390.h122
-rw-r--r--arch/s390/crypto/ghash_s390.c25
-rw-r--r--arch/s390/crypto/prng.c850
-rw-r--r--arch/s390/hypfs/hypfs_sprp.c4
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/barrier.h2
-rw-r--r--arch/s390/include/asm/cmpxchg.h2
-rw-r--r--arch/s390/include/asm/hugetlb.h4
-rw-r--r--arch/s390/include/asm/io.h1
-rw-r--r--arch/s390/include/asm/kexec.h3
-rw-r--r--arch/s390/include/asm/kvm_host.h6
-rw-r--r--arch/s390/include/asm/mm-arch-hooks.h15
-rw-r--r--arch/s390/include/asm/mmu.h4
-rw-r--r--arch/s390/include/asm/mmu_context.h3
-rw-r--r--arch/s390/include/asm/page.h8
-rw-r--r--arch/s390/include/asm/pgalloc.h1
-rw-r--r--arch/s390/include/asm/pgtable.h203
-rw-r--r--arch/s390/include/asm/sclp.h34
-rw-r--r--arch/s390/include/asm/timex.h5
-rw-r--r--arch/s390/include/asm/topology.h3
-rw-r--r--arch/s390/include/asm/uaccess.h15
-rw-r--r--arch/s390/kernel/compat_wrapper.c2
-rw-r--r--arch/s390/kernel/crash_dump.c13
-rw-r--r--arch/s390/kernel/debug.c11
-rw-r--r--arch/s390/kernel/entry.S2
-rw-r--r--arch/s390/kernel/setup.c25
-rw-r--r--arch/s390/kernel/smp.c13
-rw-r--r--arch/s390/kernel/suspend.c2
-rw-r--r--arch/s390/kernel/time.c6
-rw-r--r--arch/s390/kvm/intercept.c16
-rw-r--r--arch/s390/kvm/interrupt.c94
-rw-r--r--arch/s390/kvm/kvm-s390.c89
-rw-r--r--arch/s390/kvm/kvm-s390.h25
-rw-r--r--arch/s390/kvm/priv.c8
-rw-r--r--arch/s390/mm/fault.c2
-rw-r--r--arch/s390/mm/hugetlbpage.c136
-rw-r--r--arch/s390/mm/init.c2
-rw-r--r--arch/s390/mm/mem_detect.c4
-rw-r--r--arch/s390/mm/pgtable.c144
-rw-r--r--arch/s390/net/bpf_jit.h12
-rw-r--r--arch/s390/net/bpf_jit_comp.c136
-rw-r--r--arch/s390/pci/pci_event.c8
43 files changed, 1460 insertions, 603 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8e58c614c37d..b06dc3839268 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -115,7 +115,7 @@ config S390
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
- select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z9_109_FEATURES
+ select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
select HAVE_CMPXCHG_DOUBLE
select HAVE_CMPXCHG_LOCAL
select HAVE_DEBUG_KMEMLEAK
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index ba3b2aefddf5..d9c4c313fbc6 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -3,9 +3,10 @@
*
* Support for s390 cryptographic instructions.
*
- * Copyright IBM Corp. 2003, 2007
+ * Copyright IBM Corp. 2003, 2015
* Author(s): Thomas Spatzier
* Jan Glauber (jan.glauber@de.ibm.com)
+ * Harald Freudenberger (freude@de.ibm.com)
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -28,15 +29,17 @@
#define CRYPT_S390_MSA 0x1
#define CRYPT_S390_MSA3 0x2
#define CRYPT_S390_MSA4 0x4
+#define CRYPT_S390_MSA5 0x8
/* s390 cryptographic operations */
enum crypt_s390_operations {
- CRYPT_S390_KM = 0x0100,
- CRYPT_S390_KMC = 0x0200,
- CRYPT_S390_KIMD = 0x0300,
- CRYPT_S390_KLMD = 0x0400,
- CRYPT_S390_KMAC = 0x0500,
- CRYPT_S390_KMCTR = 0x0600
+ CRYPT_S390_KM = 0x0100,
+ CRYPT_S390_KMC = 0x0200,
+ CRYPT_S390_KIMD = 0x0300,
+ CRYPT_S390_KLMD = 0x0400,
+ CRYPT_S390_KMAC = 0x0500,
+ CRYPT_S390_KMCTR = 0x0600,
+ CRYPT_S390_PPNO = 0x0700
};
/*
@@ -138,6 +141,16 @@ enum crypt_s390_kmac_func {
KMAC_TDEA_192 = CRYPT_S390_KMAC | 3
};
+/*
+ * function codes for PPNO (PERFORM PSEUDORANDOM NUMBER
+ * OPERATION) instruction
+ */
+enum crypt_s390_ppno_func {
+ PPNO_QUERY = CRYPT_S390_PPNO | 0,
+ PPNO_SHA512_DRNG_GEN = CRYPT_S390_PPNO | 3,
+ PPNO_SHA512_DRNG_SEED = CRYPT_S390_PPNO | 0x83
+};
+
/**
* crypt_s390_km:
* @func: the function code passed to KM; see crypt_s390_km_func
@@ -162,11 +175,11 @@ static inline int crypt_s390_km(long func, void *param,
int ret;
asm volatile(
- "0: .insn rre,0xb92e0000,%3,%1 \n" /* KM opcode */
- "1: brc 1,0b \n" /* handle partial completion */
+ "0: .insn rre,0xb92e0000,%3,%1\n" /* KM opcode */
+ "1: brc 1,0b\n" /* handle partial completion */
" la %0,0\n"
"2:\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
: "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest)
: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
if (ret < 0)
@@ -198,11 +211,11 @@ static inline int crypt_s390_kmc(long func, void *param,
int ret;
asm volatile(
- "0: .insn rre,0xb92f0000,%3,%1 \n" /* KMC opcode */
- "1: brc 1,0b \n" /* handle partial completion */
+ "0: .insn rre,0xb92f0000,%3,%1\n" /* KMC opcode */
+ "1: brc 1,0b\n" /* handle partial completion */
" la %0,0\n"
"2:\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
: "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest)
: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
if (ret < 0)
@@ -233,11 +246,11 @@ static inline int crypt_s390_kimd(long func, void *param,
int ret;
asm volatile(
- "0: .insn rre,0xb93e0000,%1,%1 \n" /* KIMD opcode */
- "1: brc 1,0b \n" /* handle partial completion */
+ "0: .insn rre,0xb93e0000,%1,%1\n" /* KIMD opcode */
+ "1: brc 1,0b\n" /* handle partial completion */
" la %0,0\n"
"2:\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
: "=d" (ret), "+a" (__src), "+d" (__src_len)
: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
if (ret < 0)
@@ -267,11 +280,11 @@ static inline int crypt_s390_klmd(long func, void *param,
int ret;
asm volatile(
- "0: .insn rre,0xb93f0000,%1,%1 \n" /* KLMD opcode */
- "1: brc 1,0b \n" /* handle partial completion */
+ "0: .insn rre,0xb93f0000,%1,%1\n" /* KLMD opcode */
+ "1: brc 1,0b\n" /* handle partial completion */
" la %0,0\n"
"2:\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
: "=d" (ret), "+a" (__src), "+d" (__src_len)
: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
if (ret < 0)
@@ -302,11 +315,11 @@ static inline int crypt_s390_kmac(long func, void *param,
int ret;
asm volatile(
- "0: .insn rre,0xb91e0000,%1,%1 \n" /* KLAC opcode */
- "1: brc 1,0b \n" /* handle partial completion */
+ "0: .insn rre,0xb91e0000,%1,%1\n" /* KLAC opcode */
+ "1: brc 1,0b\n" /* handle partial completion */
" la %0,0\n"
"2:\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
: "=d" (ret), "+a" (__src), "+d" (__src_len)
: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
if (ret < 0)
@@ -340,11 +353,11 @@ static inline int crypt_s390_kmctr(long func, void *param, u8 *dest,
int ret = -1;
asm volatile(
- "0: .insn rrf,0xb92d0000,%3,%1,%4,0 \n" /* KMCTR opcode */
- "1: brc 1,0b \n" /* handle partial completion */
+ "0: .insn rrf,0xb92d0000,%3,%1,%4,0\n" /* KMCTR opcode */
+ "1: brc 1,0b\n" /* handle partial completion */
" la %0,0\n"
"2:\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
: "+d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest),
"+a" (__ctr)
: "d" (__func), "a" (__param) : "cc", "memory");
@@ -354,6 +367,47 @@ static inline int crypt_s390_kmctr(long func, void *param, u8 *dest,
}
/**
+ * crypt_s390_ppno:
+ * @func: the function code passed to PPNO; see crypt_s390_ppno_func
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @dest_len: size of destination memory area in bytes
+ * @seed: address of seed data
+ * @seed_len: size of seed data in bytes
+ *
+ * Executes the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION)
+ * operation of the CPU.
+ *
+ * Returns -1 for failure, 0 for the query func, number of random
+ * bytes stored in dest buffer for generate function
+ */
+static inline int crypt_s390_ppno(long func, void *param,
+ u8 *dest, long dest_len,
+ const u8 *seed, long seed_len)
+{
+ register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
+ register void *__param asm("1") = param; /* param block (240 bytes) */
+ register u8 *__dest asm("2") = dest; /* buf for recv random bytes */
+ register long __dest_len asm("3") = dest_len; /* requested random bytes */
+ register const u8 *__seed asm("4") = seed; /* buf with seed data */
+ register long __seed_len asm("5") = seed_len; /* bytes in seed buf */
+ int ret = -1;
+
+ asm volatile (
+ "0: .insn rre,0xb93c0000,%1,%5\n" /* PPNO opcode */
+ "1: brc 1,0b\n" /* handle partial completion */
+ " la %0,0\n"
+ "2:\n"
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+ : "+d" (ret), "+a"(__dest), "+d"(__dest_len)
+ : "d"(__func), "a"(__param), "a"(__seed), "d"(__seed_len)
+ : "cc", "memory");
+ if (ret < 0)
+ return ret;
+ return (func & CRYPT_S390_FUNC_MASK) ? dest_len - __dest_len : 0;
+}
+
+/**
* crypt_s390_func_available:
* @func: the function code of the specific function; 0 if op in general
*
@@ -373,6 +427,9 @@ static inline int crypt_s390_func_available(int func,
return 0;
if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77))
return 0;
+ if (facility_mask & CRYPT_S390_MSA5 && !test_facility(57))
+ return 0;
+
switch (func & CRYPT_S390_OP_MASK) {
case CRYPT_S390_KM:
ret = crypt_s390_km(KM_QUERY, &status, NULL, NULL, 0);
@@ -390,8 +447,12 @@ static inline int crypt_s390_func_available(int func,
ret = crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0);
break;
case CRYPT_S390_KMCTR:
- ret = crypt_s390_kmctr(KMCTR_QUERY, &status, NULL, NULL, 0,
- NULL);
+ ret = crypt_s390_kmctr(KMCTR_QUERY, &status,
+ NULL, NULL, 0, NULL);
+ break;
+ case CRYPT_S390_PPNO:
+ ret = crypt_s390_ppno(PPNO_QUERY, &status,
+ NULL, 0, NULL, 0);
break;
default:
return 0;
@@ -419,15 +480,14 @@ static inline int crypt_s390_pcc(long func, void *param)
int ret = -1;
asm volatile(
- "0: .insn rre,0xb92c0000,0,0 \n" /* PCC opcode */
- "1: brc 1,0b \n" /* handle partial completion */
+ "0: .insn rre,0xb92c0000,0,0\n" /* PCC opcode */
+ "1: brc 1,0b\n" /* handle partial completion */
" la %0,0\n"
"2:\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
: "+d" (ret)
: "d" (__func), "a" (__param) : "cc", "memory");
return ret;
}
-
#endif /* _CRYPTO_ARCH_S390_CRYPT_S390_H */
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index 7940dc90e80b..b258110da952 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -16,11 +16,12 @@
#define GHASH_DIGEST_SIZE 16
struct ghash_ctx {
- u8 icv[16];
- u8 key[16];
+ u8 key[GHASH_BLOCK_SIZE];
};
struct ghash_desc_ctx {
+ u8 icv[GHASH_BLOCK_SIZE];
+ u8 key[GHASH_BLOCK_SIZE];
u8 buffer[GHASH_BLOCK_SIZE];
u32 bytes;
};
@@ -28,8 +29,10 @@ struct ghash_desc_ctx {
static int ghash_init(struct shash_desc *desc)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
memset(dctx, 0, sizeof(*dctx));
+ memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE);
return 0;
}
@@ -45,7 +48,6 @@ static int ghash_setkey(struct crypto_shash *tfm,
}
memcpy(ctx->key, key, GHASH_BLOCK_SIZE);
- memset(ctx->icv, 0, GHASH_BLOCK_SIZE);
return 0;
}
@@ -54,7 +56,6 @@ static int ghash_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
unsigned int n;
u8 *buf = dctx->buffer;
int ret;
@@ -70,7 +71,7 @@ static int ghash_update(struct shash_desc *desc,
src += n;
if (!dctx->bytes) {
- ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf,
+ ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf,
GHASH_BLOCK_SIZE);
if (ret != GHASH_BLOCK_SIZE)
return -EIO;
@@ -79,7 +80,7 @@ static int ghash_update(struct shash_desc *desc,
n = srclen & ~(GHASH_BLOCK_SIZE - 1);
if (n) {
- ret = crypt_s390_kimd(KIMD_GHASH, ctx, src, n);
+ ret = crypt_s390_kimd(KIMD_GHASH, dctx, src, n);
if (ret != n)
return -EIO;
src += n;
@@ -94,7 +95,7 @@ static int ghash_update(struct shash_desc *desc,
return 0;
}
-static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
+static int ghash_flush(struct ghash_desc_ctx *dctx)
{
u8 *buf = dctx->buffer;
int ret;
@@ -104,24 +105,24 @@ static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
memset(pos, 0, dctx->bytes);
- ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, GHASH_BLOCK_SIZE);
+ ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
if (ret != GHASH_BLOCK_SIZE)
return -EIO;
+
+ dctx->bytes = 0;
}
- dctx->bytes = 0;
return 0;
}
static int ghash_final(struct shash_desc *desc, u8 *dst)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
int ret;
- ret = ghash_flush(ctx, dctx);
+ ret = ghash_flush(dctx);
if (!ret)
- memcpy(dst, ctx->icv, GHASH_BLOCK_SIZE);
+ memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE);
return ret;
}
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 94a35a4c1b48..9d5192c94963 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -1,106 +1,529 @@
/*
- * Copyright IBM Corp. 2006, 2007
+ * Copyright IBM Corp. 2006, 2015
* Author(s): Jan Glauber <jan.glauber@de.ibm.com>
+ * Harald Freudenberger <freude@de.ibm.com>
* Driver for the s390 pseudo random number generator
*/
+
+#define KMSG_COMPONENT "prng"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
#include <linux/fs.h>
+#include <linux/fips.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/device.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
+#include <linux/mutex.h>
#include <linux/random.h>
#include <linux/slab.h>
#include <asm/debug.h>
#include <asm/uaccess.h>
+#include <asm/timex.h>
#include "crypt_s390.h"
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jan Glauber <jan.glauber@de.ibm.com>");
+MODULE_AUTHOR("IBM Corporation");
MODULE_DESCRIPTION("s390 PRNG interface");
-static int prng_chunk_size = 256;
-module_param(prng_chunk_size, int, S_IRUSR | S_IRGRP | S_IROTH);
+
+#define PRNG_MODE_AUTO 0
+#define PRNG_MODE_TDES 1
+#define PRNG_MODE_SHA512 2
+
+static unsigned int prng_mode = PRNG_MODE_AUTO;
+module_param_named(mode, prng_mode, int, 0);
+MODULE_PARM_DESC(prng_mode, "PRNG mode: 0 - auto, 1 - TDES, 2 - SHA512");
+
+
+#define PRNG_CHUNKSIZE_TDES_MIN 8
+#define PRNG_CHUNKSIZE_TDES_MAX (64*1024)
+#define PRNG_CHUNKSIZE_SHA512_MIN 64
+#define PRNG_CHUNKSIZE_SHA512_MAX (64*1024)
+
+static unsigned int prng_chunk_size = 256;
+module_param_named(chunksize, prng_chunk_size, int, 0);
MODULE_PARM_DESC(prng_chunk_size, "PRNG read chunk size in bytes");
-static int prng_entropy_limit = 4096;
-module_param(prng_entropy_limit, int, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR);
-MODULE_PARM_DESC(prng_entropy_limit,
- "PRNG add entropy after that much bytes were produced");
+
+#define PRNG_RESEED_LIMIT_TDES 4096
+#define PRNG_RESEED_LIMIT_TDES_LOWER 4096
+#define PRNG_RESEED_LIMIT_SHA512 100000
+#define PRNG_RESEED_LIMIT_SHA512_LOWER 10000
+
+static unsigned int prng_reseed_limit;
+module_param_named(reseed_limit, prng_reseed_limit, int, 0);
+MODULE_PARM_DESC(prng_reseed_limit, "PRNG reseed limit");
+
/*
* Any one who considers arithmetical methods of producing random digits is,
* of course, in a state of sin. -- John von Neumann
*/
-struct s390_prng_data {
- unsigned long count; /* how many bytes were produced */
- char *buf;
+static int prng_errorflag;
+
+#define PRNG_GEN_ENTROPY_FAILED 1
+#define PRNG_SELFTEST_FAILED 2
+#define PRNG_INSTANTIATE_FAILED 3
+#define PRNG_SEED_FAILED 4
+#define PRNG_RESEED_FAILED 5
+#define PRNG_GEN_FAILED 6
+
+struct prng_ws_s {
+ u8 parm_block[32];
+ u32 reseed_counter;
+ u64 byte_counter;
};
-static struct s390_prng_data *p;
+struct ppno_ws_s {
+ u32 res;
+ u32 reseed_counter;
+ u64 stream_bytes;
+ u8 V[112];
+ u8 C[112];
+};
-/* copied from libica, use a non-zero initial parameter block */
-static unsigned char parm_block[32] = {
-0x0F,0x2B,0x8E,0x63,0x8C,0x8E,0xD2,0x52,0x64,0xB7,0xA0,0x7B,0x75,0x28,0xB8,0xF4,
-0x75,0x5F,0xD2,0xA6,0x8D,0x97,0x11,0xFF,0x49,0xD8,0x23,0xF3,0x7E,0x21,0xEC,0xA0,
+struct prng_data_s {
+ struct mutex mutex;
+ union {
+ struct prng_ws_s prngws;
+ struct ppno_ws_s ppnows;
+ };
+ u8 *buf;
+ u32 rest;
+ u8 *prev;
};
-static int prng_open(struct inode *inode, struct file *file)
+static struct prng_data_s *prng_data;
+
+/* initial parameter block for tdes mode, copied from libica */
+static const u8 initial_parm_block[32] __initconst = {
+ 0x0F, 0x2B, 0x8E, 0x63, 0x8C, 0x8E, 0xD2, 0x52,
+ 0x64, 0xB7, 0xA0, 0x7B, 0x75, 0x28, 0xB8, 0xF4,
+ 0x75, 0x5F, 0xD2, 0xA6, 0x8D, 0x97, 0x11, 0xFF,
+ 0x49, 0xD8, 0x23, 0xF3, 0x7E, 0x21, 0xEC, 0xA0 };
+
+
+/*** helper functions ***/
+
+static int generate_entropy(u8 *ebuf, size_t nbytes)
{
- return nonseekable_open(inode, file);
+ int n, ret = 0;
+ u8 *pg, *h, hash[32];
+
+ pg = (u8 *) __get_free_page(GFP_KERNEL);
+ if (!pg) {
+ prng_errorflag = PRNG_GEN_ENTROPY_FAILED;
+ return -ENOMEM;
+ }
+
+ while (nbytes) {
+ /* fill page with urandom bytes */
+ get_random_bytes(pg, PAGE_SIZE);
+ /* exor page with stckf values */
+ for (n = 0; n < PAGE_SIZE / sizeof(u64); n++) {
+ u64 *p = ((u64 *)pg) + n;
+ *p ^= get_tod_clock_fast();
+ }
+ n = (nbytes < sizeof(hash)) ? nbytes : sizeof(hash);
+ if (n < sizeof(hash))
+ h = hash;
+ else
+ h = ebuf;
+ /* generate sha256 from this page */
+ if (crypt_s390_kimd(KIMD_SHA_256, h,
+ pg, PAGE_SIZE) != PAGE_SIZE) {
+ prng_errorflag = PRNG_GEN_ENTROPY_FAILED;
+ ret = -EIO;
+ goto out;
+ }
+ if (n < sizeof(hash))
+ memcpy(ebuf, hash, n);
+ ret += n;
+ ebuf += n;
+ nbytes -= n;
+ }
+
+out:
+ free_page((unsigned long)pg);
+ return ret;
}
-static void prng_add_entropy(void)
+
+/*** tdes functions ***/
+
+static void prng_tdes_add_entropy(void)
{
__u64 entropy[4];
unsigned int i;
int ret;
for (i = 0; i < 16; i++) {
- ret = crypt_s390_kmc(KMC_PRNG, parm_block, (char *)entropy,
- (char *)entropy, sizeof(entropy));
+ ret = crypt_s390_kmc(KMC_PRNG, prng_data->prngws.parm_block,
+ (char *)entropy, (char *)entropy,
+ sizeof(entropy));
BUG_ON(ret < 0 || ret != sizeof(entropy));
- memcpy(parm_block, entropy, sizeof(entropy));
+ memcpy(prng_data->prngws.parm_block, entropy, sizeof(entropy));
}
}
-static void prng_seed(int nbytes)
+
+static void prng_tdes_seed(int nbytes)
{
char buf[16];
int i = 0;
- BUG_ON(nbytes > 16);
+ BUG_ON(nbytes > sizeof(buf));
+
get_random_bytes(buf, nbytes);
/* Add the entropy */
while (nbytes >= 8) {
- *((__u64 *)parm_block) ^= *((__u64 *)(buf+i));
- prng_add_entropy();
+ *((__u64 *)prng_data->prngws.parm_block) ^= *((__u64 *)(buf+i));
+ prng_tdes_add_entropy();
i += 8;
nbytes -= 8;
}
- prng_add_entropy();
+ prng_tdes_add_entropy();
+ prng_data->prngws.reseed_counter = 0;
+}
+
+
+static int __init prng_tdes_instantiate(void)
+{
+ int datalen;
+
+ pr_debug("prng runs in TDES mode with "
+ "chunksize=%d and reseed_limit=%u\n",
+ prng_chunk_size, prng_reseed_limit);
+
+ /* memory allocation, prng_data struct init, mutex init */
+ datalen = sizeof(struct prng_data_s) + prng_chunk_size;
+ prng_data = kzalloc(datalen, GFP_KERNEL);
+ if (!prng_data) {
+ prng_errorflag = PRNG_INSTANTIATE_FAILED;
+ return -ENOMEM;
+ }
+ mutex_init(&prng_data->mutex);
+ prng_data->buf = ((u8 *)prng_data) + sizeof(struct prng_data_s);
+ memcpy(prng_data->prngws.parm_block, initial_parm_block, 32);
+
+ /* initialize the PRNG, add 128 bits of entropy */
+ prng_tdes_seed(16);
+
+ return 0;
}
-static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes,
- loff_t *ppos)
+
+static void prng_tdes_deinstantiate(void)
+{
+ pr_debug("The prng module stopped "
+ "after running in triple DES mode\n");
+ kzfree(prng_data);
+}
+
+
+/*** sha512 functions ***/
+
+static int __init prng_sha512_selftest(void)
{
- int chunk, n;
+ /* NIST DRBG testvector for Hash Drbg, Sha-512, Count #0 */
+ static const u8 seed[] __initconst = {
+ 0x6b, 0x50, 0xa7, 0xd8, 0xf8, 0xa5, 0x5d, 0x7a,
+ 0x3d, 0xf8, 0xbb, 0x40, 0xbc, 0xc3, 0xb7, 0x22,
+ 0xd8, 0x70, 0x8d, 0xe6, 0x7f, 0xda, 0x01, 0x0b,
+ 0x03, 0xc4, 0xc8, 0x4d, 0x72, 0x09, 0x6f, 0x8c,
+ 0x3e, 0xc6, 0x49, 0xcc, 0x62, 0x56, 0xd9, 0xfa,
+ 0x31, 0xdb, 0x7a, 0x29, 0x04, 0xaa, 0xf0, 0x25 };
+ static const u8 V0[] __initconst = {
+ 0x00, 0xad, 0xe3, 0x6f, 0x9a, 0x01, 0xc7, 0x76,
+ 0x61, 0x34, 0x35, 0xf5, 0x4e, 0x24, 0x74, 0x22,
+ 0x21, 0x9a, 0x29, 0x89, 0xc7, 0x93, 0x2e, 0x60,
+ 0x1e, 0xe8, 0x14, 0x24, 0x8d, 0xd5, 0x03, 0xf1,
+ 0x65, 0x5d, 0x08, 0x22, 0x72, 0xd5, 0xad, 0x95,
+ 0xe1, 0x23, 0x1e, 0x8a, 0xa7, 0x13, 0xd9, 0x2b,
+ 0x5e, 0xbc, 0xbb, 0x80, 0xab, 0x8d, 0xe5, 0x79,
+ 0xab, 0x5b, 0x47, 0x4e, 0xdd, 0xee, 0x6b, 0x03,
+ 0x8f, 0x0f, 0x5c, 0x5e, 0xa9, 0x1a, 0x83, 0xdd,
+ 0xd3, 0x88, 0xb2, 0x75, 0x4b, 0xce, 0x83, 0x36,
+ 0x57, 0x4b, 0xf1, 0x5c, 0xca, 0x7e, 0x09, 0xc0,
+ 0xd3, 0x89, 0xc6, 0xe0, 0xda, 0xc4, 0x81, 0x7e,
+ 0x5b, 0xf9, 0xe1, 0x01, 0xc1, 0x92, 0x05, 0xea,
+ 0xf5, 0x2f, 0xc6, 0xc6, 0xc7, 0x8f, 0xbc, 0xf4 };
+ static const u8 C0[] __initconst = {
+ 0x00, 0xf4, 0xa3, 0xe5, 0xa0, 0x72, 0x63, 0x95,
+ 0xc6, 0x4f, 0x48, 0xd0, 0x8b, 0x5b, 0x5f, 0x8e,
+ 0x6b, 0x96, 0x1f, 0x16, 0xed, 0xbc, 0x66, 0x94,
+ 0x45, 0x31, 0xd7, 0x47, 0x73, 0x22, 0xa5, 0x86,
+ 0xce, 0xc0, 0x4c, 0xac, 0x63, 0xb8, 0x39, 0x50,
+ 0xbf, 0xe6, 0x59, 0x6c, 0x38, 0x58, 0x99, 0x1f,
+ 0x27, 0xa7, 0x9d, 0x71, 0x2a, 0xb3, 0x7b, 0xf9,
+ 0xfb, 0x17, 0x86, 0xaa, 0x99, 0x81, 0xaa, 0x43,
+ 0xe4, 0x37, 0xd3, 0x1e, 0x6e, 0xe5, 0xe6, 0xee,
+ 0xc2, 0xed, 0x95, 0x4f, 0x53, 0x0e, 0x46, 0x8a,
+ 0xcc, 0x45, 0xa5, 0xdb, 0x69, 0x0d, 0x81, 0xc9,
+ 0x32, 0x92, 0xbc, 0x8f, 0x33, 0xe6, 0xf6, 0x09,
+ 0x7c, 0x8e, 0x05, 0x19, 0x0d, 0xf1, 0xb6, 0xcc,
+ 0xf3, 0x02, 0x21, 0x90, 0x25, 0xec, 0xed, 0x0e };
+ static const u8 random[] __initconst = {
+ 0x95, 0xb7, 0xf1, 0x7e, 0x98, 0x02, 0xd3, 0x57,
+ 0x73, 0x92, 0xc6, 0xa9, 0xc0, 0x80, 0x83, 0xb6,
+ 0x7d, 0xd1, 0x29, 0x22, 0x65, 0xb5, 0xf4, 0x2d,
+ 0x23, 0x7f, 0x1c, 0x55, 0xbb, 0x9b, 0x10, 0xbf,
+ 0xcf, 0xd8, 0x2c, 0x77, 0xa3, 0x78, 0xb8, 0x26,
+ 0x6a, 0x00, 0x99, 0x14, 0x3b, 0x3c, 0x2d, 0x64,
+ 0x61, 0x1e, 0xee, 0xb6, 0x9a, 0xcd, 0xc0, 0x55,
+ 0x95, 0x7c, 0x13, 0x9e, 0x8b, 0x19, 0x0c, 0x7a,
+ 0x06, 0x95, 0x5f, 0x2c, 0x79, 0x7c, 0x27, 0x78,
+ 0xde, 0x94, 0x03, 0x96, 0xa5, 0x01, 0xf4, 0x0e,
+ 0x91, 0x39, 0x6a, 0xcf, 0x8d, 0x7e, 0x45, 0xeb,
+ 0xdb, 0xb5, 0x3b, 0xbf, 0x8c, 0x97, 0x52, 0x30,
+ 0xd2, 0xf0, 0xff, 0x91, 0x06, 0xc7, 0x61, 0x19,
+ 0xae, 0x49, 0x8e, 0x7f, 0xbc, 0x03, 0xd9, 0x0f,
+ 0x8e, 0x4c, 0x51, 0x62, 0x7a, 0xed, 0x5c, 0x8d,
+ 0x42, 0x63, 0xd5, 0xd2, 0xb9, 0x78, 0x87, 0x3a,
+ 0x0d, 0xe5, 0x96, 0xee, 0x6d, 0xc7, 0xf7, 0xc2,
+ 0x9e, 0x37, 0xee, 0xe8, 0xb3, 0x4c, 0x90, 0xdd,
+ 0x1c, 0xf6, 0xa9, 0xdd, 0xb2, 0x2b, 0x4c, 0xbd,
+ 0x08, 0x6b, 0x14, 0xb3, 0x5d, 0xe9, 0x3d, 0xa2,
+ 0xd5, 0xcb, 0x18, 0x06, 0x69, 0x8c, 0xbd, 0x7b,
+ 0xbb, 0x67, 0xbf, 0xe3, 0xd3, 0x1f, 0xd2, 0xd1,
+ 0xdb, 0xd2, 0xa1, 0xe0, 0x58, 0xa3, 0xeb, 0x99,
+ 0xd7, 0xe5, 0x1f, 0x1a, 0x93, 0x8e, 0xed, 0x5e,
+ 0x1c, 0x1d, 0xe2, 0x3a, 0x6b, 0x43, 0x45, 0xd3,
+ 0x19, 0x14, 0x09, 0xf9, 0x2f, 0x39, 0xb3, 0x67,
+ 0x0d, 0x8d, 0xbf, 0xb6, 0x35, 0xd8, 0xe6, 0xa3,
+ 0x69, 0x32, 0xd8, 0x10, 0x33, 0xd1, 0x44, 0x8d,
+ 0x63, 0xb4, 0x03, 0xdd, 0xf8, 0x8e, 0x12, 0x1b,
+ 0x6e, 0x81, 0x9a, 0xc3, 0x81, 0x22, 0x6c, 0x13,
+ 0x21, 0xe4, 0xb0, 0x86, 0x44, 0xf6, 0x72, 0x7c,
+ 0x36, 0x8c, 0x5a, 0x9f, 0x7a, 0x4b, 0x3e, 0xe2 };
+
int ret = 0;
- int tmp;
+ u8 buf[sizeof(random)];
+ struct ppno_ws_s ws;
+
+ memset(&ws, 0, sizeof(ws));
+
+ /* initial seed */
+ ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED,
+ &ws, NULL, 0,
+ seed, sizeof(seed));
+ if (ret < 0) {
+ pr_err("The prng self test seed operation for the "
+ "SHA-512 mode failed with rc=%d\n", ret);
+ prng_errorflag = PRNG_SELFTEST_FAILED;
+ return -EIO;
+ }
+
+ /* check working states V and C */
+ if (memcmp(ws.V, V0, sizeof(V0)) != 0
+ || memcmp(ws.C, C0, sizeof(C0)) != 0) {
+ pr_err("The prng self test state test "
+ "for the SHA-512 mode failed\n");
+ prng_errorflag = PRNG_SELFTEST_FAILED;
+ return -EIO;
+ }
+
+ /* generate random bytes */
+ ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
+ &ws, buf, sizeof(buf),
+ NULL, 0);
+ if (ret < 0) {
+ pr_err("The prng self test generate operation for "
+ "the SHA-512 mode failed with rc=%d\n", ret);
+ prng_errorflag = PRNG_SELFTEST_FAILED;
+ return -EIO;
+ }
+ ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
+ &ws, buf, sizeof(buf),
+ NULL, 0);
+ if (ret < 0) {
+ pr_err("The prng self test generate operation for "
+ "the SHA-512 mode failed with rc=%d\n", ret);
+ prng_errorflag = PRNG_SELFTEST_FAILED;
+ return -EIO;
+ }
+
+ /* check against expected data */
+ if (memcmp(buf, random, sizeof(random)) != 0) {
+ pr_err("The prng self test data test "
+ "for the SHA-512 mode failed\n");
+ prng_errorflag = PRNG_SELFTEST_FAILED;
+ return -EIO;
+ }
+
+ return 0;
+}
+
+
+static int __init prng_sha512_instantiate(void)
+{
+ int ret, datalen;
+ u8 seed[64];
+
+ pr_debug("prng runs in SHA-512 mode "
+ "with chunksize=%d and reseed_limit=%u\n",
+ prng_chunk_size, prng_reseed_limit);
+
+ /* memory allocation, prng_data struct init, mutex init */
+ datalen = sizeof(struct prng_data_s) + prng_chunk_size;
+ if (fips_enabled)
+ datalen += prng_chunk_size;
+ prng_data = kzalloc(datalen, GFP_KERNEL);
+ if (!prng_data) {
+ prng_errorflag = PRNG_INSTANTIATE_FAILED;
+ return -ENOMEM;
+ }
+ mutex_init(&prng_data->mutex);
+ prng_data->buf = ((u8 *)prng_data) + sizeof(struct prng_data_s);
+
+ /* selftest */
+ ret = prng_sha512_selftest();
+ if (ret)
+ goto outfree;
+
+ /* generate initial seed bytestring, first 48 bytes of entropy */
+ ret = generate_entropy(seed, 48);
+ if (ret != 48)
+ goto outfree;
+ /* followed by 16 bytes of unique nonce */
+ get_tod_clock_ext(seed + 48);
+
+ /* initial seed of the ppno drng */
+ ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED,
+ &prng_data->ppnows, NULL, 0,
+ seed, sizeof(seed));
+ if (ret < 0) {
+ prng_errorflag = PRNG_SEED_FAILED;
+ ret = -EIO;
+ goto outfree;
+ }
+
+ /* if fips mode is enabled, generate a first block of random
+ bytes for the FIPS 140-2 Conditional Self Test */
+ if (fips_enabled) {
+ prng_data->prev = prng_data->buf + prng_chunk_size;
+ ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
+ &prng_data->ppnows,
+ prng_data->prev,
+ prng_chunk_size,
+ NULL, 0);
+ if (ret < 0 || ret != prng_chunk_size) {
+ prng_errorflag = PRNG_GEN_FAILED;
+ ret = -EIO;
+ goto outfree;
+ }
+ }
+
+ return 0;
+
+outfree:
+ kfree(prng_data);
+ return ret;
+}
+
+
+static void prng_sha512_deinstantiate(void)
+{
+ pr_debug("The prng module stopped after running in SHA-512 mode\n");
+ kzfree(prng_data);
+}
+
+
+static int prng_sha512_reseed(void)
+{
+ int ret;
+ u8 seed[32];
+
+ /* generate 32 bytes of fresh entropy */
+ ret = generate_entropy(seed, sizeof(seed));
+ if (ret != sizeof(seed))
+ return ret;
+
+ /* do a reseed of the ppno drng with this bytestring */
+ ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED,
+ &prng_data->ppnows, NULL, 0,
+ seed, sizeof(seed));
+ if (ret) {
+ prng_errorflag = PRNG_RESEED_FAILED;
+ return -EIO;
+ }
+
+ return 0;
+}
+
+
+static int prng_sha512_generate(u8 *buf, size_t nbytes)
+{
+ int ret;
+
+ /* reseed needed ? */
+ if (prng_data->ppnows.reseed_counter > prng_reseed_limit) {
+ ret = prng_sha512_reseed();
+ if (ret)
+ return ret;
+ }
+
+ /* PPNO generate */
+ ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
+ &prng_data->ppnows, buf, nbytes,
+ NULL, 0);
+ if (ret < 0 || ret != nbytes) {
+ prng_errorflag = PRNG_GEN_FAILED;
+ return -EIO;
+ }
+
+ /* FIPS 140-2 Conditional Self Test */
+ if (fips_enabled) {
+ if (!memcmp(prng_data->prev, buf, nbytes)) {
+ prng_errorflag = PRNG_GEN_FAILED;
+ return -EILSEQ;
+ }
+ memcpy(prng_data->prev, buf, nbytes);
+ }
+
+ return ret;
+}
+
+
+/*** file io functions ***/
+
+static int prng_open(struct inode *inode, struct file *file)
+{
+ return nonseekable_open(inode, file);
+}
+
+
+static ssize_t prng_tdes_read(struct file *file, char __user *ubuf,
+ size_t nbytes, loff_t *ppos)
+{
+ int chunk, n, tmp, ret = 0;
+
+ /* lock prng_data struct */
+ if (mutex_lock_interruptible(&prng_data->mutex))
+ return -ERESTARTSYS;
- /* nbytes can be arbitrary length, we split it into chunks */
while (nbytes) {
- /* same as in extract_entropy_user in random.c */
if (need_resched()) {
if (signal_pending(current)) {
if (ret == 0)
ret = -ERESTARTSYS;
break;
}
+ /* give mutex free before calling schedule() */
+ mutex_unlock(&prng_data->mutex);
schedule();
+ /* occopy mutex again */
+ if (mutex_lock_interruptible(&prng_data->mutex)) {
+ if (ret == 0)
+ ret = -ERESTARTSYS;
+ return ret;
+ }
}
/*
@@ -112,12 +535,11 @@ static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes,
/* PRNG only likes multiples of 8 bytes */
n = (chunk + 7) & -8;
- if (p->count > prng_entropy_limit)
- prng_seed(8);
+ if (prng_data->prngws.reseed_counter > prng_reseed_limit)
+ prng_tdes_seed(8);
/* if the CPU supports PRNG stckf is present too */
- asm volatile(".insn s,0xb27c0000,%0"
- : "=m" (*((unsigned long long *)p->buf)) : : "cc");
+ *((unsigned long long *)prng_data->buf) = get_tod_clock_fast();
/*
* Beside the STCKF the input for the TDES-EDE is the output
@@ -132,35 +554,259 @@ static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes,
* Note: you can still get strict X9.17 conformity by setting
* prng_chunk_size to 8 bytes.
*/
- tmp = crypt_s390_kmc(KMC_PRNG, parm_block, p->buf, p->buf, n);
- BUG_ON((tmp < 0) || (tmp != n));
+ tmp = crypt_s390_kmc(KMC_PRNG, prng_data->prngws.parm_block,
+ prng_data->buf, prng_data->buf, n);
+ if (tmp < 0 || tmp != n) {
+ ret = -EIO;
+ break;
+ }
- p->count += n;
+ prng_data->prngws.byte_counter += n;
+ prng_data->prngws.reseed_counter += n;
- if (copy_to_user(ubuf, p->buf, chunk))
+ if (copy_to_user(ubuf, prng_data->buf, chunk))
return -EFAULT;
nbytes -= chunk;
ret += chunk;
ubuf += chunk;
}
+
+ /* unlock prng_data struct */
+ mutex_unlock(&prng_data->mutex);
+
return ret;
}
-static const struct file_operations prng_fops = {
+
+static ssize_t prng_sha512_read(struct file *file, char __user *ubuf,
+ size_t nbytes, loff_t *ppos)
+{
+ int n, ret = 0;
+ u8 *p;
+
+ /* if errorflag is set do nothing and return 'broken pipe' */
+ if (prng_errorflag)
+ return -EPIPE;
+
+ /* lock prng_data struct */
+ if (mutex_lock_interruptible(&prng_data->mutex))
+ return -ERESTARTSYS;
+
+ while (nbytes) {
+ if (need_resched()) {
+ if (signal_pending(current)) {
+ if (ret == 0)
+ ret = -ERESTARTSYS;
+ break;
+ }
+ /* give mutex free before calling schedule() */
+ mutex_unlock(&prng_data->mutex);
+ schedule();
+ /* occopy mutex again */
+ if (mutex_lock_interruptible(&prng_data->mutex)) {
+ if (ret == 0)
+ ret = -ERESTARTSYS;
+ return ret;
+ }
+ }
+ if (prng_data->rest) {
+ /* push left over random bytes from the previous read */
+ p = prng_data->buf + prng_chunk_size - prng_data->rest;
+ n = (nbytes < prng_data->rest) ?
+ nbytes : prng_data->rest;
+ prng_data->rest -= n;
+ } else {
+ /* generate one chunk of random bytes into read buf */
+ p = prng_data->buf;
+ n = prng_sha512_generate(p, prng_chunk_size);
+ if (n < 0) {
+ ret = n;
+ break;
+ }
+ if (nbytes < prng_chunk_size) {
+ n = nbytes;
+ prng_data->rest = prng_chunk_size - n;
+ } else {
+ n = prng_chunk_size;
+ prng_data->rest = 0;
+ }
+ }
+ if (copy_to_user(ubuf, p, n)) {
+ ret = -EFAULT;
+ break;
+ }
+ ubuf += n;
+ nbytes -= n;
+ ret += n;
+ }
+
+ /* unlock prng_data struct */
+ mutex_unlock(&prng_data->mutex);
+
+ return ret;
+}
+
+
+/*** sysfs stuff ***/
+
+static const struct file_operations prng_sha512_fops = {
+ .owner = THIS_MODULE,
+ .open = &prng_open,
+ .release = NULL,
+ .read = &prng_sha512_read,
+ .llseek = noop_llseek,
+};
+static const struct file_operations prng_tdes_fops = {
.owner = THIS_MODULE,
.open = &prng_open,
.release = NULL,
- .read = &prng_read,
+ .read = &prng_tdes_read,
.llseek = noop_llseek,
};
-static struct miscdevice prng_dev = {
+static struct miscdevice prng_sha512_dev = {
+ .name = "prandom",
+ .minor = MISC_DYNAMIC_MINOR,
+ .fops = &prng_sha512_fops,
+};
+static struct miscdevice prng_tdes_dev = {
.name = "prandom",
.minor = MISC_DYNAMIC_MINOR,
- .fops = &prng_fops,
+ .fops = &prng_tdes_fops,
};
+
+/* chunksize attribute (ro) */
+static ssize_t prng_chunksize_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size);
+}
+static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL);
+
+/* counter attribute (ro) */
+static ssize_t prng_counter_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ u64 counter;
+
+ if (mutex_lock_interruptible(&prng_data->mutex))
+ return -ERESTARTSYS;
+ if (prng_mode == PRNG_MODE_SHA512)
+ counter = prng_data->ppnows.stream_bytes;
+ else
+ counter = prng_data->prngws.byte_counter;
+ mutex_unlock(&prng_data->mutex);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", counter);
+}
+static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL);
+
+/* errorflag attribute (ro) */
+static ssize_t prng_errorflag_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag);
+}
+static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL);
+
+/* mode attribute (ro) */
+static ssize_t prng_mode_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (prng_mode == PRNG_MODE_TDES)
+ return snprintf(buf, PAGE_SIZE, "TDES\n");
+ else
+ return snprintf(buf, PAGE_SIZE, "SHA512\n");
+}
+static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL);
+
+/* reseed attribute (w) */
+static ssize_t prng_reseed_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ if (mutex_lock_interruptible(&prng_data->mutex))
+ return -ERESTARTSYS;
+ prng_sha512_reseed();
+ mutex_unlock(&prng_data->mutex);
+
+ return count;
+}
+static DEVICE_ATTR(reseed, 0200, NULL, prng_reseed_store);
+
+/* reseed limit attribute (rw) */
+static ssize_t prng_reseed_limit_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit);
+}
+static ssize_t prng_reseed_limit_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned limit;
+
+ if (sscanf(buf, "%u\n", &limit) != 1)
+ return -EINVAL;
+
+ if (prng_mode == PRNG_MODE_SHA512) {
+ if (limit < PRNG_RESEED_LIMIT_SHA512_LOWER)
+ return -EINVAL;
+ } else {
+ if (limit < PRNG_RESEED_LIMIT_TDES_LOWER)
+ return -EINVAL;
+ }
+
+ prng_reseed_limit = limit;
+
+ return count;
+}
+static DEVICE_ATTR(reseed_limit, 0644,
+ prng_reseed_limit_show, prng_reseed_limit_store);
+
+/* strength attribute (ro) */
+static ssize_t prng_strength_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "256\n");
+}
+static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL);
+
+static struct attribute *prng_sha512_dev_attrs[] = {
+ &dev_attr_errorflag.attr,
+ &dev_attr_chunksize.attr,
+ &dev_attr_byte_counter.attr,
+ &dev_attr_mode.attr,
+ &dev_attr_reseed.attr,
+ &dev_attr_reseed_limit.attr,
+ &dev_attr_strength.attr,
+ NULL
+};
+static struct attribute *prng_tdes_dev_attrs[] = {
+ &dev_attr_chunksize.attr,
+ &dev_attr_byte_counter.attr,
+ &dev_attr_mode.attr,
+ NULL
+};
+
+static struct attribute_group prng_sha512_dev_attr_group = {
+ .attrs = prng_sha512_dev_attrs
+};
+static struct attribute_group prng_tdes_dev_attr_group = {
+ .attrs = prng_tdes_dev_attrs
+};
+
+
+/*** module init and exit ***/
+
static int __init prng_init(void)
{
int ret;
@@ -169,43 +815,105 @@ static int __init prng_init(void)
if (!crypt_s390_func_available(KMC_PRNG, CRYPT_S390_MSA))
return -EOPNOTSUPP;
- if (prng_chunk_size < 8)
- return -EINVAL;
+ /* choose prng mode */
+ if (prng_mode != PRNG_MODE_TDES) {
+ /* check for MSA5 support for PPNO operations */
+ if (!crypt_s390_func_available(PPNO_SHA512_DRNG_GEN,
+ CRYPT_S390_MSA5)) {
+ if (prng_mode == PRNG_MODE_SHA512) {
+ pr_err("The prng module cannot "
+ "start in SHA-512 mode\n");
+ return -EOPNOTSUPP;
+ }
+ prng_mode = PRNG_MODE_TDES;
+ } else
+ prng_mode = PRNG_MODE_SHA512;
+ }
- p = kmalloc(sizeof(struct s390_prng_data), GFP_KERNEL);
- if (!p)
- return -ENOMEM;
- p->count = 0;
+ if (prng_mode == PRNG_MODE_SHA512) {
- p->buf = kmalloc(prng_chunk_size, GFP_KERNEL);
- if (!p->buf) {
- ret = -ENOMEM;
- goto out_free;
- }
+ /* SHA512 mode */
- /* initialize the PRNG, add 128 bits of entropy */
- prng_seed(16);
+ if (prng_chunk_size < PRNG_CHUNKSIZE_SHA512_MIN
+ || prng_chunk_size > PRNG_CHUNKSIZE_SHA512_MAX)
+ return -EINVAL;
+ prng_chunk_size = (prng_chunk_size + 0x3f) & ~0x3f;
- ret = misc_register(&prng_dev);
- if (ret)
- goto out_buf;
- return 0;
+ if (prng_reseed_limit == 0)
+ prng_reseed_limit = PRNG_RESEED_LIMIT_SHA512;
+ else if (prng_reseed_limit < PRNG_RESEED_LIMIT_SHA512_LOWER)
+ return -EINVAL;
+
+ ret = prng_sha512_instantiate();
+ if (ret)
+ goto out;
+
+ ret = misc_register(&prng_sha512_dev);
+ if (ret) {
+ prng_sha512_deinstantiate();
+ goto out;
+ }
+ ret = sysfs_create_group(&prng_sha512_dev.this_device->kobj,
+ &prng_sha512_dev_attr_group);
+ if (ret) {
+ misc_deregister(&prng_sha512_dev);
+ prng_sha512_deinstantiate();
+ goto out;
+ }
-out_buf:
- kfree(p->buf);
-out_free:
- kfree(p);
+ } else {
+
+ /* TDES mode */
+
+ if (prng_chunk_size < PRNG_CHUNKSIZE_TDES_MIN
+ || prng_chunk_size > PRNG_CHUNKSIZE_TDES_MAX)
+ return -EINVAL;
+ prng_chunk_size = (prng_chunk_size + 0x07) & ~0x07;
+
+ if (prng_reseed_limit == 0)
+ prng_reseed_limit = PRNG_RESEED_LIMIT_TDES;
+ else if (prng_reseed_limit < PRNG_RESEED_LIMIT_TDES_LOWER)
+ return -EINVAL;
+
+ ret = prng_tdes_instantiate();
+ if (ret)
+ goto out;
+
+ ret = misc_register(&prng_tdes_dev);
+ if (ret) {
+ prng_tdes_deinstantiate();
+ goto out;
+ }
+ ret = sysfs_create_group(&prng_tdes_dev.this_device->kobj,
+ &prng_tdes_dev_attr_group);
+ if (ret) {
+ misc_deregister(&prng_tdes_dev);
+ prng_tdes_deinstantiate();
+ goto out;
+ }
+
+ }
+
+out:
return ret;
}
+
static void __exit prng_exit(void)
{
- /* wipe me */
- kzfree(p->buf);
- kfree(p);
-
- misc_deregister(&prng_dev);
+ if (prng_mode == PRNG_MODE_SHA512) {
+ sysfs_remove_group(&prng_sha512_dev.this_device->kobj,
+ &prng_sha512_dev_attr_group);
+ misc_deregister(&prng_sha512_dev);
+ prng_sha512_deinstantiate();
+ } else {
+ sysfs_remove_group(&prng_tdes_dev.this_device->kobj,
+ &prng_tdes_dev_attr_group);
+ misc_deregister(&prng_tdes_dev);
+ prng_tdes_deinstantiate();
+ }
}
+
module_init(prng_init);
module_exit(prng_exit);
diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c
index f043c3c7e73c..dd42a26d049d 100644
--- a/arch/s390/hypfs/hypfs_sprp.c
+++ b/arch/s390/hypfs/hypfs_sprp.c
@@ -128,14 +128,14 @@ static struct hypfs_dbfs_file hypfs_sprp_file = {
int hypfs_sprp_init(void)
{
- if (!sclp_has_sprp())
+ if (!sclp.has_sprp)
return 0;
return hypfs_dbfs_create_file(&hypfs_sprp_file);
}
void hypfs_sprp_exit(void)
{
- if (!sclp_has_sprp())
+ if (!sclp.has_sprp)
return;
hypfs_dbfs_remove_file(&hypfs_sprp_file);
}
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index c631f98fd524..dc5385ebb071 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -4,5 +4,4 @@ generic-y += clkdev.h
generic-y += irq_work.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
-generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 8d724718ec21..e6f8615a11eb 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -36,7 +36,7 @@
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
-#define set_mb(var, value) do { var = value; mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
#define smp_store_release(p, v) \
do { \
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 4eadec466b8c..411464f4c97a 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -32,8 +32,6 @@
__old; \
})
-#define __HAVE_ARCH_CMPXCHG
-
#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \
({ \
register __typeof__(*(p1)) __old1 asm("2") = (o1); \
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index 11eae5f55b70..0130d0379edd 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -35,12 +35,8 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
}
-#define hugetlb_prefault_arch_hook(mm) do { } while (0)
#define arch_clear_hugepage_flags(page) do { } while (0)
-int arch_prepare_hugepage(struct page *page);
-void arch_release_hugepage(struct page *page);
-
static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index 30fd5c84680e..cb5fdf3a78fc 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -29,6 +29,7 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
#define ioremap_nocache(addr, size) ioremap(addr, size)
#define ioremap_wc ioremap_nocache
+#define ioremap_wt ioremap_nocache
static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
{
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index 694bcd6bd927..2f924bc30e35 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -26,6 +26,9 @@
/* Not more than 2GB */
#define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31)
+/* Allocate control page with GFP_DMA */
+#define KEXEC_CONTROL_MEMORY_GFP GFP_DMA
+
/* Maximum address we can use for the crash control pages */
#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL)
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index d01fc588b5c3..3024acbe1f9d 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -80,6 +80,7 @@ struct sca_block {
#define CPUSTAT_MCDS 0x00000100
#define CPUSTAT_SM 0x00000080
#define CPUSTAT_IBS 0x00000040
+#define CPUSTAT_GED2 0x00000010
#define CPUSTAT_G 0x00000008
#define CPUSTAT_GED 0x00000004
#define CPUSTAT_J 0x00000002
@@ -95,7 +96,8 @@ struct kvm_s390_sie_block {
#define PROG_IN_SIE (1<<0)
__u32 prog0c; /* 0x000c */
__u8 reserved10[16]; /* 0x0010 */
-#define PROG_BLOCK_SIE 0x00000001
+#define PROG_BLOCK_SIE (1<<0)
+#define PROG_REQUEST (1<<1)
atomic_t prog20; /* 0x0020 */
__u8 reserved24[4]; /* 0x0024 */
__u64 cputm; /* 0x0028 */
@@ -634,7 +636,7 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_free_memslot(struct kvm *kvm,
struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
-static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {}
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot) {}
diff --git a/arch/s390/include/asm/mm-arch-hooks.h b/arch/s390/include/asm/mm-arch-hooks.h
new file mode 100644
index 000000000000..07680b2f3c59
--- /dev/null
+++ b/arch/s390/include/asm/mm-arch-hooks.h
@@ -0,0 +1,15 @@
+/*
+ * Architecture specific mm hooks
+ *
+ * Copyright (C) 2015, IBM Corporation
+ * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_S390_MM_ARCH_HOOKS_H
+#define _ASM_S390_MM_ARCH_HOOKS_H
+
+#endif /* _ASM_S390_MM_ARCH_HOOKS_H */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index a5e656260a70..d29ad9545b41 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -14,7 +14,9 @@ typedef struct {
unsigned long asce_bits;
unsigned long asce_limit;
unsigned long vdso_base;
- /* The mmu context has extended page tables. */
+ /* The mmu context allocates 4K page tables. */
+ unsigned int alloc_pgste:1;
+ /* The mmu context uses extended page tables. */
unsigned int has_pgste:1;
/* The mmu context uses storage keys. */
unsigned int use_skey:1;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index d25d9ff10ba8..fb1b93ea3e3f 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -20,8 +20,11 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.flush_mm = 0;
mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS;
mm->context.asce_bits |= _ASCE_TYPE_REGION3;
+#ifdef CONFIG_PGSTE
+ mm->context.alloc_pgste = page_table_allocate_pgste;
mm->context.has_pgste = 0;
mm->context.use_skey = 0;
+#endif
mm->context.asce_limit = STACK_TOP_MAX;
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 53eacbd4f09b..dd345238d9a7 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -17,7 +17,10 @@
#define PAGE_DEFAULT_ACC 0
#define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4)
-#define HPAGE_SHIFT 20
+#include <asm/setup.h>
+#ifndef __ASSEMBLY__
+
+extern int HPAGE_SHIFT;
#define HPAGE_SIZE (1UL << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
@@ -27,9 +30,6 @@
#define ARCH_HAS_PREPARE_HUGEPAGE
#define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH
-#include <asm/setup.h>
-#ifndef __ASSEMBLY__
-
static inline void storage_key_init_range(unsigned long start, unsigned long end)
{
#if PAGE_DEFAULT_KEY
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 51e7fb634ebc..7b7858f158b4 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -21,6 +21,7 @@ void crst_table_free(struct mm_struct *, unsigned long *);
unsigned long *page_table_alloc(struct mm_struct *);
void page_table_free(struct mm_struct *, unsigned long *);
void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
+extern int page_table_allocate_pgste;
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned long key, bool nq);
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 989cfae9e202..f66d82798a6a 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -12,12 +12,9 @@
#define _ASM_S390_PGTABLE_H
/*
- * The Linux memory management assumes a three-level page table setup. For
- * s390 31 bit we "fold" the mid level into the top-level page table, so
- * that we physically have the same two-level page table as the s390 mmu
- * expects in 31 bit mode. For s390 64 bit we use three of the five levels
- * the hardware provides (region first and region second tables are not
- * used).
+ * The Linux memory management assumes a three-level page table setup.
+ * For s390 64 bit we use up to four of the five levels the hardware
+ * provides (region first tables are not used).
*
* The "pgd_xxx()" functions are trivial for a folded two-level
* setup: the pgd is never bad, and a pmd always exists (as it's folded
@@ -101,8 +98,8 @@ extern unsigned long zero_page_mask;
#ifndef __ASSEMBLY__
/*
- * The vmalloc and module area will always be on the topmost area of the kernel
- * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc and modules.
+ * The vmalloc and module area will always be on the topmost area of the
+ * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules.
* On 64 bit kernels we have a 2GB area at the top of the vmalloc area where
* modules will reside. That makes sure that inter module branches always
* happen without trampolines and in addition the placement within a 2GB frame
@@ -131,38 +128,6 @@ static inline int is_module_addr(void *addr)
}
/*
- * A 31 bit pagetable entry of S390 has following format:
- * | PFRA | | OS |
- * 0 0IP0
- * 00000000001111111111222222222233
- * 01234567890123456789012345678901
- *
- * I Page-Invalid Bit: Page is not available for address-translation
- * P Page-Protection Bit: Store access not possible for page
- *
- * A 31 bit segmenttable entry of S390 has following format:
- * | P-table origin | |PTL
- * 0 IC
- * 00000000001111111111222222222233
- * 01234567890123456789012345678901
- *
- * I Segment-Invalid Bit: Segment is not available for address-translation
- * C Common-Segment Bit: Segment is not private (PoP 3-30)
- * PTL Page-Table-Length: Page-table length (PTL+1*16 entries -> up to 256)
- *
- * The 31 bit segmenttable origin of S390 has following format:
- *
- * |S-table origin | | STL |
- * X **GPS
- * 00000000001111111111222222222233
- * 01234567890123456789012345678901
- *
- * X Space-Switch event:
- * G Segment-Invalid Bit: *
- * P Private-Space Bit: Segment is not private (PoP 3-30)
- * S Storage-Alteration:
- * STL Segment-Table-Length: Segment-table length (STL+1*16 entries -> up to 2048)
- *
* A 64 bit pagetable entry of S390 has following format:
* | PFRA |0IPC| OS |
* 0000000000111111111122222222223333333333444444444455555555556666
@@ -220,7 +185,6 @@ static inline int is_module_addr(void *addr)
/* Software bits in the page table entry */
#define _PAGE_PRESENT 0x001 /* SW pte present bit */
-#define _PAGE_TYPE 0x002 /* SW pte type bit */
#define _PAGE_YOUNG 0x004 /* SW pte young bit */
#define _PAGE_DIRTY 0x008 /* SW pte dirty bit */
#define _PAGE_READ 0x010 /* SW pte read bit */
@@ -240,31 +204,34 @@ static inline int is_module_addr(void *addr)
* table lock held.
*
* The following table gives the different possible bit combinations for
- * the pte hardware and software bits in the last 12 bits of a pte:
+ * the pte hardware and software bits in the last 12 bits of a pte
+ * (. unassigned bit, x don't care, t swap type):
*
* 842100000000
* 000084210000
* 000000008421
- * .IR...wrdytp
- * empty .10...000000
- * swap .10...xxxx10
- * file .11...xxxxx0
- * prot-none, clean, old .11...000001
- * prot-none, clean, young .11...000101
- * prot-none, dirty, old .10...001001
- * prot-none, dirty, young .10...001101
- * read-only, clean, old .11...010001
- * read-only, clean, young .01...010101
- * read-only, dirty, old .11...011001
- * read-only, dirty, young .01...011101
- * read-write, clean, old .11...110001
- * read-write, clean, young .01...110101
- * read-write, dirty, old .10...111001
- * read-write, dirty, young .00...111101
+ * .IR.uswrdy.p
+ * empty .10.00000000
+ * swap .11..ttttt.0
+ * prot-none, clean, old .11.xx0000.1
+ * prot-none, clean, young .11.xx0001.1
+ * prot-none, dirty, old .10.xx0010.1
+ * prot-none, dirty, young .10.xx0011.1
+ * read-only, clean, old .11.xx0100.1
+ * read-only, clean, young .01.xx0101.1
+ * read-only, dirty, old .11.xx0110.1
+ * read-only, dirty, young .01.xx0111.1
+ * read-write, clean, old .11.xx1100.1
+ * read-write, clean, young .01.xx1101.1
+ * read-write, dirty, old .10.xx1110.1
+ * read-write, dirty, young .00.xx1111.1
+ * HW-bits: R read-only, I invalid
+ * SW-bits: p present, y young, d dirty, r read, w write, s special,
+ * u unused, l large
*
- * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001
- * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400
- * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402
+ * pte_none is true for the bit pattern .10.00000000, pte == 0x400
+ * pte_swap is true for the bit pattern .11..ooooo.0, (pte & 0x201) == 0x200
+ * pte_present is true for the bit pattern .xx.xxxxxx.1, (pte & 0x001) == 0x001
*/
/* Bits in the segment/region table address-space-control-element */
@@ -335,6 +302,8 @@ static inline int is_module_addr(void *addr)
* read-write, dirty, young 11..0...0...11
* The segment table origin is used to distinguish empty (origin==0) from
* read-write, old segment table entries (origin!=0)
+ * HW-bits: R read-only, I invalid
+ * SW-bits: y young, d dirty, r read, w write
*/
#define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */
@@ -423,6 +392,15 @@ static inline int mm_has_pgste(struct mm_struct *mm)
return 0;
}
+static inline int mm_alloc_pgste(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ if (unlikely(mm->context.alloc_pgste))
+ return 1;
+#endif
+ return 0;
+}
+
/*
* In the case that a guest uses storage keys
* faults should no longer be backed by zero pages
@@ -516,7 +494,7 @@ static inline int pmd_large(pmd_t pmd)
return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
}
-static inline int pmd_pfn(pmd_t pmd)
+static inline unsigned long pmd_pfn(pmd_t pmd)
{
unsigned long origin_mask;
@@ -582,10 +560,9 @@ static inline int pte_none(pte_t pte)
static inline int pte_swap(pte_t pte)
{
- /* Bit pattern: (pte & 0x603) == 0x402 */
- return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT |
- _PAGE_TYPE | _PAGE_PRESENT))
- == (_PAGE_INVALID | _PAGE_TYPE);
+ /* Bit pattern: (pte & 0x201) == 0x200 */
+ return (pte_val(pte) & (_PAGE_PROTECT | _PAGE_PRESENT))
+ == _PAGE_PROTECT;
}
static inline int pte_special(pte_t pte)
@@ -1521,9 +1498,9 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
return pmd_young(pmd);
}
-#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
-static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
- unsigned long address, pmd_t *pmdp)
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
+static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
{
pmd_t pmd = *pmdp;
@@ -1532,10 +1509,10 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
return pmd;
}
-#define __HAVE_ARCH_PMDP_GET_AND_CLEAR_FULL
-static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm,
- unsigned long address,
- pmd_t *pmdp, int full)
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
+static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
+ unsigned long address,
+ pmd_t *pmdp, int full)
{
pmd_t pmd = *pmdp;
@@ -1545,11 +1522,11 @@ static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm,
return pmd;
}
-#define __HAVE_ARCH_PMDP_CLEAR_FLUSH
-static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
+#define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
+static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
{
- return pmdp_get_and_clear(vma->vm_mm, address, pmdp);
+ return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
}
#define __HAVE_ARCH_PMDP_INVALIDATE
@@ -1571,6 +1548,14 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
}
}
+static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
+ unsigned long address,
+ pmd_t *pmdp)
+{
+ return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
+}
+#define pmdp_collapse_flush pmdp_collapse_flush
+
#define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot))
#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
@@ -1586,51 +1571,51 @@ static inline int has_transparent_hugepage(void)
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
- * 31 bit swap entry format:
- * A page-table entry has some bits we have to treat in a special way.
- * Bits 0, 20 and bit 23 have to be zero, otherwise an specification
- * exception will occur instead of a page translation exception. The
- * specifiation exception has the bad habit not to store necessary
- * information in the lowcore.
- * Bits 21, 22, 30 and 31 are used to indicate the page type.
- * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
- * This leaves the bits 1-19 and bits 24-29 to store type and offset.
- * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19
- * plus 24 for the offset.
- * 0| offset |0110|o|type |00|
- * 0 0000000001111111111 2222 2 22222 33
- * 0 1234567890123456789 0123 4 56789 01
- *
* 64 bit swap entry format:
* A page-table entry has some bits we have to treat in a special way.
- * Bits 52 and bit 55 have to be zero, otherwise an specification
+ * Bits 52 and bit 55 have to be zero, otherwise a specification
* exception will occur instead of a page translation exception. The
- * specifiation exception has the bad habit not to store necessary
+ * specification exception has the bad habit not to store necessary
* information in the lowcore.
- * Bits 53, 54, 62 and 63 are used to indicate the page type.
- * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
- * This leaves the bits 0-51 and bits 56-61 to store type and offset.
- * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51
- * plus 56 for the offset.
- * | offset |0110|o|type |00|
- * 0000000000111111111122222222223333333333444444444455 5555 5 55566 66
- * 0123456789012345678901234567890123456789012345678901 2345 6 78901 23
+ * Bits 54 and 63 are used to indicate the page type.
+ * A swap pte is indicated by bit pattern (pte & 0x201) == 0x200
+ * This leaves the bits 0-51 and bits 56-62 to store type and offset.
+ * We use the 5 bits from 57-61 for the type and the 52 bits from 0-51
+ * for the offset.
+ * | offset |01100|type |00|
+ * |0000000000111111111122222222223333333333444444444455|55555|55566|66|
+ * |0123456789012345678901234567890123456789012345678901|23456|78901|23|
*/
-#define __SWP_OFFSET_MASK (~0UL >> 11)
+#define __SWP_OFFSET_MASK ((1UL << 52) - 1)
+#define __SWP_OFFSET_SHIFT 12
+#define __SWP_TYPE_MASK ((1UL << 5) - 1)
+#define __SWP_TYPE_SHIFT 2
static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
{
pte_t pte;
- offset &= __SWP_OFFSET_MASK;
- pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) |
- ((offset & 1UL) << 7) | ((offset & ~1UL) << 11);
+
+ pte_val(pte) = _PAGE_INVALID | _PAGE_PROTECT;
+ pte_val(pte) |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT;
+ pte_val(pte) |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT;
return pte;
}
-#define __swp_type(entry) (((entry).val >> 2) & 0x1f)
-#define __swp_offset(entry) (((entry).val >> 11) | (((entry).val >> 7) & 1))
-#define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) })
+static inline unsigned long __swp_type(swp_entry_t entry)
+{
+ return (entry.val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK;
+}
+
+static inline unsigned long __swp_offset(swp_entry_t entry)
+{
+ return (entry.val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK;
+}
+
+static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
+{
+ return (swp_entry_t) { pte_val(mk_swap_pte(type, offset)) };
+}
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index f1096bab5199..c891f41b2753 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -46,33 +46,39 @@ struct sclp_cpu_info {
struct sclp_cpu_entry cpu[MAX_CPU_ADDRESS + 1];
};
+struct sclp_info {
+ unsigned char has_linemode : 1;
+ unsigned char has_vt220 : 1;
+ unsigned char has_siif : 1;
+ unsigned char has_sigpif : 1;
+ unsigned char has_cpu_type : 1;
+ unsigned char has_sprp : 1;
+ unsigned int ibc;
+ unsigned int mtid;
+ unsigned int mtid_cp;
+ unsigned int mtid_prev;
+ unsigned long long rzm;
+ unsigned long long rnmax;
+ unsigned long long hamax;
+ unsigned int max_cpu;
+ unsigned long hsa_size;
+ unsigned long long facilities;
+};
+extern struct sclp_info sclp;
+
int sclp_get_cpu_info(struct sclp_cpu_info *info);
int sclp_cpu_configure(u8 cpu);
int sclp_cpu_deconfigure(u8 cpu);
-unsigned long long sclp_get_rnmax(void);
-unsigned long long sclp_get_rzm(void);
-unsigned int sclp_get_max_cpu(void);
-unsigned int sclp_get_mtid(u8 cpu_type);
-unsigned int sclp_get_mtid_max(void);
-unsigned int sclp_get_mtid_prev(void);
int sclp_sdias_blk_count(void);
int sclp_sdias_copy(void *dest, int blk_num, int nr_blks);
int sclp_chp_configure(struct chp_id chpid);
int sclp_chp_deconfigure(struct chp_id chpid);
int sclp_chp_read_info(struct sclp_chp_info *info);
void sclp_get_ipl_info(struct sclp_ipl_info *info);
-bool __init sclp_has_linemode(void);
-bool __init sclp_has_vt220(void);
-bool sclp_has_sprp(void);
int sclp_pci_configure(u32 fid);
int sclp_pci_deconfigure(u32 fid);
int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
-unsigned long sclp_get_hsa_size(void);
void sclp_early_detect(void);
-int sclp_has_siif(void);
-int sclp_has_sigpif(void);
-unsigned int sclp_get_ibc(void);
-
long _sclp_print_early(const char *);
#endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 98eb2a579223..dcb6312a0b91 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -10,6 +10,7 @@
#define _ASM_S390_TIMEX_H
#include <asm/lowcore.h>
+#include <linux/time64.h>
/* The value of the TOD clock for 1.1.1970. */
#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
@@ -108,10 +109,10 @@ int get_sync_clock(unsigned long long *clock);
void init_cpu_timer(void);
unsigned long long monotonic_clock(void);
-void tod_to_timeval(__u64, struct timespec *);
+void tod_to_timeval(__u64 todval, struct timespec64 *xt);
static inline
-void stck_to_timespec(unsigned long long stck, struct timespec *ts)
+void stck_to_timespec64(unsigned long long stck, struct timespec64 *ts)
{
tod_to_timeval(stck - TOD_UNIX_EPOCH, ts);
}
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index b1453a2ae1ca..4990f6c66288 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -22,7 +22,8 @@ DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
#define topology_physical_package_id(cpu) (per_cpu(cpu_topology, cpu).socket_id)
#define topology_thread_id(cpu) (per_cpu(cpu_topology, cpu).thread_id)
-#define topology_thread_cpumask(cpu) (&per_cpu(cpu_topology, cpu).thread_mask)
+#define topology_sibling_cpumask(cpu) \
+ (&per_cpu(cpu_topology, cpu).thread_mask)
#define topology_core_id(cpu) (per_cpu(cpu_topology, cpu).core_id)
#define topology_core_cpumask(cpu) (&per_cpu(cpu_topology, cpu).core_mask)
#define topology_book_id(cpu) (per_cpu(cpu_topology, cpu).book_id)
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index d64a7a62164f..9dd4cc47ddc7 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -98,7 +98,8 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x)
* @from: Source address, in user space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from user space to kernel space. Caller must check
* the specified block with access_ok() before calling this function.
@@ -118,7 +119,8 @@ unsigned long __must_check __copy_from_user(void *to, const void __user *from,
* @from: Source address, in kernel space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from kernel space to user space. Caller must check
* the specified block with access_ok() before calling this function.
@@ -264,7 +266,8 @@ int __get_user_bad(void) __attribute__((noreturn));
* @from: Source address, in kernel space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from kernel space to user space.
*
@@ -290,7 +293,8 @@ __compiletime_warning("copy_from_user() buffer size is not provably correct")
* @from: Source address, in user space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from user space to kernel space.
*
@@ -348,7 +352,8 @@ static inline unsigned long strnlen_user(const char __user *src, unsigned long n
* strlen_user: - Get the size of a string in user space.
* @str: The string to measure.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Get the size of a NUL-terminated string in user space.
*
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index d7fa2f0f1425..f8498dde67b1 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -202,7 +202,7 @@ COMPAT_SYSCALL_WRAP1(epoll_create1, int, flags);
COMPAT_SYSCALL_WRAP2(tkill, int, pid, int, sig);
COMPAT_SYSCALL_WRAP3(tgkill, int, tgid, int, pid, int, sig);
COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags);
-COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val);
+COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, unsigned long, tls);
COMPAT_SYSCALL_WRAP2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags);
COMPAT_SYSCALL_WRAP4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim);
COMPAT_SYSCALL_WRAP5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, int __user *, mnt_id, int, flag);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 9f73c8059022..7a75ad4594e3 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -33,11 +33,12 @@ static struct memblock_type oldmem_type = {
};
#define for_each_dump_mem_range(i, nid, p_start, p_end, p_nid) \
- for (i = 0, __next_mem_range(&i, nid, &memblock.physmem, \
+ for (i = 0, __next_mem_range(&i, nid, MEMBLOCK_NONE, \
+ &memblock.physmem, \
&oldmem_type, p_start, \
p_end, p_nid); \
i != (u64)ULLONG_MAX; \
- __next_mem_range(&i, nid, &memblock.physmem, \
+ __next_mem_range(&i, nid, MEMBLOCK_NONE, &memblock.physmem,\
&oldmem_type, \
p_start, p_end, p_nid))
@@ -122,7 +123,7 @@ static ssize_t copy_oldmem_page_zfcpdump(char *buf, size_t csize,
{
int rc;
- if (src < sclp_get_hsa_size()) {
+ if (src < sclp.hsa_size) {
rc = memcpy_hsa(buf, src, csize, userbuf);
} else {
if (userbuf)
@@ -215,7 +216,7 @@ static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma,
unsigned long pfn,
unsigned long size, pgprot_t prot)
{
- unsigned long hsa_end = sclp_get_hsa_size();
+ unsigned long hsa_end = sclp.hsa_size;
unsigned long size_hsa;
if (pfn < hsa_end >> PAGE_SHIFT) {
@@ -258,7 +259,7 @@ int copy_from_oldmem(void *dest, void *src, size_t count)
return rc;
}
} else {
- unsigned long hsa_end = sclp_get_hsa_size();
+ unsigned long hsa_end = sclp.hsa_size;
if ((unsigned long) src < hsa_end) {
copied = min(count, hsa_end - (unsigned long) src);
rc = memcpy_hsa(dest, (unsigned long) src, copied, 0);
@@ -609,7 +610,7 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
if (elfcorehdr_addr != ELFCORE_ADDR_MAX)
return 0;
/* If we cannot get HSA size for zfcpdump return error */
- if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp_get_hsa_size())
+ if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp.hsa_size)
return -ENODEV;
/* For kdump, exclude previous crashkernel memory */
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index c1f21aca76e7..6fca0e46464e 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -1457,23 +1457,24 @@ int
debug_dflt_header_fn(debug_info_t * id, struct debug_view *view,
int area, debug_entry_t * entry, char *out_buf)
{
- struct timespec time_spec;
+ struct timespec64 time_spec;
char *except_str;
unsigned long caller;
int rc = 0;
unsigned int level;
level = entry->id.fields.level;
- stck_to_timespec(entry->id.stck, &time_spec);
+ stck_to_timespec64(entry->id.stck, &time_spec);
if (entry->id.fields.exception)
except_str = "*";
else
except_str = "-";
caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN;
- rc += sprintf(out_buf, "%02i %011lu:%06lu %1u %1s %02i %p ",
- area, time_spec.tv_sec, time_spec.tv_nsec / 1000, level,
- except_str, entry->id.fields.cpuid, (void *) caller);
+ rc += sprintf(out_buf, "%02i %011lld:%06lu %1u %1s %02i %p ",
+ area, (long long)time_spec.tv_sec,
+ time_spec.tv_nsec / 1000, level, except_str,
+ entry->id.fields.cpuid, (void *)caller);
return rc;
}
EXPORT_SYMBOL(debug_dflt_header_fn);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 99b44acbfcc7..3238893c9d4f 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -1005,7 +1005,7 @@ ENTRY(sie64a)
.Lsie_gmap:
lg %r14,__SF_EMPTY(%r15) # get control block pointer
oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
- tm __SIE_PROG20+3(%r14),1 # last exit...
+ tm __SIE_PROG20+3(%r14),3 # last exit...
jnz .Lsie_done
LPP __SF_EMPTY(%r15) # set guest id
sie 0(%r14)
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 7262fe438c99..73941bf42350 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -128,9 +128,9 @@ __setup("condev=", condev_setup);
static void __init set_preferred_console(void)
{
if (MACHINE_IS_KVM) {
- if (sclp_has_vt220())
+ if (sclp.has_vt220)
add_preferred_console("ttyS", 1, NULL);
- else if (sclp_has_linemode())
+ else if (sclp.has_linemode)
add_preferred_console("ttyS", 0, NULL);
else
add_preferred_console("hvc", 0, NULL);
@@ -510,8 +510,8 @@ static void reserve_memory_end(void)
{
#ifdef CONFIG_CRASH_DUMP
if (ipl_info.type == IPL_TYPE_FCP_DUMP &&
- !OLDMEM_BASE && sclp_get_hsa_size()) {
- memory_end = sclp_get_hsa_size();
+ !OLDMEM_BASE && sclp.hsa_size) {
+ memory_end = sclp.hsa_size;
memory_end &= PAGE_MASK;
memory_end_set = 1;
}
@@ -576,7 +576,7 @@ static void __init reserve_crashkernel(void)
crash_base = low;
} else {
/* Find suitable area in free memory */
- low = max_t(unsigned long, crash_size, sclp_get_hsa_size());
+ low = max_t(unsigned long, crash_size, sclp.hsa_size);
high = crash_base ? crash_base + crash_size : ULONG_MAX;
if (crash_base && crash_base < low) {
@@ -640,19 +640,24 @@ static void __init check_initrd(void)
}
/*
- * Reserve all kernel text
+ * Reserve memory used for lowcore/command line/kernel image.
*/
static void __init reserve_kernel(void)
{
- unsigned long start_pfn;
- start_pfn = PFN_UP(__pa(&_end));
+ unsigned long start_pfn = PFN_UP(__pa(&_end));
+#ifdef CONFIG_DMA_API_DEBUG
/*
- * Reserve memory used for lowcore/command line/kernel image.
+ * DMA_API_DEBUG code stumbles over addresses from the
+ * range [_ehead, _stext]. Mark the memory as reserved
+ * so it is not used for CONFIG_DMA_API_DEBUG=y.
*/
+ memblock_reserve(0, PFN_PHYS(start_pfn));
+#else
memblock_reserve(0, (unsigned long)_ehead);
memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
- (unsigned long)_stext);
+#endif
}
static void __init reserve_elfcorehdr(void)
@@ -875,6 +880,8 @@ void __init setup_arch(char **cmdline_p)
*/
setup_hwcaps();
+ HPAGE_SHIFT = MACHINE_HAS_HPAGE ? 20 : 0;
+
/*
* Create kernel page tables and switch to virtual addressing.
*/
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index efd2c1968000..0d9d59d4710e 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -601,7 +601,7 @@ static void __init smp_store_cpu_states(struct sclp_cpu_info *info)
/* No previous system present, normal boot. */
return;
/* Set multi-threading state to the previous system. */
- pcpu_set_smt(sclp_get_mtid_prev());
+ pcpu_set_smt(sclp.mtid_prev);
/* Collect CPU states. */
cpu = 0;
for (i = 0; i < info->configured; i++) {
@@ -740,7 +740,7 @@ static void __init smp_detect_cpus(void)
#endif
/* Set multi-threading state for the current system */
- mtid = sclp_get_mtid(boot_cpu_type);
+ mtid = boot_cpu_type ? sclp.mtid : sclp.mtid_cp;
mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1;
pcpu_set_smt(mtid);
@@ -880,12 +880,13 @@ void __noreturn cpu_die(void)
void __init smp_fill_possible_mask(void)
{
- unsigned int possible, sclp, cpu;
+ unsigned int possible, sclp_max, cpu;
- sclp = min(smp_max_threads, sclp_get_mtid_max() + 1);
- sclp = sclp_get_max_cpu()*sclp ?: nr_cpu_ids;
+ sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1;
+ sclp_max = min(smp_max_threads, sclp_max);
+ sclp_max = sclp.max_cpu * sclp_max ?: nr_cpu_ids;
possible = setup_possible_cpus ?: nr_cpu_ids;
- possible = min(possible, sclp);
+ possible = min(possible, sclp_max);
for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
set_cpu_possible(cpu, true);
}
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
index d3236c9e226b..39e2f41b6cf0 100644
--- a/arch/s390/kernel/suspend.c
+++ b/arch/s390/kernel/suspend.c
@@ -9,10 +9,10 @@
#include <linux/pfn.h>
#include <linux/suspend.h>
#include <linux/mm.h>
+#include <linux/pci.h>
#include <asm/ctl_reg.h>
#include <asm/ipl.h>
#include <asm/cio.h>
-#include <asm/pci.h>
#include <asm/sections.h>
#include "entry.h"
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 170ddd2018b3..9e733d965e08 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -76,7 +76,7 @@ unsigned long long monotonic_clock(void)
}
EXPORT_SYMBOL(monotonic_clock);
-void tod_to_timeval(__u64 todval, struct timespec *xt)
+void tod_to_timeval(__u64 todval, struct timespec64 *xt)
{
unsigned long long sec;
@@ -181,12 +181,12 @@ static void timing_alert_interrupt(struct ext_code ext_code,
static void etr_reset(void);
static void stp_reset(void);
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
{
tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts);
}
-void read_boot_clock(struct timespec *ts)
+void read_boot_clock64(struct timespec64 *ts)
{
tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts);
}
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 9e3779e3e496..7365e8a46032 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -241,21 +241,6 @@ static int handle_prog(struct kvm_vcpu *vcpu)
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
-static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
-{
- int rc, rc2;
-
- vcpu->stat.exit_instr_and_program++;
- rc = handle_instruction(vcpu);
- rc2 = handle_prog(vcpu);
-
- if (rc == -EOPNOTSUPP)
- vcpu->arch.sie_block->icptcode = 0x04;
- if (rc)
- return rc;
- return rc2;
-}
-
/**
* handle_external_interrupt - used for external interruption interceptions
*
@@ -355,7 +340,6 @@ static const intercept_handler_t intercept_funcs[] = {
[0x00 >> 2] = handle_noop,
[0x04 >> 2] = handle_instruction,
[0x08 >> 2] = handle_prog,
- [0x0C >> 2] = handle_instruction_and_prog,
[0x10 >> 2] = handle_noop,
[0x14 >> 2] = handle_external_interrupt,
[0x18 >> 2] = handle_noop,
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 9de47265ef73..c98d89708e99 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -134,6 +134,8 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
active_mask = pending_local_irqs(vcpu);
active_mask |= pending_floating_irqs(vcpu);
+ if (!active_mask)
+ return 0;
if (psw_extint_disabled(vcpu))
active_mask &= ~IRQ_PEND_EXT_MASK;
@@ -799,7 +801,7 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
- if (!sclp_has_sigpif())
+ if (!sclp.has_sigpif)
return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
return (sigp_ctrl & SIGP_CTRL_C) &&
@@ -941,12 +943,9 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
if (cpu_timer_irq_pending(vcpu))
set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
- do {
- irqs = deliverable_irqs(vcpu);
+ while ((irqs = deliverable_irqs(vcpu)) && !rc) {
/* bits are in the order of interrupt priority */
irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT);
- if (irq_type == IRQ_PEND_COUNT)
- break;
if (is_ioirq(irq_type)) {
rc = __deliver_io(vcpu, irq_type);
} else {
@@ -958,9 +957,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
}
rc = func(vcpu);
}
- if (rc)
- break;
- } while (!rc);
+ }
set_intercept_indicators(vcpu);
@@ -1058,10 +1055,10 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
kvm_get_vcpu(vcpu->kvm, src_id) == NULL)
return -EINVAL;
- if (sclp_has_sigpif())
+ if (sclp.has_sigpif)
return __inject_extcall_sigpif(vcpu, src_id);
- if (!test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
+ if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
return -EBUSY;
*extcall = irq->u.extcall;
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
@@ -1340,12 +1337,54 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
return 0;
}
-static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
+/*
+ * Find a destination VCPU for a floating irq and kick it.
+ */
+static void __floating_irq_kick(struct kvm *kvm, u64 type)
{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
struct kvm_s390_local_interrupt *li;
+ struct kvm_vcpu *dst_vcpu;
+ int sigcpu, online_vcpus, nr_tries = 0;
+
+ online_vcpus = atomic_read(&kvm->online_vcpus);
+ if (!online_vcpus)
+ return;
+
+ /* find idle VCPUs first, then round robin */
+ sigcpu = find_first_bit(fi->idle_mask, online_vcpus);
+ if (sigcpu == online_vcpus) {
+ do {
+ sigcpu = fi->next_rr_cpu;
+ fi->next_rr_cpu = (fi->next_rr_cpu + 1) % online_vcpus;
+ /* avoid endless loops if all vcpus are stopped */
+ if (nr_tries++ >= online_vcpus)
+ return;
+ } while (is_vcpu_stopped(kvm_get_vcpu(kvm, sigcpu)));
+ }
+ dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
+
+ /* make the VCPU drop out of the SIE, or wake it up if sleeping */
+ li = &dst_vcpu->arch.local_int;
+ spin_lock(&li->lock);
+ switch (type) {
+ case KVM_S390_MCHK:
+ atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
+ break;
+ default:
+ atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+ break;
+ }
+ spin_unlock(&li->lock);
+ kvm_s390_vcpu_wakeup(dst_vcpu);
+}
+
+static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
+{
struct kvm_s390_float_interrupt *fi;
- struct kvm_vcpu *dst_vcpu = NULL;
- int sigcpu;
u64 type = READ_ONCE(inti->type);
int rc;
@@ -1373,32 +1412,8 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
if (rc)
return rc;
- sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
- if (sigcpu == KVM_MAX_VCPUS) {
- do {
- sigcpu = fi->next_rr_cpu++;
- if (sigcpu == KVM_MAX_VCPUS)
- sigcpu = fi->next_rr_cpu = 0;
- } while (kvm_get_vcpu(kvm, sigcpu) == NULL);
- }
- dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
- li = &dst_vcpu->arch.local_int;
- spin_lock(&li->lock);
- switch (type) {
- case KVM_S390_MCHK:
- atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
- break;
- case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
- atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
- break;
- default:
- atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- break;
- }
- spin_unlock(&li->lock);
- kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
+ __floating_irq_kick(kvm, type);
return 0;
-
}
int kvm_s390_inject_vm(struct kvm *kvm,
@@ -1606,6 +1621,9 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm)
int i;
spin_lock(&fi->lock);
+ fi->pending_irqs = 0;
+ memset(&fi->srv_signal, 0, sizeof(fi->srv_signal));
+ memset(&fi->mchk, 0, sizeof(fi->mchk));
for (i = 0; i < FIRQ_LIST_COUNT; i++)
clear_irq_list(&fi->lists[i]);
for (i = 0; i < FIRQ_MAX_COUNT; i++)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8cd8e7b288c5..2078f92d15ac 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -36,6 +36,10 @@
#include "kvm-s390.h"
#include "gaccess.h"
+#define KMSG_COMPONENT "kvm-s390"
+#undef pr_fmt
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
#define CREATE_TRACE_POINTS
#include "trace.h"
#include "trace-s390.h"
@@ -110,7 +114,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
/* upper facilities limit for kvm */
unsigned long kvm_s390_fac_list_mask[] = {
0xffe6fffbfcfdfc40UL,
- 0x005c800000000000UL,
+ 0x005e800000000000UL,
};
unsigned long kvm_s390_fac_list_mask_size(void)
@@ -236,6 +240,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
{
int r;
unsigned long n;
+ struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int is_dirty = 0;
@@ -245,7 +250,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
if (log->slot >= KVM_USER_MEM_SLOTS)
goto out;
- memslot = id_to_memslot(kvm->memslots, log->slot);
+ slots = kvm_memslots(kvm);
+ memslot = id_to_memslot(slots, log->slot);
r = -ENOENT;
if (!memslot->dirty_bitmap)
goto out;
@@ -454,10 +460,10 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
mutex_lock(&kvm->lock);
kvm->arch.epoch = gtod - host_tod;
- kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) {
+ kvm_s390_vcpu_block_all(kvm);
+ kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
- exit_sie(cur_vcpu);
- }
+ kvm_s390_vcpu_unblock_all(kvm);
mutex_unlock(&kvm->lock);
return 0;
}
@@ -604,7 +610,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
goto out;
}
get_cpu_id((struct cpuid *) &mach->cpuid);
- mach->ibc = sclp_get_ibc();
+ mach->ibc = sclp.ibc;
memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
S390_ARCH_FAC_LIST_SIZE_BYTE);
memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
@@ -1068,7 +1074,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
S390_ARCH_FAC_LIST_SIZE_BYTE);
kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
- kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff;
+ kvm->arch.model.ibc = sclp.ibc & 0x0fff;
if (kvm_s390_crypto_init(kvm) < 0)
goto out_err;
@@ -1311,8 +1317,13 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
CPUSTAT_SM |
- CPUSTAT_STOPPED |
- CPUSTAT_GED);
+ CPUSTAT_STOPPED);
+
+ if (test_kvm_facility(vcpu->kvm, 78))
+ atomic_set_mask(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
+ else if (test_kvm_facility(vcpu->kvm, 8))
+ atomic_set_mask(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
+
kvm_s390_vcpu_setup_model(vcpu);
vcpu->arch.sie_block->ecb = 6;
@@ -1321,9 +1332,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->ecb2 = 8;
vcpu->arch.sie_block->eca = 0xC1002000U;
- if (sclp_has_siif())
+ if (sclp.has_siif)
vcpu->arch.sie_block->eca |= 1;
- if (sclp_has_sigpif())
+ if (sclp.has_sigpif)
vcpu->arch.sie_block->eca |= 0x10000000U;
if (test_kvm_facility(vcpu->kvm, 129)) {
vcpu->arch.sie_block->eca |= 0x00020000;
@@ -1409,16 +1420,28 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
return kvm_s390_vcpu_has_irq(vcpu, 0);
}
-void s390_vcpu_block(struct kvm_vcpu *vcpu)
+void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
{
atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+ exit_sie(vcpu);
}
-void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
+void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
{
atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
}
+static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
+{
+ atomic_set_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+ exit_sie(vcpu);
+}
+
+static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
+{
+ atomic_clear_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+}
+
/*
* Kick a guest cpu out of SIE and wait until SIE is not running.
* If the CPU is not running (e.g. waiting as idle) the function will
@@ -1430,11 +1453,11 @@ void exit_sie(struct kvm_vcpu *vcpu)
cpu_relax();
}
-/* Kick a guest cpu out of SIE and prevent SIE-reentry */
-void exit_sie_sync(struct kvm_vcpu *vcpu)
+/* Kick a guest cpu out of SIE to process a request synchronously */
+void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
{
- s390_vcpu_block(vcpu);
- exit_sie(vcpu);
+ kvm_make_request(req, vcpu);
+ kvm_s390_vcpu_request(vcpu);
}
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
@@ -1447,8 +1470,7 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
/* match against both prefix pages */
if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
- kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
- exit_sie_sync(vcpu);
+ kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
}
}
}
@@ -1720,8 +1742,10 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu)
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
{
+ if (!vcpu->requests)
+ return 0;
retry:
- s390_vcpu_unblock(vcpu);
+ kvm_s390_vcpu_request_handled(vcpu);
/*
* We use MMU_RELOAD just to re-arm the ipte notifier for the
* guest prefix page. gmap_ipte_notify will wait on the ptl lock.
@@ -1993,12 +2017,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
* As PF_VCPU will be used in fault handler, between
* guest_enter and guest_exit should be no uaccess.
*/
- preempt_disable();
- kvm_guest_enter();
- preempt_enable();
+ local_irq_disable();
+ __kvm_guest_enter();
+ local_irq_enable();
exit_reason = sie64a(vcpu->arch.sie_block,
vcpu->run->s.regs.gprs);
- kvm_guest_exit();
+ local_irq_disable();
+ __kvm_guest_exit();
+ local_irq_enable();
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
rc = vcpu_post_run(vcpu, exit_reason);
@@ -2068,7 +2094,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
kvm_s390_vcpu_start(vcpu);
} else if (is_vcpu_stopped(vcpu)) {
- pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n",
+ pr_err_ratelimited("can't run stopped vcpu %d\n",
vcpu->vcpu_id);
return -EINVAL;
}
@@ -2206,8 +2232,7 @@ int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
{
kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
- kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu);
- exit_sie_sync(vcpu);
+ kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
}
static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
@@ -2223,8 +2248,7 @@ static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
{
kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
- kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu);
- exit_sie_sync(vcpu);
+ kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
}
void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
@@ -2563,7 +2587,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
/* Section: memory related */
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem,
+ const struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
/* A few sanity checks. We can have memory slots which have to be
@@ -2581,8 +2605,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
+ const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
int rc;
@@ -2601,7 +2626,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
mem->guest_phys_addr, mem->memory_size);
if (rc)
- printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
+ pr_warn("failed to commit memory region\n");
return;
}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index ca108b90ae56..c5704786e473 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -211,10 +211,10 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
-void s390_vcpu_block(struct kvm_vcpu *vcpu);
-void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu);
void exit_sie(struct kvm_vcpu *vcpu);
-void exit_sie_sync(struct kvm_vcpu *vcpu);
+void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu);
int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
/* is cmma enabled */
@@ -228,6 +228,25 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
struct kvm_s390_pgm_info *pgm_info);
+static inline void kvm_s390_vcpu_block_all(struct kvm *kvm)
+{
+ int i;
+ struct kvm_vcpu *vcpu;
+
+ WARN_ON(!mutex_is_locked(&kvm->lock));
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_s390_vcpu_block(vcpu);
+}
+
+static inline void kvm_s390_vcpu_unblock_all(struct kvm *kvm)
+{
+ int i;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_s390_vcpu_unblock(vcpu);
+}
+
/**
* kvm_s390_inject_prog_cond - conditionally inject a program check
* @vcpu: virtual cpu
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index d22d8ee1ff9d..ad4242245771 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -698,10 +698,14 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
case 0x00001000:
end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
break;
- /* We dont support EDAT2
case 0x00002000:
+ /* only support 2G frame size if EDAT2 is available and we are
+ not in 24-bit addressing mode */
+ if (!test_kvm_facility(vcpu->kvm, 78) ||
+ psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_24BIT)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
end = (start + (1UL << 31)) & ~((1UL << 31) - 1);
- break;*/
+ break;
default:
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 76515bcea2f1..4c8f5d7f9c23 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -399,7 +399,7 @@ static inline int do_exception(struct pt_regs *regs, int access)
* user context.
*/
fault = VM_FAULT_BADCONTEXT;
- if (unlikely(!user_space_fault(regs) || in_atomic() || !mm))
+ if (unlikely(!user_space_fault(regs) || faulthandler_disabled() || !mm))
goto out;
address = trans_exc_code & __FAIL_ADDR_MASK;
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 210ffede0153..fb4bf2c4379e 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -14,20 +14,23 @@ static inline pmd_t __pte_to_pmd(pte_t pte)
/*
* Convert encoding pte bits pmd bits
- * .IR...wrdytp dy..R...I...wr
- * empty .10...000000 -> 00..0...1...00
- * prot-none, clean, old .11...000001 -> 00..1...1...00
- * prot-none, clean, young .11...000101 -> 01..1...1...00
- * prot-none, dirty, old .10...001001 -> 10..1...1...00
- * prot-none, dirty, young .10...001101 -> 11..1...1...00
- * read-only, clean, old .11...010001 -> 00..1...1...01
- * read-only, clean, young .01...010101 -> 01..1...0...01
- * read-only, dirty, old .11...011001 -> 10..1...1...01
- * read-only, dirty, young .01...011101 -> 11..1...0...01
- * read-write, clean, old .11...110001 -> 00..0...1...11
- * read-write, clean, young .01...110101 -> 01..0...0...11
- * read-write, dirty, old .10...111001 -> 10..0...1...11
- * read-write, dirty, young .00...111101 -> 11..0...0...11
+ * lIR.uswrdy.p dy..R...I...wr
+ * empty 010.000000.0 -> 00..0...1...00
+ * prot-none, clean, old 111.000000.1 -> 00..1...1...00
+ * prot-none, clean, young 111.000001.1 -> 01..1...1...00
+ * prot-none, dirty, old 111.000010.1 -> 10..1...1...00
+ * prot-none, dirty, young 111.000011.1 -> 11..1...1...00
+ * read-only, clean, old 111.000100.1 -> 00..1...1...01
+ * read-only, clean, young 101.000101.1 -> 01..1...0...01
+ * read-only, dirty, old 111.000110.1 -> 10..1...1...01
+ * read-only, dirty, young 101.000111.1 -> 11..1...0...01
+ * read-write, clean, old 111.001100.1 -> 00..1...1...11
+ * read-write, clean, young 101.001101.1 -> 01..1...0...11
+ * read-write, dirty, old 110.001110.1 -> 10..0...1...11
+ * read-write, dirty, young 100.001111.1 -> 11..0...0...11
+ * HW-bits: R read-only, I invalid
+ * SW-bits: p present, y young, d dirty, r read, w write, s special,
+ * u unused, l large
*/
if (pte_present(pte)) {
pmd_val(pmd) = pte_val(pte) & PAGE_MASK;
@@ -48,20 +51,23 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
/*
* Convert encoding pmd bits pte bits
- * dy..R...I...wr .IR...wrdytp
- * empty 00..0...1...00 -> .10...001100
- * prot-none, clean, old 00..0...1...00 -> .10...000001
- * prot-none, clean, young 01..0...1...00 -> .10...000101
- * prot-none, dirty, old 10..0...1...00 -> .10...001001
- * prot-none, dirty, young 11..0...1...00 -> .10...001101
- * read-only, clean, old 00..1...1...01 -> .11...010001
- * read-only, clean, young 01..1...1...01 -> .11...010101
- * read-only, dirty, old 10..1...1...01 -> .11...011001
- * read-only, dirty, young 11..1...1...01 -> .11...011101
- * read-write, clean, old 00..0...1...11 -> .10...110001
- * read-write, clean, young 01..0...1...11 -> .10...110101
- * read-write, dirty, old 10..0...1...11 -> .10...111001
- * read-write, dirty, young 11..0...1...11 -> .10...111101
+ * dy..R...I...wr lIR.uswrdy.p
+ * empty 00..0...1...00 -> 010.000000.0
+ * prot-none, clean, old 00..1...1...00 -> 111.000000.1
+ * prot-none, clean, young 01..1...1...00 -> 111.000001.1
+ * prot-none, dirty, old 10..1...1...00 -> 111.000010.1
+ * prot-none, dirty, young 11..1...1...00 -> 111.000011.1
+ * read-only, clean, old 00..1...1...01 -> 111.000100.1
+ * read-only, clean, young 01..1...0...01 -> 101.000101.1
+ * read-only, dirty, old 10..1...1...01 -> 111.000110.1
+ * read-only, dirty, young 11..1...0...01 -> 101.000111.1
+ * read-write, clean, old 00..1...1...11 -> 111.001100.1
+ * read-write, clean, young 01..1...0...11 -> 101.001101.1
+ * read-write, dirty, old 10..0...1...11 -> 110.001110.1
+ * read-write, dirty, young 11..0...0...11 -> 100.001111.1
+ * HW-bits: R read-only, I invalid
+ * SW-bits: p present, y young, d dirty, r read, w write, s special,
+ * u unused, l large
*/
if (pmd_present(pmd)) {
pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE;
@@ -70,8 +76,8 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4;
pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5;
pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT);
- pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10;
- pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10;
+ pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10;
+ pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10;
} else
pte_val(pte) = _PAGE_INVALID;
return pte;
@@ -80,31 +86,16 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
- pmd_t pmd;
+ pmd_t pmd = __pte_to_pmd(pte);
- pmd = __pte_to_pmd(pte);
- if (!MACHINE_HAS_HPAGE) {
- /* Emulated huge ptes loose the dirty and young bit */
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
- pmd_val(pmd) |= pte_page(pte)[1].index;
- } else
- pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
*(pmd_t *) ptep = pmd;
}
pte_t huge_ptep_get(pte_t *ptep)
{
- unsigned long origin;
- pmd_t pmd;
+ pmd_t pmd = *(pmd_t *) ptep;
- pmd = *(pmd_t *) ptep;
- if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) {
- origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN;
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
- pmd_val(pmd) |= *(unsigned long *) origin;
- /* Emulated huge ptes are young and dirty by definition */
- pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG | _SEGMENT_ENTRY_DIRTY;
- }
return __pmd_to_pte(pmd);
}
@@ -119,45 +110,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
return pte;
}
-int arch_prepare_hugepage(struct page *page)
-{
- unsigned long addr = page_to_phys(page);
- pte_t pte;
- pte_t *ptep;
- int i;
-
- if (MACHINE_HAS_HPAGE)
- return 0;
-
- ptep = (pte_t *) pte_alloc_one(&init_mm, addr);
- if (!ptep)
- return -ENOMEM;
-
- pte_val(pte) = addr;
- for (i = 0; i < PTRS_PER_PTE; i++) {
- set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte);
- pte_val(pte) += PAGE_SIZE;
- }
- page[1].index = (unsigned long) ptep;
- return 0;
-}
-
-void arch_release_hugepage(struct page *page)
-{
- pte_t *ptep;
-
- if (MACHINE_HAS_HPAGE)
- return;
-
- ptep = (pte_t *) page[1].index;
- if (!ptep)
- return;
- clear_table((unsigned long *) ptep, _PAGE_INVALID,
- PTRS_PER_PTE * sizeof(pte_t));
- page_table_free(&init_mm, (unsigned long *) ptep);
- page[1].index = 0;
-}
-
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
@@ -187,17 +139,9 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
return (pte_t *) pmdp;
}
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
-{
- return 0;
-}
-
int pmd_huge(pmd_t pmd)
{
- if (!MACHINE_HAS_HPAGE)
- return 0;
-
- return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE);
+ return pmd_large(pmd);
}
int pud_huge(pud_t pud)
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 80875c43a4a4..76e873748b56 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -213,7 +213,7 @@ unsigned long memory_block_size_bytes(void)
* Make sure the memory block size is always greater
* or equal than the memory increment size.
*/
- return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp_get_rzm());
+ return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp.rzm);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c
index 0f3604395805..e00f0d5d296d 100644
--- a/arch/s390/mm/mem_detect.c
+++ b/arch/s390/mm/mem_detect.c
@@ -31,8 +31,8 @@ void __init detect_memory_memblock(void)
unsigned long addr, size;
int type;
- rzm = sclp_get_rzm();
- rnmax = sclp_get_rnmax();
+ rzm = sclp.rzm;
+ rnmax = sclp.rnmax;
memsize = rzm * rnmax;
if (!rzm)
rzm = 1ULL << 17;
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 33f589459113..33082d0d101b 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -18,6 +18,7 @@
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/swapops.h>
+#include <linux/sysctl.h>
#include <linux/ksm.h>
#include <linux/mman.h>
@@ -30,6 +31,8 @@
#define ALLOC_ORDER 2
#define FRAG_MASK 0x03
+int HPAGE_SHIFT;
+
unsigned long *crst_table_alloc(struct mm_struct *mm)
{
struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
@@ -920,6 +923,40 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
}
EXPORT_SYMBOL(get_guest_storage_key);
+static int page_table_allocate_pgste_min = 0;
+static int page_table_allocate_pgste_max = 1;
+int page_table_allocate_pgste = 0;
+EXPORT_SYMBOL(page_table_allocate_pgste);
+
+static struct ctl_table page_table_sysctl[] = {
+ {
+ .procname = "allocate_pgste",
+ .data = &page_table_allocate_pgste,
+ .maxlen = sizeof(int),
+ .mode = S_IRUGO | S_IWUSR,
+ .proc_handler = proc_dointvec,
+ .extra1 = &page_table_allocate_pgste_min,
+ .extra2 = &page_table_allocate_pgste_max,
+ },
+ { }
+};
+
+static struct ctl_table page_table_sysctl_dir[] = {
+ {
+ .procname = "vm",
+ .maxlen = 0,
+ .mode = 0555,
+ .child = page_table_sysctl,
+ },
+ { }
+};
+
+static int __init page_table_register_sysctl(void)
+{
+ return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM;
+}
+__initcall(page_table_register_sysctl);
+
#else /* CONFIG_PGSTE */
static inline int page_table_with_pgste(struct page *page)
@@ -963,7 +1000,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
struct page *uninitialized_var(page);
unsigned int mask, bit;
- if (mm_has_pgste(mm))
+ if (mm_alloc_pgste(mm))
return page_table_alloc_pgste(mm);
/* Allocate fragments of a 4K page as 1K/2K page table */
spin_lock_bh(&mm->context.list_lock);
@@ -1165,116 +1202,25 @@ static inline void thp_split_mm(struct mm_struct *mm)
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb,
- struct mm_struct *mm, pud_t *pud,
- unsigned long addr, unsigned long end)
-{
- unsigned long next, *table, *new;
- struct page *page;
- spinlock_t *ptl;
- pmd_t *pmd;
-
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
-again:
- if (pmd_none_or_clear_bad(pmd))
- continue;
- table = (unsigned long *) pmd_deref(*pmd);
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (page_table_with_pgste(page))
- continue;
- /* Allocate new page table with pgstes */
- new = page_table_alloc_pgste(mm);
- if (!new)
- return -ENOMEM;
-
- ptl = pmd_lock(mm, pmd);
- if (likely((unsigned long *) pmd_deref(*pmd) == table)) {
- /* Nuke pmd entry pointing to the "short" page table */
- pmdp_flush_lazy(mm, addr, pmd);
- pmd_clear(pmd);
- /* Copy ptes from old table to new table */
- memcpy(new, table, PAGE_SIZE/2);
- clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
- /* Establish new table */
- pmd_populate(mm, pmd, (pte_t *) new);
- /* Free old table with rcu, there might be a walker! */
- page_table_free_rcu(tlb, table, addr);
- new = NULL;
- }
- spin_unlock(ptl);
- if (new) {
- page_table_free_pgste(new);
- goto again;
- }
- } while (pmd++, addr = next, addr != end);
-
- return addr;
-}
-
-static unsigned long page_table_realloc_pud(struct mmu_gather *tlb,
- struct mm_struct *mm, pgd_t *pgd,
- unsigned long addr, unsigned long end)
-{
- unsigned long next;
- pud_t *pud;
-
- pud = pud_offset(pgd, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
- continue;
- next = page_table_realloc_pmd(tlb, mm, pud, addr, next);
- if (unlikely(IS_ERR_VALUE(next)))
- return next;
- } while (pud++, addr = next, addr != end);
-
- return addr;
-}
-
-static unsigned long page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long addr, unsigned long end)
-{
- unsigned long next;
- pgd_t *pgd;
-
- pgd = pgd_offset(mm, addr);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
- continue;
- next = page_table_realloc_pud(tlb, mm, pgd, addr, next);
- if (unlikely(IS_ERR_VALUE(next)))
- return next;
- } while (pgd++, addr = next, addr != end);
-
- return 0;
-}
-
/*
* switch on pgstes for its userspace process (for kvm)
*/
int s390_enable_sie(void)
{
- struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->mm;
- struct mmu_gather tlb;
+ struct mm_struct *mm = current->mm;
/* Do we have pgstes? if yes, we are done */
- if (mm_has_pgste(tsk->mm))
+ if (mm_has_pgste(mm))
return 0;
-
+ /* Fail if the page tables are 2K */
+ if (!mm_alloc_pgste(mm))
+ return -EINVAL;
down_write(&mm->mmap_sem);
+ mm->context.has_pgste = 1;
/* split thp mappings and disable thp for future mappings */
thp_split_mm(mm);
- /* Reallocate the page tables with pgstes */
- tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE);
- if (!page_table_realloc(&tlb, mm, 0, TASK_SIZE))
- mm->context.has_pgste = 1;
- tlb_finish_mmu(&tlb, 0, TASK_SIZE);
up_write(&mm->mmap_sem);
- return mm->context.has_pgste ? 0 : -ENOMEM;
+ return 0;
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h
index ba8593a515ba..f6498eec9ee1 100644
--- a/arch/s390/net/bpf_jit.h
+++ b/arch/s390/net/bpf_jit.h
@@ -28,6 +28,9 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
* | old backchain | |
* +---------------+ |
* | r15 - r6 | |
+ * +---------------+ |
+ * | 4 byte align | |
+ * | tail_call_cnt | |
* BFP -> +===============+ |
* | | |
* | BPF stack | |
@@ -46,12 +49,17 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
* R15 -> +---------------+ + low
*
* We get 160 bytes stack space from calling function, but only use
- * 11 * 8 byte (old backchain + r15 - r6) for storing registers.
+ * 12 * 8 byte for old backchain, r15..r6, and tail_call_cnt.
*/
-#define STK_OFF (MAX_BPF_STACK + 8 + 4 + 4 + (160 - 11 * 8))
+#define STK_SPACE (MAX_BPF_STACK + 8 + 4 + 4 + 160)
+#define STK_160_UNUSED (160 - 12 * 8)
+#define STK_OFF (STK_SPACE - STK_160_UNUSED)
#define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */
#define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */
+#define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */
+#define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */
+
/* Offset to skip condition code check */
#define OFF_OK 4
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 7690dc8e1ab5..d3766dd67e23 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -21,6 +21,7 @@
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/init.h>
+#include <linux/bpf.h>
#include <asm/cacheflush.h>
#include <asm/dis.h>
#include "bpf_jit.h"
@@ -40,6 +41,8 @@ struct bpf_jit {
int base_ip; /* Base address for literal pool */
int ret0_ip; /* Address of return 0 */
int exit_ip; /* Address of exit */
+ int tail_call_start; /* Tail call start offset */
+ int labels[1]; /* Labels for local jumps */
};
#define BPF_SIZE_MAX 4096 /* Max size for program */
@@ -49,6 +52,7 @@ struct bpf_jit {
#define SEEN_RET0 4 /* ret0_ip points to a valid return 0 */
#define SEEN_LITERAL 8 /* code uses literals */
#define SEEN_FUNC 16 /* calls C functions */
+#define SEEN_TAIL_CALL 32 /* code uses tail calls */
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
/*
@@ -60,6 +64,7 @@ struct bpf_jit {
#define REG_L (__MAX_BPF_REG+3) /* Literal pool register */
#define REG_15 (__MAX_BPF_REG+4) /* Register 15 */
#define REG_0 REG_W0 /* Register 0 */
+#define REG_1 REG_W1 /* Register 1 */
#define REG_2 BPF_REG_1 /* Register 2 */
#define REG_14 BPF_REG_0 /* Register 14 */
@@ -223,6 +228,24 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
REG_SET_SEEN(b3); \
})
+#define EMIT6_PCREL_LABEL(op1, op2, b1, b2, label, mask) \
+({ \
+ int rel = (jit->labels[label] - jit->prg) >> 1; \
+ _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), \
+ op2 | mask << 12); \
+ REG_SET_SEEN(b1); \
+ REG_SET_SEEN(b2); \
+})
+
+#define EMIT6_PCREL_IMM_LABEL(op1, op2, b1, imm, label, mask) \
+({ \
+ int rel = (jit->labels[label] - jit->prg) >> 1; \
+ _EMIT6(op1 | (reg_high(b1) | mask) << 16 | \
+ (rel & 0xffff), op2 | (imm & 0xff) << 8); \
+ REG_SET_SEEN(b1); \
+ BUILD_BUG_ON(((unsigned long) imm) > 0xff); \
+})
+
#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \
({ \
/* Branch instruction needs 6 bytes */ \
@@ -286,7 +309,7 @@ static void jit_fill_hole(void *area, unsigned int size)
*/
static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
{
- u32 off = 72 + (rs - 6) * 8;
+ u32 off = STK_OFF_R6 + (rs - 6) * 8;
if (rs == re)
/* stg %rs,off(%r15) */
@@ -301,7 +324,7 @@ static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
*/
static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re)
{
- u32 off = 72 + (rs - 6) * 8;
+ u32 off = STK_OFF_R6 + (rs - 6) * 8;
if (jit->seen & SEEN_STACK)
off += STK_OFF;
@@ -374,6 +397,16 @@ static void save_restore_regs(struct bpf_jit *jit, int op)
*/
static void bpf_jit_prologue(struct bpf_jit *jit)
{
+ if (jit->seen & SEEN_TAIL_CALL) {
+ /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
+ _EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
+ } else {
+ /* j tail_call_start: NOP if no tail calls are used */
+ EMIT4_PCREL(0xa7f40000, 6);
+ _EMIT2(0);
+ }
+ /* Tail calls have to skip above initialization */
+ jit->tail_call_start = jit->prg;
/* Save registers */
save_restore_regs(jit, REGS_SAVE);
/* Setup literal pool */
@@ -384,13 +417,16 @@ static void bpf_jit_prologue(struct bpf_jit *jit)
}
/* Setup stack and backchain */
if (jit->seen & SEEN_STACK) {
- /* lgr %bfp,%r15 (BPF frame pointer) */
- EMIT4(0xb9040000, BPF_REG_FP, REG_15);
+ if (jit->seen & SEEN_FUNC)
+ /* lgr %w1,%r15 (backchain) */
+ EMIT4(0xb9040000, REG_W1, REG_15);
+ /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
+ EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
/* aghi %r15,-STK_OFF */
EMIT4_IMM(0xa70b0000, REG_15, -STK_OFF);
if (jit->seen & SEEN_FUNC)
- /* stg %bfp,152(%r15) (backchain) */
- EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_FP, REG_0,
+ /* stg %w1,152(%r15) (backchain) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
REG_15, 152);
}
/*
@@ -443,8 +479,11 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
/*
* Compile one eBPF instruction into s390x code
+ *
+ * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
+ * stack space for the large switch statement.
*/
-static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
+static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
{
struct bpf_insn *insn = &fp->insnsi[i];
int jmp_off, last, insn_count = 1;
@@ -588,8 +627,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
EMIT4(0xb9160000, dst_reg, rc_reg);
break;
}
- case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / (u32) src */
- case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % (u32) src */
+ case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
+ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
@@ -602,10 +641,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
EMIT4_IMM(0xa7090000, REG_W0, 0);
/* lgr %w1,%dst */
EMIT4(0xb9040000, REG_W1, dst_reg);
- /* llgfr %dst,%src (u32 cast) */
- EMIT4(0xb9160000, dst_reg, src_reg);
/* dlgr %w0,%dst */
- EMIT4(0xb9870000, REG_W0, dst_reg);
+ EMIT4(0xb9870000, REG_W0, src_reg);
/* lgr %dst,%rc */
EMIT4(0xb9040000, dst_reg, rc_reg);
break;
@@ -632,8 +669,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
EMIT4(0xb9160000, dst_reg, rc_reg);
break;
}
- case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / (u32) imm */
- case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % (u32) imm */
+ case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
+ case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
@@ -649,7 +686,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
EMIT4(0xb9040000, REG_W1, dst_reg);
/* dlg %w0,<d(imm)>(%l) */
EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
- EMIT_CONST_U64((u32) imm));
+ EMIT_CONST_U64(imm));
/* lgr %dst,%rc */
EMIT4(0xb9040000, dst_reg, rc_reg);
break;
@@ -947,6 +984,75 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
EMIT4(0xb9040000, BPF_REG_0, REG_2);
break;
}
+ case BPF_JMP | BPF_CALL | BPF_X:
+ /*
+ * Implicit input:
+ * B1: pointer to ctx
+ * B2: pointer to bpf_array
+ * B3: index in bpf_array
+ */
+ jit->seen |= SEEN_TAIL_CALL;
+
+ /*
+ * if (index >= array->map.max_entries)
+ * goto out;
+ */
+
+ /* llgf %w1,map.max_entries(%b2) */
+ EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
+ offsetof(struct bpf_array, map.max_entries));
+ /* clgrj %b3,%w1,0xa,label0: if %b3 >= %w1 goto out */
+ EMIT6_PCREL_LABEL(0xec000000, 0x0065, BPF_REG_3,
+ REG_W1, 0, 0xa);
+
+ /*
+ * if (tail_call_cnt++ > MAX_TAIL_CALL_CNT)
+ * goto out;
+ */
+
+ if (jit->seen & SEEN_STACK)
+ off = STK_OFF_TCCNT + STK_OFF;
+ else
+ off = STK_OFF_TCCNT;
+ /* lhi %w0,1 */
+ EMIT4_IMM(0xa7080000, REG_W0, 1);
+ /* laal %w1,%w0,off(%r15) */
+ EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
+ /* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
+ EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
+ MAX_TAIL_CALL_CNT, 0, 0x2);
+
+ /*
+ * prog = array->prog[index];
+ * if (prog == NULL)
+ * goto out;
+ */
+
+ /* sllg %r1,%b3,3: %r1 = index * 8 */
+ EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, BPF_REG_3, REG_0, 3);
+ /* lg %r1,prog(%b2,%r1) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, BPF_REG_2,
+ REG_1, offsetof(struct bpf_array, prog));
+ /* clgij %r1,0,0x8,label0 */
+ EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007d, REG_1, 0, 0, 0x8);
+
+ /*
+ * Restore registers before calling function
+ */
+ save_restore_regs(jit, REGS_RESTORE);
+
+ /*
+ * goto *(prog->bpf_func + tail_call_start);
+ */
+
+ /* lg %r1,bpf_func(%r1) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_1, REG_0,
+ offsetof(struct bpf_prog, bpf_func));
+ /* bc 0xf,tail_call_start(%r1) */
+ _EMIT4(0x47f01000 + jit->tail_call_start);
+ /* out: */
+ jit->labels[0] = jit->prg;
+ break;
case BPF_JMP | BPF_EXIT: /* return b0 */
last = (i == fp->len - 1) ? 1 : 0;
if (last && !(jit->seen & SEEN_RET0))
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 460fdb21cf61..ed2394dd14e9 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -75,7 +75,13 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
zpci_err_hex(ccdf, sizeof(*ccdf));
switch (ccdf->pec) {
- case 0x0301: /* Standby -> Configured */
+ case 0x0301: /* Reserved|Standby -> Configured */
+ if (!zdev) {
+ ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
+ if (ret)
+ break;
+ zdev = get_zdev_by_fid(ccdf->fid);
+ }
if (!zdev || zdev->state != ZPCI_FN_STATE_STANDBY)
break;
zdev->state = ZPCI_FN_STATE_CONFIGURED;