63 files changed, 4378 insertions, 1091 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 1831833c430e..29a7940f284f 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -3,6 +3,10 @@
 # see Documentation/kbuild/kconfig-language.txt.
 #
 
+config SCHED_MC
+	def_bool y
+	depends on SMP
+
 config MMU
 	def_bool y
 
@@ -39,6 +43,9 @@ config GENERIC_HWEIGHT
 config GENERIC_TIME
 	def_bool y
 
+config GENERIC_CLOCKEVENTS
+	def_bool y
+
 config GENERIC_BUG
 	bool
 	depends on BUG
@@ -55,6 +62,10 @@ config GENERIC_LOCKBREAK
 	default y
 	depends on SMP && PREEMPT
 
+config PGSTE
+	bool
+	default y if KVM
+
 mainmenu "Linux Kernel Configuration"
 
 config S390
@@ -62,6 +73,7 @@ config S390
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
+	select HAVE_KVM if 64BIT
 
 source "init/Kconfig"
 
@@ -69,6 +81,8 @@ menu "Base setup"
 
 comment "Processor type and features"
 
+source "kernel/time/Kconfig"
+
 config 64BIT
 	bool "64 bit kernel"
 	help
@@ -286,6 +300,14 @@ comment "Kernel preemption"
 
 source "kernel/Kconfig.preempt"
 
+config ARCH_SPARSEMEM_ENABLE
+	def_bool y
+	select SPARSEMEM_VMEMMAP_ENABLE
+	select SPARSEMEM_VMEMMAP
+
+config ARCH_SPARSEMEM_DEFAULT
+	def_bool y
+
 source "mm/Kconfig"
 
 comment "I/O subsystem configuration"
@@ -301,10 +323,7 @@ config QDIO
 	tristate "QDIO support"
 	---help---
 	  This driver provides the Queued Direct I/O base support for
-	  IBM mainframes.
-
-	  For details please refer to the documentation provided by IBM at
-	  <http://www10.software.ibm.com/developerworks/opensource/linux390>
+	  IBM System z.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called qdio.
@@ -486,25 +505,6 @@ config APPLDATA_NET_SUM
 
 source kernel/Kconfig.hz
 
-config NO_IDLE_HZ
-	bool "No HZ timer ticks in idle"
-	help
-	  Switches the regular HZ timer off when the system is going idle.
-	  This helps z/VM to detect that the Linux system is idle. VM can
-	  then "swap-out" this guest which reduces memory usage. It also
-	  reduces the overhead of idle systems.
-
-	  The HZ timer can be switched on/off via /proc/sys/kernel/hz_timer.
-	  hz_timer=0 means HZ timer is disabled. hz_timer=1 means HZ
-	  timer is active.
-
-config NO_IDLE_HZ_INIT
-	bool "HZ timer in idle off by default"
-	depends on NO_IDLE_HZ
-	help
-	  The HZ timer is switched off in idle by default. That means the
-	  HZ timer is already disabled at boot time.
-
 config S390_HYPFS_FS
 	bool "s390 hypervisor file system support"
 	select SYS_HYPERVISOR
@@ -528,6 +528,13 @@ config ZFCPDUMP
 	  Select this option if you want to build an zfcpdump enabled kernel.
 	  Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
 
+config S390_GUEST
+bool "s390 guest support (EXPERIMENTAL)"
+	depends on 64BIT && EXPERIMENTAL
+	select VIRTIO
+	select VIRTIO_RING
+	help
+	  Select this option if you want to run the kernel under s390 linux
 endmenu
 
 source "net/Kconfig"
@@ -549,3 +556,5 @@ source "security/Kconfig"
 source "crypto/Kconfig"
 
 source "lib/Kconfig"
+
+source "arch/s390/kvm/Kconfig"
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index f708be367b03..792a4e7743ce 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -87,7 +87,7 @@ LDFLAGS_vmlinux := -e start
 head-y		:= arch/s390/kernel/head.o arch/s390/kernel/init_task.o
 
 core-y		+= arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \
-		   arch/s390/appldata/ arch/s390/hypfs/
+		   arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/
 libs-y		+= arch/s390/lib/
 drivers-y	+= drivers/s390/
 drivers-$(CONFIG_MATHEMU) += arch/s390/math-emu/
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index 14e552c5cc43..6a1157fa4f98 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -2,8 +2,9 @@
 # Cryptographic API
 #
 
-obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o
-obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o
+obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o
+obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o
+obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o
 obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o des_check_key.o
 obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
 obj-$(CONFIG_S390_PRNG) += prng.o
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index a3f67f8b5427..e33f32b54c08 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -499,7 +499,7 @@ static struct crypto_alg cbc_aes_alg = {
 	}
 };
 
-static int __init aes_init(void)
+static int __init aes_s390_init(void)
 {
 	int ret;
 
@@ -542,15 +542,15 @@ aes_err:
 	goto out;
 }
 
-static void __exit aes_fini(void)
+static void __exit aes_s390_fini(void)
 {
 	crypto_unregister_alg(&cbc_aes_alg);
 	crypto_unregister_alg(&ecb_aes_alg);
 	crypto_unregister_alg(&aes_alg);
 }
 
-module_init(aes_init);
-module_exit(aes_fini);
+module_init(aes_s390_init);
+module_exit(aes_s390_fini);
 
 MODULE_ALIAS("aes");
 
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index 95f5160df27f..9992f95ef992 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -82,6 +82,7 @@ enum crypt_s390_kimd_func {
 	KIMD_QUERY   = CRYPT_S390_KIMD | 0,
 	KIMD_SHA_1   = CRYPT_S390_KIMD | 1,
 	KIMD_SHA_256 = CRYPT_S390_KIMD | 2,
+	KIMD_SHA_512 = CRYPT_S390_KIMD | 3,
 };
 
 /*
@@ -92,6 +93,7 @@ enum crypt_s390_klmd_func {
 	KLMD_QUERY   = CRYPT_S390_KLMD | 0,
 	KLMD_SHA_1   = CRYPT_S390_KLMD | 1,
 	KLMD_SHA_256 = CRYPT_S390_KLMD | 2,
+	KLMD_SHA_512 = CRYPT_S390_KLMD | 3,
 };
 
 /*
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index ea22707f435f..4aba83b31596 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -550,7 +550,7 @@ static struct crypto_alg cbc_des3_192_alg = {
 	}
 };
 
-static int init(void)
+static int des_s390_init(void)
 {
 	int ret = 0;
 
@@ -612,7 +612,7 @@ des_err:
 	goto out;
 }
 
-static void __exit fini(void)
+static void __exit des_s390_fini(void)
 {
 	crypto_unregister_alg(&cbc_des3_192_alg);
 	crypto_unregister_alg(&ecb_des3_192_alg);
@@ -625,8 +625,8 @@ static void __exit fini(void)
 	crypto_unregister_alg(&des_alg);
 }
 
-module_init(init);
-module_exit(fini);
+module_init(des_s390_init);
+module_exit(des_s390_fini);
 
 MODULE_ALIAS("des");
 MODULE_ALIAS("des3_ede");
diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h
new file mode 100644
index 000000000000..1ceafa571eab
--- /dev/null
+++ b/arch/s390/crypto/sha.h
@@ -0,0 +1,35 @@
+/*
+ * Cryptographic API.
+ *
+ * s390 generic implementation of the SHA Secure Hash Algorithms.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Jan Glauber (jang@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#ifndef _CRYPTO_ARCH_S390_SHA_H
+#define _CRYPTO_ARCH_S390_SHA_H
+
+#include <linux/crypto.h>
+#include <crypto/sha.h>
+
+/* must be big enough for the largest SHA variant */
+#define SHA_MAX_STATE_SIZE	16
+#define SHA_MAX_BLOCK_SIZE      SHA512_BLOCK_SIZE
+
+struct s390_sha_ctx {
+	u64 count;              /* message length in bytes */
+	u32 state[SHA_MAX_STATE_SIZE];
+	u8 buf[2 * SHA_MAX_BLOCK_SIZE];
+	int func;		/* KIMD function to use */
+};
+
+void s390_sha_update(struct crypto_tfm *tfm, const u8 *data, unsigned int len);
+void s390_sha_final(struct crypto_tfm *tfm, u8 *out);
+
+#endif
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 5a834f6578ab..b3cb5a89b00d 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -29,16 +29,11 @@
 #include <crypto/sha.h>
 
 #include "crypt_s390.h"
-
-struct s390_sha1_ctx {
-	u64 count;		/* message length */
-	u32 state[5];
-	u8 buf[2 * SHA1_BLOCK_SIZE];
-};
+#include "sha.h"
 
 static void sha1_init(struct crypto_tfm *tfm)
 {
-	struct s390_sha1_ctx *sctx = crypto_tfm_ctx(tfm);
+	struct s390_sha_ctx *sctx = crypto_tfm_ctx(tfm);
 
 	sctx->state[0] = SHA1_H0;
 	sctx->state[1] = SHA1_H1;
@@ -46,79 +41,7 @@ static void sha1_init(struct crypto_tfm *tfm)
 	sctx->state[3] = SHA1_H3;
 	sctx->state[4] = SHA1_H4;
 	sctx->count = 0;
-}
-
-static void sha1_update(struct crypto_tfm *tfm, const u8 *data,
-			unsigned int len)
-{
-	struct s390_sha1_ctx *sctx = crypto_tfm_ctx(tfm);
-	unsigned int index;
-	int ret;
-
-	/* how much is already in the buffer? */
-	index = sctx->count & 0x3f;
-
-	sctx->count += len;
-
-	if (index + len < SHA1_BLOCK_SIZE)
-		goto store;
-
-	/* process one stored block */
-	if (index) {
-		memcpy(sctx->buf + index, data, SHA1_BLOCK_SIZE - index);
-		ret = crypt_s390_kimd(KIMD_SHA_1, sctx->state, sctx->buf,
-				      SHA1_BLOCK_SIZE);
-		BUG_ON(ret != SHA1_BLOCK_SIZE);
-		data += SHA1_BLOCK_SIZE - index;
-		len -= SHA1_BLOCK_SIZE - index;
-	}
-
-	/* process as many blocks as possible */
-	if (len >= SHA1_BLOCK_SIZE) {
-		ret = crypt_s390_kimd(KIMD_SHA_1, sctx->state, data,
-				      len & ~(SHA1_BLOCK_SIZE - 1));
-		BUG_ON(ret != (len & ~(SHA1_BLOCK_SIZE - 1)));
-		data += ret;
-		len -= ret;
-	}
-
-store:
-	/* anything left? */
-	if (len)
-		memcpy(sctx->buf + index , data, len);
-}
-
-/* Add padding and return the message digest. */
-static void sha1_final(struct crypto_tfm *tfm, u8 *out)
-{
-	struct s390_sha1_ctx *sctx = crypto_tfm_ctx(tfm);
-	u64 bits;
-	unsigned int index, end;
-	int ret;
-
-	/* must perform manual padding */
-	index = sctx->count & 0x3f;
-	end =  (index < 56) ? SHA1_BLOCK_SIZE : (2 * SHA1_BLOCK_SIZE);
-
-	/* start pad with 1 */
-	sctx->buf[index] = 0x80;
-
-	/* pad with zeros */
-	index++;
-	memset(sctx->buf + index, 0x00, end - index - 8);
-
-	/* append message length */
-	bits = sctx->count * 8;
-	memcpy(sctx->buf + end - 8, &bits, sizeof(bits));
-
-	ret = crypt_s390_kimd(KIMD_SHA_1, sctx->state, sctx->buf, end);
-	BUG_ON(ret != end);
-
-	/* copy digest to out */
-	memcpy(out, sctx->state, SHA1_DIGEST_SIZE);
-
-	/* wipe context */
-	memset(sctx, 0, sizeof *sctx);
+	sctx->func = KIMD_SHA_1;
 }
 
 static struct crypto_alg alg = {
@@ -127,33 +50,31 @@ static struct crypto_alg alg = {
 	.cra_priority	=	CRYPT_S390_PRIORITY,
 	.cra_flags	=	CRYPTO_ALG_TYPE_DIGEST,
 	.cra_blocksize	=	SHA1_BLOCK_SIZE,
-	.cra_ctxsize	=	sizeof(struct s390_sha1_ctx),
+	.cra_ctxsize	=	sizeof(struct s390_sha_ctx),
 	.cra_module	=	THIS_MODULE,
 	.cra_list	=	LIST_HEAD_INIT(alg.cra_list),
 	.cra_u		=	{ .digest = {
 	.dia_digestsize	=	SHA1_DIGEST_SIZE,
 	.dia_init	=	sha1_init,
-	.dia_update	=	sha1_update,
-	.dia_final	=	sha1_final } }
+	.dia_update	=	s390_sha_update,
+	.dia_final	=	s390_sha_final } }
 };
 
-static int __init init(void)
+static int __init sha1_s390_init(void)
 {
 	if (!crypt_s390_func_available(KIMD_SHA_1))
 		return -EOPNOTSUPP;
-
 	return crypto_register_alg(&alg);
 }
 
-static void __exit fini(void)
+static void __exit sha1_s390_fini(void)
 {
 	crypto_unregister_alg(&alg);
 }
 
-module_init(init);
-module_exit(fini);
+module_init(sha1_s390_init);
+module_exit(sha1_s390_fini);
 
 MODULE_ALIAS("sha1");
-
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index ccf8633c4f65..19c03fb6ba7e 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -22,16 +22,11 @@
 #include <crypto/sha.h>
 
 #include "crypt_s390.h"
-
-struct s390_sha256_ctx {
-	u64 count;		/* message length */
-	u32 state[8];
-	u8 buf[2 * SHA256_BLOCK_SIZE];
-};
+#include "sha.h"
 
 static void sha256_init(struct crypto_tfm *tfm)
 {
-	struct s390_sha256_ctx *sctx = crypto_tfm_ctx(tfm);
+	struct s390_sha_ctx *sctx = crypto_tfm_ctx(tfm);
 
 	sctx->state[0] = SHA256_H0;
 	sctx->state[1] = SHA256_H1;
@@ -42,79 +37,7 @@ static void sha256_init(struct crypto_tfm *tfm)
 	sctx->state[6] = SHA256_H6;
 	sctx->state[7] = SHA256_H7;
 	sctx->count = 0;
-}
-
-static void sha256_update(struct crypto_tfm *tfm, const u8 *data,
-			  unsigned int len)
-{
-	struct s390_sha256_ctx *sctx = crypto_tfm_ctx(tfm);
-	unsigned int index;
-	int ret;
-
-	/* how much is already in the buffer? */
-	index = sctx->count & 0x3f;
-
-	sctx->count += len;
-
-	if ((index + len) < SHA256_BLOCK_SIZE)
-		goto store;
-
-	/* process one stored block */
-	if (index) {
-		memcpy(sctx->buf + index, data, SHA256_BLOCK_SIZE - index);
-		ret = crypt_s390_kimd(KIMD_SHA_256, sctx->state, sctx->buf,
-				      SHA256_BLOCK_SIZE);
-		BUG_ON(ret != SHA256_BLOCK_SIZE);
-		data += SHA256_BLOCK_SIZE - index;
-		len -= SHA256_BLOCK_SIZE - index;
-	}
-
-	/* process as many blocks as possible */
-	if (len >= SHA256_BLOCK_SIZE) {
-		ret = crypt_s390_kimd(KIMD_SHA_256, sctx->state, data,
-				      len & ~(SHA256_BLOCK_SIZE - 1));
-		BUG_ON(ret != (len & ~(SHA256_BLOCK_SIZE - 1)));
-		data += ret;
-		len -= ret;
-	}
-
-store:
-	/* anything left? */
-	if (len)
-		memcpy(sctx->buf + index , data, len);
-}
-
-/* Add padding and return the message digest */
-static void sha256_final(struct crypto_tfm *tfm, u8 *out)
-{
-	struct s390_sha256_ctx *sctx = crypto_tfm_ctx(tfm);
-	u64 bits;
-	unsigned int index, end;
-	int ret;
-
-	/* must perform manual padding */
-	index = sctx->count & 0x3f;
-	end = (index < 56) ? SHA256_BLOCK_SIZE : (2 * SHA256_BLOCK_SIZE);
-
-	/* start pad with 1 */
-	sctx->buf[index] = 0x80;
-
-	/* pad with zeros */
-	index++;
-	memset(sctx->buf + index, 0x00, end - index - 8);
-
-	/* append message length */
-	bits = sctx->count * 8;
-	memcpy(sctx->buf + end - 8, &bits, sizeof(bits));
-
-	ret = crypt_s390_kimd(KIMD_SHA_256, sctx->state, sctx->buf, end);
-	BUG_ON(ret != end);
-
-	/* copy digest to out */
-	memcpy(out, sctx->state, SHA256_DIGEST_SIZE);
-
-	/* wipe context */
-	memset(sctx, 0, sizeof *sctx);
+	sctx->func = KIMD_SHA_256;
 }
 
 static struct crypto_alg alg = {
@@ -123,17 +46,17 @@ static struct crypto_alg alg = {
 	.cra_priority	=	CRYPT_S390_PRIORITY,
 	.cra_flags	=	CRYPTO_ALG_TYPE_DIGEST,
 	.cra_blocksize	=	SHA256_BLOCK_SIZE,
-	.cra_ctxsize	=	sizeof(struct s390_sha256_ctx),
+	.cra_ctxsize	=	sizeof(struct s390_sha_ctx),
 	.cra_module	=	THIS_MODULE,
 	.cra_list	=	LIST_HEAD_INIT(alg.cra_list),
 	.cra_u		=	{ .digest = {
 	.dia_digestsize	=	SHA256_DIGEST_SIZE,
 	.dia_init	=	sha256_init,
-	.dia_update	=	sha256_update,
-	.dia_final	=	sha256_final } }
+	.dia_update	=	s390_sha_update,
+	.dia_final	=	s390_sha_final } }
 };
 
-static int init(void)
+static int sha256_s390_init(void)
 {
 	if (!crypt_s390_func_available(KIMD_SHA_256))
 		return -EOPNOTSUPP;
@@ -141,15 +64,14 @@ static int init(void)
 	return crypto_register_alg(&alg);
 }
 
-static void __exit fini(void)
+static void __exit sha256_s390_fini(void)
 {
 	crypto_unregister_alg(&alg);
 }
 
-module_init(init);
-module_exit(fini);
+module_init(sha256_s390_init);
+module_exit(sha256_s390_fini);
 
 MODULE_ALIAS("sha256");
-
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
new file mode 100644
index 000000000000..23c7861f6aeb
--- /dev/null
+++ b/arch/s390/crypto/sha512_s390.c
@@ -0,0 +1,114 @@
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the SHA512 and SHA38 Secure Hash Algorithm.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Jan Glauber (jang@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+
+#include "sha.h"
+#include "crypt_s390.h"
+
+static void sha512_init(struct crypto_tfm *tfm)
+{
+	struct s390_sha_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	*(__u64 *)&ctx->state[0] = 0x6a09e667f3bcc908ULL;
+	*(__u64 *)&ctx->state[2] = 0xbb67ae8584caa73bULL;
+	*(__u64 *)&ctx->state[4] = 0x3c6ef372fe94f82bULL;
+	*(__u64 *)&ctx->state[6] = 0xa54ff53a5f1d36f1ULL;
+	*(__u64 *)&ctx->state[8] = 0x510e527fade682d1ULL;
+	*(__u64 *)&ctx->state[10] = 0x9b05688c2b3e6c1fULL;
+	*(__u64 *)&ctx->state[12] = 0x1f83d9abfb41bd6bULL;
+	*(__u64 *)&ctx->state[14] = 0x5be0cd19137e2179ULL;
+	ctx->count = 0;
+	ctx->func = KIMD_SHA_512;
+}
+
+static struct crypto_alg sha512_alg = {
+	.cra_name	=	"sha512",
+	.cra_driver_name =	"sha512-s390",
+	.cra_priority	=	CRYPT_S390_PRIORITY,
+	.cra_flags	=	CRYPTO_ALG_TYPE_DIGEST,
+	.cra_blocksize	=	SHA512_BLOCK_SIZE,
+	.cra_ctxsize	=	sizeof(struct s390_sha_ctx),
+	.cra_module	=	THIS_MODULE,
+	.cra_list	=	LIST_HEAD_INIT(sha512_alg.cra_list),
+	.cra_u		=	{ .digest = {
+	.dia_digestsize	=	SHA512_DIGEST_SIZE,
+	.dia_init	=	sha512_init,
+	.dia_update	=	s390_sha_update,
+	.dia_final	=	s390_sha_final } }
+};
+
+MODULE_ALIAS("sha512");
+
+static void sha384_init(struct crypto_tfm *tfm)
+{
+	struct s390_sha_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	*(__u64 *)&ctx->state[0] = 0xcbbb9d5dc1059ed8ULL;
+	*(__u64 *)&ctx->state[2] = 0x629a292a367cd507ULL;
+	*(__u64 *)&ctx->state[4] = 0x9159015a3070dd17ULL;
+	*(__u64 *)&ctx->state[6] = 0x152fecd8f70e5939ULL;
+	*(__u64 *)&ctx->state[8] = 0x67332667ffc00b31ULL;
+	*(__u64 *)&ctx->state[10] = 0x8eb44a8768581511ULL;
+	*(__u64 *)&ctx->state[12] = 0xdb0c2e0d64f98fa7ULL;
+	*(__u64 *)&ctx->state[14] = 0x47b5481dbefa4fa4ULL;
+	ctx->count = 0;
+	ctx->func = KIMD_SHA_512;
+}
+
+static struct crypto_alg sha384_alg = {
+	.cra_name	=	"sha384",
+	.cra_driver_name =	"sha384-s390",
+	.cra_priority	=	CRYPT_S390_PRIORITY,
+	.cra_flags	=	CRYPTO_ALG_TYPE_DIGEST,
+	.cra_blocksize	=	SHA384_BLOCK_SIZE,
+	.cra_ctxsize	=	sizeof(struct s390_sha_ctx),
+	.cra_module	=	THIS_MODULE,
+	.cra_list	=	LIST_HEAD_INIT(sha384_alg.cra_list),
+	.cra_u		=	{ .digest = {
+	.dia_digestsize	=	SHA384_DIGEST_SIZE,
+	.dia_init	=	sha384_init,
+	.dia_update	=	s390_sha_update,
+	.dia_final	=	s390_sha_final } }
+};
+
+MODULE_ALIAS("sha384");
+
+static int __init init(void)
+{
+	int ret;
+
+	if (!crypt_s390_func_available(KIMD_SHA_512))
+		return -EOPNOTSUPP;
+	if ((ret = crypto_register_alg(&sha512_alg)) < 0)
+		goto out;
+	if ((ret = crypto_register_alg(&sha384_alg)) < 0)
+		crypto_unregister_alg(&sha512_alg);
+out:
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	crypto_unregister_alg(&sha512_alg);
+	crypto_unregister_alg(&sha384_alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA512 and SHA-384 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
new file mode 100644
index 000000000000..9d6eb8c3d37e
--- /dev/null
+++ b/arch/s390/crypto/sha_common.c
@@ -0,0 +1,97 @@
+/*
+ * Cryptographic API.
+ *
+ * s390 generic implementation of the SHA Secure Hash Algorithms.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Jan Glauber (jang@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/crypto.h>
+#include "sha.h"
+#include "crypt_s390.h"
+
+void s390_sha_update(struct crypto_tfm *tfm, const u8 *data, unsigned int len)
+{
+	struct s390_sha_ctx *ctx = crypto_tfm_ctx(tfm);
+	unsigned int bsize = crypto_tfm_alg_blocksize(tfm);
+	unsigned int index;
+	int ret;
+
+	/* how much is already in the buffer? */
+	index = ctx->count & (bsize - 1);
+	ctx->count += len;
+
+	if ((index + len) < bsize)
+		goto store;
+
+	/* process one stored block */
+	if (index) {
+		memcpy(ctx->buf + index, data, bsize - index);
+		ret = crypt_s390_kimd(ctx->func, ctx->state, ctx->buf, bsize);
+		BUG_ON(ret != bsize);
+		data += bsize - index;
+		len -= bsize - index;
+	}
+
+	/* process as many blocks as possible */
+	if (len >= bsize) {
+		ret = crypt_s390_kimd(ctx->func, ctx->state, data,
+				      len & ~(bsize - 1));
+		BUG_ON(ret != (len & ~(bsize - 1)));
+		data += ret;
+		len -= ret;
+	}
+store:
+	if (len)
+		memcpy(ctx->buf + index , data, len);
+}
+EXPORT_SYMBOL_GPL(s390_sha_update);
+
+void s390_sha_final(struct crypto_tfm *tfm, u8 *out)
+{
+	struct s390_sha_ctx *ctx = crypto_tfm_ctx(tfm);
+	unsigned int bsize = crypto_tfm_alg_blocksize(tfm);
+	u64 bits;
+	unsigned int index, end, plen;
+	int ret;
+
+	/* SHA-512 uses 128 bit padding length */
+	plen = (bsize > SHA256_BLOCK_SIZE) ? 16 : 8;
+
+	/* must perform manual padding */
+	index = ctx->count & (bsize - 1);
+	end = (index < bsize - plen) ? bsize : (2 * bsize);
+
+	/* start pad with 1 */
+	ctx->buf[index] = 0x80;
+	index++;
+
+	/* pad with zeros */
+	memset(ctx->buf + index, 0x00, end - index - 8);
+
+	/*
+	 * Append message length. Well, SHA-512 wants a 128 bit lenght value,
+	 * nevertheless we use u64, should be enough for now...
+	 */
+	bits = ctx->count * 8;
+	memcpy(ctx->buf + end - 8, &bits, sizeof(bits));
+
+	ret = crypt_s390_kimd(ctx->func, ctx->state, ctx->buf, end);
+	BUG_ON(ret != end);
+
+	/* copy digest to out */
+	memcpy(out, ctx->state, crypto_hash_digestsize(crypto_hash_cast(tfm)));
+	/* wipe context */
+	memset(ctx, 0, sizeof *ctx);
+}
+EXPORT_SYMBOL_GPL(s390_sha_final);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("s390 SHA cipher common functions");
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 62f6b5a606dd..aa341d0ea1e6 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -1,8 +1,9 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.25-rc4
-# Wed Mar  5 11:22:59 2008
+# Linux kernel version: 2.6.25
+# Wed Apr 30 11:07:45 2008
 #
+CONFIG_SCHED_MC=y
 CONFIG_MMU=y
 CONFIG_ZONE_DMA=y
 CONFIG_LOCKDEP_SUPPORT=y
@@ -13,10 +14,12 @@ CONFIG_RWSEM_XCHGADD_ALGORITHM=y
 # CONFIG_ARCH_HAS_ILOG2_U64 is not set
 CONFIG_GENERIC_HWEIGHT=y
 CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
 CONFIG_GENERIC_BUG=y
 CONFIG_NO_IOMEM=y
 CONFIG_NO_DMA=y
 CONFIG_GENERIC_LOCKBREAK=y
+CONFIG_PGSTE=y
 CONFIG_S390=y
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 
@@ -42,6 +45,7 @@ CONFIG_LOG_BUF_SHIFT=17
 CONFIG_CGROUPS=y
 # CONFIG_CGROUP_DEBUG is not set
 CONFIG_CGROUP_NS=y
+# CONFIG_CGROUP_DEVICE is not set
 # CONFIG_CPUSETS is not set
 CONFIG_GROUP_SCHED=y
 CONFIG_FAIR_GROUP_SCHED=y
@@ -64,6 +68,7 @@ CONFIG_INITRAMFS_SOURCE=""
 CONFIG_SYSCTL=y
 # CONFIG_EMBEDDED is not set
 CONFIG_SYSCTL_SYSCALL=y
+CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -91,6 +96,7 @@ CONFIG_KPROBES=y
 CONFIG_KRETPROBES=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
+# CONFIG_HAVE_DMA_ATTRS is not set
 CONFIG_PROC_PAGE_MONITOR=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -120,8 +126,8 @@ CONFIG_DEFAULT_DEADLINE=y
 # CONFIG_DEFAULT_CFQ is not set
 # CONFIG_DEFAULT_NOOP is not set
 CONFIG_DEFAULT_IOSCHED="deadline"
+CONFIG_PREEMPT_NOTIFIERS=y
 CONFIG_CLASSIC_RCU=y
-# CONFIG_PREEMPT_RCU is not set
 
 #
 # Base setup
@@ -130,6 +136,10 @@ CONFIG_CLASSIC_RCU=y
 #
 # Processor type and features
 #
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
 CONFIG_64BIT=y
 CONFIG_SMP=y
 CONFIG_NR_CPUS=32
@@ -160,15 +170,20 @@ CONFIG_ARCH_POPULATES_NODE_MAP=y
 # CONFIG_PREEMPT_NONE is not set
 # CONFIG_PREEMPT_VOLUNTARY is not set
 CONFIG_PREEMPT=y
-# CONFIG_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU is not set
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_DEFAULT=y
 CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_FLATMEM_MANUAL=y
+# CONFIG_FLATMEM_MANUAL is not set
 # CONFIG_DISCONTIGMEM_MANUAL is not set
-# CONFIG_SPARSEMEM_MANUAL is not set
-CONFIG_FLATMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_SPARSEMEM_MANUAL=y
+CONFIG_SPARSEMEM=y
+CONFIG_HAVE_MEMORY_PRESENT=y
 # CONFIG_SPARSEMEM_STATIC is not set
-# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
+CONFIG_SPARSEMEM_EXTREME=y
+CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
+CONFIG_SPARSEMEM_VMEMMAP=y
+CONFIG_PAGEFLAGS_EXTENDED=y
 CONFIG_SPLIT_PTLOCK_CPUS=4
 CONFIG_RESOURCES_64BIT=y
 CONFIG_ZONE_DMA_FLAG=1
@@ -204,11 +219,10 @@ CONFIG_HZ_100=y
 # CONFIG_HZ_1000 is not set
 CONFIG_HZ=100
 # CONFIG_SCHED_HRTICK is not set
-CONFIG_NO_IDLE_HZ=y
-CONFIG_NO_IDLE_HZ_INIT=y
 CONFIG_S390_HYPFS_FS=y
 CONFIG_KEXEC=y
 # CONFIG_ZFCPDUMP is not set
+CONFIG_S390_GUEST=y
 
 #
 # Networking
@@ -271,8 +285,10 @@ CONFIG_INET6_XFRM_MODE_TUNNEL=y
 CONFIG_INET6_XFRM_MODE_BEET=y
 # CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
 CONFIG_IPV6_SIT=y
+CONFIG_IPV6_NDISC_NODETYPE=y
 # CONFIG_IPV6_TUNNEL is not set
 # CONFIG_IPV6_MULTIPLE_TABLES is not set
+# CONFIG_IPV6_MROUTE is not set
 # CONFIG_NETWORK_SECMARK is not set
 CONFIG_NETFILTER=y
 # CONFIG_NETFILTER_DEBUG is not set
@@ -288,6 +304,7 @@ CONFIG_NF_CONNTRACK=m
 # CONFIG_NF_CT_ACCT is not set
 # CONFIG_NF_CONNTRACK_MARK is not set
 # CONFIG_NF_CONNTRACK_EVENTS is not set
+# CONFIG_NF_CT_PROTO_DCCP is not set
 # CONFIG_NF_CT_PROTO_SCTP is not set
 # CONFIG_NF_CT_PROTO_UDPLITE is not set
 # CONFIG_NF_CONNTRACK_AMANDA is not set
@@ -438,6 +455,7 @@ CONFIG_DASD_ECKD=y
 CONFIG_DASD_FBA=y
 CONFIG_DASD_DIAG=y
 CONFIG_DASD_EER=y
+CONFIG_VIRTIO_BLK=m
 CONFIG_MISC_DEVICES=y
 # CONFIG_EEPROM_93CX6 is not set
 # CONFIG_ENCLOSURE_SERVICES is not set
@@ -532,26 +550,26 @@ CONFIG_NETDEV_10000=y
 # S/390 network device drivers
 #
 CONFIG_LCS=m
-CONFIG_CTC=m
+CONFIG_CTCM=m
 # CONFIG_NETIUCV is not set
 # CONFIG_SMSGIUCV is not set
 # CONFIG_CLAW is not set
 CONFIG_QETH=y
-
-#
-# Gigabit Ethernet default settings
-#
-# CONFIG_QETH_IPV6 is not set
+CONFIG_QETH_L2=y
+CONFIG_QETH_L3=y
+CONFIG_QETH_IPV6=y
 CONFIG_CCWGROUP=y
 # CONFIG_PPP is not set
 # CONFIG_SLIP is not set
 # CONFIG_NETCONSOLE is not set
 # CONFIG_NETPOLL is not set
 # CONFIG_NET_POLL_CONTROLLER is not set
+CONFIG_VIRTIO_NET=m
 
 #
 # Character devices
 #
+CONFIG_DEVKMEM=y
 CONFIG_UNIX98_PTYS=y
 CONFIG_LEGACY_PTYS=y
 CONFIG_LEGACY_PTY_COUNT=256
@@ -601,6 +619,7 @@ CONFIG_S390_VMUR=m
 # Sonics Silicon Backplane
 #
 # CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
 
 #
 # File systems
@@ -653,6 +672,7 @@ CONFIG_PROC_SYSCTL=y
 CONFIG_SYSFS=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
+# CONFIG_HUGETLBFS is not set
 # CONFIG_HUGETLB_PAGE is not set
 CONFIG_CONFIGFS_FS=m
 
@@ -679,12 +699,10 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
 # CONFIG_NFS_V3_ACL is not set
 # CONFIG_NFS_V4 is not set
-# CONFIG_NFS_DIRECTIO is not set
 CONFIG_NFSD=y
 CONFIG_NFSD_V3=y
 # CONFIG_NFSD_V3_ACL is not set
 # CONFIG_NFSD_V4 is not set
-CONFIG_NFSD_TCP=y
 CONFIG_LOCKD=y
 CONFIG_LOCKD_V4=y
 CONFIG_EXPORTFS=y
@@ -732,6 +750,7 @@ CONFIG_TRACE_IRQFLAGS_SUPPORT=y
 # CONFIG_PRINTK_TIME is not set
 CONFIG_ENABLE_WARN_DEPRECATED=y
 CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=2048
 CONFIG_MAGIC_SYSRQ=y
 # CONFIG_UNUSED_SYMBOLS is not set
 CONFIG_DEBUG_FS=y
@@ -755,6 +774,7 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y
 CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_DEBUG_INFO is not set
 # CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
 # CONFIG_DEBUG_LIST is not set
 # CONFIG_DEBUG_SG is not set
 # CONFIG_FRAME_POINTER is not set
@@ -776,58 +796,88 @@ CONFIG_SAMPLES=y
 # CONFIG_SECURITY is not set
 # CONFIG_SECURITY_FILE_CAPABILITIES is not set
 CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
 CONFIG_CRYPTO_ALGAPI=y
 CONFIG_CRYPTO_AEAD=m
 CONFIG_CRYPTO_BLKCIPHER=y
-CONFIG_CRYPTO_SEQIV=m
 CONFIG_CRYPTO_HASH=m
 CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_GF128MUL=m
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+CONFIG_CRYPTO_AUTHENC=m
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_SEQIV=m
+
+#
+# Block modes
+#
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_CTR=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_ECB=m
+# CONFIG_CRYPTO_LRW is not set
+CONFIG_CRYPTO_PCBC=m
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
 CONFIG_CRYPTO_HMAC=m
 # CONFIG_CRYPTO_XCBC is not set
-# CONFIG_CRYPTO_NULL is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
 # CONFIG_CRYPTO_MD4 is not set
 CONFIG_CRYPTO_MD5=m
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
 CONFIG_CRYPTO_SHA1=m
 # CONFIG_CRYPTO_SHA256 is not set
 # CONFIG_CRYPTO_SHA512 is not set
-# CONFIG_CRYPTO_WP512 is not set
 # CONFIG_CRYPTO_TGR192 is not set
-CONFIG_CRYPTO_GF128MUL=m
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_LRW is not set
-# CONFIG_CRYPTO_XTS is not set
-CONFIG_CRYPTO_CTR=m
-CONFIG_CRYPTO_GCM=m
-CONFIG_CRYPTO_CCM=m
-# CONFIG_CRYPTO_CRYPTD is not set
-# CONFIG_CRYPTO_DES is not set
-CONFIG_CRYPTO_FCRYPT=m
-# CONFIG_CRYPTO_BLOWFISH is not set
-# CONFIG_CRYPTO_TWOFISH is not set
-# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
 # CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+CONFIG_CRYPTO_CAMELLIA=m
 # CONFIG_CRYPTO_CAST5 is not set
 # CONFIG_CRYPTO_CAST6 is not set
-# CONFIG_CRYPTO_TEA is not set
-# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_DES is not set
+CONFIG_CRYPTO_FCRYPT=m
 # CONFIG_CRYPTO_KHAZAD is not set
-# CONFIG_CRYPTO_ANUBIS is not set
-CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SEED=m
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
 # CONFIG_CRYPTO_DEFLATE is not set
-# CONFIG_CRYPTO_MICHAEL_MIC is not set
-# CONFIG_CRYPTO_CRC32C is not set
-CONFIG_CRYPTO_CAMELLIA=m
-# CONFIG_CRYPTO_TEST is not set
-CONFIG_CRYPTO_AUTHENC=m
 CONFIG_CRYPTO_LZO=m
 CONFIG_CRYPTO_HW=y
 CONFIG_ZCRYPT=m
 # CONFIG_ZCRYPT_MONOLITHIC is not set
 # CONFIG_CRYPTO_SHA1_S390 is not set
 # CONFIG_CRYPTO_SHA256_S390 is not set
+CONFIG_CRYPTO_SHA512_S390=m
 # CONFIG_CRYPTO_DES_S390 is not set
 # CONFIG_CRYPTO_AES_S390 is not set
 CONFIG_S390_PRNG=m
@@ -836,6 +886,8 @@ CONFIG_S390_PRNG=m
 # Library routines
 #
 CONFIG_BITREVERSE=m
+# CONFIG_GENERIC_FIND_FIRST_BIT is not set
+# CONFIG_GENERIC_FIND_NEXT_BIT is not set
 # CONFIG_CRC_CCITT is not set
 # CONFIG_CRC16 is not set
 # CONFIG_CRC_ITU_T is not set
@@ -845,3 +897,9 @@ CONFIG_LIBCRC32C=m
 CONFIG_LZO_COMPRESS=m
 CONFIG_LZO_DECOMPRESS=m
 CONFIG_PLIST=y
+CONFIG_HAVE_KVM=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM=m
+CONFIG_VIRTIO=y
+CONFIG_VIRTIO_RING=y
+CONFIG_VIRTIO_BALLOON=m
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 4d3e38392cb1..6302f5082588 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -2,8 +2,6 @@
 # Makefile for the linux kernel.
 #
 
-EXTRA_AFLAGS	:= -traditional
-
 #
 # Passing null pointers is ok for smp code, since we access the lowcore here.
 #
@@ -11,7 +9,7 @@ CFLAGS_smp.o	:= -Wno-nonnull
 
 obj-y	:=  bitmap.o traps.o time.o process.o base.o early.o \
             setup.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \
-	    semaphore.o s390_ext.o debug.o irq.o ipl.o dis.o diag.o
+	    s390_ext.o debug.o irq.o ipl.o dis.o diag.o
 
 obj-y	+= $(if $(CONFIG_64BIT),entry64.o,entry.o)
 obj-y	+= $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
@@ -19,7 +17,7 @@ obj-y	+= $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
 extra-y				+= head.o init_task.o vmlinux.lds
 
 obj-$(CONFIG_MODULES)		+= s390_ksyms.o module.o
-obj-$(CONFIG_SMP)		+= smp.o
+obj-$(CONFIG_SMP)		+= smp.o topology.o
 
 obj-$(CONFIG_AUDIT)		+= audit.o
 compat-obj-$(CONFIG_AUDIT)	+= compat_audit.o
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 1375f8a4469e..fa28ecae636b 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -5,44 +5,38 @@
  */
 
 #include <linux/sched.h>
-
-/* Use marker if you need to separate the values later */
-
-#define DEFINE(sym, val, marker) \
-	asm volatile("\n->" #sym " %0 " #val " " #marker : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
+#include <linux/kbuild.h>
 
 int main(void)
 {
-	DEFINE(__THREAD_info, offsetof(struct task_struct, stack),);
-	DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp),);
-	DEFINE(__THREAD_per, offsetof(struct task_struct, thread.per_info),);
+	DEFINE(__THREAD_info, offsetof(struct task_struct, stack));
+	DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp));
+	DEFINE(__THREAD_per, offsetof(struct task_struct, thread.per_info));
 	DEFINE(__THREAD_mm_segment,
-	       offsetof(struct task_struct, thread.mm_segment),);
+	       offsetof(struct task_struct, thread.mm_segment));
 	BLANK();
-	DEFINE(__TASK_pid, offsetof(struct task_struct, pid),);
+	DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
 	BLANK();
-	DEFINE(__PER_atmid, offsetof(per_struct, lowcore.words.perc_atmid),);
-	DEFINE(__PER_address, offsetof(per_struct, lowcore.words.address),);
-	DEFINE(__PER_access_id, offsetof(per_struct, lowcore.words.access_id),);
+	DEFINE(__PER_atmid, offsetof(per_struct, lowcore.words.perc_atmid));
+	DEFINE(__PER_address, offsetof(per_struct, lowcore.words.address));
+	DEFINE(__PER_access_id, offsetof(per_struct, lowcore.words.access_id));
 	BLANK();
-	DEFINE(__TI_task, offsetof(struct thread_info, task),);
-	DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain),);
-	DEFINE(__TI_flags, offsetof(struct thread_info, flags),);
-	DEFINE(__TI_cpu, offsetof(struct thread_info, cpu),);
-	DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count),);
+	DEFINE(__TI_task, offsetof(struct thread_info, task));
+	DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain));
+	DEFINE(__TI_flags, offsetof(struct thread_info, flags));
+	DEFINE(__TI_cpu, offsetof(struct thread_info, cpu));
+	DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count));
 	BLANK();
-	DEFINE(__PT_ARGS, offsetof(struct pt_regs, args),);
-	DEFINE(__PT_PSW, offsetof(struct pt_regs, psw),);
-	DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs),);
-	DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2),);
-	DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc),);
-	DEFINE(__PT_TRAP, offsetof(struct pt_regs, trap),);
-	DEFINE(__PT_SIZE, sizeof(struct pt_regs),);
+	DEFINE(__PT_ARGS, offsetof(struct pt_regs, args));
+	DEFINE(__PT_PSW, offsetof(struct pt_regs, psw));
+	DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs));
+	DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2));
+	DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc));
+	DEFINE(__PT_TRAP, offsetof(struct pt_regs, trap));
+	DEFINE(__PT_SIZE, sizeof(struct pt_regs));
 	BLANK();
-	DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain),);
-	DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs),);
-	DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1),);
+	DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain));
+	DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs));
+	DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1));
 	return 0;
 }
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 50b85d07ddd2..d7f22226fc4e 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -62,7 +62,6 @@
 
 #include <asm/types.h>
 #include <asm/uaccess.h>
-#include <asm/semaphore.h>
 
 #include <net/scm.h>
 #include <net/sock.h>
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
index e89f8c0c42a0..20723a062017 100644
--- a/arch/s390/kernel/compat_linux.h
+++ b/arch/s390/kernel/compat_linux.h
@@ -162,4 +162,77 @@ struct ucontext32 {
 	compat_sigset_t		uc_sigmask;	/* mask last for extensibility */
 };
 
+struct __sysctl_args32;
+struct stat64_emu31;
+struct mmap_arg_struct_emu31;
+struct fadvise64_64_args;
+struct old_sigaction32;
+struct old_sigaction32;
+
+long sys32_chown16(const char __user * filename, u16 user, u16 group);
+long sys32_lchown16(const char __user * filename, u16 user, u16 group);
+long sys32_fchown16(unsigned int fd, u16 user, u16 group);
+long sys32_setregid16(u16 rgid, u16 egid);
+long sys32_setgid16(u16 gid);
+long sys32_setreuid16(u16 ruid, u16 euid);
+long sys32_setuid16(u16 uid);
+long sys32_setresuid16(u16 ruid, u16 euid, u16 suid);
+long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid);
+long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid);
+long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid);
+long sys32_setfsuid16(u16 uid);
+long sys32_setfsgid16(u16 gid);
+long sys32_getgroups16(int gidsetsize, u16 __user *grouplist);
+long sys32_setgroups16(int gidsetsize, u16 __user *grouplist);
+long sys32_getuid16(void);
+long sys32_geteuid16(void);
+long sys32_getgid16(void);
+long sys32_getegid16(void);
+long sys32_ipc(u32 call, int first, int second, int third, u32 ptr);
+long sys32_truncate64(const char __user * path, unsigned long high,
+		      unsigned long low);
+long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low);
+long sys32_sched_rr_get_interval(compat_pid_t pid,
+				 struct compat_timespec __user *interval);
+long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
+			  compat_sigset_t __user *oset, size_t sigsetsize);
+long sys32_rt_sigpending(compat_sigset_t __user *set, size_t sigsetsize);
+long sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo);
+long sys32_execve(void);
+long sys32_init_module(void __user *umod, unsigned long len,
+		       const char __user *uargs);
+long sys32_delete_module(const char __user *name_user, unsigned int flags);
+long sys32_gettimeofday(struct compat_timeval __user *tv,
+			struct timezone __user *tz);
+long sys32_settimeofday(struct compat_timeval __user *tv,
+			struct timezone __user *tz);
+long sys32_pause(void);
+long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count,
+		   u32 poshi, u32 poslo);
+long sys32_pwrite64(unsigned int fd, const char __user *ubuf,
+		    size_t count, u32 poshi, u32 poslo);
+compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count);
+long sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset,
+		    size_t count);
+long sys32_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset,
+		      s32 count);
+long sys32_sysctl(struct __sysctl_args32 __user *args);
+long sys32_stat64(char __user * filename, struct stat64_emu31 __user * statbuf);
+long sys32_lstat64(char __user * filename,
+		   struct stat64_emu31 __user * statbuf);
+long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf);
+long sys32_fstatat64(unsigned int dfd, char __user *filename,
+		     struct stat64_emu31 __user* statbuf, int flag);
+unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg);
+long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg);
+long sys32_read(unsigned int fd, char __user * buf, size_t count);
+long sys32_write(unsigned int fd, char __user * buf, size_t count);
+long sys32_clone(void);
+long sys32_fadvise64(int fd, loff_t offset, size_t len, int advise);
+long sys32_fadvise64_64(struct fadvise64_64_args __user *args);
+long sys32_sigaction(int sig, const struct old_sigaction32 __user *act,
+		     struct old_sigaction32 __user *oact);
+long sys32_rt_sigaction(int sig, const struct sigaction32 __user *act,
+			struct sigaction32 __user *oact, size_t sigsetsize);
+long sys32_sigaltstack(const stack_t32 __user *uss, stack_t32 __user *uoss);
 #endif /* _ASM_S390X_S390_H */
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index a5692c460bad..c7f02e777af2 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -29,6 +29,7 @@
 #include <asm/lowcore.h>
 #include "compat_linux.h"
 #include "compat_ptrace.h"
+#include "entry.h"
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
@@ -428,6 +429,10 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
 	/* Default to using normal stack */
 	sp = (unsigned long) A(regs->gprs[15]);
 
+	/* Overflow on alternate signal stack gives SIGSEGV. */
+	if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
+		return (void __user *) -1UL;
+
 	/* This is the X/Open sanctioned signal stack switching.  */
 	if (ka->sa.sa_flags & SA_ONSTACK) {
 		if (! sas_ss_flags(sp))
@@ -461,6 +466,9 @@ static int setup_frame32(int sig, struct k_sigaction *ka,
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe32)))
 		goto give_sigsegv;
 
+	if (frame == (void __user *) -1UL)
+		goto give_sigsegv;
+
 	if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32))
 		goto give_sigsegv;
 
@@ -514,6 +522,9 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe32)))
 		goto give_sigsegv;
 
+	if (frame == (void __user *) -1UL)
+		goto give_sigsegv;
+
 	if (copy_siginfo_to_user32(&frame->info, info))
 		goto give_sigsegv;
 
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 1b2f5ce45320..dff0568e67ec 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -17,7 +17,6 @@
 #include <linux/ctype.h>
 #include <linux/sysctl.h>
 #include <asm/uaccess.h>
-#include <asm/semaphore.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/fs.h>
@@ -73,7 +72,7 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf,
 static int debug_open(struct inode *inode, struct file *file);
 static int debug_close(struct inode *inode, struct file *file);
 static debug_info_t*  debug_info_create(char *name, int pages_per_area,
-			int nr_areas, int buf_size);
+			int nr_areas, int buf_size, mode_t mode);
 static void debug_info_get(debug_info_t *);
 static void debug_info_put(debug_info_t *);
 static int debug_prolog_level_fn(debug_info_t * id,
@@ -157,7 +156,7 @@ struct debug_view debug_sprintf_view = {
 };
 
 /* used by dump analysis tools to determine version of debug feature */
-unsigned int debug_feature_version = __DEBUG_FEATURE_VERSION;
+static unsigned int __used debug_feature_version = __DEBUG_FEATURE_VERSION;
 
 /* static globals */
 
@@ -327,7 +326,8 @@ debug_info_free(debug_info_t* db_info){
  */
 
 static debug_info_t*
-debug_info_create(char *name, int pages_per_area, int nr_areas, int buf_size)
+debug_info_create(char *name, int pages_per_area, int nr_areas, int buf_size,
+		  mode_t mode)
 {
 	debug_info_t* rc;
 
@@ -336,6 +336,8 @@ debug_info_create(char *name, int pages_per_area, int nr_areas, int buf_size)
         if(!rc) 
 		goto out;
 
+	rc->mode = mode & ~S_IFMT;
+
 	/* create root directory */
         rc->debugfs_root_entry = debugfs_create_dir(rc->name,
 					debug_debugfs_root_entry);
@@ -676,23 +678,30 @@ debug_close(struct inode *inode, struct file *file)
 }
 
 /*
- * debug_register:
- * - creates and initializes debug area for the caller
- * - returns handle for debug area
+ * debug_register_mode:
+ * - Creates and initializes debug area for the caller
+ *   The mode parameter allows to specify access rights for the s390dbf files
+ * - Returns handle for debug area
  */
 
-debug_info_t*
-debug_register (char *name, int pages_per_area, int nr_areas, int buf_size)
+debug_info_t *debug_register_mode(char *name, int pages_per_area, int nr_areas,
+				  int buf_size, mode_t mode, uid_t uid,
+				  gid_t gid)
 {
 	debug_info_t *rc = NULL;
 
+	/* Since debugfs currently does not support uid/gid other than root, */
+	/* we do not allow gid/uid != 0 until we get support for that. */
+	if ((uid != 0) || (gid != 0))
+		printk(KERN_WARNING "debug: Warning - Currently only uid/gid "
+		       "= 0 are supported. Using root as owner now!");
 	if (!initialized)
 		BUG();
 	mutex_lock(&debug_mutex);
 
         /* create new debug_info */
 
-	rc = debug_info_create(name, pages_per_area, nr_areas, buf_size);
+	rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode);
 	if(!rc) 
 		goto out;
 	debug_register_view(rc, &debug_level_view);
@@ -705,6 +714,20 @@ out:
 	mutex_unlock(&debug_mutex);
 	return rc;
 }
+EXPORT_SYMBOL(debug_register_mode);
+
+/*
+ * debug_register:
+ * - creates and initializes debug area for the caller
+ * - returns handle for debug area
+ */
+
+debug_info_t *debug_register(char *name, int pages_per_area, int nr_areas,
+			     int buf_size)
+{
+	return debug_register_mode(name, pages_per_area, nr_areas, buf_size,
+				   S_IRUSR | S_IWUSR, 0, 0);
+}
 
 /*
  * debug_unregister:
@@ -1073,15 +1096,16 @@ debug_register_view(debug_info_t * id, struct debug_view *view)
 	int rc = 0;
 	int i;
 	unsigned long flags;
-	mode_t mode = S_IFREG;
+	mode_t mode;
 	struct dentry *pde;
 
 	if (!id)
 		goto out;
-	if (view->prolog_proc || view->format_proc || view->header_proc)
-		mode |= S_IRUSR;
-	if (view->input_proc)
-		mode |= S_IWUSR;
+	mode = (id->mode | S_IFREG) & ~S_IXUGO;
+	if (!(view->prolog_proc || view->format_proc || view->header_proc))
+		mode &= ~(S_IRUSR | S_IRGRP | S_IROTH);
+	if (!view->input_proc)
+		mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
 	pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry,
 				id , &debug_file_ops);
 	if (!pde){
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 01832c440636..d0e09684b9ce 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -21,6 +21,7 @@
 #include <asm/setup.h>
 #include <asm/cpcmd.h>
 #include <asm/sclp.h>
+#include "entry.h"
 
 /*
  * Create a Kernel NSS if the SAVESYS= parameter is defined
@@ -138,11 +139,15 @@ static noinline __init void detect_machine_type(void)
 
 	/* Running under z/VM ? */
 	if (cpuinfo->cpu_id.version == 0xff)
-		machine_flags |= 1;
+		machine_flags |= MACHINE_FLAG_VM;
 
 	/* Running on a P/390 ? */
 	if (cpuinfo->cpu_id.machine == 0x7490)
-		machine_flags |= 4;
+		machine_flags |= MACHINE_FLAG_P390;
+
+	/* Running under KVM ? */
+	if (cpuinfo->cpu_id.version == 0xfe)
+		machine_flags |= MACHINE_FLAG_KVM;
 }
 
 #ifdef CONFIG_64BIT
@@ -263,6 +268,118 @@ static noinline __init void setup_lowcore_early(void)
 	s390_base_pgm_handler_fn = early_pgm_check_handler;
 }
 
+static noinline __init void setup_hpage(void)
+{
+#ifndef CONFIG_DEBUG_PAGEALLOC
+	unsigned int facilities;
+
+	facilities = stfl();
+	if (!(facilities & (1UL << 23)) || !(facilities & (1UL << 29)))
+		return;
+	machine_flags |= MACHINE_FLAG_HPAGE;
+	__ctl_set_bit(0, 23);
+#endif
+}
+
+static __init void detect_mvpg(void)
+{
+#ifndef CONFIG_64BIT
+	int rc;
+
+	asm volatile(
+		"	la	0,0\n"
+		"	mvpg	%2,%2\n"
+		"0:	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc) : "0" (-EOPNOTSUPP), "a" (0) : "memory", "cc", "0");
+	if (!rc)
+		machine_flags |= MACHINE_FLAG_MVPG;
+#endif
+}
+
+static __init void detect_ieee(void)
+{
+#ifndef CONFIG_64BIT
+	int rc, tmp;
+
+	asm volatile(
+		"	efpc	%1,0\n"
+		"0:	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc), "=d" (tmp): "0" (-EOPNOTSUPP) : "cc");
+	if (!rc)
+		machine_flags |= MACHINE_FLAG_IEEE;
+#endif
+}
+
+static __init void detect_csp(void)
+{
+#ifndef CONFIG_64BIT
+	int rc;
+
+	asm volatile(
+		"	la	0,0\n"
+		"	la	1,0\n"
+		"	la	2,4\n"
+		"	csp	0,2\n"
+		"0:	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc) : "0" (-EOPNOTSUPP) : "cc", "0", "1", "2");
+	if (!rc)
+		machine_flags |= MACHINE_FLAG_CSP;
+#endif
+}
+
+static __init void detect_diag9c(void)
+{
+	unsigned int cpu_address;
+	int rc;
+
+	cpu_address = stap();
+	asm volatile(
+		"	diag	%2,0,0x9c\n"
+		"0:	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc");
+	if (!rc)
+		machine_flags |= MACHINE_FLAG_DIAG9C;
+}
+
+static __init void detect_diag44(void)
+{
+#ifdef CONFIG_64BIT
+	int rc;
+
+	asm volatile(
+		"	diag	0,0,0x44\n"
+		"0:	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc) : "0" (-EOPNOTSUPP) : "cc");
+	if (!rc)
+		machine_flags |= MACHINE_FLAG_DIAG44;
+#endif
+}
+
+static __init void detect_machine_facilities(void)
+{
+#ifdef CONFIG_64BIT
+	unsigned int facilities;
+
+	facilities = stfl();
+	if (facilities & (1 << 28))
+		machine_flags |= MACHINE_FLAG_IDTE;
+	if (facilities & (1 << 23))
+		machine_flags |= MACHINE_FLAG_PFMF;
+	if (facilities & (1 << 4))
+		machine_flags |= MACHINE_FLAG_MVCOS;
+#endif
+}
+
 /*
  * Save ipl parameters, clear bss memory, initialize storage keys
  * and create a kernel NSS at startup if the SAVESYS= parm is defined
@@ -280,6 +397,13 @@ void __init startup_init(void)
 	create_kernel_nss();
 	sort_main_extable();
 	setup_lowcore_early();
+	detect_mvpg();
+	detect_ieee();
+	detect_csp();
+	detect_diag9c();
+	detect_diag44();
+	detect_machine_facilities();
+	setup_hpage();
 	sclp_read_info_early();
 	sclp_facilities_detect();
 	memsize = sclp_memory_detect();
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 6766e37fe8ea..bdbb3bcd78a5 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -49,9 +49,9 @@ SP_ILC	     =	STACK_FRAME_OVERHEAD + __PT_ILC
 SP_TRAP      =	STACK_FRAME_OVERHEAD + __PT_TRAP
 SP_SIZE      =	STACK_FRAME_OVERHEAD + __PT_SIZE
 
-_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \
+_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP )
-_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \
+_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING)
 
 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
@@ -316,7 +316,7 @@ sysc_work:
 	bo	BASED(sysc_mcck_pending)
 	tm	__TI_flags+3(%r9),_TIF_NEED_RESCHED
 	bo	BASED(sysc_reschedule)
-	tm	__TI_flags+3(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)
+	tm	__TI_flags+3(%r9),_TIF_SIGPENDING
 	bnz	BASED(sysc_sigpending)
 	tm	__TI_flags+3(%r9),_TIF_RESTART_SVC
 	bo	BASED(sysc_restart)
@@ -342,7 +342,7 @@ sysc_mcck_pending:
 	br	%r1			# TIF bit will be cleared by handler
 
 #
-# _TIF_SIGPENDING or _TIF_RESTORE_SIGMASK is set, call do_signal
+# _TIF_SIGPENDING is set, call do_signal
 #
 sysc_sigpending:
 	ni	__TI_flags+3(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP
@@ -657,7 +657,7 @@ io_work:
 	lr	%r15,%r1
 #
 # One of the work bits is on. Find out which one.
-# Checked are: _TIF_SIGPENDING, _TIF_RESTORE_SIGMASK, _TIF_NEED_RESCHED
+# Checked are: _TIF_SIGPENDING, _TIF_NEED_RESCHED
 #		and _TIF_MCCK_PENDING
 #
 io_work_loop:
@@ -665,7 +665,7 @@ io_work_loop:
 	bo	BASED(io_mcck_pending)
 	tm	__TI_flags+3(%r9),_TIF_NEED_RESCHED
 	bo	BASED(io_reschedule)
-	tm	__TI_flags+3(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)
+	tm	__TI_flags+3(%r9),_TIF_SIGPENDING
 	bnz	BASED(io_sigpending)
 	b	BASED(io_restore)
 io_work_done:
@@ -693,7 +693,7 @@ io_reschedule:
 	b	BASED(io_work_loop)
 
 #
-# _TIF_SIGPENDING or _TIF_RESTORE_SIGMASK is set, call do_signal
+# _TIF_SIGPENDING is set, call do_signal
 #
 io_sigpending:
 	TRACE_IRQS_ON
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
new file mode 100644
index 000000000000..6b1896345eda
--- /dev/null
+++ b/arch/s390/kernel/entry.h
@@ -0,0 +1,60 @@
+#ifndef _ENTRY_H
+#define _ENTRY_H
+
+#include <linux/types.h>
+#include <linux/signal.h>
+#include <asm/ptrace.h>
+
+typedef void pgm_check_handler_t(struct pt_regs *, long);
+extern pgm_check_handler_t *pgm_check_table[128];
+pgm_check_handler_t do_protection_exception;
+pgm_check_handler_t do_dat_exception;
+
+extern int sysctl_userprocess_debug;
+
+void do_single_step(struct pt_regs *regs);
+void syscall_trace(struct pt_regs *regs, int entryexit);
+void kernel_stack_overflow(struct pt_regs * regs);
+void do_signal(struct pt_regs *regs);
+int handle_signal32(unsigned long sig, struct k_sigaction *ka,
+		    siginfo_t *info, sigset_t *oldset, struct pt_regs *regs);
+
+void do_extint(struct pt_regs *regs, unsigned short code);
+int __cpuinit start_secondary(void *cpuvoid);
+void __init startup_init(void);
+void die(const char * str, struct pt_regs * regs, long err);
+
+struct new_utsname;
+struct mmap_arg_struct;
+struct fadvise64_64_args;
+struct old_sigaction;
+struct sel_arg_struct;
+
+long sys_pipe(unsigned long __user *fildes);
+long sys_mmap2(struct mmap_arg_struct __user  *arg);
+long old_mmap(struct mmap_arg_struct __user *arg);
+long sys_ipc(uint call, int first, unsigned long second,
+	     unsigned long third, void __user *ptr);
+long s390x_newuname(struct new_utsname __user *name);
+long s390x_personality(unsigned long personality);
+long s390_fadvise64(int fd, u32 offset_high, u32 offset_low,
+		    size_t len, int advice);
+long s390_fadvise64_64(struct fadvise64_64_args __user *args);
+long s390_fallocate(int fd, int mode, loff_t offset, u32 len_high, u32 len_low);
+long sys_fork(void);
+long sys_clone(void);
+long sys_vfork(void);
+void execve_tail(void);
+long sys_execve(void);
+int sys_sigsuspend(int history0, int history1, old_sigset_t mask);
+long sys_sigaction(int sig, const struct old_sigaction __user *act,
+		   struct old_sigaction __user *oact);
+long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss);
+long sys_sigreturn(void);
+long sys_rt_sigreturn(void);
+long sys32_sigreturn(void);
+long sys32_rt_sigreturn(void);
+long old_select(struct sel_arg_struct __user *arg);
+long sys_ptrace(long request, long pid, long addr, long data);
+
+#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index efde6e178f6c..5a4a7bcd2bba 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -52,9 +52,9 @@ SP_SIZE      =	STACK_FRAME_OVERHEAD + __PT_SIZE
 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
 STACK_SIZE  = 1 << STACK_SHIFT
 
-_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \
+_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP )
-_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \
+_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING)
 
 #define BASED(name) name-system_call(%r13)
@@ -308,7 +308,7 @@ sysc_work:
 	jo	sysc_mcck_pending
 	tm	__TI_flags+7(%r9),_TIF_NEED_RESCHED
 	jo	sysc_reschedule
-	tm	__TI_flags+7(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)
+	tm	__TI_flags+7(%r9),_TIF_SIGPENDING
 	jnz	sysc_sigpending
 	tm	__TI_flags+7(%r9),_TIF_RESTART_SVC
 	jo	sysc_restart
@@ -332,7 +332,7 @@ sysc_mcck_pending:
 	jg	s390_handle_mcck	# TIF bit will be cleared by handler
 
 #
-# _TIF_SIGPENDING or _TIF_RESTORE_SIGMASK is set, call do_signal
+# _TIF_SIGPENDING is set, call do_signal
 #
 sysc_sigpending:
 	ni	__TI_flags+7(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP
@@ -475,6 +475,7 @@ pgm_check_handler:
 pgm_no_vtime:
 #endif
 	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
+	mvc	SP_ARGS(8,%r15),__LC_LAST_BREAK
 	TRACE_IRQS_OFF
 	lgf	%r3,__LC_PGM_ILC	# load program interruption code
 	lghi	%r8,0x7f
@@ -647,7 +648,7 @@ io_work_loop:
 	jo	io_mcck_pending
 	tm	__TI_flags+7(%r9),_TIF_NEED_RESCHED
 	jo	io_reschedule
-	tm	__TI_flags+7(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)
+	tm	__TI_flags+7(%r9),_TIF_SIGPENDING
 	jnz	io_sigpending
 	j	io_restore
 io_work_done:
@@ -673,7 +674,7 @@ io_reschedule:
 	j	io_work_loop
 
 #
-# _TIF_SIGPENDING or _TIF_RESTORE_SIGMASK is set, call do_signal
+# _TIF_SIGPENDING or is set, call do_signal
 #
 io_sigpending:
 	TRACE_IRQS_ON
@@ -847,6 +848,7 @@ stack_overflow:
 	je	0f
 	la	%r1,__LC_SAVE_AREA+32
 0:	mvc	SP_R12(32,%r15),0(%r1)	# move %r12-%r15 to stack
+	mvc	SP_ARGS(8,%r15),__LC_LAST_BREAK
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # clear back chain
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	jg	kernel_stack_overflow
diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S
index dc364c1419af..a816e2de32b9 100644
--- a/arch/s390/kernel/head31.S
+++ b/arch/s390/kernel/head31.S
@@ -57,61 +57,6 @@ startup_continue:
 #
 	l	%r14,.Lstartup_init-.LPG1(%r13)
 	basr	%r14,%r14
-
-	l	%r12,.Lmflags-.LPG1(%r13) # get address of machine_flags
-#
-# find out if we have an IEEE fpu
-#
-	mvc	__LC_PGM_NEW_PSW(8),.Lpcfpu-.LPG1(%r13)
-	efpc	%r0,0			# test IEEE extract fpc instruction
-	oi	3(%r12),2		# set IEEE fpu flag
-.Lchkfpu:
-
-#
-# find out if we have the CSP instruction
-#
-       mvc	 __LC_PGM_NEW_PSW(8),.Lpccsp-.LPG1(%r13)
-       la	 %r0,0
-       lr	%r1,%r0
-       la	%r2,4
-       csp	%r0,%r2			# Test CSP instruction
-       oi	3(%r12),8		# set CSP flag
-.Lchkcsp:
-
-#
-# find out if we have the MVPG instruction
-#
-       mvc	__LC_PGM_NEW_PSW(8),.Lpcmvpg-.LPG1(%r13)
-       sr	%r0,%r0
-       la	%r1,0
-       la	%r2,0
-       mvpg	%r1,%r2			# Test CSP instruction
-       oi	3(%r12),16		# set MVPG flag
-.Lchkmvpg:
-
-#
-# find out if we have the IDTE instruction
-#
-	mvc	__LC_PGM_NEW_PSW(8),.Lpcidte-.LPG1(%r13)
-	.long	0xb2b10000		# store facility list
-	tm	0xc8,0x08		# check bit for clearing-by-ASCE
-	bno	.Lchkidte-.LPG1(%r13)
-	lhi	%r1,2094
-	lhi	%r2,0
-	.long	0xb98e2001
-	oi	3(%r12),0x80		# set IDTE flag
-.Lchkidte:
-
-#
-# find out if the diag 0x9c is available
-#
-	mvc	__LC_PGM_NEW_PSW(8),.Lpcdiag9c-.LPG1(%r13)
-	stap	__LC_CPUID+4		# store cpu address
-	lh	%r1,__LC_CPUID+4
-	diag	%r1,0,0x9c		# test diag 0x9c
-	oi	2(%r12),1		# set diag9c flag
-.Lchkdiag9c:
-
 	lpsw  .Lentry-.LPG1(13)		# jump to _stext in primary-space,
 					# virtual and never return ...
 	.align	8
@@ -132,13 +77,7 @@ startup_continue:
 	.long	0			# cr13: home space segment table
 	.long	0xc0000000		# cr14: machine check handling off
 	.long	0			# cr15: linkage stack operations
-.Lpcfpu:.long	0x00080000,0x80000000 + .Lchkfpu
-.Lpccsp:.long	0x00080000,0x80000000 + .Lchkcsp
-.Lpcmvpg:.long	0x00080000,0x80000000 + .Lchkmvpg
-.Lpcidte:.long	0x00080000,0x80000000 + .Lchkidte
-.Lpcdiag9c:.long 0x00080000,0x80000000 + .Lchkdiag9c
 .Lmchunk:.long	memory_chunk
-.Lmflags:.long	machine_flags
 .Lbss_bgn:  .long __bss_start
 .Lbss_end:  .long _end
 .Lparmaddr: .long PARMAREA
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 79dccd206a6e..1d06961e87b3 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -125,73 +125,11 @@ startup_continue:
 # and create a kernel NSS if the SAVESYS= parm is defined
 #
 	brasl	%r14,startup_init
-					# set program check new psw mask
-	mvc	__LC_PGM_NEW_PSW(8),.Lpcmsk-.LPG1(%r13)
-	larl	%r12,machine_flags
-#
-# find out if we have the MVPG instruction
-#
-	la	%r1,0f-.LPG1(%r13)	# set program check address
-	stg	%r1,__LC_PGM_NEW_PSW+8
-	sgr	%r0,%r0
-	lghi	%r1,0
-	lghi	%r2,0
-	mvpg	%r1,%r2 		# test MVPG instruction
-	oi	7(%r12),16		# set MVPG flag
-0:
-
-#
-# find out if the diag 0x44 works in 64 bit mode
-#
-	la	%r1,0f-.LPG1(%r13)	# set program check address
-	stg	%r1,__LC_PGM_NEW_PSW+8
-	diag	0,0,0x44		# test diag 0x44
-	oi	7(%r12),32		# set diag44 flag
-0:
-
-#
-# find out if we have the IDTE instruction
-#
-	la	%r1,0f-.LPG1(%r13)	# set program check address
-	stg	%r1,__LC_PGM_NEW_PSW+8
-	.long	0xb2b10000		# store facility list
-	tm	0xc8,0x08		# check bit for clearing-by-ASCE
-	bno	0f-.LPG1(%r13)
-	lhi	%r1,2048
-	lhi	%r2,0
-	.long	0xb98e2001
-	oi	7(%r12),0x80		# set IDTE flag
-0:
-
-#
-# find out if the diag 0x9c is available
-#
-	la	%r1,0f-.LPG1(%r13)	# set program check address
-	stg	%r1,__LC_PGM_NEW_PSW+8
-	stap	__LC_CPUID+4		# store cpu address
-	lh	%r1,__LC_CPUID+4
-	diag	%r1,0,0x9c		# test diag 0x9c
-	oi	6(%r12),1		# set diag9c flag
-0:
-
-#
-# find out if we have the MVCOS instruction
-#
-	la	%r1,0f-.LPG1(%r13)	# set program check address
-	stg	%r1,__LC_PGM_NEW_PSW+8
-	.short	0xc800			# mvcos 0(%r0),0(%r0),%r0
-	.short	0x0000
-	.short	0x0000
-0:	tm	0x8f,0x13		# special-operation exception?
-	bno	1f-.LPG1(%r13)		# if yes, MVCOS is present
-	oi	6(%r12),2		# set MVCOS flag
-1:
-
 	lpswe	.Lentry-.LPG1(13)	# jump to _stext in primary-space,
 					# virtual and never return ...
 	.align	16
 .Lentry:.quad	0x0000000180000000,_stext
-.Lctl:	.quad	0x04b50002		# cr0: various things
+.Lctl:	.quad	0x04350002		# cr0: various things
 	.quad	0			# cr1: primary space segment table
 	.quad	.Lduct			# cr2: dispatchable unit control table
 	.quad	0			# cr3: instruction authorization
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 375232c46c7a..532542447d66 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -655,7 +655,7 @@ static struct kobj_attribute reipl_type_attr =
 
 static struct kset *reipl_kset;
 
-void reipl_run(struct shutdown_trigger *trigger)
+static void reipl_run(struct shutdown_trigger *trigger)
 {
 	struct ccw_dev_id devid;
 	static char buf[100];
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index c36d8123ca14..c59a86dca584 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -60,8 +60,6 @@ init_IRQ(void)
 /*
  * Switch to the asynchronous interrupt stack for softirq execution.
  */
-extern void __do_softirq(void);
-
 asmlinkage void do_softirq(void)
 {
 	unsigned long flags, old, new;
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index c5549a206284..ed04d1372d5d 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -360,7 +360,7 @@ no_kprobe:
  *	- When the probed function returns, this probe
  *		causes the handlers to fire
  */
-void kretprobe_trampoline_holder(void)
+static void __used kretprobe_trampoline_holder(void)
 {
 	asm volatile(".global kretprobe_trampoline\n"
 		     "kretprobe_trampoline: bcr 0,0\n");
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index ce203154d8ce..7920861109d2 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -36,6 +36,8 @@
 #include <linux/module.h>
 #include <linux/notifier.h>
 #include <linux/utsname.h>
+#include <linux/tick.h>
+#include <linux/elfcore.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -44,6 +46,7 @@
 #include <asm/irq.h>
 #include <asm/timer.h>
 #include <asm/cpu.h>
+#include "entry.h"
 
 asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
 
@@ -76,6 +79,7 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
  * Need to know about CPUs going idle?
  */
 static ATOMIC_NOTIFIER_HEAD(idle_chain);
+DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
 
 int register_idle_notifier(struct notifier_block *nb)
 {
@@ -89,9 +93,33 @@ int unregister_idle_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_idle_notifier);
 
-void do_monitor_call(struct pt_regs *regs, long interruption_code)
+static int s390_idle_enter(void)
+{
+	struct s390_idle_data *idle;
+	int nr_calls = 0;
+	void *hcpu;
+	int rc;
+
+	hcpu = (void *)(long)smp_processor_id();
+	rc = __atomic_notifier_call_chain(&idle_chain, S390_CPU_IDLE, hcpu, -1,
+					  &nr_calls);
+	if (rc == NOTIFY_BAD) {
+		nr_calls--;
+		__atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
+					     hcpu, nr_calls, NULL);
+		return rc;
+	}
+	idle = &__get_cpu_var(s390_idle);
+	spin_lock(&idle->lock);
+	idle->idle_count++;
+	idle->in_idle = 1;
+	idle->idle_enter = get_clock();
+	spin_unlock(&idle->lock);
+	return NOTIFY_OK;
+}
+
+void s390_idle_leave(void)
 {
-#ifdef CONFIG_SMP
 	struct s390_idle_data *idle;
 
 	idle = &__get_cpu_var(s390_idle);
@@ -99,10 +127,6 @@ void do_monitor_call(struct pt_regs *regs, long interruption_code)
 	idle->idle_time += get_clock() - idle->idle_enter;
 	idle->in_idle = 0;
 	spin_unlock(&idle->lock);
-#endif
-	/* disable monitor call class 0 */
-	__ctl_clear_bit(8, 15);
-
 	atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
 				   (void *)(long) smp_processor_id());
 }
@@ -113,61 +137,30 @@ extern void s390_handle_mcck(void);
  */
 static void default_idle(void)
 {
-	int cpu, rc;
-	int nr_calls = 0;
-	void *hcpu;
-#ifdef CONFIG_SMP
-	struct s390_idle_data *idle;
-#endif
-
 	/* CPU is going idle. */
-	cpu = smp_processor_id();
-	hcpu = (void *)(long)cpu;
 	local_irq_disable();
 	if (need_resched()) {
 		local_irq_enable();
 		return;
 	}
-
-	rc = __atomic_notifier_call_chain(&idle_chain, S390_CPU_IDLE, hcpu, -1,
-					  &nr_calls);
-	if (rc == NOTIFY_BAD) {
-		nr_calls--;
-		__atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
-					     hcpu, nr_calls, NULL);
+	if (s390_idle_enter() == NOTIFY_BAD) {
 		local_irq_enable();
 		return;
 	}
-
-	/* enable monitor call class 0 */
-	__ctl_set_bit(8, 15);
-
 #ifdef CONFIG_HOTPLUG_CPU
-	if (cpu_is_offline(cpu)) {
+	if (cpu_is_offline(smp_processor_id())) {
 		preempt_enable_no_resched();
 		cpu_die();
 	}
 #endif
-
 	local_mcck_disable();
 	if (test_thread_flag(TIF_MCCK_PENDING)) {
 		local_mcck_enable();
-		/* disable monitor call class 0 */
-		__ctl_clear_bit(8, 15);
-		atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
-					   hcpu);
+		s390_idle_leave();
 		local_irq_enable();
 		s390_handle_mcck();
 		return;
 	}
-#ifdef CONFIG_SMP
-	idle = &__get_cpu_var(s390_idle);
-	spin_lock(&idle->lock);
-	idle->idle_count++;
-	idle->in_idle = 1;
-	idle->idle_enter = get_clock();
-	spin_unlock(&idle->lock);
-#endif
 	trace_hardirqs_on();
 	/* Wait for external, I/O or machine check interrupt. */
 	__load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
@@ -177,32 +170,16 @@ static void default_idle(void)
 void cpu_idle(void)
 {
 	for (;;) {
+		tick_nohz_stop_sched_tick();
 		while (!need_resched())
 			default_idle();
-
+		tick_nohz_restart_sched_tick();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
 	}
 }
 
-void show_regs(struct pt_regs *regs)
-{
-	print_modules();
-	printk("CPU: %d %s %s %.*s\n",
-	       task_thread_info(current)->cpu, print_tainted(),
-	       init_utsname()->release,
-	       (int)strcspn(init_utsname()->version, " "),
-	       init_utsname()->version);
-	printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
-	       current->comm, current->pid, current,
-	       (void *) current->thread.ksp);
-	show_registers(regs);
-	/* Show stack backtrace if pt_regs is from kernel mode */
-	if (!(regs->psw.mask & PSW_MASK_PSTATE))
-		show_trace(NULL, (unsigned long *) regs->gprs[15]);
-}
-
 extern void kernel_thread_starter(void);
 
 asm(
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 6e036bae9875..7f4270163744 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -41,6 +41,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
+#include "entry.h"
 
 #ifdef CONFIG_COMPAT
 #include "compat_ptrace.h"
@@ -606,38 +607,8 @@ do_ptrace_emu31(struct task_struct *child, long request, long addr, long data)
 }
 #endif
 
-#define PT32_IEEE_IP 0x13c
-
-static int
-do_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
-	int ret;
-
-	if (request == PTRACE_ATTACH)
-		return ptrace_attach(child);
-
-	/*
-	 * Special cases to get/store the ieee instructions pointer.
-	 */
-	if (child == current) {
-		if (request == PTRACE_PEEKUSR && addr == PT_IEEE_IP)
-			return peek_user(child, addr, data);
-		if (request == PTRACE_POKEUSR && addr == PT_IEEE_IP)
-			return poke_user(child, addr, data);
-#ifdef CONFIG_COMPAT
-		if (request == PTRACE_PEEKUSR &&
-		    addr == PT32_IEEE_IP && test_thread_flag(TIF_31BIT))
-			return peek_user_emu31(child, addr, data);
-		if (request == PTRACE_POKEUSR &&
-		    addr == PT32_IEEE_IP && test_thread_flag(TIF_31BIT))
-			return poke_user_emu31(child, addr, data);
-#endif
-	}
-
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret < 0)
-		return ret;
-
 	switch (request) {
 	case PTRACE_SYSCALL:
 		/* continue and stop at next (return from) syscall */
@@ -692,31 +663,6 @@ do_ptrace(struct task_struct *child, long request, long addr, long data)
 	return -EIO;
 }
 
-asmlinkage long
-sys_ptrace(long request, long pid, long addr, long data)
-{
-	struct task_struct *child;
-	int ret;
-
-	lock_kernel();
-	if (request == PTRACE_TRACEME) {
-		 ret = ptrace_traceme();
-		 goto out;
-	}
-
-	child = ptrace_get_task_struct(pid);
-	if (IS_ERR(child)) {
-		ret = PTR_ERR(child);
-		goto out;
-	}
-
-	ret = do_ptrace(child, request, addr, data);
-	put_task_struct(child);
-out:
-	unlock_kernel();
-	return ret;
-}
-
 asmlinkage void
 syscall_trace(struct pt_regs *regs, int entryexit)
 {
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
index acf93dba7727..e019b419efc6 100644
--- a/arch/s390/kernel/s390_ext.c
+++ b/arch/s390/kernel/s390_ext.c
@@ -13,11 +13,12 @@
 #include <linux/errno.h>
 #include <linux/kernel_stat.h>
 #include <linux/interrupt.h>
-
+#include <asm/cpu.h>
 #include <asm/lowcore.h>
 #include <asm/s390_ext.h>
 #include <asm/irq_regs.h>
 #include <asm/irq.h>
+#include "entry.h"
 
 /*
  * ext_int_hash[index] is the start of the list for all external interrupts
@@ -119,13 +120,10 @@ void do_extint(struct pt_regs *regs, unsigned short code)
 
 	old_regs = set_irq_regs(regs);
 	irq_enter();
-	asm volatile ("mc 0,0");
-	if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer)
-		/**
-		 * Make sure that the i/o interrupt did not "overtake"
-		 * the last HZ timer interrupt.
-		 */
-		account_ticks(S390_lowcore.int_clock);
+	s390_idle_check();
+	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
+		/* Serve timer interrupts first. */
+		clock_comparator_work();
 	kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++;
         index = ext_hash(code);
 	for (p = ext_int_hash[index]; p; p = p->next) {
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 7234c737f825..48238a114ce9 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -27,13 +27,6 @@ EXPORT_SYMBOL(_zb_findmap);
 EXPORT_SYMBOL(_sb_findmap);
 
 /*
- * semaphore ops
- */
-EXPORT_SYMBOL(__up);
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-
-/*
  * binfmt_elf loader 
  */
 extern int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs);
diff --git a/arch/s390/kernel/semaphore.c b/arch/s390/kernel/semaphore.c
deleted file mode 100644
index 191303f6c1d8..000000000000
--- a/arch/s390/kernel/semaphore.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- *  linux/arch/s390/kernel/semaphore.c
- *
- *  S390 version
- *    Copyright (C) 1998-2000 IBM Corporation
- *    Author(s): Martin Schwidefsky
- *
- *  Derived from "linux/arch/i386/kernel/semaphore.c
- *    Copyright (C) 1999, Linus Torvalds
- *
- */
-#include <linux/sched.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-
-#include <asm/semaphore.h>
-
-/*
- * Atomically update sem->count. Equivalent to:
- *   old_val = sem->count.counter;
- *   new_val = ((old_val >= 0) ? old_val : 0) + incr;
- *   sem->count.counter = new_val;
- *   return old_val;
- */
-static inline int __sem_update_count(struct semaphore *sem, int incr)
-{
-	int old_val, new_val;
-
-	asm volatile(
-		"	l	%0,0(%3)\n"
-		"0:	ltr	%1,%0\n"
-		"	jhe	1f\n"
-		"	lhi	%1,0\n"
-		"1:	ar	%1,%4\n"
-		"	cs	%0,%1,0(%3)\n"
-		"	jl	0b\n"
-		: "=&d" (old_val), "=&d" (new_val), "=m" (sem->count)
-		: "a" (&sem->count), "d" (incr), "m" (sem->count)
-		: "cc");
-	return old_val;
-}
-
-/*
- * The inline function up() incremented count but the result
- * was <= 0. This indicates that some process is waiting on
- * the semaphore. The semaphore is free and we'll wake the
- * first sleeping process, so we set count to 1 unless some
- * other cpu has called up in the meantime in which case
- * we just increment count by 1.
- */
-void __up(struct semaphore *sem)
-{
-	__sem_update_count(sem, 1);
-	wake_up(&sem->wait);
-}
-
-/*
- * The inline function down() decremented count and the result
- * was < 0. The wait loop will atomically test and update the
- * semaphore counter following the rules:
- *   count > 0: decrement count, wake up queue and exit.
- *   count <= 0: set count to -1, go to sleep.
- */
-void __sched __down(struct semaphore * sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	__set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	add_wait_queue_exclusive(&sem->wait, &wait);
-	while (__sem_update_count(sem, -1) <= 0) {
-		schedule();
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	__set_task_state(tsk, TASK_RUNNING);
-	wake_up(&sem->wait);
-}
-
-/*
- * Same as __down() with an additional test for signals.
- * If a signal is pending the count is updated as follows:
- *   count > 0: wake up queue and exit.
- *   count <= 0: set count to 0, wake up queue and exit.
- */
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	__set_task_state(tsk, TASK_INTERRUPTIBLE);
-	add_wait_queue_exclusive(&sem->wait, &wait);
-	while (__sem_update_count(sem, -1) <= 0) {
-		if (signal_pending(current)) {
-			__sem_update_count(sem, 0);
-			retval = -EINTR;
-			break;
-		}
-		schedule();
-		set_task_state(tsk, TASK_INTERRUPTIBLE);
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	__set_task_state(tsk, TASK_RUNNING);
-	wake_up(&sem->wait);
-	return retval;
-}
-
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 290e504061a3..2bc70b6e876a 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -39,6 +39,7 @@
 #include <linux/pfn.h>
 #include <linux/ctype.h>
 #include <linux/reboot.h>
+#include <linux/topology.h>
 
 #include <asm/ipl.h>
 #include <asm/uaccess.h>
@@ -72,7 +73,7 @@ EXPORT_SYMBOL(uaccess);
 unsigned int console_mode = 0;
 unsigned int console_devno = -1;
 unsigned int console_irq = -1;
-unsigned long machine_flags = 0;
+unsigned long machine_flags;
 unsigned long elf_hwcap = 0;
 char elf_platform[ELF_PLATFORM_SIZE];
 
@@ -315,7 +316,11 @@ static int __init early_parse_ipldelay(char *p)
 early_param("ipldelay", early_parse_ipldelay);
 
 #ifdef CONFIG_S390_SWITCH_AMODE
+#ifdef CONFIG_PGSTE
+unsigned int switch_amode = 1;
+#else
 unsigned int switch_amode = 0;
+#endif
 EXPORT_SYMBOL_GPL(switch_amode);
 
 static void set_amode_and_uaccess(unsigned long user_amode,
@@ -427,7 +432,7 @@ setup_lowcore(void)
 	lc->io_new_psw.mask = psw_kernel_bits;
 	lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
 	lc->ipl_device = S390_lowcore.ipl_device;
-	lc->jiffy_timer = -1LL;
+	lc->clock_comparator = -1ULL;
 	lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE;
 	lc->async_stack = (unsigned long)
 		__alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE;
@@ -678,16 +683,7 @@ setup_memory(void)
 #endif
 }
 
-static __init unsigned int stfl(void)
-{
-	asm volatile(
-		"	.insn	s,0xb2b10000,0(0)\n" /* stfl */
-		"0:\n"
-		EX_TABLE(0b,0b));
-	return S390_lowcore.stfl_fac_list;
-}
-
-static __init int stfle(unsigned long long *list, int doublewords)
+static int __init __stfle(unsigned long long *list, int doublewords)
 {
 	typedef struct { unsigned long long _[doublewords]; } addrtype;
 	register unsigned long __nr asm("0") = doublewords - 1;
@@ -697,6 +693,13 @@ static __init int stfle(unsigned long long *list, int doublewords)
 	return __nr + 1;
 }
 
+int __init stfle(unsigned long long *list, int doublewords)
+{
+	if (!(stfl() & (1UL << 24)))
+		return -EOPNOTSUPP;
+	return __stfle(list, doublewords);
+}
+
 /*
  * Setup hardware capabilities.
  */
@@ -741,11 +744,14 @@ static void __init setup_hwcaps(void)
 	 *   HWCAP_S390_DFP bit 6.
 	 */
 	if ((elf_hwcap & (1UL << 2)) &&
-	    stfle(&facility_list_extended, 1) > 0) {
+	    __stfle(&facility_list_extended, 1) > 0) {
 		if (facility_list_extended & (1ULL << (64 - 43)))
 			elf_hwcap |= 1UL << 6;
 	}
 
+	if (MACHINE_HAS_HPAGE)
+		elf_hwcap |= 1UL << 7;
+
 	switch (cpuinfo->cpu_id.machine) {
 	case 0x9672:
 #if !defined(CONFIG_64BIT)
@@ -789,9 +795,13 @@ setup_arch(char **cmdline_p)
 	       "This machine has an IEEE fpu\n" :
 	       "This machine has no IEEE fpu\n");
 #else /* CONFIG_64BIT */
-	printk((MACHINE_IS_VM) ?
-	       "We are running under VM (64 bit mode)\n" :
-	       "We are running native (64 bit mode)\n");
+	if (MACHINE_IS_VM)
+		printk("We are running under VM (64 bit mode)\n");
+	else if (MACHINE_IS_KVM) {
+		printk("We are running under KVM (64 bit mode)\n");
+		add_preferred_console("ttyS", 1, NULL);
+	} else
+		printk("We are running native (64 bit mode)\n");
 #endif /* CONFIG_64BIT */
 
 	/* Save unparsed command line copy for /proc/cmdline */
@@ -823,6 +833,7 @@ setup_arch(char **cmdline_p)
 
         cpu_init();
         __cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr;
+	s390_init_cpu_topology();
 
 	/*
 	 * Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
@@ -864,8 +875,9 @@ void __cpuinit print_cpu_info(struct cpuinfo_S390 *cpuinfo)
 
 static int show_cpuinfo(struct seq_file *m, void *v)
 {
-	static const char *hwcap_str[7] = {
-		"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp"
+	static const char *hwcap_str[8] = {
+		"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
+		"edat"
 	};
         struct cpuinfo_S390 *cpuinfo;
 	unsigned long n = (unsigned long) v - 1;
@@ -880,7 +892,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 			       num_online_cpus(), loops_per_jiffy/(500000/HZ),
 			       (loops_per_jiffy/(5000/HZ))%100);
 		seq_puts(m, "features\t: ");
-		for (i = 0; i < 7; i++)
+		for (i = 0; i < 8; i++)
 			if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
 				seq_printf(m, "%s ", hwcap_str[i]);
 		seq_puts(m, "\n");
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 4449bf32cbf1..b97682040215 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -27,6 +27,7 @@
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/lowcore.h>
+#include "entry.h"
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
@@ -235,6 +236,10 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
 	/* Default to using normal stack */
 	sp = regs->gprs[15];
 
+	/* Overflow on alternate signal stack gives SIGSEGV. */
+	if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
+		return (void __user *) -1UL;
+
 	/* This is the X/Open sanctioned signal stack switching.  */
 	if (ka->sa.sa_flags & SA_ONSTACK) {
 		if (! sas_ss_flags(sp))
@@ -270,6 +275,9 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe)))
 		goto give_sigsegv;
 
+	if (frame == (void __user *) -1UL)
+		goto give_sigsegv;
+
 	if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE))
 		goto give_sigsegv;
 
@@ -327,6 +335,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe)))
 		goto give_sigsegv;
 
+	if (frame == (void __user *) -1UL)
+		goto give_sigsegv;
+
 	if (copy_siginfo_to_user(&frame->info, info))
 		goto give_sigsegv;
 
@@ -474,11 +485,6 @@ void do_signal(struct pt_regs *regs)
 		int ret;
 #ifdef CONFIG_COMPAT
 		if (test_thread_flag(TIF_31BIT)) {
-			extern int handle_signal32(unsigned long sig,
-						   struct k_sigaction *ka,
-						   siginfo_t *info,
-						   sigset_t *oldset,
-						   struct pt_regs *regs);
 			ret = handle_signal32(signr, &ka, &info, oldset, regs);
 	        }
 		else
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 8f894d380a62..0aeb290060d9 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -44,6 +44,7 @@
 #include <asm/lowcore.h>
 #include <asm/sclp.h>
 #include <asm/cpu.h>
+#include "entry.h"
 
 /*
  * An array with a pointer the lowcore of every CPU.
@@ -67,13 +68,12 @@ enum s390_cpu_state {
 	CPU_STATE_CONFIGURED,
 };
 
-#ifdef CONFIG_HOTPLUG_CPU
-static DEFINE_MUTEX(smp_cpu_state_mutex);
-#endif
+DEFINE_MUTEX(smp_cpu_state_mutex);
+int smp_cpu_polarization[NR_CPUS];
 static int smp_cpu_state[NR_CPUS];
+static int cpu_management;
 
 static DEFINE_PER_CPU(struct cpu, cpu_devices);
-DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
 
 static void smp_ext_bitcall(int, ec_bit_sig);
 
@@ -298,7 +298,7 @@ static void smp_ext_bitcall(int cpu, ec_bit_sig sig)
 /*
  * this function sends a 'purge tlb' signal to another CPU.
  */
-void smp_ptlb_callback(void *info)
+static void smp_ptlb_callback(void *info)
 {
 	__tlb_flush_local();
 }
@@ -456,6 +456,7 @@ static int smp_rescan_cpus_sigp(cpumask_t avail)
 		if (cpu_known(cpu_id))
 			continue;
 		__cpu_logical_map[logical_cpu] = cpu_id;
+		smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN;
 		if (!cpu_stopped(logical_cpu))
 			continue;
 		cpu_set(logical_cpu, cpu_present_map);
@@ -489,6 +490,7 @@ static int smp_rescan_cpus_sclp(cpumask_t avail)
 		if (cpu_known(cpu_id))
 			continue;
 		__cpu_logical_map[logical_cpu] = cpu_id;
+		smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN;
 		cpu_set(logical_cpu, cpu_present_map);
 		if (cpu >= info->configured)
 			smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY;
@@ -503,7 +505,7 @@ out:
 	return rc;
 }
 
-static int smp_rescan_cpus(void)
+static int __smp_rescan_cpus(void)
 {
 	cpumask_t avail;
 
@@ -568,7 +570,7 @@ out:
 	kfree(info);
 	printk(KERN_INFO "CPUs: %d configured, %d standby\n", c_cpus, s_cpus);
 	get_online_cpus();
-	smp_rescan_cpus();
+	__smp_rescan_cpus();
 	put_online_cpus();
 }
 
@@ -846,6 +848,7 @@ void __init smp_prepare_boot_cpu(void)
 	S390_lowcore.percpu_offset = __per_cpu_offset[0];
 	current_set[0] = current;
 	smp_cpu_state[0] = CPU_STATE_CONFIGURED;
+	smp_cpu_polarization[0] = POLARIZATION_UNKNWN;
 	spin_lock_init(&(&__get_cpu_var(s390_idle))->lock);
 }
 
@@ -887,8 +890,8 @@ static ssize_t cpu_configure_store(struct sys_device *dev, const char *buf,
 	if (val != 0 && val != 1)
 		return -EINVAL;
 
-	mutex_lock(&smp_cpu_state_mutex);
 	get_online_cpus();
+	mutex_lock(&smp_cpu_state_mutex);
 	rc = -EBUSY;
 	if (cpu_online(cpu))
 		goto out;
@@ -897,28 +900,60 @@ static ssize_t cpu_configure_store(struct sys_device *dev, const char *buf,
 	case 0:
 		if (smp_cpu_state[cpu] == CPU_STATE_CONFIGURED) {
 			rc = sclp_cpu_deconfigure(__cpu_logical_map[cpu]);
-			if (!rc)
+			if (!rc) {
 				smp_cpu_state[cpu] = CPU_STATE_STANDBY;
+				smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
+			}
 		}
 		break;
 	case 1:
 		if (smp_cpu_state[cpu] == CPU_STATE_STANDBY) {
 			rc = sclp_cpu_configure(__cpu_logical_map[cpu]);
-			if (!rc)
+			if (!rc) {
 				smp_cpu_state[cpu] = CPU_STATE_CONFIGURED;
+				smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
+			}
 		}
 		break;
 	default:
 		break;
 	}
 out:
-	put_online_cpus();
 	mutex_unlock(&smp_cpu_state_mutex);
+	put_online_cpus();
 	return rc ? rc : count;
 }
 static SYSDEV_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
 #endif /* CONFIG_HOTPLUG_CPU */
 
+static ssize_t cpu_polarization_show(struct sys_device *dev, char *buf)
+{
+	int cpu = dev->id;
+	ssize_t count;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	switch (smp_cpu_polarization[cpu]) {
+	case POLARIZATION_HRZ:
+		count = sprintf(buf, "horizontal\n");
+		break;
+	case POLARIZATION_VL:
+		count = sprintf(buf, "vertical:low\n");
+		break;
+	case POLARIZATION_VM:
+		count = sprintf(buf, "vertical:medium\n");
+		break;
+	case POLARIZATION_VH:
+		count = sprintf(buf, "vertical:high\n");
+		break;
+	default:
+		count = sprintf(buf, "unknown\n");
+		break;
+	}
+	mutex_unlock(&smp_cpu_state_mutex);
+	return count;
+}
+static SYSDEV_ATTR(polarization, 0444, cpu_polarization_show, NULL);
+
 static ssize_t show_cpu_address(struct sys_device *dev, char *buf)
 {
 	return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]);
@@ -931,6 +966,7 @@ static struct attribute *cpu_common_attrs[] = {
 	&attr_configure.attr,
 #endif
 	&attr_address.attr,
+	&attr_polarization.attr,
 	NULL,
 };
 
@@ -1052,17 +1088,17 @@ out:
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static ssize_t __ref rescan_store(struct sys_device *dev,
-				  const char *buf, size_t count)
+
+int smp_rescan_cpus(void)
 {
 	cpumask_t newcpus;
 	int cpu;
 	int rc;
 
-	mutex_lock(&smp_cpu_state_mutex);
 	get_online_cpus();
+	mutex_lock(&smp_cpu_state_mutex);
 	newcpus = cpu_present_map;
-	rc = smp_rescan_cpus();
+	rc = __smp_rescan_cpus();
 	if (rc)
 		goto out;
 	cpus_andnot(newcpus, cpu_present_map, newcpus);
@@ -1073,13 +1109,59 @@ static ssize_t __ref rescan_store(struct sys_device *dev,
 	}
 	rc = 0;
 out:
-	put_online_cpus();
 	mutex_unlock(&smp_cpu_state_mutex);
+	put_online_cpus();
+	if (!cpus_empty(newcpus))
+		topology_schedule_update();
+	return rc;
+}
+
+static ssize_t __ref rescan_store(struct sys_device *dev, const char *buf,
+				  size_t count)
+{
+	int rc;
+
+	rc = smp_rescan_cpus();
 	return rc ? rc : count;
 }
 static SYSDEV_ATTR(rescan, 0200, NULL, rescan_store);
 #endif /* CONFIG_HOTPLUG_CPU */
 
+static ssize_t dispatching_show(struct sys_device *dev, char *buf)
+{
+	ssize_t count;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	count = sprintf(buf, "%d\n", cpu_management);
+	mutex_unlock(&smp_cpu_state_mutex);
+	return count;
+}
+
+static ssize_t dispatching_store(struct sys_device *dev, const char *buf,
+				 size_t count)
+{
+	int val, rc;
+	char delim;
+
+	if (sscanf(buf, "%d %c", &val, &delim) != 1)
+		return -EINVAL;
+	if (val != 0 && val != 1)
+		return -EINVAL;
+	rc = 0;
+	get_online_cpus();
+	mutex_lock(&smp_cpu_state_mutex);
+	if (cpu_management == val)
+		goto out;
+	rc = topology_set_cpu_management(val);
+	if (!rc)
+		cpu_management = val;
+out:
+	mutex_unlock(&smp_cpu_state_mutex);
+	put_online_cpus();
+	return rc ? rc : count;
+}
+static SYSDEV_ATTR(dispatching, 0644, dispatching_show, dispatching_store);
+
 static int __init topology_init(void)
 {
 	int cpu;
@@ -1093,6 +1175,10 @@ static int __init topology_init(void)
 	if (rc)
 		return rc;
 #endif
+	rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+			       &attr_dispatching.attr);
+	if (rc)
+		return rc;
 	for_each_present_cpu(cpu) {
 		rc = smp_add_present_cpu(cpu);
 		if (rc)
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
index fefee99f28aa..988d0d64c2c8 100644
--- a/arch/s390/kernel/sys_s390.c
+++ b/arch/s390/kernel/sys_s390.c
@@ -29,8 +29,8 @@
 #include <linux/personality.h>
 #include <linux/unistd.h>
 #include <linux/ipc.h>
-
 #include <asm/uaccess.h>
+#include "entry.h"
 
 /*
  * sys_pipe() is the normal C calling standard for creating
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index cb232c155360..7aec676fefd5 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -30,7 +30,7 @@
 #include <linux/timex.h>
 #include <linux/notifier.h>
 #include <linux/clocksource.h>
-
+#include <linux/clockchips.h>
 #include <asm/uaccess.h>
 #include <asm/delay.h>
 #include <asm/s390_ext.h>
@@ -39,6 +39,7 @@
 #include <asm/irq_regs.h>
 #include <asm/timer.h>
 #include <asm/etr.h>
+#include <asm/cio.h>
 
 /* change this if you have some constant time drift */
 #define USECS_PER_JIFFY     ((unsigned long) 1000000/HZ)
@@ -57,16 +58,16 @@
 
 static ext_int_info_t ext_int_info_cc;
 static ext_int_info_t ext_int_etr_cc;
-static u64 init_timer_cc;
 static u64 jiffies_timer_cc;
-static u64 xtime_cc;
+
+static DEFINE_PER_CPU(struct clock_event_device, comparators);
 
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
 unsigned long long sched_clock(void)
 {
-	return ((get_clock() - jiffies_timer_cc) * 125) >> 9;
+	return ((get_clock_xt() - jiffies_timer_cc) * 125) >> 9;
 }
 
 /*
@@ -95,162 +96,40 @@ void tod_to_timeval(__u64 todval, struct timespec *xtime)
 #define s390_do_profile()	do { ; } while(0)
 #endif /* CONFIG_PROFILING */
 
-/*
- * Advance the per cpu tick counter up to the time given with the
- * "time" argument. The per cpu update consists of accounting
- * the virtual cpu time, calling update_process_times and calling
- * the profiling hook. If xtime is before time it is advanced as well.
- */
-void account_ticks(u64 time)
+void clock_comparator_work(void)
 {
-	__u32 ticks;
-	__u64 tmp;
-
-	/* Calculate how many ticks have passed. */
-	if (time < S390_lowcore.jiffy_timer)
-		return;
-	tmp = time - S390_lowcore.jiffy_timer;
-	if (tmp >= 2*CLK_TICKS_PER_JIFFY) {  /* more than two ticks ? */
-		ticks = __div(tmp, CLK_TICKS_PER_JIFFY) + 1;
-		S390_lowcore.jiffy_timer +=
-			CLK_TICKS_PER_JIFFY * (__u64) ticks;
-	} else if (tmp >= CLK_TICKS_PER_JIFFY) {
-		ticks = 2;
-		S390_lowcore.jiffy_timer += 2*CLK_TICKS_PER_JIFFY;
-	} else {
-		ticks = 1;
-		S390_lowcore.jiffy_timer += CLK_TICKS_PER_JIFFY;
-	}
-
-#ifdef CONFIG_SMP
-	/*
-	 * Do not rely on the boot cpu to do the calls to do_timer.
-	 * Spread it over all cpus instead.
-	 */
-	write_seqlock(&xtime_lock);
-	if (S390_lowcore.jiffy_timer > xtime_cc) {
-		__u32 xticks;
-		tmp = S390_lowcore.jiffy_timer - xtime_cc;
-		if (tmp >= 2*CLK_TICKS_PER_JIFFY) {
-			xticks = __div(tmp, CLK_TICKS_PER_JIFFY);
-			xtime_cc += (__u64) xticks * CLK_TICKS_PER_JIFFY;
-		} else {
-			xticks = 1;
-			xtime_cc += CLK_TICKS_PER_JIFFY;
-		}
-		do_timer(xticks);
-	}
-	write_sequnlock(&xtime_lock);
-#else
-	do_timer(ticks);
-#endif
-
-	while (ticks--)
-		update_process_times(user_mode(get_irq_regs()));
+	struct clock_event_device *cd;
 
+	S390_lowcore.clock_comparator = -1ULL;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+	cd = &__get_cpu_var(comparators);
+	cd->event_handler(cd);
 	s390_do_profile();
 }
 
-#ifdef CONFIG_NO_IDLE_HZ
-
-#ifdef CONFIG_NO_IDLE_HZ_INIT
-int sysctl_hz_timer = 0;
-#else
-int sysctl_hz_timer = 1;
-#endif
-
-/*
- * Stop the HZ tick on the current CPU.
- * Only cpu_idle may call this function.
- */
-static void stop_hz_timer(void)
-{
-	unsigned long flags;
-	unsigned long seq, next;
-	__u64 timer, todval;
-	int cpu = smp_processor_id();
-
-	if (sysctl_hz_timer != 0)
-		return;
-
-	cpu_set(cpu, nohz_cpu_mask);
-
-	/*
-	 * Leave the clock comparator set up for the next timer
-	 * tick if either rcu or a softirq is pending.
-	 */
-	if (rcu_needs_cpu(cpu) || local_softirq_pending()) {
-		cpu_clear(cpu, nohz_cpu_mask);
-		return;
-	}
-
-	/*
-	 * This cpu is going really idle. Set up the clock comparator
-	 * for the next event.
-	 */
-	next = next_timer_interrupt();
-	do {
-		seq = read_seqbegin_irqsave(&xtime_lock, flags);
-		timer = ((__u64) next) - ((__u64) jiffies) + jiffies_64;
-	} while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
-	todval = -1ULL;
-	/* Be careful about overflows. */
-	if (timer < (-1ULL / CLK_TICKS_PER_JIFFY)) {
-		timer = jiffies_timer_cc + timer * CLK_TICKS_PER_JIFFY;
-		if (timer >= jiffies_timer_cc)
-			todval = timer;
-	}
-	set_clock_comparator(todval);
-}
-
 /*
- * Start the HZ tick on the current CPU.
- * Only cpu_idle may call this function.
+ * Fixup the clock comparator.
  */
-static void start_hz_timer(void)
+static void fixup_clock_comparator(unsigned long long delta)
 {
-	if (!cpu_isset(smp_processor_id(), nohz_cpu_mask))
+	/* If nobody is waiting there's nothing to fix. */
+	if (S390_lowcore.clock_comparator == -1ULL)
 		return;
-	account_ticks(get_clock());
-	set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION);
-	cpu_clear(smp_processor_id(), nohz_cpu_mask);
-}
-
-static int nohz_idle_notify(struct notifier_block *self,
-			    unsigned long action, void *hcpu)
-{
-	switch (action) {
-	case S390_CPU_IDLE:
-		stop_hz_timer();
-		break;
-	case S390_CPU_NOT_IDLE:
-		start_hz_timer();
-		break;
-	}
-	return NOTIFY_OK;
+	S390_lowcore.clock_comparator += delta;
+	set_clock_comparator(S390_lowcore.clock_comparator);
 }
 
-static struct notifier_block nohz_idle_nb = {
-	.notifier_call = nohz_idle_notify,
-};
-
-static void __init nohz_init(void)
+static int s390_next_event(unsigned long delta,
+			   struct clock_event_device *evt)
 {
-	if (register_idle_notifier(&nohz_idle_nb))
-		panic("Couldn't register idle notifier");
+	S390_lowcore.clock_comparator = get_clock() + delta;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+	return 0;
 }
 
-#endif
-
-/*
- * Set up per cpu jiffy timer and set the clock comparator.
- */
-static void setup_jiffy_timer(void)
+static void s390_set_mode(enum clock_event_mode mode,
+			  struct clock_event_device *evt)
 {
-	/* Set up clock comparator to next jiffy. */
-	S390_lowcore.jiffy_timer =
-		jiffies_timer_cc + (jiffies_64 + 1) * CLK_TICKS_PER_JIFFY;
-	set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION);
 }
 
 /*
@@ -259,7 +138,26 @@ static void setup_jiffy_timer(void)
  */
 void init_cpu_timer(void)
 {
-	setup_jiffy_timer();
+	struct clock_event_device *cd;
+	int cpu;
+
+	S390_lowcore.clock_comparator = -1ULL;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+
+	cpu = smp_processor_id();
+	cd = &per_cpu(comparators, cpu);
+	cd->name		= "comparator";
+	cd->features		= CLOCK_EVT_FEAT_ONESHOT;
+	cd->mult		= 16777;
+	cd->shift		= 12;
+	cd->min_delta_ns	= 1;
+	cd->max_delta_ns	= LONG_MAX;
+	cd->rating		= 400;
+	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->set_next_event	= s390_next_event;
+	cd->set_mode		= s390_set_mode;
+
+	clockevents_register_device(cd);
 
 	/* Enable clock comparator timer interrupt. */
 	__ctl_set_bit(0,11);
@@ -270,8 +168,6 @@ void init_cpu_timer(void)
 
 static void clock_comparator_interrupt(__u16 code)
 {
-	/* set clock comparator for next tick */
-	set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION);
 }
 
 static void etr_reset(void);
@@ -316,8 +212,9 @@ static struct clocksource clocksource_tod = {
  */
 void __init time_init(void)
 {
+	u64 init_timer_cc;
+
 	init_timer_cc = reset_tod_clock();
-	xtime_cc = init_timer_cc + CLK_TICKS_PER_JIFFY;
 	jiffies_timer_cc = init_timer_cc - jiffies_64 * CLK_TICKS_PER_JIFFY;
 
 	/* set xtime */
@@ -342,10 +239,6 @@ void __init time_init(void)
 	/* Enable TOD clock interrupts on the boot cpu. */
 	init_cpu_timer();
 
-#ifdef CONFIG_NO_IDLE_HZ
-	nohz_init();
-#endif
-
 #ifdef CONFIG_VIRT_TIMER
 	vtime_init();
 #endif
@@ -699,53 +592,49 @@ static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p)
 }
 
 /*
- * The time is "clock". xtime is what we think the time is.
+ * The time is "clock". old is what we think the time is.
  * Adjust the value by a multiple of jiffies and add the delta to ntp.
  * "delay" is an approximation how long the synchronization took. If
  * the time correction is positive, then "delay" is subtracted from
  * the time difference and only the remaining part is passed to ntp.
  */
-static void etr_adjust_time(unsigned long long clock, unsigned long long delay)
+static unsigned long long etr_adjust_time(unsigned long long old,
+					  unsigned long long clock,
+					  unsigned long long delay)
 {
 	unsigned long long delta, ticks;
 	struct timex adjust;
 
-	/*
-	 * We don't have to take the xtime lock because the cpu
-	 * executing etr_adjust_time is running disabled in
-	 * tasklet context and all other cpus are looping in
-	 * etr_sync_cpu_start.
-	 */
-	if (clock > xtime_cc) {
+	if (clock > old) {
 		/* It is later than we thought. */
-		delta = ticks = clock - xtime_cc;
+		delta = ticks = clock - old;
 		delta = ticks = (delta < delay) ? 0 : delta - delay;
 		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
-		init_timer_cc = init_timer_cc + delta;
-		jiffies_timer_cc = jiffies_timer_cc + delta;
-		xtime_cc = xtime_cc + delta;
 		adjust.offset = ticks * (1000000 / HZ);
 	} else {
 		/* It is earlier than we thought. */
-		delta = ticks = xtime_cc - clock;
+		delta = ticks = old - clock;
 		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
-		init_timer_cc = init_timer_cc - delta;
-		jiffies_timer_cc = jiffies_timer_cc - delta;
-		xtime_cc = xtime_cc - delta;
+		delta = -delta;
 		adjust.offset = -ticks * (1000000 / HZ);
 	}
+	jiffies_timer_cc += delta;
 	if (adjust.offset != 0) {
 		printk(KERN_NOTICE "etr: time adjusted by %li micro-seconds\n",
 		       adjust.offset);
 		adjust.modes = ADJ_OFFSET_SINGLESHOT;
 		do_adjtimex(&adjust);
 	}
+	return delta;
 }
 
+static struct {
+	int in_sync;
+	unsigned long long fixup_cc;
+} etr_sync;
+
 static void etr_sync_cpu_start(void *dummy)
 {
-	int *in_sync = dummy;
-
 	etr_enable_sync_clock();
 	/*
 	 * This looks like a busy wait loop but it isn't. etr_sync_cpus
@@ -753,7 +642,7 @@ static void etr_sync_cpu_start(void *dummy)
 	 * __udelay will stop the cpu on an enabled wait psw until the
 	 * TOD is running again.
 	 */
-	while (*in_sync == 0) {
+	while (etr_sync.in_sync == 0) {
 		__udelay(1);
 		/*
 		 * A different cpu changes *in_sync. Therefore use
@@ -761,14 +650,14 @@ static void etr_sync_cpu_start(void *dummy)
 		 */
 		barrier();
 	}
-	if (*in_sync != 1)
+	if (etr_sync.in_sync != 1)
 		/* Didn't work. Clear per-cpu in sync bit again. */
 		etr_disable_sync_clock(NULL);
 	/*
 	 * This round of TOD syncing is done. Set the clock comparator
 	 * to the next tick and let the processor continue.
 	 */
-	setup_jiffy_timer();
+	fixup_clock_comparator(etr_sync.fixup_cc);
 }
 
 static void etr_sync_cpu_end(void *dummy)
@@ -783,8 +672,8 @@ static void etr_sync_cpu_end(void *dummy)
 static int etr_sync_clock(struct etr_aib *aib, int port)
 {
 	struct etr_aib *sync_port;
-	unsigned long long clock, delay;
-	int in_sync, follows;
+	unsigned long long clock, old_clock, delay, delta;
+	int follows;
 	int rc;
 
 	/* Check if the current aib is adjacent to the sync port aib. */
@@ -799,9 +688,9 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
 	 * successfully synced the clock. smp_call_function will
 	 * return after all other cpus are in etr_sync_cpu_start.
 	 */
-	in_sync = 0;
+	memset(&etr_sync, 0, sizeof(etr_sync));
 	preempt_disable();
-	smp_call_function(etr_sync_cpu_start,&in_sync,0,0);
+	smp_call_function(etr_sync_cpu_start, NULL, 0, 0);
 	local_irq_disable();
 	etr_enable_sync_clock();
 
@@ -809,6 +698,7 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
 	__ctl_set_bit(14, 21);
 	__ctl_set_bit(0, 29);
 	clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32;
+	old_clock = get_clock();
 	if (set_clock(clock) == 0) {
 		__udelay(1);	/* Wait for the clock to start. */
 		__ctl_clear_bit(0, 29);
@@ -817,16 +707,17 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
 		/* Adjust Linux timing variables. */
 		delay = (unsigned long long)
 			(aib->edf2.etv - sync_port->edf2.etv) << 32;
-		etr_adjust_time(clock, delay);
-		setup_jiffy_timer();
+		delta = etr_adjust_time(old_clock, clock, delay);
+		etr_sync.fixup_cc = delta;
+		fixup_clock_comparator(delta);
 		/* Verify that the clock is properly set. */
 		if (!etr_aib_follows(sync_port, aib, port)) {
 			/* Didn't work. */
 			etr_disable_sync_clock(NULL);
-			in_sync = -EAGAIN;
+			etr_sync.in_sync = -EAGAIN;
 			rc = -EAGAIN;
 		} else {
-			in_sync = 1;
+			etr_sync.in_sync = 1;
 			rc = 0;
 		}
 	} else {
@@ -834,7 +725,7 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
 		__ctl_clear_bit(0, 29);
 		__ctl_clear_bit(14, 21);
 		etr_disable_sync_clock(NULL);
-		in_sync = -EAGAIN;
+		etr_sync.in_sync = -EAGAIN;
 		rc = -EAGAIN;
 	}
 	local_irq_enable();
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
new file mode 100644
index 000000000000..661a07217057
--- /dev/null
+++ b/arch/s390/kernel/topology.c
@@ -0,0 +1,341 @@
+/*
+ *    Copyright IBM Corp. 2007
+ *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/bootmem.h>
+#include <linux/sched.h>
+#include <linux/kthread.h>
+#include <linux/workqueue.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <asm/delay.h>
+#include <asm/s390_ext.h>
+#include <asm/sysinfo.h>
+
+#define CPU_BITS 64
+#define NR_MAG 6
+
+#define PTF_HORIZONTAL	(0UL)
+#define PTF_VERTICAL	(1UL)
+#define PTF_CHECK	(2UL)
+
+struct tl_cpu {
+	unsigned char reserved0[4];
+	unsigned char :6;
+	unsigned char pp:2;
+	unsigned char reserved1;
+	unsigned short origin;
+	unsigned long mask[CPU_BITS / BITS_PER_LONG];
+};
+
+struct tl_container {
+	unsigned char reserved[8];
+};
+
+union tl_entry {
+	unsigned char nl;
+	struct tl_cpu cpu;
+	struct tl_container container;
+};
+
+struct tl_info {
+	unsigned char reserved0[2];
+	unsigned short length;
+	unsigned char mag[NR_MAG];
+	unsigned char reserved1;
+	unsigned char mnest;
+	unsigned char reserved2[4];
+	union tl_entry tle[0];
+};
+
+struct core_info {
+	struct core_info *next;
+	cpumask_t mask;
+};
+
+static void topology_work_fn(struct work_struct *work);
+static struct tl_info *tl_info;
+static struct core_info core_info;
+static int machine_has_topology;
+static int machine_has_topology_irq;
+static struct timer_list topology_timer;
+static void set_topology_timer(void);
+static DECLARE_WORK(topology_work, topology_work_fn);
+
+cpumask_t cpu_core_map[NR_CPUS];
+
+cpumask_t cpu_coregroup_map(unsigned int cpu)
+{
+	struct core_info *core = &core_info;
+	cpumask_t mask;
+
+	cpus_clear(mask);
+	if (!machine_has_topology)
+		return cpu_present_map;
+	mutex_lock(&smp_cpu_state_mutex);
+	while (core) {
+		if (cpu_isset(cpu, core->mask)) {
+			mask = core->mask;
+			break;
+		}
+		core = core->next;
+	}
+	mutex_unlock(&smp_cpu_state_mutex);
+	if (cpus_empty(mask))
+		mask = cpumask_of_cpu(cpu);
+	return mask;
+}
+
+static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core)
+{
+	unsigned int cpu;
+
+	for (cpu = find_first_bit(&tl_cpu->mask[0], CPU_BITS);
+	     cpu < CPU_BITS;
+	     cpu = find_next_bit(&tl_cpu->mask[0], CPU_BITS, cpu + 1))
+	{
+		unsigned int rcpu, lcpu;
+
+		rcpu = CPU_BITS - 1 - cpu + tl_cpu->origin;
+		for_each_present_cpu(lcpu) {
+			if (__cpu_logical_map[lcpu] == rcpu) {
+				cpu_set(lcpu, core->mask);
+				smp_cpu_polarization[lcpu] = tl_cpu->pp;
+			}
+		}
+	}
+}
+
+static void clear_cores(void)
+{
+	struct core_info *core = &core_info;
+
+	while (core) {
+		cpus_clear(core->mask);
+		core = core->next;
+	}
+}
+
+static union tl_entry *next_tle(union tl_entry *tle)
+{
+	if (tle->nl)
+		return (union tl_entry *)((struct tl_container *)tle + 1);
+	else
+		return (union tl_entry *)((struct tl_cpu *)tle + 1);
+}
+
+static void tl_to_cores(struct tl_info *info)
+{
+	union tl_entry *tle, *end;
+	struct core_info *core = &core_info;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	clear_cores();
+	tle = info->tle;
+	end = (union tl_entry *)((unsigned long)info + info->length);
+	while (tle < end) {
+		switch (tle->nl) {
+		case 5:
+		case 4:
+		case 3:
+		case 2:
+			break;
+		case 1:
+			core = core->next;
+			break;
+		case 0:
+			add_cpus_to_core(&tle->cpu, core);
+			break;
+		default:
+			clear_cores();
+			machine_has_topology = 0;
+			return;
+		}
+		tle = next_tle(tle);
+	}
+	mutex_unlock(&smp_cpu_state_mutex);
+}
+
+static void topology_update_polarization_simple(void)
+{
+	int cpu;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	for_each_present_cpu(cpu)
+		smp_cpu_polarization[cpu] = POLARIZATION_HRZ;
+	mutex_unlock(&smp_cpu_state_mutex);
+}
+
+static int ptf(unsigned long fc)
+{
+	int rc;
+
+	asm volatile(
+		"	.insn	rre,0xb9a20000,%1,%1\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (rc)
+		: "d" (fc)  : "cc");
+	return rc;
+}
+
+int topology_set_cpu_management(int fc)
+{
+	int cpu;
+	int rc;
+
+	if (!machine_has_topology)
+		return -EOPNOTSUPP;
+	if (fc)
+		rc = ptf(PTF_VERTICAL);
+	else
+		rc = ptf(PTF_HORIZONTAL);
+	if (rc)
+		return -EBUSY;
+	for_each_present_cpu(cpu)
+		smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
+	return rc;
+}
+
+static void update_cpu_core_map(void)
+{
+	int cpu;
+
+	for_each_present_cpu(cpu)
+		cpu_core_map[cpu] = cpu_coregroup_map(cpu);
+}
+
+void arch_update_cpu_topology(void)
+{
+	struct tl_info *info = tl_info;
+	struct sys_device *sysdev;
+	int cpu;
+
+	if (!machine_has_topology) {
+		update_cpu_core_map();
+		topology_update_polarization_simple();
+		return;
+	}
+	stsi(info, 15, 1, 2);
+	tl_to_cores(info);
+	update_cpu_core_map();
+	for_each_online_cpu(cpu) {
+		sysdev = get_cpu_sysdev(cpu);
+		kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
+	}
+}
+
+static int topology_kthread(void *data)
+{
+	arch_reinit_sched_domains();
+	return 0;
+}
+
+static void topology_work_fn(struct work_struct *work)
+{
+	/* We can't call arch_reinit_sched_domains() from a multi-threaded
+	 * workqueue context since it may deadlock in case of cpu hotplug.
+	 * So we have to create a kernel thread in order to call
+	 * arch_reinit_sched_domains().
+	 */
+	kthread_run(topology_kthread, NULL, "topology_update");
+}
+
+void topology_schedule_update(void)
+{
+	schedule_work(&topology_work);
+}
+
+static void topology_timer_fn(unsigned long ignored)
+{
+	if (ptf(PTF_CHECK))
+		topology_schedule_update();
+	set_topology_timer();
+}
+
+static void set_topology_timer(void)
+{
+	topology_timer.function = topology_timer_fn;
+	topology_timer.data = 0;
+	topology_timer.expires = jiffies + 60 * HZ;
+	add_timer(&topology_timer);
+}
+
+static void topology_interrupt(__u16 code)
+{
+	schedule_work(&topology_work);
+}
+
+static int __init init_topology_update(void)
+{
+	int rc;
+
+	rc = 0;
+	if (!machine_has_topology) {
+		topology_update_polarization_simple();
+		goto out;
+	}
+	init_timer_deferrable(&topology_timer);
+	if (machine_has_topology_irq) {
+		rc = register_external_interrupt(0x2005, topology_interrupt);
+		if (rc)
+			goto out;
+		ctl_set_bit(0, 8);
+	}
+	else
+		set_topology_timer();
+out:
+	update_cpu_core_map();
+	return rc;
+}
+__initcall(init_topology_update);
+
+void __init s390_init_cpu_topology(void)
+{
+	unsigned long long facility_bits;
+	struct tl_info *info;
+	struct core_info *core;
+	int nr_cores;
+	int i;
+
+	if (stfle(&facility_bits, 1) <= 0)
+		return;
+	if (!(facility_bits & (1ULL << 52)) || !(facility_bits & (1ULL << 61)))
+		return;
+	machine_has_topology = 1;
+
+	if (facility_bits & (1ULL << 51))
+		machine_has_topology_irq = 1;
+
+	tl_info = alloc_bootmem_pages(PAGE_SIZE);
+	if (!tl_info)
+		goto error;
+	info = tl_info;
+	stsi(info, 15, 1, 2);
+
+	nr_cores = info->mag[NR_MAG - 2];
+	for (i = 0; i < info->mnest - 2; i++)
+		nr_cores *= info->mag[NR_MAG - 3 - i];
+
+	printk(KERN_INFO "CPU topology:");
+	for (i = 0; i < NR_MAG; i++)
+		printk(" %d", info->mag[i]);
+	printk(" / %d\n", info->mnest);
+
+	core = &core_info;
+	for (i = 0; i < nr_cores; i++) {
+		core->next = alloc_bootmem(sizeof(struct core_info));
+		core = core->next;
+		if (!core)
+			goto error;
+	}
+	return;
+error:
+	machine_has_topology = 0;
+	machine_has_topology_irq = 0;
+}
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 60f728aeaf12..4584d81984c0 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -42,11 +42,8 @@
 #include <asm/s390_ext.h>
 #include <asm/lowcore.h>
 #include <asm/debug.h>
+#include "entry.h"
 
-/* Called from entry.S only */
-extern void handle_per_exception(struct pt_regs *regs);
-
-typedef void pgm_check_handler_t(struct pt_regs *, long);
 pgm_check_handler_t *pgm_check_table[128];
 
 #ifdef CONFIG_SYSCTL
@@ -59,7 +56,6 @@ int sysctl_userprocess_debug = 0;
 
 extern pgm_check_handler_t do_protection_exception;
 extern pgm_check_handler_t do_dat_exception;
-extern pgm_check_handler_t do_monitor_call;
 extern pgm_check_handler_t do_asce_exception;
 
 #define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; })
@@ -117,7 +113,7 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high)
 	}
 }
 
-void show_trace(struct task_struct *task, unsigned long *stack)
+static void show_trace(struct task_struct *task, unsigned long *stack)
 {
 	register unsigned long __r15 asm ("15");
 	unsigned long sp;
@@ -138,7 +134,6 @@ void show_trace(struct task_struct *task, unsigned long *stack)
 	else
 		__show_trace(sp, S390_lowcore.thread_info,
 			     S390_lowcore.thread_info + THREAD_SIZE);
-	printk("\n");
 	if (!task)
 		task = current;
 	debug_show_held_locks(task);
@@ -166,6 +161,15 @@ void show_stack(struct task_struct *task, unsigned long *sp)
 	show_trace(task, sp);
 }
 
+static void show_last_breaking_event(struct pt_regs *regs)
+{
+#ifdef CONFIG_64BIT
+	printk("Last Breaking-Event-Address:\n");
+	printk(" [<%016lx>] ", regs->args[0] & PSW_ADDR_INSN);
+	print_symbol("%s\n", regs->args[0] & PSW_ADDR_INSN);
+#endif
+}
+
 /*
  * The architecture-independent dump_stack generator
  */
@@ -219,6 +223,24 @@ void show_registers(struct pt_regs *regs)
 	show_code(regs);
 }	
 
+void show_regs(struct pt_regs *regs)
+{
+	print_modules();
+	printk("CPU: %d %s %s %.*s\n",
+	       task_thread_info(current)->cpu, print_tainted(),
+	       init_utsname()->release,
+	       (int)strcspn(init_utsname()->version, " "),
+	       init_utsname()->version);
+	printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
+	       current->comm, current->pid, current,
+	       (void *) current->thread.ksp);
+	show_registers(regs);
+	/* Show stack backtrace if pt_regs is from kernel mode */
+	if (!(regs->psw.mask & PSW_MASK_PSTATE))
+		show_trace(NULL, (unsigned long *) regs->gprs[15]);
+	show_last_breaking_event(regs);
+}
+
 /* This is called from fs/proc/array.c */
 void task_show_regs(struct seq_file *m, struct task_struct *task)
 {
@@ -739,6 +761,5 @@ void __init trap_init(void)
         pgm_check_table[0x15] = &operand_exception;
         pgm_check_table[0x1C] = &space_switch_exception;
         pgm_check_table[0x1D] = &hfp_sqrt_exception;
-	pgm_check_table[0x40] = &do_monitor_call;
 	pfault_irq_init();
 }
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index c5f05b3fb2c3..ca90ee3f930e 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -110,6 +110,7 @@ void account_system_vtime(struct task_struct *tsk)
 	S390_lowcore.steal_clock -= cputime << 12;
 	account_system_time(tsk, 0, cputime);
 }
+EXPORT_SYMBOL_GPL(account_system_vtime);
 
 static inline void set_vtimer(__u64 expires)
 {
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
new file mode 100644
index 000000000000..1761b74d639b
--- /dev/null
+++ b/arch/s390/kvm/Kconfig
@@ -0,0 +1,46 @@
+#
+# KVM configuration
+#
+config HAVE_KVM
+       bool
+
+menuconfig VIRTUALIZATION
+	bool "Virtualization"
+	default y
+	---help---
+	  Say Y here to get to see options for using your Linux host to run other
+	  operating systems inside virtual machines (guests).
+	  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and disabled.
+
+if VIRTUALIZATION
+
+config KVM
+	tristate "Kernel-based Virtual Machine (KVM) support"
+	depends on HAVE_KVM && EXPERIMENTAL
+	select PREEMPT_NOTIFIERS
+	select ANON_INODES
+	select S390_SWITCH_AMODE
+	select PREEMPT
+	---help---
+	  Support hosting paravirtualized guest machines using the SIE
+	  virtualization capability on the mainframe. This should work
+	  on any 64bit machine.
+
+	  This module provides access to the hardware capabilities through
+	  a character device node named /dev/kvm.
+
+	  To compile this as a module, choose M here: the module
+	  will be called kvm.
+
+	  If unsure, say N.
+
+config KVM_TRACE
+       bool
+
+# OK, it's a little counter-intuitive to do this, but it puts it neatly under
+# the virtualization menu.
+source drivers/virtio/Kconfig
+
+endif # VIRTUALIZATION
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
new file mode 100644
index 000000000000..e5221ec0b8e3
--- /dev/null
+++ b/arch/s390/kvm/Makefile
@@ -0,0 +1,14 @@
+# Makefile for kernel virtual machines on s390
+#
+# Copyright IBM Corp. 2008
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License (version 2 only)
+# as published by the Free Software Foundation.
+
+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o)
+
+EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm
+
+kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o sigp.o diag.o
+obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
new file mode 100644
index 000000000000..f639a152869f
--- /dev/null
+++ b/arch/s390/kvm/diag.c
@@ -0,0 +1,67 @@
+/*
+ * diag.c - handling diagnose instructions
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include "kvm-s390.h"
+
+static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
+{
+	VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
+	vcpu->stat.diagnose_44++;
+	vcpu_put(vcpu);
+	schedule();
+	vcpu_load(vcpu);
+	return 0;
+}
+
+static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
+{
+	unsigned int reg = vcpu->arch.sie_block->ipa & 0xf;
+	unsigned long subcode = vcpu->arch.guest_gprs[reg] & 0xffff;
+
+	VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
+	switch (subcode) {
+	case 3:
+		vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
+		break;
+	case 4:
+		vcpu->run->s390_reset_flags = 0;
+		break;
+	default:
+		return -ENOTSUPP;
+	}
+
+	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+	vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
+	vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
+	vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
+	vcpu->run->exit_reason = KVM_EXIT_S390_RESET;
+	VCPU_EVENT(vcpu, 3, "requesting userspace resets %lx",
+	  vcpu->run->s390_reset_flags);
+	return -EREMOTE;
+}
+
+int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
+{
+	int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
+
+	switch (code) {
+	case 0x44:
+		return __diag_time_slice_end(vcpu);
+	case 0x308:
+		return __diag_ipl_functions(vcpu);
+	default:
+		return -ENOTSUPP;
+	}
+}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
new file mode 100644
index 000000000000..4e0633c413f3
--- /dev/null
+++ b/arch/s390/kvm/gaccess.h
@@ -0,0 +1,274 @@
+/*
+ * gaccess.h -  access guest memory
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+#ifndef __KVM_S390_GACCESS_H
+#define __KVM_S390_GACCESS_H
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+#include <asm/uaccess.h>
+
+static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu,
+					       u64 guestaddr)
+{
+	u64 prefix  = vcpu->arch.sie_block->prefix;
+	u64 origin  = vcpu->kvm->arch.guest_origin;
+	u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+	if (guestaddr < 2 * PAGE_SIZE)
+		guestaddr += prefix;
+	else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE))
+		guestaddr -= prefix;
+
+	if (guestaddr > memsize)
+		return (void __user __force *) ERR_PTR(-EFAULT);
+
+	guestaddr += origin;
+
+	return (void __user *) guestaddr;
+}
+
+static inline int get_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr,
+				u64 *result)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	BUG_ON(guestaddr & 7);
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return get_user(*result, (u64 __user *) uptr);
+}
+
+static inline int get_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr,
+				u32 *result)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	BUG_ON(guestaddr & 3);
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return get_user(*result, (u32 __user *) uptr);
+}
+
+static inline int get_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr,
+				u16 *result)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	BUG_ON(guestaddr & 1);
+
+	if (IS_ERR(uptr))
+		return PTR_ERR(uptr);
+
+	return get_user(*result, (u16 __user *) uptr);
+}
+
+static inline int get_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr,
+			       u8 *result)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return get_user(*result, (u8 __user *) uptr);
+}
+
+static inline int put_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr,
+				u64 value)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	BUG_ON(guestaddr & 7);
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return put_user(value, (u64 __user *) uptr);
+}
+
+static inline int put_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr,
+				u32 value)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	BUG_ON(guestaddr & 3);
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return put_user(value, (u32 __user *) uptr);
+}
+
+static inline int put_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr,
+				u16 value)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	BUG_ON(guestaddr & 1);
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return put_user(value, (u16 __user *) uptr);
+}
+
+static inline int put_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr,
+			       u8 value)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return put_user(value, (u8 __user *) uptr);
+}
+
+
+static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, u64 guestdest,
+				       const void *from, unsigned long n)
+{
+	int rc;
+	unsigned long i;
+	const u8 *data = from;
+
+	for (i = 0; i < n; i++) {
+		rc = put_guest_u8(vcpu, guestdest++, *(data++));
+		if (rc < 0)
+			return rc;
+	}
+	return 0;
+}
+
+static inline int copy_to_guest(struct kvm_vcpu *vcpu, u64 guestdest,
+				const void *from, unsigned long n)
+{
+	u64 prefix  = vcpu->arch.sie_block->prefix;
+	u64 origin  = vcpu->kvm->arch.guest_origin;
+	u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+	if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE))
+		goto slowpath;
+
+	if ((guestdest < prefix) && (guestdest + n > prefix))
+		goto slowpath;
+
+	if ((guestdest < prefix + 2 * PAGE_SIZE)
+	    && (guestdest + n > prefix + 2 * PAGE_SIZE))
+		goto slowpath;
+
+	if (guestdest < 2 * PAGE_SIZE)
+		guestdest += prefix;
+	else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE))
+		guestdest -= prefix;
+
+	if (guestdest + n > memsize)
+		return -EFAULT;
+
+	if (guestdest + n < guestdest)
+		return -EFAULT;
+
+	guestdest += origin;
+
+	return copy_to_user((void __user *) guestdest, from, n);
+slowpath:
+	return __copy_to_guest_slow(vcpu, guestdest, from, n);
+}
+
+static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to,
+					 u64 guestsrc, unsigned long n)
+{
+	int rc;
+	unsigned long i;
+	u8 *data = to;
+
+	for (i = 0; i < n; i++) {
+		rc = get_guest_u8(vcpu, guestsrc++, data++);
+		if (rc < 0)
+			return rc;
+	}
+	return 0;
+}
+
+static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to,
+				  u64 guestsrc, unsigned long n)
+{
+	u64 prefix  = vcpu->arch.sie_block->prefix;
+	u64 origin  = vcpu->kvm->arch.guest_origin;
+	u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+	if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE))
+		goto slowpath;
+
+	if ((guestsrc < prefix) && (guestsrc + n > prefix))
+		goto slowpath;
+
+	if ((guestsrc < prefix + 2 * PAGE_SIZE)
+	    && (guestsrc + n > prefix + 2 * PAGE_SIZE))
+		goto slowpath;
+
+	if (guestsrc < 2 * PAGE_SIZE)
+		guestsrc += prefix;
+	else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE))
+		guestsrc -= prefix;
+
+	if (guestsrc + n > memsize)
+		return -EFAULT;
+
+	if (guestsrc + n < guestsrc)
+		return -EFAULT;
+
+	guestsrc += origin;
+
+	return copy_from_user(to, (void __user *) guestsrc, n);
+slowpath:
+	return __copy_from_guest_slow(vcpu, to, guestsrc, n);
+}
+
+static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, u64 guestdest,
+					 const void *from, unsigned long n)
+{
+	u64 origin  = vcpu->kvm->arch.guest_origin;
+	u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+	if (guestdest + n > memsize)
+		return -EFAULT;
+
+	if (guestdest + n < guestdest)
+		return -EFAULT;
+
+	guestdest += origin;
+
+	return copy_to_user((void __user *) guestdest, from, n);
+}
+
+static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to,
+					   u64 guestsrc, unsigned long n)
+{
+	u64 origin  = vcpu->kvm->arch.guest_origin;
+	u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+	if (guestsrc + n > memsize)
+		return -EFAULT;
+
+	if (guestsrc + n < guestsrc)
+		return -EFAULT;
+
+	guestsrc += origin;
+
+	return copy_from_user(to, (void __user *) guestsrc, n);
+}
+#endif
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
new file mode 100644
index 000000000000..349581a26103
--- /dev/null
+++ b/arch/s390/kvm/intercept.c
@@ -0,0 +1,216 @@
+/*
+ * intercept.c - in-kernel handling for sie intercepts
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+
+#include <asm/kvm_host.h>
+
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+static int handle_lctg(struct kvm_vcpu *vcpu)
+{
+	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
+			((vcpu->arch.sie_block->ipb & 0xff00) << 4);
+	u64 useraddr;
+	int reg, rc;
+
+	vcpu->stat.instruction_lctg++;
+	if ((vcpu->arch.sie_block->ipb & 0xff) != 0x2f)
+		return -ENOTSUPP;
+
+	useraddr = disp2;
+	if (base2)
+		useraddr += vcpu->arch.guest_gprs[base2];
+
+	reg = reg1;
+
+	VCPU_EVENT(vcpu, 5, "lctg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
+		   disp2);
+
+	do {
+		rc = get_guest_u64(vcpu, useraddr,
+				   &vcpu->arch.sie_block->gcr[reg]);
+		if (rc == -EFAULT) {
+			kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+			break;
+		}
+		useraddr += 8;
+		if (reg == reg3)
+			break;
+		reg = (reg + 1) % 16;
+	} while (1);
+	return 0;
+}
+
+static int handle_lctl(struct kvm_vcpu *vcpu)
+{
+	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u64 useraddr;
+	u32 val = 0;
+	int reg, rc;
+
+	vcpu->stat.instruction_lctl++;
+
+	useraddr = disp2;
+	if (base2)
+		useraddr += vcpu->arch.guest_gprs[base2];
+
+	VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
+		   disp2);
+
+	reg = reg1;
+	do {
+		rc = get_guest_u32(vcpu, useraddr, &val);
+		if (rc == -EFAULT) {
+			kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+			break;
+		}
+		vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
+		vcpu->arch.sie_block->gcr[reg] |= val;
+		useraddr += 4;
+		if (reg == reg3)
+			break;
+		reg = (reg + 1) % 16;
+	} while (1);
+	return 0;
+}
+
+static intercept_handler_t instruction_handlers[256] = {
+	[0x83] = kvm_s390_handle_diag,
+	[0xae] = kvm_s390_handle_sigp,
+	[0xb2] = kvm_s390_handle_priv,
+	[0xb7] = handle_lctl,
+	[0xeb] = handle_lctg,
+};
+
+static int handle_noop(struct kvm_vcpu *vcpu)
+{
+	switch (vcpu->arch.sie_block->icptcode) {
+	case 0x10:
+		vcpu->stat.exit_external_request++;
+		break;
+	case 0x14:
+		vcpu->stat.exit_external_interrupt++;
+		break;
+	default:
+		break; /* nothing */
+	}
+	return 0;
+}
+
+static int handle_stop(struct kvm_vcpu *vcpu)
+{
+	int rc;
+
+	vcpu->stat.exit_stop_request++;
+	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+	spin_lock_bh(&vcpu->arch.local_int.lock);
+	if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) {
+		vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP;
+		rc = __kvm_s390_vcpu_store_status(vcpu,
+						  KVM_S390_STORE_STATUS_NOADDR);
+		if (rc >= 0)
+			rc = -ENOTSUPP;
+	}
+
+	if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
+		vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
+		VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
+		rc = -ENOTSUPP;
+	} else
+		rc = 0;
+	spin_unlock_bh(&vcpu->arch.local_int.lock);
+	return rc;
+}
+
+static int handle_validity(struct kvm_vcpu *vcpu)
+{
+	int viwhy = vcpu->arch.sie_block->ipb >> 16;
+	vcpu->stat.exit_validity++;
+	if (viwhy == 0x37) {
+		fault_in_pages_writeable((char __user *)
+					 vcpu->kvm->arch.guest_origin +
+					 vcpu->arch.sie_block->prefix,
+					 PAGE_SIZE);
+		return 0;
+	}
+	VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
+		   viwhy);
+	return -ENOTSUPP;
+}
+
+static int handle_instruction(struct kvm_vcpu *vcpu)
+{
+	intercept_handler_t handler;
+
+	vcpu->stat.exit_instruction++;
+	handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8];
+	if (handler)
+		return handler(vcpu);
+	return -ENOTSUPP;
+}
+
+static int handle_prog(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.exit_program_interruption++;
+	return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
+}
+
+static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
+{
+	int rc, rc2;
+
+	vcpu->stat.exit_instr_and_program++;
+	rc = handle_instruction(vcpu);
+	rc2 = handle_prog(vcpu);
+
+	if (rc == -ENOTSUPP)
+		vcpu->arch.sie_block->icptcode = 0x04;
+	if (rc)
+		return rc;
+	return rc2;
+}
+
+static const intercept_handler_t intercept_funcs[0x48 >> 2] = {
+	[0x00 >> 2] = handle_noop,
+	[0x04 >> 2] = handle_instruction,
+	[0x08 >> 2] = handle_prog,
+	[0x0C >> 2] = handle_instruction_and_prog,
+	[0x10 >> 2] = handle_noop,
+	[0x14 >> 2] = handle_noop,
+	[0x1C >> 2] = kvm_s390_handle_wait,
+	[0x20 >> 2] = handle_validity,
+	[0x28 >> 2] = handle_stop,
+};
+
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
+{
+	intercept_handler_t func;
+	u8 code = vcpu->arch.sie_block->icptcode;
+
+	if (code & 3 || code > 0x48)
+		return -ENOTSUPP;
+	func = intercept_funcs[code >> 2];
+	if (func)
+		return func(vcpu);
+	return -ENOTSUPP;
+}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
new file mode 100644
index 000000000000..fcd1ed8015c1
--- /dev/null
+++ b/arch/s390/kvm/interrupt.c
@@ -0,0 +1,592 @@
+/*
+ * interrupt.c - handling kvm guest interrupts
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+#include <asm/lowcore.h>
+#include <asm/uaccess.h>
+#include <linux/kvm_host.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+static int psw_extint_disabled(struct kvm_vcpu *vcpu)
+{
+	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
+}
+
+static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
+{
+	if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) ||
+	    (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) ||
+	    (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT))
+		return 0;
+	return 1;
+}
+
+static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
+				      struct interrupt_info *inti)
+{
+	switch (inti->type) {
+	case KVM_S390_INT_EMERGENCY:
+		if (psw_extint_disabled(vcpu))
+			return 0;
+		if (vcpu->arch.sie_block->gcr[0] & 0x4000ul)
+			return 1;
+		return 0;
+	case KVM_S390_INT_SERVICE:
+		if (psw_extint_disabled(vcpu))
+			return 0;
+		if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
+			return 1;
+		return 0;
+	case KVM_S390_INT_VIRTIO:
+		if (psw_extint_disabled(vcpu))
+			return 0;
+		if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
+			return 1;
+		return 0;
+	case KVM_S390_PROGRAM_INT:
+	case KVM_S390_SIGP_STOP:
+	case KVM_S390_SIGP_SET_PREFIX:
+	case KVM_S390_RESTART:
+		return 1;
+	default:
+		BUG();
+	}
+	return 0;
+}
+
+static void __set_cpu_idle(struct kvm_vcpu *vcpu)
+{
+	BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
+	atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+	set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+}
+
+static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
+{
+	BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
+	atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+	clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+}
+
+static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
+{
+	atomic_clear_mask(CPUSTAT_ECALL_PEND |
+		CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
+		&vcpu->arch.sie_block->cpuflags);
+	vcpu->arch.sie_block->lctl = 0x0000;
+}
+
+static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
+{
+	atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
+}
+
+static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
+				      struct interrupt_info *inti)
+{
+	switch (inti->type) {
+	case KVM_S390_INT_EMERGENCY:
+	case KVM_S390_INT_SERVICE:
+	case KVM_S390_INT_VIRTIO:
+		if (psw_extint_disabled(vcpu))
+			__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
+		else
+			vcpu->arch.sie_block->lctl |= LCTL_CR0;
+		break;
+	case KVM_S390_SIGP_STOP:
+		__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
+				   struct interrupt_info *inti)
+{
+	const unsigned short table[] = { 2, 4, 4, 6 };
+	int rc, exception = 0;
+
+	switch (inti->type) {
+	case KVM_S390_INT_EMERGENCY:
+		VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
+		vcpu->stat.deliver_emergency_signal++;
+		rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+			__LC_EXT_NEW_PSW, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+		break;
+
+	case KVM_S390_INT_SERVICE:
+		VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
+			   inti->ext.ext_params);
+		vcpu->stat.deliver_service_signal++;
+		rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+			__LC_EXT_NEW_PSW, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
+		if (rc == -EFAULT)
+			exception = 1;
+		break;
+
+	case KVM_S390_INT_VIRTIO:
+		VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%lx",
+			   inti->ext.ext_params, inti->ext.ext_params2);
+		vcpu->stat.deliver_virtio_interrupt++;
+		rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, 0x0d00);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+			__LC_EXT_NEW_PSW, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = put_guest_u64(vcpu, __LC_PFAULT_INTPARM,
+			inti->ext.ext_params2);
+		if (rc == -EFAULT)
+			exception = 1;
+		break;
+
+	case KVM_S390_SIGP_STOP:
+		VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
+		vcpu->stat.deliver_stop_signal++;
+		__set_intercept_indicator(vcpu, inti);
+		break;
+
+	case KVM_S390_SIGP_SET_PREFIX:
+		VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
+			   inti->prefix.address);
+		vcpu->stat.deliver_prefix_signal++;
+		vcpu->arch.sie_block->prefix = inti->prefix.address;
+		vcpu->arch.sie_block->ihcpu = 0xffff;
+		break;
+
+	case KVM_S390_RESTART:
+		VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
+		vcpu->stat.deliver_restart_signal++;
+		rc = copy_to_guest(vcpu, offsetof(struct _lowcore,
+		  restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+			offsetof(struct _lowcore, restart_psw), sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+		break;
+
+	case KVM_S390_PROGRAM_INT:
+		VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
+			   inti->pgm.code,
+			   table[vcpu->arch.sie_block->ipa >> 14]);
+		vcpu->stat.deliver_program_int++;
+		rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = put_guest_u16(vcpu, __LC_PGM_ILC,
+			table[vcpu->arch.sie_block->ipa >> 14]);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
+			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+			__LC_PGM_NEW_PSW, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+		break;
+
+	default:
+		BUG();
+	}
+
+	if (exception) {
+		VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering"
+			   " interrupt");
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		if (inti->type == KVM_S390_PROGRAM_INT) {
+			printk(KERN_WARNING "kvm: recursive program check\n");
+			BUG();
+		}
+	}
+}
+
+static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
+{
+	int rc, exception = 0;
+
+	if (psw_extint_disabled(vcpu))
+		return 0;
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+		return 0;
+	rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004);
+	if (rc == -EFAULT)
+		exception = 1;
+	rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+		 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	if (rc == -EFAULT)
+		exception = 1;
+	rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+		__LC_EXT_NEW_PSW, sizeof(psw_t));
+	if (rc == -EFAULT)
+		exception = 1;
+
+	if (exception) {
+		VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering" \
+			   " ckc interrupt");
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		return 0;
+	}
+
+	return 1;
+}
+
+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
+{
+	struct local_interrupt *li = &vcpu->arch.local_int;
+	struct float_interrupt *fi = vcpu->arch.local_int.float_int;
+	struct interrupt_info  *inti;
+	int rc = 0;
+
+	if (atomic_read(&li->active)) {
+		spin_lock_bh(&li->lock);
+		list_for_each_entry(inti, &li->list, list)
+			if (__interrupt_is_deliverable(vcpu, inti)) {
+				rc = 1;
+				break;
+			}
+		spin_unlock_bh(&li->lock);
+	}
+
+	if ((!rc) && atomic_read(&fi->active)) {
+		spin_lock_bh(&fi->lock);
+		list_for_each_entry(inti, &fi->list, list)
+			if (__interrupt_is_deliverable(vcpu, inti)) {
+				rc = 1;
+				break;
+			}
+		spin_unlock_bh(&fi->lock);
+	}
+
+	if ((!rc) && (vcpu->arch.sie_block->ckc <
+		get_clock() + vcpu->arch.sie_block->epoch)) {
+		if ((!psw_extint_disabled(vcpu)) &&
+			(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+			rc = 1;
+	}
+
+	return rc;
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
+{
+	u64 now, sltime;
+	DECLARE_WAITQUEUE(wait, current);
+
+	vcpu->stat.exit_wait_state++;
+	if (kvm_cpu_has_interrupt(vcpu))
+		return 0;
+
+	if (psw_interrupts_disabled(vcpu)) {
+		VCPU_EVENT(vcpu, 3, "%s", "disabled wait");
+		__unset_cpu_idle(vcpu);
+		return -ENOTSUPP; /* disabled wait */
+	}
+
+	if (psw_extint_disabled(vcpu) ||
+	    (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) {
+		VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
+		goto no_timer;
+	}
+
+	now = get_clock() + vcpu->arch.sie_block->epoch;
+	if (vcpu->arch.sie_block->ckc < now) {
+		__unset_cpu_idle(vcpu);
+		return 0;
+	}
+
+	sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1;
+
+	vcpu->arch.ckc_timer.expires = jiffies + sltime;
+
+	add_timer(&vcpu->arch.ckc_timer);
+	VCPU_EVENT(vcpu, 5, "enabled wait timer:%lx jiffies", sltime);
+no_timer:
+	spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
+	spin_lock_bh(&vcpu->arch.local_int.lock);
+	__set_cpu_idle(vcpu);
+	vcpu->arch.local_int.timer_due = 0;
+	add_wait_queue(&vcpu->arch.local_int.wq, &wait);
+	while (list_empty(&vcpu->arch.local_int.list) &&
+		list_empty(&vcpu->arch.local_int.float_int->list) &&
+		(!vcpu->arch.local_int.timer_due) &&
+		!signal_pending(current)) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		spin_unlock_bh(&vcpu->arch.local_int.lock);
+		spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
+		vcpu_put(vcpu);
+		schedule();
+		vcpu_load(vcpu);
+		spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
+		spin_lock_bh(&vcpu->arch.local_int.lock);
+	}
+	__unset_cpu_idle(vcpu);
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(&vcpu->wq, &wait);
+	spin_unlock_bh(&vcpu->arch.local_int.lock);
+	spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
+	del_timer(&vcpu->arch.ckc_timer);
+	return 0;
+}
+
+void kvm_s390_idle_wakeup(unsigned long data)
+{
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+
+	spin_lock_bh(&vcpu->arch.local_int.lock);
+	vcpu->arch.local_int.timer_due = 1;
+	if (waitqueue_active(&vcpu->arch.local_int.wq))
+		wake_up_interruptible(&vcpu->arch.local_int.wq);
+	spin_unlock_bh(&vcpu->arch.local_int.lock);
+}
+
+
+void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
+{
+	struct local_interrupt *li = &vcpu->arch.local_int;
+	struct float_interrupt *fi = vcpu->arch.local_int.float_int;
+	struct interrupt_info  *n, *inti = NULL;
+	int deliver;
+
+	__reset_intercept_indicators(vcpu);
+	if (atomic_read(&li->active)) {
+		do {
+			deliver = 0;
+			spin_lock_bh(&li->lock);
+			list_for_each_entry_safe(inti, n, &li->list, list) {
+				if (__interrupt_is_deliverable(vcpu, inti)) {
+					list_del(&inti->list);
+					deliver = 1;
+					break;
+				}
+				__set_intercept_indicator(vcpu, inti);
+			}
+			if (list_empty(&li->list))
+				atomic_set(&li->active, 0);
+			spin_unlock_bh(&li->lock);
+			if (deliver) {
+				__do_deliver_interrupt(vcpu, inti);
+				kfree(inti);
+			}
+		} while (deliver);
+	}
+
+	if ((vcpu->arch.sie_block->ckc <
+		get_clock() + vcpu->arch.sie_block->epoch))
+		__try_deliver_ckc_interrupt(vcpu);
+
+	if (atomic_read(&fi->active)) {
+		do {
+			deliver = 0;
+			spin_lock_bh(&fi->lock);
+			list_for_each_entry_safe(inti, n, &fi->list, list) {
+				if (__interrupt_is_deliverable(vcpu, inti)) {
+					list_del(&inti->list);
+					deliver = 1;
+					break;
+				}
+				__set_intercept_indicator(vcpu, inti);
+			}
+			if (list_empty(&fi->list))
+				atomic_set(&fi->active, 0);
+			spin_unlock_bh(&fi->lock);
+			if (deliver) {
+				__do_deliver_interrupt(vcpu, inti);
+				kfree(inti);
+			}
+		} while (deliver);
+	}
+}
+
+int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+{
+	struct local_interrupt *li = &vcpu->arch.local_int;
+	struct interrupt_info *inti;
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return -ENOMEM;
+
+	inti->type = KVM_S390_PROGRAM_INT;;
+	inti->pgm.code = code;
+
+	VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
+	spin_lock_bh(&li->lock);
+	list_add(&inti->list, &li->list);
+	atomic_set(&li->active, 1);
+	BUG_ON(waitqueue_active(&li->wq));
+	spin_unlock_bh(&li->lock);
+	return 0;
+}
+
+int kvm_s390_inject_vm(struct kvm *kvm,
+		       struct kvm_s390_interrupt *s390int)
+{
+	struct local_interrupt *li;
+	struct float_interrupt *fi;
+	struct interrupt_info *inti;
+	int sigcpu;
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return -ENOMEM;
+
+	switch (s390int->type) {
+	case KVM_S390_INT_VIRTIO:
+		VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%lx",
+			 s390int->parm, s390int->parm64);
+		inti->type = s390int->type;
+		inti->ext.ext_params = s390int->parm;
+		inti->ext.ext_params2 = s390int->parm64;
+		break;
+	case KVM_S390_INT_SERVICE:
+		VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
+		inti->type = s390int->type;
+		inti->ext.ext_params = s390int->parm;
+		break;
+	case KVM_S390_PROGRAM_INT:
+	case KVM_S390_SIGP_STOP:
+	case KVM_S390_INT_EMERGENCY:
+	default:
+		kfree(inti);
+		return -EINVAL;
+	}
+
+	mutex_lock(&kvm->lock);
+	fi = &kvm->arch.float_int;
+	spin_lock_bh(&fi->lock);
+	list_add_tail(&inti->list, &fi->list);
+	atomic_set(&fi->active, 1);
+	sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
+	if (sigcpu == KVM_MAX_VCPUS) {
+		do {
+			sigcpu = fi->next_rr_cpu++;
+			if (sigcpu == KVM_MAX_VCPUS)
+				sigcpu = fi->next_rr_cpu = 0;
+		} while (fi->local_int[sigcpu] == NULL);
+	}
+	li = fi->local_int[sigcpu];
+	spin_lock_bh(&li->lock);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	if (waitqueue_active(&li->wq))
+		wake_up_interruptible(&li->wq);
+	spin_unlock_bh(&li->lock);
+	spin_unlock_bh(&fi->lock);
+	mutex_unlock(&kvm->lock);
+	return 0;
+}
+
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
+			 struct kvm_s390_interrupt *s390int)
+{
+	struct local_interrupt *li;
+	struct interrupt_info *inti;
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return -ENOMEM;
+
+	switch (s390int->type) {
+	case KVM_S390_PROGRAM_INT:
+		if (s390int->parm & 0xffff0000) {
+			kfree(inti);
+			return -EINVAL;
+		}
+		inti->type = s390int->type;
+		inti->pgm.code = s390int->parm;
+		VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
+			   s390int->parm);
+		break;
+	case KVM_S390_SIGP_STOP:
+	case KVM_S390_RESTART:
+	case KVM_S390_SIGP_SET_PREFIX:
+	case KVM_S390_INT_EMERGENCY:
+		VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
+		inti->type = s390int->type;
+		break;
+	case KVM_S390_INT_VIRTIO:
+	case KVM_S390_INT_SERVICE:
+	default:
+		kfree(inti);
+		return -EINVAL;
+	}
+
+	mutex_lock(&vcpu->kvm->lock);
+	li = &vcpu->arch.local_int;
+	spin_lock_bh(&li->lock);
+	if (inti->type == KVM_S390_PROGRAM_INT)
+		list_add(&inti->list, &li->list);
+	else
+		list_add_tail(&inti->list, &li->list);
+	atomic_set(&li->active, 1);
+	if (inti->type == KVM_S390_SIGP_STOP)
+		li->action_bits |= ACTION_STOP_ON_STOP;
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	if (waitqueue_active(&li->wq))
+		wake_up_interruptible(&vcpu->arch.local_int.wq);
+	spin_unlock_bh(&li->lock);
+	mutex_unlock(&vcpu->kvm->lock);
+	return 0;
+}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
new file mode 100644
index 000000000000..98d1e73e01f1
--- /dev/null
+++ b/arch/s390/kvm/kvm-s390.c
@@ -0,0 +1,685 @@
+/*
+ * s390host.c --  hosting zSeries kernel virtual machines
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ *               Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <asm/lowcore.h>
+#include <asm/pgtable.h>
+
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+	{ "userspace_handled", VCPU_STAT(exit_userspace) },
+	{ "exit_validity", VCPU_STAT(exit_validity) },
+	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
+	{ "exit_external_request", VCPU_STAT(exit_external_request) },
+	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
+	{ "exit_instruction", VCPU_STAT(exit_instruction) },
+	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
+	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
+	{ "instruction_lctg", VCPU_STAT(instruction_lctg) },
+	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
+	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
+	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
+	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
+	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
+	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
+	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
+	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
+	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
+	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
+	{ "instruction_spx", VCPU_STAT(instruction_spx) },
+	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
+	{ "instruction_stap", VCPU_STAT(instruction_stap) },
+	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
+	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
+	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
+	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
+	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
+	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
+	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
+	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
+	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
+	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
+	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
+	{ "diagnose_44", VCPU_STAT(diagnose_44) },
+	{ NULL }
+};
+
+
+/* Section: not file related */
+void kvm_arch_hardware_enable(void *garbage)
+{
+	/* every s390 is virtualization enabled ;-) */
+}
+
+void kvm_arch_hardware_disable(void *garbage)
+{
+}
+
+void decache_vcpus_on_cpu(int cpu)
+{
+}
+
+int kvm_arch_hardware_setup(void)
+{
+	return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+}
+
+void kvm_arch_check_processor_compat(void *rtn)
+{
+}
+
+int kvm_arch_init(void *opaque)
+{
+	return 0;
+}
+
+void kvm_arch_exit(void)
+{
+}
+
+/* Section: device related */
+long kvm_arch_dev_ioctl(struct file *filp,
+			unsigned int ioctl, unsigned long arg)
+{
+	if (ioctl == KVM_S390_ENABLE_SIE)
+		return s390_enable_sie();
+	return -EINVAL;
+}
+
+int kvm_dev_ioctl_check_extension(long ext)
+{
+	return 0;
+}
+
+/* Section: vm related */
+/*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+			       struct kvm_dirty_log *log)
+{
+	return 0;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+		       unsigned int ioctl, unsigned long arg)
+{
+	struct kvm *kvm = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	int r;
+
+	switch (ioctl) {
+	case KVM_S390_INTERRUPT: {
+		struct kvm_s390_interrupt s390int;
+
+		r = -EFAULT;
+		if (copy_from_user(&s390int, argp, sizeof(s390int)))
+			break;
+		r = kvm_s390_inject_vm(kvm, &s390int);
+		break;
+	}
+	default:
+		r = -EINVAL;
+	}
+
+	return r;
+}
+
+struct kvm *kvm_arch_create_vm(void)
+{
+	struct kvm *kvm;
+	int rc;
+	char debug_name[16];
+
+	rc = s390_enable_sie();
+	if (rc)
+		goto out_nokvm;
+
+	rc = -ENOMEM;
+	kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
+	if (!kvm)
+		goto out_nokvm;
+
+	kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
+	if (!kvm->arch.sca)
+		goto out_nosca;
+
+	sprintf(debug_name, "kvm-%u", current->pid);
+
+	kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
+	if (!kvm->arch.dbf)
+		goto out_nodbf;
+
+	spin_lock_init(&kvm->arch.float_int.lock);
+	INIT_LIST_HEAD(&kvm->arch.float_int.list);
+
+	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
+	VM_EVENT(kvm, 3, "%s", "vm created");
+
+	try_module_get(THIS_MODULE);
+
+	return kvm;
+out_nodbf:
+	free_page((unsigned long)(kvm->arch.sca));
+out_nosca:
+	kfree(kvm);
+out_nokvm:
+	return ERR_PTR(rc);
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+	debug_unregister(kvm->arch.dbf);
+	free_page((unsigned long)(kvm->arch.sca));
+	kfree(kvm);
+	module_put(THIS_MODULE);
+}
+
+/* Section: vcpu related */
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+	/* kvm common code refers to this, but does'nt call it */
+	BUG();
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	save_fp_regs(&vcpu->arch.host_fpregs);
+	save_access_regs(vcpu->arch.host_acrs);
+	vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
+	restore_fp_regs(&vcpu->arch.guest_fpregs);
+	restore_access_regs(vcpu->arch.guest_acrs);
+
+	if (signal_pending(current))
+		atomic_set_mask(CPUSTAT_STOP_INT,
+			&vcpu->arch.sie_block->cpuflags);
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	save_fp_regs(&vcpu->arch.guest_fpregs);
+	save_access_regs(vcpu->arch.guest_acrs);
+	restore_fp_regs(&vcpu->arch.host_fpregs);
+	restore_access_regs(vcpu->arch.host_acrs);
+}
+
+static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
+{
+	/* this equals initial cpu reset in pop, but we don't switch to ESA */
+	vcpu->arch.sie_block->gpsw.mask = 0UL;
+	vcpu->arch.sie_block->gpsw.addr = 0UL;
+	vcpu->arch.sie_block->prefix    = 0UL;
+	vcpu->arch.sie_block->ihcpu     = 0xffff;
+	vcpu->arch.sie_block->cputm     = 0UL;
+	vcpu->arch.sie_block->ckc       = 0UL;
+	vcpu->arch.sie_block->todpr     = 0;
+	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
+	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
+	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
+	vcpu->arch.guest_fpregs.fpc = 0;
+	asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
+	vcpu->arch.sie_block->gbea = 1;
+}
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
+	vcpu->arch.sie_block->gmslm = 0xffffffffffUL;
+	vcpu->arch.sie_block->gmsor = 0x000000000000;
+	vcpu->arch.sie_block->ecb   = 2;
+	vcpu->arch.sie_block->eca   = 0xC1002001U;
+	setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
+		 (unsigned long) vcpu);
+	get_cpu_id(&vcpu->arch.cpu_id);
+	vcpu->arch.cpu_id.version = 0xfe;
+	return 0;
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
+				      unsigned int id)
+{
+	struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
+	int rc = -ENOMEM;
+
+	if (!vcpu)
+		goto out_nomem;
+
+	vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL);
+
+	if (!vcpu->arch.sie_block)
+		goto out_free_cpu;
+
+	vcpu->arch.sie_block->icpua = id;
+	BUG_ON(!kvm->arch.sca);
+	BUG_ON(kvm->arch.sca->cpu[id].sda);
+	kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
+	vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
+	vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
+
+	spin_lock_init(&vcpu->arch.local_int.lock);
+	INIT_LIST_HEAD(&vcpu->arch.local_int.list);
+	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
+	spin_lock_bh(&kvm->arch.float_int.lock);
+	kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
+	init_waitqueue_head(&vcpu->arch.local_int.wq);
+	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
+	spin_unlock_bh(&kvm->arch.float_int.lock);
+
+	rc = kvm_vcpu_init(vcpu, kvm, id);
+	if (rc)
+		goto out_free_cpu;
+	VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
+		 vcpu->arch.sie_block);
+
+	try_module_get(THIS_MODULE);
+
+	return vcpu;
+out_free_cpu:
+	kfree(vcpu);
+out_nomem:
+	return ERR_PTR(rc);
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
+	free_page((unsigned long)(vcpu->arch.sie_block));
+	kfree(vcpu);
+	module_put(THIS_MODULE);
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+	/* kvm common code refers to this, but never calls it */
+	BUG();
+	return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
+{
+	vcpu_load(vcpu);
+	kvm_s390_vcpu_initial_reset(vcpu);
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	vcpu_load(vcpu);
+	memcpy(&vcpu->arch.guest_gprs, &regs->gprs, sizeof(regs->gprs));
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	vcpu_load(vcpu);
+	memcpy(&regs->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	vcpu_load(vcpu);
+	memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
+	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	vcpu_load(vcpu);
+	memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
+	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	vcpu_load(vcpu);
+	memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
+	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	vcpu_load(vcpu);
+	memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
+	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
+	vcpu_put(vcpu);
+	return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
+{
+	int rc = 0;
+
+	vcpu_load(vcpu);
+	if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
+		rc = -EBUSY;
+	else
+		vcpu->arch.sie_block->gpsw = psw;
+	vcpu_put(vcpu);
+	return rc;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+				  struct kvm_translation *tr)
+{
+	return -EINVAL; /* not implemented yet */
+}
+
+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
+				    struct kvm_debug_guest *dbg)
+{
+	return -EINVAL; /* not implemented yet */
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	return -EINVAL; /* not implemented yet */
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	return -EINVAL; /* not implemented yet */
+}
+
+static void __vcpu_run(struct kvm_vcpu *vcpu)
+{
+	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
+
+	if (need_resched())
+		schedule();
+
+	vcpu->arch.sie_block->icptcode = 0;
+	local_irq_disable();
+	kvm_guest_enter();
+	local_irq_enable();
+	VCPU_EVENT(vcpu, 6, "entering sie flags %x",
+		   atomic_read(&vcpu->arch.sie_block->cpuflags));
+	sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs);
+	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
+		   vcpu->arch.sie_block->icptcode);
+	local_irq_disable();
+	kvm_guest_exit();
+	local_irq_enable();
+
+	memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	int rc;
+	sigset_t sigsaved;
+
+	vcpu_load(vcpu);
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
+	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+
+	BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
+
+	switch (kvm_run->exit_reason) {
+	case KVM_EXIT_S390_SIEIC:
+		vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
+		vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
+		break;
+	case KVM_EXIT_UNKNOWN:
+	case KVM_EXIT_S390_RESET:
+		break;
+	default:
+		BUG();
+	}
+
+	might_sleep();
+
+	do {
+		kvm_s390_deliver_pending_interrupts(vcpu);
+		__vcpu_run(vcpu);
+		rc = kvm_handle_sie_intercept(vcpu);
+	} while (!signal_pending(current) && !rc);
+
+	if (signal_pending(current) && !rc)
+		rc = -EINTR;
+
+	if (rc == -ENOTSUPP) {
+		/* intercept cannot be handled in-kernel, prepare kvm-run */
+		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
+		kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
+		kvm_run->s390_sieic.mask     = vcpu->arch.sie_block->gpsw.mask;
+		kvm_run->s390_sieic.addr     = vcpu->arch.sie_block->gpsw.addr;
+		kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
+		kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
+		rc = 0;
+	}
+
+	if (rc == -EREMOTE) {
+		/* intercept was handled, but userspace support is needed
+		 * kvm_run has been prepared by the handler */
+		rc = 0;
+	}
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	vcpu_put(vcpu);
+
+	vcpu->stat.exit_userspace++;
+	return rc;
+}
+
+static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
+		       unsigned long n, int prefix)
+{
+	if (prefix)
+		return copy_to_guest(vcpu, guestdest, from, n);
+	else
+		return copy_to_guest_absolute(vcpu, guestdest, from, n);
+}
+
+/*
+ * store status at address
+ * we use have two special cases:
+ * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
+ * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
+ */
+int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	const unsigned char archmode = 1;
+	int prefix;
+
+	if (addr == KVM_S390_STORE_STATUS_NOADDR) {
+		if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
+			return -EFAULT;
+		addr = SAVE_AREA_BASE;
+		prefix = 0;
+	} else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
+		if (copy_to_guest(vcpu, 163ul, &archmode, 1))
+			return -EFAULT;
+		addr = SAVE_AREA_BASE;
+		prefix = 1;
+	} else
+		prefix = 0;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
+			vcpu->arch.guest_fpregs.fprs, 128, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
+			vcpu->arch.guest_gprs, 128, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
+			&vcpu->arch.sie_block->gpsw, 16, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
+			&vcpu->arch.sie_block->prefix, 4, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu,
+			addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
+			&vcpu->arch.guest_fpregs.fpc, 4, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
+			&vcpu->arch.sie_block->todpr, 4, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
+			&vcpu->arch.sie_block->cputm, 8, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
+			&vcpu->arch.sie_block->ckc, 8, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
+			&vcpu->arch.guest_acrs, 64, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu,
+			addr + offsetof(struct save_area_s390x, ctrl_regs),
+			&vcpu->arch.sie_block->gcr, 128, prefix))
+		return -EFAULT;
+	return 0;
+}
+
+static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	int rc;
+
+	vcpu_load(vcpu);
+	rc = __kvm_s390_vcpu_store_status(vcpu, addr);
+	vcpu_put(vcpu);
+	return rc;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+			 unsigned int ioctl, unsigned long arg)
+{
+	struct kvm_vcpu *vcpu = filp->private_data;
+	void __user *argp = (void __user *)arg;
+
+	switch (ioctl) {
+	case KVM_S390_INTERRUPT: {
+		struct kvm_s390_interrupt s390int;
+
+		if (copy_from_user(&s390int, argp, sizeof(s390int)))
+			return -EFAULT;
+		return kvm_s390_inject_vcpu(vcpu, &s390int);
+	}
+	case KVM_S390_STORE_STATUS:
+		return kvm_s390_vcpu_store_status(vcpu, arg);
+	case KVM_S390_SET_INITIAL_PSW: {
+		psw_t psw;
+
+		if (copy_from_user(&psw, argp, sizeof(psw)))
+			return -EFAULT;
+		return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
+	}
+	case KVM_S390_INITIAL_RESET:
+		return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+	default:
+		;
+	}
+	return -EINVAL;
+}
+
+/* Section: memory related */
+int kvm_arch_set_memory_region(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem,
+				struct kvm_memory_slot old,
+				int user_alloc)
+{
+	/* A few sanity checks. We can have exactly one memory slot which has
+	   to start at guest virtual zero and which has to be located at a
+	   page boundary in userland and which has to end at a page boundary.
+	   The memory in userland is ok to be fragmented into various different
+	   vmas. It is okay to mmap() and munmap() stuff in this slot after
+	   doing this call at any time */
+
+	if (mem->slot)
+		return -EINVAL;
+
+	if (mem->guest_phys_addr)
+		return -EINVAL;
+
+	if (mem->userspace_addr & (PAGE_SIZE - 1))
+		return -EINVAL;
+
+	if (mem->memory_size & (PAGE_SIZE - 1))
+		return -EINVAL;
+
+	kvm->arch.guest_origin = mem->userspace_addr;
+	kvm->arch.guest_memsize = mem->memory_size;
+
+	/* FIXME: we do want to interrupt running CPUs and update their memory
+	   configuration now to avoid race conditions. But hey, changing the
+	   memory layout while virtual CPUs are running is usually bad
+	   programming practice. */
+
+	return 0;
+}
+
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+{
+	return gfn;
+}
+
+static int __init kvm_s390_init(void)
+{
+	return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
+}
+
+static void __exit kvm_s390_exit(void)
+{
+	kvm_exit();
+}
+
+module_init(kvm_s390_init);
+module_exit(kvm_s390_exit);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
new file mode 100644
index 000000000000..3893cf12eacf
--- /dev/null
+++ b/arch/s390/kvm/kvm-s390.h
@@ -0,0 +1,64 @@
+/*
+ * kvm_s390.h -  definition for kvm on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#ifndef ARCH_S390_KVM_S390_H
+#define ARCH_S390_KVM_S390_H
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+
+typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
+
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+
+#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
+do { \
+	debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
+	  d_args); \
+} while (0)
+
+#define VCPU_EVENT(d_vcpu, d_loglevel, d_string, d_args...)\
+do { \
+	debug_sprintf_event(d_vcpu->kvm->arch.dbf, d_loglevel, \
+	  "%02d[%016lx-%016lx]: " d_string "\n", d_vcpu->vcpu_id, \
+	  d_vcpu->arch.sie_block->gpsw.mask, d_vcpu->arch.sie_block->gpsw.addr,\
+	  d_args); \
+} while (0)
+
+static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu)
+{
+	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT;
+}
+
+int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
+void kvm_s390_idle_wakeup(unsigned long data);
+void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
+int kvm_s390_inject_vm(struct kvm *kvm,
+		struct kvm_s390_interrupt *s390int);
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
+		struct kvm_s390_interrupt *s390int);
+int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
+
+/* implemented in priv.c */
+int kvm_s390_handle_priv(struct kvm_vcpu *vcpu);
+
+/* implemented in sigp.c */
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
+
+/* implemented in kvm-s390.c */
+int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu,
+				 unsigned long addr);
+/* implemented in diag.c */
+int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
+
+#endif
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
new file mode 100644
index 000000000000..c02286c6a931
--- /dev/null
+++ b/arch/s390/kvm/priv.c
@@ -0,0 +1,314 @@
+/*
+ * priv.c - handling privileged instructions
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/errno.h>
+#include <asm/current.h>
+#include <asm/debug.h>
+#include <asm/ebcdic.h>
+#include <asm/sysinfo.h>
+#include "gaccess.h"
+#include "kvm-s390.h"
+
+static int handle_set_prefix(struct kvm_vcpu *vcpu)
+{
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u64 operand2;
+	u32 address = 0;
+	u8 tmp;
+
+	vcpu->stat.instruction_spx++;
+
+	operand2 = disp2;
+	if (base2)
+		operand2 += vcpu->arch.guest_gprs[base2];
+
+	/* must be word boundary */
+	if (operand2 & 3) {
+		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+		goto out;
+	}
+
+	/* get the value */
+	if (get_guest_u32(vcpu, operand2, &address)) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out;
+	}
+
+	address = address & 0x7fffe000u;
+
+	/* make sure that the new value is valid memory */
+	if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
+	   (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out;
+	}
+
+	vcpu->arch.sie_block->prefix = address;
+	vcpu->arch.sie_block->ihcpu = 0xffff;
+
+	VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
+out:
+	return 0;
+}
+
+static int handle_store_prefix(struct kvm_vcpu *vcpu)
+{
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u64 operand2;
+	u32 address;
+
+	vcpu->stat.instruction_stpx++;
+	operand2 = disp2;
+	if (base2)
+		operand2 += vcpu->arch.guest_gprs[base2];
+
+	/* must be word boundary */
+	if (operand2 & 3) {
+		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+		goto out;
+	}
+
+	address = vcpu->arch.sie_block->prefix;
+	address = address & 0x7fffe000u;
+
+	/* get the value */
+	if (put_guest_u32(vcpu, operand2, address)) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out;
+	}
+
+	VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
+out:
+	return 0;
+}
+
+static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
+{
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u64 useraddr;
+	int rc;
+
+	vcpu->stat.instruction_stap++;
+	useraddr = disp2;
+	if (base2)
+		useraddr += vcpu->arch.guest_gprs[base2];
+
+	if (useraddr & 1) {
+		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+		goto out;
+	}
+
+	rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id);
+	if (rc == -EFAULT) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out;
+	}
+
+	VCPU_EVENT(vcpu, 5, "storing cpu address to %lx", useraddr);
+out:
+	return 0;
+}
+
+static int handle_skey(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.instruction_storage_key++;
+	vcpu->arch.sie_block->gpsw.addr -= 4;
+	VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
+	return 0;
+}
+
+static int handle_stsch(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.instruction_stsch++;
+	VCPU_EVENT(vcpu, 4, "%s", "store subchannel - CC3");
+	/* condition code 3 */
+	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+	vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
+	return 0;
+}
+
+static int handle_chsc(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.instruction_chsc++;
+	VCPU_EVENT(vcpu, 4, "%s", "channel subsystem call - CC3");
+	/* condition code 3 */
+	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+	vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
+	return 0;
+}
+
+static int handle_stfl(struct kvm_vcpu *vcpu)
+{
+	unsigned int facility_list = stfl();
+	int rc;
+
+	vcpu->stat.instruction_stfl++;
+	facility_list &= ~(1UL<<24); /* no stfle */
+
+	rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
+			   &facility_list, sizeof(facility_list));
+	if (rc == -EFAULT)
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	else
+		VCPU_EVENT(vcpu, 5, "store facility list value %x",
+			   facility_list);
+	return 0;
+}
+
+static int handle_stidp(struct kvm_vcpu *vcpu)
+{
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u64 operand2;
+	int rc;
+
+	vcpu->stat.instruction_stidp++;
+	operand2 = disp2;
+	if (base2)
+		operand2 += vcpu->arch.guest_gprs[base2];
+
+	if (operand2 & 7) {
+		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+		goto out;
+	}
+
+	rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data);
+	if (rc == -EFAULT) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out;
+	}
+
+	VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
+out:
+	return 0;
+}
+
+static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
+{
+	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	int cpus = 0;
+	int n;
+
+	spin_lock_bh(&fi->lock);
+	for (n = 0; n < KVM_MAX_VCPUS; n++)
+		if (fi->local_int[n])
+			cpus++;
+	spin_unlock_bh(&fi->lock);
+
+	/* deal with other level 3 hypervisors */
+	if (stsi(mem, 3, 2, 2) == -ENOSYS)
+		mem->count = 0;
+	if (mem->count < 8)
+		mem->count++;
+	for (n = mem->count - 1; n > 0 ; n--)
+		memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0]));
+
+	mem->vm[0].cpus_total = cpus;
+	mem->vm[0].cpus_configured = cpus;
+	mem->vm[0].cpus_standby = 0;
+	mem->vm[0].cpus_reserved = 0;
+	mem->vm[0].caf = 1000;
+	memcpy(mem->vm[0].name, "KVMguest", 8);
+	ASCEBC(mem->vm[0].name, 8);
+	memcpy(mem->vm[0].cpi, "KVM/Linux       ", 16);
+	ASCEBC(mem->vm[0].cpi, 16);
+}
+
+static int handle_stsi(struct kvm_vcpu *vcpu)
+{
+	int fc = (vcpu->arch.guest_gprs[0] & 0xf0000000) >> 28;
+	int sel1 = vcpu->arch.guest_gprs[0] & 0xff;
+	int sel2 = vcpu->arch.guest_gprs[1] & 0xffff;
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u64 operand2;
+	unsigned long mem;
+
+	vcpu->stat.instruction_stsi++;
+	VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
+
+	operand2 = disp2;
+	if (base2)
+		operand2 += vcpu->arch.guest_gprs[base2];
+
+	if (operand2 & 0xfff && fc > 0)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	switch (fc) {
+	case 0:
+		vcpu->arch.guest_gprs[0] = 3 << 28;
+		vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+		return 0;
+	case 1: /* same handling for 1 and 2 */
+	case 2:
+		mem = get_zeroed_page(GFP_KERNEL);
+		if (!mem)
+			goto out_fail;
+		if (stsi((void *) mem, fc, sel1, sel2) == -ENOSYS)
+			goto out_mem;
+		break;
+	case 3:
+		if (sel1 != 2 || sel2 != 2)
+			goto out_fail;
+		mem = get_zeroed_page(GFP_KERNEL);
+		if (!mem)
+			goto out_fail;
+		handle_stsi_3_2_2(vcpu, (void *) mem);
+		break;
+	default:
+		goto out_fail;
+	}
+
+	if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out_mem;
+	}
+	free_page(mem);
+	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+	vcpu->arch.guest_gprs[0] = 0;
+	return 0;
+out_mem:
+	free_page(mem);
+out_fail:
+	/* condition code 3 */
+	vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;
+	return 0;
+}
+
+static intercept_handler_t priv_handlers[256] = {
+	[0x02] = handle_stidp,
+	[0x10] = handle_set_prefix,
+	[0x11] = handle_store_prefix,
+	[0x12] = handle_store_cpu_address,
+	[0x29] = handle_skey,
+	[0x2a] = handle_skey,
+	[0x2b] = handle_skey,
+	[0x34] = handle_stsch,
+	[0x5f] = handle_chsc,
+	[0x7d] = handle_stsi,
+	[0xb1] = handle_stfl,
+};
+
+int kvm_s390_handle_priv(struct kvm_vcpu *vcpu)
+{
+	intercept_handler_t handler;
+
+	handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
+	if (handler)
+		return handler(vcpu);
+	return -ENOTSUPP;
+}
diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S
new file mode 100644
index 000000000000..934fd6a885f6
--- /dev/null
+++ b/arch/s390/kvm/sie64a.S
@@ -0,0 +1,47 @@
+/*
+ * sie64a.S - low level sie call
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <asm/asm-offsets.h>
+
+SP_R5 =	5 * 8	# offset into stackframe
+SP_R6 =	6 * 8
+
+/*
+ * sie64a calling convention:
+ * %r2 pointer to sie control block
+ * %r3 guest register save area
+ */
+	.globl	sie64a
+sie64a:
+	lgr	%r5,%r3
+	stmg	%r5,%r14,SP_R5(%r15)	# save register on entry
+	lgr	%r14,%r2		# pointer to sie control block
+	lmg	%r0,%r13,0(%r3)		# load guest gprs 0-13
+sie_inst:
+	sie	0(%r14)
+	lg	%r14,SP_R5(%r15)
+	stmg	%r0,%r13,0(%r14)	# save guest gprs 0-13
+	lghi	%r2,0
+	lmg	%r6,%r14,SP_R6(%r15)
+	br	%r14
+
+sie_err:
+	lg	%r14,SP_R5(%r15)
+	stmg	%r0,%r13,0(%r14)	# save guest gprs 0-13
+	lghi	%r2,-EFAULT
+	lmg	%r6,%r14,SP_R6(%r15)
+	br	%r14
+
+	.section __ex_table,"a"
+	.quad	sie_inst,sie_err
+	.previous
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
new file mode 100644
index 000000000000..0a236acfb5f6
--- /dev/null
+++ b/arch/s390/kvm/sigp.c
@@ -0,0 +1,288 @@
+/*
+ * sigp.c - handlinge interprocessor communication
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include "gaccess.h"
+#include "kvm-s390.h"
+
+/* sigp order codes */
+#define SIGP_SENSE             0x01
+#define SIGP_EXTERNAL_CALL     0x02
+#define SIGP_EMERGENCY         0x03
+#define SIGP_START             0x04
+#define SIGP_STOP              0x05
+#define SIGP_RESTART           0x06
+#define SIGP_STOP_STORE_STATUS 0x09
+#define SIGP_INITIAL_CPU_RESET 0x0b
+#define SIGP_CPU_RESET         0x0c
+#define SIGP_SET_PREFIX        0x0d
+#define SIGP_STORE_STATUS_ADDR 0x0e
+#define SIGP_SET_ARCH          0x12
+
+/* cpu status bits */
+#define SIGP_STAT_EQUIPMENT_CHECK   0x80000000UL
+#define SIGP_STAT_INCORRECT_STATE   0x00000200UL
+#define SIGP_STAT_INVALID_PARAMETER 0x00000100UL
+#define SIGP_STAT_EXT_CALL_PENDING  0x00000080UL
+#define SIGP_STAT_STOPPED           0x00000040UL
+#define SIGP_STAT_OPERATOR_INTERV   0x00000020UL
+#define SIGP_STAT_CHECK_STOP        0x00000010UL
+#define SIGP_STAT_INOPERATIVE       0x00000004UL
+#define SIGP_STAT_INVALID_ORDER     0x00000002UL
+#define SIGP_STAT_RECEIVER_CHECK    0x00000001UL
+
+
+static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg)
+{
+	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	int rc;
+
+	if (cpu_addr >= KVM_MAX_VCPUS)
+		return 3; /* not operational */
+
+	spin_lock_bh(&fi->lock);
+	if (fi->local_int[cpu_addr] == NULL)
+		rc = 3; /* not operational */
+	else if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
+		 & CPUSTAT_RUNNING) {
+		*reg &= 0xffffffff00000000UL;
+		rc = 1; /* status stored */
+	} else {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STAT_STOPPED;
+		rc = 1; /* status stored */
+	}
+	spin_unlock_bh(&fi->lock);
+
+	VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
+	return rc;
+}
+
+static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
+{
+	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct local_interrupt *li;
+	struct interrupt_info *inti;
+	int rc;
+
+	if (cpu_addr >= KVM_MAX_VCPUS)
+		return 3; /* not operational */
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return -ENOMEM;
+
+	inti->type = KVM_S390_INT_EMERGENCY;
+
+	spin_lock_bh(&fi->lock);
+	li = fi->local_int[cpu_addr];
+	if (li == NULL) {
+		rc = 3; /* not operational */
+		kfree(inti);
+		goto unlock;
+	}
+	spin_lock_bh(&li->lock);
+	list_add_tail(&inti->list, &li->list);
+	atomic_set(&li->active, 1);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	if (waitqueue_active(&li->wq))
+		wake_up_interruptible(&li->wq);
+	spin_unlock_bh(&li->lock);
+	rc = 0; /* order accepted */
+unlock:
+	spin_unlock_bh(&fi->lock);
+	VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
+	return rc;
+}
+
+static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
+{
+	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct local_interrupt *li;
+	struct interrupt_info *inti;
+	int rc;
+
+	if (cpu_addr >= KVM_MAX_VCPUS)
+		return 3; /* not operational */
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return -ENOMEM;
+
+	inti->type = KVM_S390_SIGP_STOP;
+
+	spin_lock_bh(&fi->lock);
+	li = fi->local_int[cpu_addr];
+	if (li == NULL) {
+		rc = 3; /* not operational */
+		kfree(inti);
+		goto unlock;
+	}
+	spin_lock_bh(&li->lock);
+	list_add_tail(&inti->list, &li->list);
+	atomic_set(&li->active, 1);
+	atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+	if (store)
+		li->action_bits |= ACTION_STORE_ON_STOP;
+	li->action_bits |= ACTION_STOP_ON_STOP;
+	if (waitqueue_active(&li->wq))
+		wake_up_interruptible(&li->wq);
+	spin_unlock_bh(&li->lock);
+	rc = 0; /* order accepted */
+unlock:
+	spin_unlock_bh(&fi->lock);
+	VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
+	return rc;
+}
+
+static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
+{
+	int rc;
+
+	switch (parameter & 0xff) {
+	case 0:
+		printk(KERN_WARNING "kvm: request to switch to ESA/390 mode"
+							" not supported");
+		rc = 3; /* not operational */
+		break;
+	case 1:
+	case 2:
+		rc = 0; /* order accepted */
+		break;
+	default:
+		rc = -ENOTSUPP;
+	}
+	return rc;
+}
+
+static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
+			     u64 *reg)
+{
+	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct local_interrupt *li;
+	struct interrupt_info *inti;
+	int rc;
+	u8 tmp;
+
+	/* make sure that the new value is valid memory */
+	address = address & 0x7fffe000u;
+	if ((copy_from_guest(vcpu, &tmp,
+		(u64) (address + vcpu->kvm->arch.guest_origin) , 1)) ||
+	   (copy_from_guest(vcpu, &tmp, (u64) (address +
+			vcpu->kvm->arch.guest_origin + PAGE_SIZE), 1))) {
+		*reg |= SIGP_STAT_INVALID_PARAMETER;
+		return 1; /* invalid parameter */
+	}
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return 2; /* busy */
+
+	spin_lock_bh(&fi->lock);
+	li = fi->local_int[cpu_addr];
+
+	if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) {
+		rc = 1; /* incorrect state */
+		*reg &= SIGP_STAT_INCORRECT_STATE;
+		kfree(inti);
+		goto out_fi;
+	}
+
+	spin_lock_bh(&li->lock);
+	/* cpu must be in stopped state */
+	if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
+		rc = 1; /* incorrect state */
+		*reg &= SIGP_STAT_INCORRECT_STATE;
+		kfree(inti);
+		goto out_li;
+	}
+
+	inti->type = KVM_S390_SIGP_SET_PREFIX;
+	inti->prefix.address = address;
+
+	list_add_tail(&inti->list, &li->list);
+	atomic_set(&li->active, 1);
+	if (waitqueue_active(&li->wq))
+		wake_up_interruptible(&li->wq);
+	rc = 0; /* order accepted */
+
+	VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
+out_li:
+	spin_unlock_bh(&li->lock);
+out_fi:
+	spin_unlock_bh(&fi->lock);
+	return rc;
+}
+
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+{
+	int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int r3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u32 parameter;
+	u16 cpu_addr = vcpu->arch.guest_gprs[r3];
+	u8 order_code;
+	int rc;
+
+	order_code = disp2;
+	if (base2)
+		order_code += vcpu->arch.guest_gprs[base2];
+
+	if (r1 % 2)
+		parameter = vcpu->arch.guest_gprs[r1];
+	else
+		parameter = vcpu->arch.guest_gprs[r1 + 1];
+
+	switch (order_code) {
+	case SIGP_SENSE:
+		vcpu->stat.instruction_sigp_sense++;
+		rc = __sigp_sense(vcpu, cpu_addr,
+				  &vcpu->arch.guest_gprs[r1]);
+		break;
+	case SIGP_EMERGENCY:
+		vcpu->stat.instruction_sigp_emergency++;
+		rc = __sigp_emergency(vcpu, cpu_addr);
+		break;
+	case SIGP_STOP:
+		vcpu->stat.instruction_sigp_stop++;
+		rc = __sigp_stop(vcpu, cpu_addr, 0);
+		break;
+	case SIGP_STOP_STORE_STATUS:
+		vcpu->stat.instruction_sigp_stop++;
+		rc = __sigp_stop(vcpu, cpu_addr, 1);
+		break;
+	case SIGP_SET_ARCH:
+		vcpu->stat.instruction_sigp_arch++;
+		rc = __sigp_set_arch(vcpu, parameter);
+		break;
+	case SIGP_SET_PREFIX:
+		vcpu->stat.instruction_sigp_prefix++;
+		rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
+				       &vcpu->arch.guest_gprs[r1]);
+		break;
+	case SIGP_RESTART:
+		vcpu->stat.instruction_sigp_restart++;
+		/* user space must know about restart */
+	default:
+		return -ENOTSUPP;
+	}
+
+	if (rc < 0)
+		return rc;
+
+	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+	vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44;
+	return 0;
+}
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 52084436ab69..ab6735df2d21 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -2,8 +2,6 @@
 # Makefile for s390-specific library files..
 #
 
-EXTRA_AFLAGS := -traditional
-
 lib-y += delay.o string.o uaccess_std.o uaccess_pt.o
 obj-$(CONFIG_32BIT) += div64.o qrnnd.o
 lib-$(CONFIG_64BIT) += uaccess_mvcos.o
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 70f2a862b670..eae21a8ac72d 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -34,7 +34,7 @@ void __delay(unsigned long loops)
  */
 void __udelay(unsigned long usecs)
 {
-	u64 end, time, jiffy_timer = 0;
+	u64 end, time, old_cc = 0;
 	unsigned long flags, cr0, mask, dummy;
 	int irq_context;
 
@@ -43,8 +43,8 @@ void __udelay(unsigned long usecs)
 		local_bh_disable();
 	local_irq_save(flags);
 	if (raw_irqs_disabled_flags(flags)) {
-		jiffy_timer = S390_lowcore.jiffy_timer;
-		S390_lowcore.jiffy_timer = -1ULL - (4096 << 12);
+		old_cc = S390_lowcore.clock_comparator;
+		S390_lowcore.clock_comparator = -1ULL;
 		__ctl_store(cr0, 0, 0);
 		dummy = (cr0 & 0xffff00e0) | 0x00000800;
 		__ctl_load(dummy , 0, 0);
@@ -55,8 +55,8 @@ void __udelay(unsigned long usecs)
 
 	end = get_clock() + ((u64) usecs << 12);
 	do {
-		time = end < S390_lowcore.jiffy_timer ?
-			end : S390_lowcore.jiffy_timer;
+		time = end < S390_lowcore.clock_comparator ?
+			end : S390_lowcore.clock_comparator;
 		set_clock_comparator(time);
 		trace_hardirqs_on();
 		__load_psw_mask(mask);
@@ -65,10 +65,10 @@ void __udelay(unsigned long usecs)
 
 	if (raw_irqs_disabled_flags(flags)) {
 		__ctl_load(cr0, 0, 0);
-		S390_lowcore.jiffy_timer = jiffy_timer;
+		S390_lowcore.clock_comparator = old_cc;
 	}
 	if (!irq_context)
 		_local_bh_enable();
-	set_clock_comparator(S390_lowcore.jiffy_timer);
+	set_clock_comparator(S390_lowcore.clock_comparator);
 	local_irq_restore(flags);
 }
diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c
index 6d8772339d76..3f15aaf54855 100644
--- a/arch/s390/lib/uaccess_mvcos.c
+++ b/arch/s390/lib/uaccess_mvcos.c
@@ -162,6 +162,7 @@ static size_t clear_user_mvcos(size_t size, void __user *to)
 	return size;
 }
 
+#ifdef CONFIG_S390_SWITCH_AMODE
 static size_t strnlen_user_mvcos(size_t count, const char __user *src)
 {
 	char buf[256];
@@ -199,6 +200,7 @@ static size_t strncpy_from_user_mvcos(size_t count, const char __user *src,
 	} while ((len_str == len) && (done < count));
 	return done;
 }
+#endif /* CONFIG_S390_SWITCH_AMODE */
 
 struct uaccess_ops uaccess_mvcos = {
 	.copy_from_user = copy_from_user_mvcos_check,
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 5efdfe9f5e76..d66215b0fde9 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -302,6 +302,10 @@ static size_t copy_in_user_pt(size_t n, void __user *to,
 	pte_t *pte_from, *pte_to;
 	int write_user;
 
+	if (segment_eq(get_fs(), KERNEL_DS)) {
+		memcpy((void __force *) to, (void __force *) from, n);
+		return 0;
+	}
 	done = 0;
 retry:
 	spin_lock(&mm->page_table_lock);
@@ -361,18 +365,10 @@ fault:
 		     : "0" (-EFAULT), "d" (oparg), "a" (uaddr),		\
 		       "m" (*uaddr) : "cc" );
 
-int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
+static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
 {
 	int oldval = 0, newval, ret;
 
-	spin_lock(&current->mm->page_table_lock);
-	uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
-	if (!uaddr) {
-		spin_unlock(&current->mm->page_table_lock);
-		return -EFAULT;
-	}
-	get_page(virt_to_page(uaddr));
-	spin_unlock(&current->mm->page_table_lock);
 	switch (op) {
 	case FUTEX_OP_SET:
 		__futex_atomic_op("lr %2,%5\n",
@@ -397,17 +393,17 @@ int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
 	default:
 		ret = -ENOSYS;
 	}
-	put_page(virt_to_page(uaddr));
-	*old = oldval;
+	if (ret == 0)
+		*old = oldval;
 	return ret;
 }
 
-int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
+int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
 {
 	int ret;
 
-	if (!current->mm)
-		return -EFAULT;
+	if (segment_eq(get_fs(), KERNEL_DS))
+		return __futex_atomic_op_pt(op, uaddr, oparg, old);
 	spin_lock(&current->mm->page_table_lock);
 	uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
 	if (!uaddr) {
@@ -416,13 +412,40 @@ int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
 	}
 	get_page(virt_to_page(uaddr));
 	spin_unlock(&current->mm->page_table_lock);
-	asm volatile("   cs   %1,%4,0(%5)\n"
-		     "0: lr   %0,%1\n"
-		     "1:\n"
-		     EX_TABLE(0b,1b)
+	ret = __futex_atomic_op_pt(op, uaddr, oparg, old);
+	put_page(virt_to_page(uaddr));
+	return ret;
+}
+
+static int __futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
+{
+	int ret;
+
+	asm volatile("0: cs   %1,%4,0(%5)\n"
+		     "1: lr   %0,%1\n"
+		     "2:\n"
+		     EX_TABLE(0b,2b) EX_TABLE(1b,2b)
 		     : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
 		     : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
 		     : "cc", "memory" );
+	return ret;
+}
+
+int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
+{
+	int ret;
+
+	if (segment_eq(get_fs(), KERNEL_DS))
+		return __futex_atomic_cmpxchg_pt(uaddr, oldval, newval);
+	spin_lock(&current->mm->page_table_lock);
+	uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
+	if (!uaddr) {
+		spin_unlock(&current->mm->page_table_lock);
+		return -EFAULT;
+	}
+	get_page(virt_to_page(uaddr));
+	spin_unlock(&current->mm->page_table_lock);
+	ret = __futex_atomic_cmpxchg_pt(uaddr, oldval, newval);
 	put_page(virt_to_page(uaddr));
 	return ret;
 }
diff --git a/arch/s390/math-emu/Makefile b/arch/s390/math-emu/Makefile
index 73b3e72efc46..c84890341052 100644
--- a/arch/s390/math-emu/Makefile
+++ b/arch/s390/math-emu/Makefile
@@ -5,4 +5,3 @@
 obj-$(CONFIG_MATHEMU) := math.o
 
 EXTRA_CFLAGS := -I$(src) -Iinclude/math-emu -w
-EXTRA_AFLAGS := -traditional
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 66401930f83e..fb988a48a754 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -4,4 +4,4 @@
 
 obj-y	 := init.o fault.o extmem.o mmap.o vmem.o pgtable.o
 obj-$(CONFIG_CMM) += cmm.o
-
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 880b0ebf894b..f231f5ec74b6 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -287,24 +287,10 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
 	if (rc < 0)
 		goto out_free;
 
-	rc = add_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
+	rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
 
-	switch (rc) {
-	case 0:
-		break;
-	case -ENOSPC:
-		PRINT_WARN("segment_load: not loading segment %s - overlaps "
-			   "storage/segment\n", name);
-		goto out_free;
-	case -ERANGE:
-		PRINT_WARN("segment_load: not loading segment %s - exceeds "
-			   "kernel mapping range\n", name);
-		goto out_free;
-	default:
-		PRINT_WARN("segment_load: not loading segment %s (rc: %d)\n",
-			   name, rc);
+	if (rc)
 		goto out_free;
-	}
 
 	seg->res = kzalloc(sizeof(struct resource), GFP_KERNEL);
 	if (seg->res == NULL) {
@@ -365,7 +351,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
 	release_resource(seg->res);
 	kfree(seg->res);
  out_shared:
-	remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
+	vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
  out_free:
 	kfree(seg);
  out:
@@ -488,7 +474,7 @@ segment_modify_shared (char *name, int do_nonshared)
 	rc = 0;
 	goto out_unlock;
  out_del:
-	remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
+	vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
 	list_del(&seg->list);
 	dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy);
 	kfree(seg);
@@ -522,7 +508,7 @@ segment_unload(char *name)
 		goto out_unlock;
 	release_resource(seg->res);
 	kfree(seg->res);
-	remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
+	vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
 	list_del(&seg->list);
 	dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy);
 	kfree(seg);
@@ -582,8 +568,59 @@ out:
 	mutex_unlock(&dcss_lock);
 }
 
+/*
+ * print appropriate error message for segment_load()/segment_type()
+ * return code
+ */
+void segment_warning(int rc, char *seg_name)
+{
+	switch (rc) {
+	case -ENOENT:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "does not exist\n", seg_name);
+		break;
+	case -ENOSYS:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "not running on VM\n", seg_name);
+		break;
+	case -EIO:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "hardware error\n", seg_name);
+		break;
+	case -ENOTSUPP:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "is a multi-part segment\n", seg_name);
+		break;
+	case -ENOSPC:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "overlaps with storage\n", seg_name);
+		break;
+	case -EBUSY:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "overlaps with already loaded dcss\n", seg_name);
+		break;
+	case -EPERM:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "already loaded in incompatible mode\n", seg_name);
+		break;
+	case -ENOMEM:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "out of memory\n", seg_name);
+		break;
+	case -ERANGE:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "exceeds kernel mapping range\n", seg_name);
+		break;
+	default:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "return value %i\n", seg_name, rc);
+		break;
+	}
+}
+
 EXPORT_SYMBOL(segment_load);
 EXPORT_SYMBOL(segment_unload);
 EXPORT_SYMBOL(segment_save);
 EXPORT_SYMBOL(segment_type);
 EXPORT_SYMBOL(segment_modify_shared);
+EXPORT_SYMBOL(segment_warning);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index ed13d429a487..4d537205e83c 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -28,11 +28,12 @@
 #include <linux/hardirq.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
-
+#include <linux/hugetlb.h>
 #include <asm/system.h>
 #include <asm/pgtable.h>
 #include <asm/s390_ext.h>
 #include <asm/mmu_context.h>
+#include "../kernel/entry.h"
 
 #ifndef CONFIG_64BIT
 #define __FAIL_ADDR_MASK 0x7ffff000
@@ -50,8 +51,6 @@
 extern int sysctl_userprocess_debug;
 #endif
 
-extern void die(const char *,struct pt_regs *,long);
-
 #ifdef CONFIG_KPROBES
 static inline int notify_page_fault(struct pt_regs *regs, long err)
 {
@@ -245,11 +244,6 @@ static void do_sigbus(struct pt_regs *regs, unsigned long error_code,
 }
 
 #ifdef CONFIG_S390_EXEC_PROTECT
-extern long sys_sigreturn(struct pt_regs *regs);
-extern long sys_rt_sigreturn(struct pt_regs *regs);
-extern long sys32_sigreturn(struct pt_regs *regs);
-extern long sys32_rt_sigreturn(struct pt_regs *regs);
-
 static int signal_return(struct mm_struct *mm, struct pt_regs *regs,
 			 unsigned long address, unsigned long error_code)
 {
@@ -270,15 +264,15 @@ static int signal_return(struct mm_struct *mm, struct pt_regs *regs,
 #ifdef CONFIG_COMPAT
 	compat = test_tsk_thread_flag(current, TIF_31BIT);
 	if (compat && instruction == 0x0a77)
-		sys32_sigreturn(regs);
+		sys32_sigreturn();
 	else if (compat && instruction == 0x0aad)
-		sys32_rt_sigreturn(regs);
+		sys32_rt_sigreturn();
 	else
 #endif
 	if (instruction == 0x0a77)
-		sys_sigreturn(regs);
+		sys_sigreturn();
 	else if (instruction == 0x0aad)
-		sys_rt_sigreturn(regs);
+		sys_rt_sigreturn();
 	else {
 		current->thread.prot_addr = address;
 		current->thread.trap_no = error_code;
@@ -374,6 +368,8 @@ good_area:
 	}
 
 survive:
+	if (is_vm_hugetlb_page(vma))
+		address &= HPAGE_MASK;
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
@@ -424,7 +420,7 @@ no_context:
 }
 
 void __kprobes do_protection_exception(struct pt_regs *regs,
-				       unsigned long error_code)
+				       long error_code)
 {
 	/* Protection exception is supressing, decrement psw address. */
 	regs->psw.addr -= (error_code >> 16);
@@ -440,7 +436,7 @@ void __kprobes do_protection_exception(struct pt_regs *regs,
 	do_exception(regs, 4, 1);
 }
 
-void __kprobes do_dat_exception(struct pt_regs *regs, unsigned long error_code)
+void __kprobes do_dat_exception(struct pt_regs *regs, long error_code)
 {
 	do_exception(regs, error_code & 0xff, 0);
 }
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
new file mode 100644
index 000000000000..f4b6124fdb75
--- /dev/null
+++ b/arch/s390/mm/hugetlbpage.c
@@ -0,0 +1,134 @@
+/*
+ *  IBM System z Huge TLB Page Support for Kernel.
+ *
+ *    Copyright 2007 IBM Corp.
+ *    Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+
+
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+				   pte_t *pteptr, pte_t pteval)
+{
+	pmd_t *pmdp = (pmd_t *) pteptr;
+	pte_t shadow_pteval = pteval;
+	unsigned long mask;
+
+	if (!MACHINE_HAS_HPAGE) {
+		pteptr = (pte_t *) pte_page(pteval)[1].index;
+		mask = pte_val(pteval) &
+				(_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
+		pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask;
+		if (mm->context.noexec) {
+			pteptr += PTRS_PER_PTE;
+			pte_val(shadow_pteval) =
+					(_SEGMENT_ENTRY + __pa(pteptr)) | mask;
+		}
+	}
+
+	pmd_val(*pmdp) = pte_val(pteval);
+	if (mm->context.noexec) {
+		pmdp = get_shadow_table(pmdp);
+		pmd_val(*pmdp) = pte_val(shadow_pteval);
+	}
+}
+
+int arch_prepare_hugepage(struct page *page)
+{
+	unsigned long addr = page_to_phys(page);
+	pte_t pte;
+	pte_t *ptep;
+	int i;
+
+	if (MACHINE_HAS_HPAGE)
+		return 0;
+
+	ptep = (pte_t *) pte_alloc_one(&init_mm, address);
+	if (!ptep)
+		return -ENOMEM;
+
+	pte = mk_pte(page, PAGE_RW);
+	for (i = 0; i < PTRS_PER_PTE; i++) {
+		set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte);
+		pte_val(pte) += PAGE_SIZE;
+	}
+	page[1].index = (unsigned long) ptep;
+	return 0;
+}
+
+void arch_release_hugepage(struct page *page)
+{
+	pte_t *ptep;
+
+	if (MACHINE_HAS_HPAGE)
+		return;
+
+	ptep = (pte_t *) page[1].index;
+	if (!ptep)
+		return;
+	pte_free(&init_mm, ptep);
+	page[1].index = 0;
+}
+
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp = NULL;
+
+	pgdp = pgd_offset(mm, addr);
+	pudp = pud_alloc(mm, pgdp, addr);
+	if (pudp)
+		pmdp = pmd_alloc(mm, pudp, addr);
+	return (pte_t *) pmdp;
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp = NULL;
+
+	pgdp = pgd_offset(mm, addr);
+	if (pgd_present(*pgdp)) {
+		pudp = pud_offset(pgdp, addr);
+		if (pud_present(*pudp))
+			pmdp = pmd_offset(pudp, addr);
+	}
+	return (pte_t *) pmdp;
+}
+
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+	return 0;
+}
+
+struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
+			      int write)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	if (!MACHINE_HAS_HPAGE)
+		return 0;
+
+	return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE);
+}
+
+struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+			     pmd_t *pmdp, int write)
+{
+	struct page *page;
+
+	if (!MACHINE_HAS_HPAGE)
+		return NULL;
+
+	page = pmd_page(*pmdp);
+	if (page)
+		page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
+	return page;
+}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 8053245fe259..fa31de6ae97a 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -50,7 +50,6 @@ void show_mem(void)
 
 	printk("Mem-info:\n");
 	show_free_areas();
-	printk("Free swap:       %6ldkB\n", nr_swap_pages << (PAGE_SHIFT - 10));
 	i = max_mapnr;
 	while (i-- > 0) {
 		if (!pfn_valid(i))
@@ -78,28 +77,6 @@ void show_mem(void)
 	printk("%lu pages pagetables\n", global_page_state(NR_PAGETABLE));
 }
 
-static void __init setup_ro_region(void)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	pte_t new_pte;
-	unsigned long address, end;
-
-	address = ((unsigned long)&_stext) & PAGE_MASK;
-	end = PFN_ALIGN((unsigned long)&_eshared);
-
-	for (; address < end; address += PAGE_SIZE) {
-		pgd = pgd_offset_k(address);
-		pud = pud_offset(pgd, address);
-		pmd = pmd_offset(pud, address);
-		pte = pte_offset_kernel(pmd, address);
-		new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO));
-		*pte = new_pte;
-	}
-}
-
 /*
  * paging_init() sets up the page tables
  */
@@ -122,7 +99,6 @@ void __init paging_init(void)
 	clear_table((unsigned long *) init_mm.pgd, pgd_type,
 		    sizeof(unsigned long)*2048);
 	vmem_map_init();
-	setup_ro_region();
 
         /* enable virtual mapping in kernel mode */
 	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
@@ -130,6 +106,8 @@ void __init paging_init(void)
 	__ctl_load(S390_lowcore.kernel_asce, 13, 13);
 	__raw_local_irq_ssm(ssm_mask);
 
+	sparse_memory_present_with_active_regions(MAX_NUMNODES);
+	sparse_init();
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
 #ifdef CONFIG_ZONE_DMA
 	max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index fd072013f88c..5c1aea97cd12 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -30,11 +30,27 @@
 #define TABLES_PER_PAGE	4
 #define FRAG_MASK	15UL
 #define SECOND_HALVES	10UL
+
+void clear_table_pgstes(unsigned long *table)
+{
+	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
+	memset(table + 256, 0, PAGE_SIZE/4);
+	clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
+	memset(table + 768, 0, PAGE_SIZE/4);
+}
+
 #else
 #define ALLOC_ORDER	2
 #define TABLES_PER_PAGE	2
 #define FRAG_MASK	3UL
 #define SECOND_HALVES	2UL
+
+void clear_table_pgstes(unsigned long *table)
+{
+	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
+	memset(table + 256, 0, PAGE_SIZE/2);
+}
+
 #endif
 
 unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
@@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 	unsigned long *table;
 	unsigned long bits;
 
-	bits = mm->context.noexec ? 3UL : 1UL;
+	bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
 	spin_lock(&mm->page_table_lock);
 	page = NULL;
 	if (!list_empty(&mm->context.pgtable_list)) {
@@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 		pgtable_page_ctor(page);
 		page->flags &= ~FRAG_MASK;
 		table = (unsigned long *) page_to_phys(page);
-		clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+		if (mm->context.pgstes)
+			clear_table_pgstes(table);
+		else
+			clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
 		spin_lock(&mm->page_table_lock);
 		list_add(&page->lru, &mm->context.pgtable_list);
 	}
@@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
 	struct page *page;
 	unsigned long bits;
 
-	bits = mm->context.noexec ? 3UL : 1UL;
+	bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
 	bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
 	spin_lock(&mm->page_table_lock);
@@ -228,3 +247,43 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
 	mm->context.noexec = 0;
 	update_mm(mm, tsk);
 }
+
+/*
+ * switch on pgstes for its userspace process (for kvm)
+ */
+int s390_enable_sie(void)
+{
+	struct task_struct *tsk = current;
+	struct mm_struct *mm;
+	int rc;
+
+	task_lock(tsk);
+
+	rc = 0;
+	if (tsk->mm->context.pgstes)
+		goto unlock;
+
+	rc = -EINVAL;
+	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
+	    tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
+		goto unlock;
+
+	tsk->mm->context.pgstes = 1;	/* dirty little tricks .. */
+	mm = dup_mm(tsk);
+	tsk->mm->context.pgstes = 0;
+
+	rc = -ENOMEM;
+	if (!mm)
+		goto unlock;
+	mmput(tsk->mm);
+	tsk->mm = tsk->active_mm = mm;
+	preempt_disable();
+	update_mm(mm, tsk);
+	cpu_set(smp_processor_id(), mm->cpu_vm_mask);
+	preempt_enable();
+	rc = 0;
+unlock:
+	task_unlock(tsk);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(s390_enable_sie);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 35d90a4720fd..beccacf907f3 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -10,10 +10,12 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/list.h>
+#include <linux/hugetlb.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
 #include <asm/tlbflush.h>
+#include <asm/sections.h>
 
 static DEFINE_MUTEX(vmem_mutex);
 
@@ -25,43 +27,6 @@ struct memory_segment {
 
 static LIST_HEAD(mem_segs);
 
-void __meminit memmap_init(unsigned long size, int nid, unsigned long zone,
-			   unsigned long start_pfn)
-{
-	struct page *start, *end;
-	struct page *map_start, *map_end;
-	int i;
-
-	start = pfn_to_page(start_pfn);
-	end = start + size;
-
-	for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
-		unsigned long cstart, cend;
-
-		cstart = PFN_DOWN(memory_chunk[i].addr);
-		cend = cstart + PFN_DOWN(memory_chunk[i].size);
-
-		map_start = mem_map + cstart;
-		map_end = mem_map + cend;
-
-		if (map_start < start)
-			map_start = start;
-		if (map_end > end)
-			map_end = end;
-
-		map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1))
-			/ sizeof(struct page);
-		map_end += ((PFN_ALIGN((unsigned long) map_end)
-			     - (unsigned long) map_end)
-			    / sizeof(struct page));
-
-		if (map_start < map_end)
-			memmap_init_zone((unsigned long)(map_end - map_start),
-					 nid, zone, page_to_pfn(map_start),
-					 MEMMAP_EARLY);
-	}
-}
-
 static void __ref *vmem_alloc_pages(unsigned int order)
 {
 	if (slab_is_available())
@@ -77,8 +42,7 @@ static inline pud_t *vmem_pud_alloc(void)
 	pud = vmem_alloc_pages(2);
 	if (!pud)
 		return NULL;
-	pud_val(*pud) = _REGION3_ENTRY_EMPTY;
-	memcpy(pud + 1, pud, (PTRS_PER_PUD - 1)*sizeof(pud_t));
+	clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4);
 #endif
 	return pud;
 }
@@ -91,7 +55,7 @@ static inline pmd_t *vmem_pmd_alloc(void)
 	pmd = vmem_alloc_pages(2);
 	if (!pmd)
 		return NULL;
-	clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE*4);
+	clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4);
 #endif
 	return pmd;
 }
@@ -114,7 +78,7 @@ static pte_t __init_refok *vmem_pte_alloc(void)
 /*
  * Add a physical memory range to the 1:1 mapping.
  */
-static int vmem_add_range(unsigned long start, unsigned long size)
+static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 {
 	unsigned long address;
 	pgd_t *pg_dir;
@@ -141,7 +105,19 @@ static int vmem_add_range(unsigned long start, unsigned long size)
 			pud_populate_kernel(&init_mm, pu_dir, pm_dir);
 		}
 
+		pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0));
 		pm_dir = pmd_offset(pu_dir, address);
+
+#ifdef __s390x__
+		if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) &&
+		    (address + HPAGE_SIZE <= start + size) &&
+		    (address >= HPAGE_SIZE)) {
+			pte_val(pte) |= _SEGMENT_ENTRY_LARGE;
+			pmd_val(*pm_dir) = pte_val(pte);
+			address += HPAGE_SIZE - PAGE_SIZE;
+			continue;
+		}
+#endif
 		if (pmd_none(*pm_dir)) {
 			pt_dir = vmem_pte_alloc();
 			if (!pt_dir)
@@ -150,7 +126,6 @@ static int vmem_add_range(unsigned long start, unsigned long size)
 		}
 
 		pt_dir = pte_offset_kernel(pm_dir, address);
-		pte = pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL);
 		*pt_dir = pte;
 	}
 	ret = 0;
@@ -181,6 +156,13 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
 		pm_dir = pmd_offset(pu_dir, address);
 		if (pmd_none(*pm_dir))
 			continue;
+
+		if (pmd_huge(*pm_dir)) {
+			pmd_clear_kernel(pm_dir);
+			address += HPAGE_SIZE - PAGE_SIZE;
+			continue;
+		}
+
 		pt_dir = pte_offset_kernel(pm_dir, address);
 		*pt_dir = pte;
 	}
@@ -190,10 +172,9 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
 /*
  * Add a backed mem_map array to the virtual mem_map array.
  */
-static int vmem_add_mem_map(unsigned long start, unsigned long size)
+int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
 {
 	unsigned long address, start_addr, end_addr;
-	struct page *map_start, *map_end;
 	pgd_t *pg_dir;
 	pud_t *pu_dir;
 	pmd_t *pm_dir;
@@ -201,11 +182,8 @@ static int vmem_add_mem_map(unsigned long start, unsigned long size)
 	pte_t  pte;
 	int ret = -ENOMEM;
 
-	map_start = VMEM_MAP + PFN_DOWN(start);
-	map_end	= VMEM_MAP + PFN_DOWN(start + size);
-
-	start_addr = (unsigned long) map_start & PAGE_MASK;
-	end_addr = PFN_ALIGN((unsigned long) map_end);
+	start_addr = (unsigned long) start;
+	end_addr = (unsigned long) (start + nr);
 
 	for (address = start_addr; address < end_addr; address += PAGE_SIZE) {
 		pg_dir = pgd_offset_k(address);
@@ -249,16 +227,6 @@ out:
 	return ret;
 }
 
-static int vmem_add_mem(unsigned long start, unsigned long size)
-{
-	int ret;
-
-	ret = vmem_add_mem_map(start, size);
-	if (ret)
-		return ret;
-	return vmem_add_range(start, size);
-}
-
 /*
  * Add memory segment to the segment list if it doesn't overlap with
  * an already present segment.
@@ -296,7 +264,7 @@ static void __remove_shared_memory(struct memory_segment *seg)
 	vmem_remove_range(seg->start, seg->size);
 }
 
-int remove_shared_memory(unsigned long start, unsigned long size)
+int vmem_remove_mapping(unsigned long start, unsigned long size)
 {
 	struct memory_segment *seg;
 	int ret;
@@ -320,11 +288,9 @@ out:
 	return ret;
 }
 
-int add_shared_memory(unsigned long start, unsigned long size)
+int vmem_add_mapping(unsigned long start, unsigned long size)
 {
 	struct memory_segment *seg;
-	struct page *page;
-	unsigned long pfn, num_pfn, end_pfn;
 	int ret;
 
 	mutex_lock(&vmem_mutex);
@@ -339,24 +305,9 @@ int add_shared_memory(unsigned long start, unsigned long size)
 	if (ret)
 		goto out_free;
 
-	ret = vmem_add_mem(start, size);
+	ret = vmem_add_mem(start, size, 0);
 	if (ret)
 		goto out_remove;
-
-	pfn = PFN_DOWN(start);
-	num_pfn = PFN_DOWN(size);
-	end_pfn = pfn + num_pfn;
-
-	page = pfn_to_page(pfn);
-	memset(page, 0, num_pfn * sizeof(struct page));
-
-	for (; pfn < end_pfn; pfn++) {
-		page = pfn_to_page(pfn);
-		init_page_count(page);
-		reset_page_mapcount(page);
-		SetPageReserved(page);
-		INIT_LIST_HEAD(&page->lru);
-	}
 	goto out;
 
 out_remove:
@@ -375,14 +326,34 @@ out:
  */
 void __init vmem_map_init(void)
 {
+	unsigned long ro_start, ro_end;
+	unsigned long start, end;
 	int i;
 
 	INIT_LIST_HEAD(&init_mm.context.crst_list);
 	INIT_LIST_HEAD(&init_mm.context.pgtable_list);
 	init_mm.context.noexec = 0;
-	NODE_DATA(0)->node_mem_map = VMEM_MAP;
-	for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++)
-		vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size);
+	ro_start = ((unsigned long)&_stext) & PAGE_MASK;
+	ro_end = PFN_ALIGN((unsigned long)&_eshared);
+	for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
+		start = memory_chunk[i].addr;
+		end = memory_chunk[i].addr + memory_chunk[i].size;
+		if (start >= ro_end || end <= ro_start)
+			vmem_add_mem(start, end - start, 0);
+		else if (start >= ro_start && end <= ro_end)
+			vmem_add_mem(start, end - start, 1);
+		else if (start >= ro_start) {
+			vmem_add_mem(start, ro_end - start, 1);
+			vmem_add_mem(ro_end, end - ro_end, 0);
+		} else if (end < ro_end) {
+			vmem_add_mem(start, ro_start - start, 0);
+			vmem_add_mem(ro_start, end - ro_start, 1);
+		} else {
+			vmem_add_mem(start, ro_start - start, 0);
+			vmem_add_mem(ro_start, ro_end - ro_start, 1);
+			vmem_add_mem(ro_end, end - ro_end, 0);
+		}
+	}
 }
 
 /*