103 files changed, 9259 insertions, 50 deletions
diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um
new file mode 100644
index 000000000000..36ddec6a41c9
--- /dev/null
+++ b/arch/x86/Makefile.um
@@ -0,0 +1,61 @@
+core-y += arch/x86/crypto/
+
+ifeq ($(CONFIG_X86_32),y)
+START := 0x8048000
+
+LDFLAGS			+= -m elf_i386
+ELF_ARCH		:= i386
+ELF_FORMAT 		:= elf32-i386
+CHECKFLAGS	+= -D__i386__
+
+ifeq ("$(origin SUBARCH)", "command line")
+ifneq ("$(shell uname -m | sed -e s/i.86/i386/)", "$(SUBARCH)")
+KBUILD_CFLAGS		+= $(call cc-option,-m32)
+KBUILD_AFLAGS		+= $(call cc-option,-m32)
+LINK-y			+= $(call cc-option,-m32)
+
+export LDFLAGS
+endif
+endif
+
+# First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y.
+include $(srctree)/arch/x86/Makefile_32.cpu
+
+# prevent gcc from keeping the stack 16 byte aligned. Taken from i386.
+cflags-y += $(call cc-option,-mpreferred-stack-boundary=2)
+
+# Prevent sprintf in nfsd from being converted to strcpy and resulting in
+# an unresolved reference.
+cflags-y += -ffreestanding
+
+# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
+# a lot more stack due to the lack of sharing of stacklots.  Also, gcc
+# 4.3.0 needs -funit-at-a-time for extern inline functions.
+KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \
+			echo $(call cc-option,-fno-unit-at-a-time); \
+			else echo $(call cc-option,-funit-at-a-time); fi ;)
+
+KBUILD_CFLAGS += $(cflags-y)
+
+else
+
+START := 0x60000000
+
+KBUILD_CFLAGS += -fno-builtin -m64 
+
+CHECKFLAGS  += -m64 -D__x86_64__
+KBUILD_AFLAGS += -m64
+LDFLAGS += -m elf_x86_64
+KBUILD_CPPFLAGS += -m64
+
+ELF_ARCH := i386:x86-64
+ELF_FORMAT := elf64-x86-64
+
+# Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example.
+
+LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64
+LINK-y += -m64
+
+# Do unit-at-a-time unconditionally on x86_64, following the host
+KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
+endif
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index c04f1b7a9139..3537d4b91f74 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -7,21 +7,33 @@ obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
 obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
 
 obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
+obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
+obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
 obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
 obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
 obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
 
 obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
+obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
 
 aes-i586-y := aes-i586-asm_32.o aes_glue.o
 twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
 salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
 
 aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
+blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
 twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
+twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
 salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
 
 aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
 
 ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
+
+# enable AVX support only when $(AS) can actually assemble the instructions
+ifeq ($(call as-instr,vpxor %xmm0$(comma)%xmm1$(comma)%xmm2,yes,no),yes)
+AFLAGS_sha1_ssse3_asm.o += -DSHA1_ENABLE_AVX_SUPPORT
+CFLAGS_sha1_ssse3_glue.o += -DSHA1_ENABLE_AVX_SUPPORT
+endif
+sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c
index 49ae9fe32b22..b0b6950cc8c8 100644
--- a/arch/x86/crypto/aes_glue.c
+++ b/arch/x86/crypto/aes_glue.c
@@ -4,6 +4,7 @@
  */
 
 #include <crypto/aes.h>
+#include <asm/aes.h>
 
 asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
 asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S
new file mode 100644
index 000000000000..391d245dc086
--- /dev/null
+++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
@@ -0,0 +1,390 @@
+/*
+ * Blowfish Cipher Algorithm (x86_64)
+ *
+ * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+.file "blowfish-x86_64-asm.S"
+.text
+
+/* structure of crypto context */
+#define p	0
+#define s0	((16 + 2) * 4)
+#define s1	((16 + 2 + (1 * 256)) * 4)
+#define s2	((16 + 2 + (2 * 256)) * 4)
+#define s3	((16 + 2 + (3 * 256)) * 4)
+
+/* register macros */
+#define CTX %rdi
+#define RIO %rsi
+
+#define RX0 %rax
+#define RX1 %rbx
+#define RX2 %rcx
+#define RX3 %rdx
+
+#define RX0d %eax
+#define RX1d %ebx
+#define RX2d %ecx
+#define RX3d %edx
+
+#define RX0bl %al
+#define RX1bl %bl
+#define RX2bl %cl
+#define RX3bl %dl
+
+#define RX0bh %ah
+#define RX1bh %bh
+#define RX2bh %ch
+#define RX3bh %dh
+
+#define RT0 %rbp
+#define RT1 %rsi
+#define RT2 %r8
+#define RT3 %r9
+
+#define RT0d %ebp
+#define RT1d %esi
+#define RT2d %r8d
+#define RT3d %r9d
+
+#define RKEY %r10
+
+/***********************************************************************
+ * 1-way blowfish
+ ***********************************************************************/
+#define F() \
+	rorq $16,		RX0; \
+	movzbl RX0bh,		RT0d; \
+	movzbl RX0bl,		RT1d; \
+	rolq $16,		RX0; \
+	movl s0(CTX,RT0,4),	RT0d; \
+	addl s1(CTX,RT1,4),	RT0d; \
+	movzbl RX0bh,		RT1d; \
+	movzbl RX0bl,		RT2d; \
+	rolq $32,		RX0; \
+	xorl s2(CTX,RT1,4),	RT0d; \
+	addl s3(CTX,RT2,4),	RT0d; \
+	xorq RT0,		RX0;
+
+#define add_roundkey_enc(n) \
+	xorq p+4*(n)(CTX), 	RX0;
+
+#define round_enc(n) \
+	add_roundkey_enc(n); \
+	\
+	F(); \
+	F();
+
+#define add_roundkey_dec(n) \
+	movq p+4*(n-1)(CTX),	RT0; \
+	rorq $32,		RT0; \
+	xorq RT0,		RX0;
+
+#define round_dec(n) \
+	add_roundkey_dec(n); \
+	\
+	F(); \
+	F(); \
+
+#define read_block() \
+	movq (RIO), 		RX0; \
+	rorq $32, 		RX0; \
+	bswapq 			RX0;
+
+#define write_block() \
+	bswapq 			RX0; \
+	movq RX0, 		(RIO);
+
+#define xor_block() \
+	bswapq 			RX0; \
+	xorq RX0, 		(RIO);
+
+.align 8
+.global __blowfish_enc_blk
+.type   __blowfish_enc_blk,@function;
+
+__blowfish_enc_blk:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 *	%rcx: bool, if true: xor output
+	 */
+	movq %rbp, %r11;
+
+	movq %rsi, %r10;
+	movq %rdx, RIO;
+
+	read_block();
+
+	round_enc(0);
+	round_enc(2);
+	round_enc(4);
+	round_enc(6);
+	round_enc(8);
+	round_enc(10);
+	round_enc(12);
+	round_enc(14);
+	add_roundkey_enc(16);
+
+	movq %r11, %rbp;
+
+	movq %r10, RIO;
+	test %cl, %cl;
+	jnz __enc_xor;
+
+	write_block();
+	ret;
+__enc_xor:
+	xor_block();
+	ret;
+
+.align 8
+.global blowfish_dec_blk
+.type   blowfish_dec_blk,@function;
+
+blowfish_dec_blk:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 */
+	movq %rbp, %r11;
+
+	movq %rsi, %r10;
+	movq %rdx, RIO;
+
+	read_block();
+
+	round_dec(17);
+	round_dec(15);
+	round_dec(13);
+	round_dec(11);
+	round_dec(9);
+	round_dec(7);
+	round_dec(5);
+	round_dec(3);
+	add_roundkey_dec(1);
+
+	movq %r10, RIO;
+	write_block();
+
+	movq %r11, %rbp;
+
+	ret;
+
+/**********************************************************************
+  4-way blowfish, four blocks parallel
+ **********************************************************************/
+
+/* F() for 4-way. Slower when used alone/1-way, but faster when used
+ * parallel/4-way (tested on AMD Phenom II & Intel Xeon E7330).
+ */
+#define F4(x) \
+	movzbl x ## bh,		RT1d; \
+	movzbl x ## bl,		RT3d; \
+	rorq $16,		x; \
+	movzbl x ## bh,		RT0d; \
+	movzbl x ## bl,		RT2d; \
+	rorq $16,		x; \
+	movl s0(CTX,RT0,4),	RT0d; \
+	addl s1(CTX,RT2,4),	RT0d; \
+	xorl s2(CTX,RT1,4),	RT0d; \
+	addl s3(CTX,RT3,4),	RT0d; \
+	xorq RT0,		x;
+
+#define add_preloaded_roundkey4() \
+	xorq RKEY,		RX0; \
+	xorq RKEY,		RX1; \
+	xorq RKEY,		RX2; \
+	xorq RKEY,		RX3;
+
+#define preload_roundkey_enc(n) \
+	movq p+4*(n)(CTX),	RKEY;
+
+#define add_roundkey_enc4(n) \
+	add_preloaded_roundkey4(); \
+	preload_roundkey_enc(n + 2);
+
+#define round_enc4(n) \
+	add_roundkey_enc4(n); \
+	\
+	F4(RX0); \
+	F4(RX1); \
+	F4(RX2); \
+	F4(RX3); \
+	\
+	F4(RX0); \
+	F4(RX1); \
+	F4(RX2); \
+	F4(RX3);
+
+#define preload_roundkey_dec(n) \
+	movq p+4*((n)-1)(CTX),	RKEY; \
+	rorq $32,		RKEY;
+
+#define add_roundkey_dec4(n) \
+	add_preloaded_roundkey4(); \
+	preload_roundkey_dec(n - 2);
+
+#define round_dec4(n) \
+	add_roundkey_dec4(n); \
+	\
+	F4(RX0); \
+	F4(RX1); \
+	F4(RX2); \
+	F4(RX3); \
+	\
+	F4(RX0); \
+	F4(RX1); \
+	F4(RX2); \
+	F4(RX3);
+
+#define read_block4() \
+	movq (RIO),		RX0; \
+	rorq $32,		RX0; \
+	bswapq 			RX0; \
+	\
+	movq 8(RIO),		RX1; \
+	rorq $32,		RX1; \
+	bswapq 			RX1; \
+	\
+	movq 16(RIO),		RX2; \
+	rorq $32,		RX2; \
+	bswapq 			RX2; \
+	\
+	movq 24(RIO),		RX3; \
+	rorq $32,		RX3; \
+	bswapq 			RX3;
+
+#define write_block4() \
+	bswapq 			RX0; \
+	movq RX0,		(RIO); \
+	\
+	bswapq 			RX1; \
+	movq RX1,		8(RIO); \
+	\
+	bswapq 			RX2; \
+	movq RX2,		16(RIO); \
+	\
+	bswapq 			RX3; \
+	movq RX3,		24(RIO);
+
+#define xor_block4() \
+	bswapq 			RX0; \
+	xorq RX0,		(RIO); \
+	\
+	bswapq 			RX1; \
+	xorq RX1,		8(RIO); \
+	\
+	bswapq 			RX2; \
+	xorq RX2,		16(RIO); \
+	\
+	bswapq 			RX3; \
+	xorq RX3,		24(RIO);
+
+.align 8
+.global __blowfish_enc_blk_4way
+.type   __blowfish_enc_blk_4way,@function;
+
+__blowfish_enc_blk_4way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 *	%rcx: bool, if true: xor output
+	 */
+	pushq %rbp;
+	pushq %rbx;
+	pushq %rcx;
+
+	preload_roundkey_enc(0);
+
+	movq %rsi, %r11;
+	movq %rdx, RIO;
+
+	read_block4();
+
+	round_enc4(0);
+	round_enc4(2);
+	round_enc4(4);
+	round_enc4(6);
+	round_enc4(8);
+	round_enc4(10);
+	round_enc4(12);
+	round_enc4(14);
+	add_preloaded_roundkey4();
+
+	popq %rbp;
+	movq %r11, RIO;
+
+	test %bpl, %bpl;
+	jnz __enc_xor4;
+
+	write_block4();
+
+	popq %rbx;
+	popq %rbp;
+	ret;
+
+__enc_xor4:
+	xor_block4();
+
+	popq %rbx;
+	popq %rbp;
+	ret;
+
+.align 8
+.global blowfish_dec_blk_4way
+.type   blowfish_dec_blk_4way,@function;
+
+blowfish_dec_blk_4way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 */
+	pushq %rbp;
+	pushq %rbx;
+	preload_roundkey_dec(17);
+
+	movq %rsi, %r11;
+	movq %rdx, RIO;
+
+	read_block4();
+
+	round_dec4(17);
+	round_dec4(15);
+	round_dec4(13);
+	round_dec4(11);
+	round_dec4(9);
+	round_dec4(7);
+	round_dec4(5);
+	round_dec4(3);
+	add_preloaded_roundkey4();
+
+	movq %r11, RIO;
+	write_block4();
+
+	popq %rbx;
+	popq %rbp;
+
+	ret;
+
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
new file mode 100644
index 000000000000..b05aa163d55a
--- /dev/null
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -0,0 +1,492 @@
+/*
+ * Glue Code for assembler optimized version of Blowfish
+ *
+ * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
+ *   Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ * CTR part based on code (crypto/ctr.c) by:
+ *   (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <crypto/blowfish.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <crypto/algapi.h>
+
+/* regular block cipher functions */
+asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
+				   bool xor);
+asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
+
+/* 4-way parallel cipher functions */
+asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
+					const u8 *src, bool xor);
+asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
+				      const u8 *src);
+
+static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
+{
+	__blowfish_enc_blk(ctx, dst, src, false);
+}
+
+static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
+					const u8 *src)
+{
+	__blowfish_enc_blk(ctx, dst, src, true);
+}
+
+static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
+					 const u8 *src)
+{
+	__blowfish_enc_blk_4way(ctx, dst, src, false);
+}
+
+static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
+				      const u8 *src)
+{
+	__blowfish_enc_blk_4way(ctx, dst, src, true);
+}
+
+static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src);
+}
+
+static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
+}
+
+static struct crypto_alg bf_alg = {
+	.cra_name		=	"blowfish",
+	.cra_driver_name	=	"blowfish-asm",
+	.cra_priority		=	200,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	BF_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct bf_ctx),
+	.cra_alignmask		=	3,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(bf_alg.cra_list),
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	BF_MIN_KEY_SIZE,
+			.cia_max_keysize	=	BF_MAX_KEY_SIZE,
+			.cia_setkey		=	blowfish_setkey,
+			.cia_encrypt		=	blowfish_encrypt,
+			.cia_decrypt		=	blowfish_decrypt,
+		}
+	}
+};
+
+static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
+		     void (*fn)(struct bf_ctx *, u8 *, const u8 *),
+		     void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
+{
+	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = BF_BLOCK_SIZE;
+	unsigned int nbytes;
+	int err;
+
+	err = blkcipher_walk_virt(desc, walk);
+
+	while ((nbytes = walk->nbytes)) {
+		u8 *wsrc = walk->src.virt.addr;
+		u8 *wdst = walk->dst.virt.addr;
+
+		/* Process four block batch */
+		if (nbytes >= bsize * 4) {
+			do {
+				fn_4way(ctx, wdst, wsrc);
+
+				wsrc += bsize * 4;
+				wdst += bsize * 4;
+				nbytes -= bsize * 4;
+			} while (nbytes >= bsize * 4);
+
+			if (nbytes < bsize)
+				goto done;
+		}
+
+		/* Handle leftovers */
+		do {
+			fn(ctx, wdst, wsrc);
+
+			wsrc += bsize;
+			wdst += bsize;
+			nbytes -= bsize;
+		} while (nbytes >= bsize);
+
+done:
+		err = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	return err;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way);
+}
+
+static struct crypto_alg blk_ecb_alg = {
+	.cra_name		= "ecb(blowfish)",
+	.cra_driver_name	= "ecb-blowfish-asm",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= BF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct bf_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(blk_ecb_alg.cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= BF_MIN_KEY_SIZE,
+			.max_keysize	= BF_MAX_KEY_SIZE,
+			.setkey		= blowfish_setkey,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+};
+
+static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
+				  struct blkcipher_walk *walk)
+{
+	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = BF_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u64 *src = (u64 *)walk->src.virt.addr;
+	u64 *dst = (u64 *)walk->dst.virt.addr;
+	u64 *iv = (u64 *)walk->iv;
+
+	do {
+		*dst = *src ^ *iv;
+		blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
+		iv = dst;
+
+		src += 1;
+		dst += 1;
+		nbytes -= bsize;
+	} while (nbytes >= bsize);
+
+	*(u64 *)walk->iv = *iv;
+	return nbytes;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		nbytes = __cbc_encrypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
+				  struct blkcipher_walk *walk)
+{
+	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = BF_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u64 *src = (u64 *)walk->src.virt.addr;
+	u64 *dst = (u64 *)walk->dst.virt.addr;
+	u64 ivs[4 - 1];
+	u64 last_iv;
+
+	/* Start of the last block. */
+	src += nbytes / bsize - 1;
+	dst += nbytes / bsize - 1;
+
+	last_iv = *src;
+
+	/* Process four block batch */
+	if (nbytes >= bsize * 4) {
+		do {
+			nbytes -= bsize * 4 - bsize;
+			src -= 4 - 1;
+			dst -= 4 - 1;
+
+			ivs[0] = src[0];
+			ivs[1] = src[1];
+			ivs[2] = src[2];
+
+			blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
+
+			dst[1] ^= ivs[0];
+			dst[2] ^= ivs[1];
+			dst[3] ^= ivs[2];
+
+			nbytes -= bsize;
+			if (nbytes < bsize)
+				goto done;
+
+			*dst ^= *(src - 1);
+			src -= 1;
+			dst -= 1;
+		} while (nbytes >= bsize * 4);
+
+		if (nbytes < bsize)
+			goto done;
+	}
+
+	/* Handle leftovers */
+	for (;;) {
+		blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
+
+		nbytes -= bsize;
+		if (nbytes < bsize)
+			break;
+
+		*dst ^= *(src - 1);
+		src -= 1;
+		dst -= 1;
+	}
+
+done:
+	*dst ^= *(u64 *)walk->iv;
+	*(u64 *)walk->iv = last_iv;
+
+	return nbytes;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		nbytes = __cbc_decrypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static struct crypto_alg blk_cbc_alg = {
+	.cra_name		= "cbc(blowfish)",
+	.cra_driver_name	= "cbc-blowfish-asm",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= BF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct bf_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(blk_cbc_alg.cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= BF_MIN_KEY_SIZE,
+			.max_keysize	= BF_MAX_KEY_SIZE,
+			.ivsize		= BF_BLOCK_SIZE,
+			.setkey		= blowfish_setkey,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+};
+
+static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
+{
+	u8 *ctrblk = walk->iv;
+	u8 keystream[BF_BLOCK_SIZE];
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	unsigned int nbytes = walk->nbytes;
+
+	blowfish_enc_blk(ctx, keystream, ctrblk);
+	crypto_xor(keystream, src, nbytes);
+	memcpy(dst, keystream, nbytes);
+
+	crypto_inc(ctrblk, BF_BLOCK_SIZE);
+}
+
+static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
+				struct blkcipher_walk *walk)
+{
+	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = BF_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u64 *src = (u64 *)walk->src.virt.addr;
+	u64 *dst = (u64 *)walk->dst.virt.addr;
+	u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
+	__be64 ctrblocks[4];
+
+	/* Process four block batch */
+	if (nbytes >= bsize * 4) {
+		do {
+			if (dst != src) {
+				dst[0] = src[0];
+				dst[1] = src[1];
+				dst[2] = src[2];
+				dst[3] = src[3];
+			}
+
+			/* create ctrblks for parallel encrypt */
+			ctrblocks[0] = cpu_to_be64(ctrblk++);
+			ctrblocks[1] = cpu_to_be64(ctrblk++);
+			ctrblocks[2] = cpu_to_be64(ctrblk++);
+			ctrblocks[3] = cpu_to_be64(ctrblk++);
+
+			blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
+						  (u8 *)ctrblocks);
+
+			src += 4;
+			dst += 4;
+		} while ((nbytes -= bsize * 4) >= bsize * 4);
+
+		if (nbytes < bsize)
+			goto done;
+	}
+
+	/* Handle leftovers */
+	do {
+		if (dst != src)
+			*dst = *src;
+
+		ctrblocks[0] = cpu_to_be64(ctrblk++);
+
+		blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
+
+		src += 1;
+		dst += 1;
+	} while ((nbytes -= bsize) >= bsize);
+
+done:
+	*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
+	return nbytes;
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		     struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
+
+	while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
+		nbytes = __ctr_crypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	if (walk.nbytes) {
+		ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+
+	return err;
+}
+
+static struct crypto_alg blk_ctr_alg = {
+	.cra_name		= "ctr(blowfish)",
+	.cra_driver_name	= "ctr-blowfish-asm",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct bf_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(blk_ctr_alg.cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= BF_MIN_KEY_SIZE,
+			.max_keysize	= BF_MAX_KEY_SIZE,
+			.ivsize		= BF_BLOCK_SIZE,
+			.setkey		= blowfish_setkey,
+			.encrypt	= ctr_crypt,
+			.decrypt	= ctr_crypt,
+		},
+	},
+};
+
+static int __init init(void)
+{
+	int err;
+
+	err = crypto_register_alg(&bf_alg);
+	if (err)
+		goto bf_err;
+	err = crypto_register_alg(&blk_ecb_alg);
+	if (err)
+		goto ecb_err;
+	err = crypto_register_alg(&blk_cbc_alg);
+	if (err)
+		goto cbc_err;
+	err = crypto_register_alg(&blk_ctr_alg);
+	if (err)
+		goto ctr_err;
+
+	return 0;
+
+ctr_err:
+	crypto_unregister_alg(&blk_cbc_alg);
+cbc_err:
+	crypto_unregister_alg(&blk_ecb_alg);
+ecb_err:
+	crypto_unregister_alg(&bf_alg);
+bf_err:
+	return err;
+}
+
+static void __exit fini(void)
+{
+	crypto_unregister_alg(&blk_ctr_alg);
+	crypto_unregister_alg(&blk_cbc_alg);
+	crypto_unregister_alg(&blk_ecb_alg);
+	crypto_unregister_alg(&bf_alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized");
+MODULE_ALIAS("blowfish");
+MODULE_ALIAS("blowfish-asm");
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
new file mode 100644
index 000000000000..b2c2f57d70e8
--- /dev/null
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -0,0 +1,558 @@
+/*
+ * This is a SIMD SHA-1 implementation. It requires the Intel(R) Supplemental
+ * SSE3 instruction set extensions introduced in Intel Core Microarchitecture
+ * processors. CPUs supporting Intel(R) AVX extensions will get an additional
+ * boost.
+ *
+ * This work was inspired by the vectorized implementation of Dean Gaudet.
+ * Additional information on it can be found at:
+ *    http://www.arctic.org/~dean/crypto/sha1.html
+ *
+ * It was improved upon with more efficient vectorization of the message
+ * scheduling. This implementation has also been optimized for all current and
+ * several future generations of Intel CPUs.
+ *
+ * See this article for more information about the implementation details:
+ *   http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1/
+ *
+ * Copyright (C) 2010, Intel Corp.
+ *   Authors: Maxim Locktyukhin <maxim.locktyukhin@intel.com>
+ *            Ronen Zohar <ronen.zohar@intel.com>
+ *
+ * Converted to AT&T syntax and adapted for inclusion in the Linux kernel:
+ *   Author: Mathias Krause <minipli@googlemail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#define CTX	%rdi	// arg1
+#define BUF	%rsi	// arg2
+#define CNT	%rdx	// arg3
+
+#define REG_A	%ecx
+#define REG_B	%esi
+#define REG_C	%edi
+#define REG_D	%ebp
+#define REG_E	%edx
+
+#define REG_T1	%eax
+#define REG_T2	%ebx
+
+#define K_BASE		%r8
+#define HASH_PTR	%r9
+#define BUFFER_PTR	%r10
+#define BUFFER_END	%r11
+
+#define W_TMP1	%xmm0
+#define W_TMP2	%xmm9
+
+#define W0	%xmm1
+#define W4	%xmm2
+#define W8	%xmm3
+#define W12	%xmm4
+#define W16	%xmm5
+#define W20	%xmm6
+#define W24	%xmm7
+#define W28	%xmm8
+
+#define XMM_SHUFB_BSWAP	%xmm10
+
+/* we keep window of 64 w[i]+K pre-calculated values in a circular buffer */
+#define WK(t)	(((t) & 15) * 4)(%rsp)
+#define W_PRECALC_AHEAD	16
+
+/*
+ * This macro implements the SHA-1 function's body for single 64-byte block
+ * param: function's name
+ */
+.macro SHA1_VECTOR_ASM  name
+	.global	\name
+	.type	\name, @function
+	.align 32
+\name:
+	push	%rbx
+	push	%rbp
+	push	%r12
+
+	mov	%rsp, %r12
+	sub	$64, %rsp		# allocate workspace
+	and	$~15, %rsp		# align stack
+
+	mov	CTX, HASH_PTR
+	mov	BUF, BUFFER_PTR
+
+	shl	$6, CNT			# multiply by 64
+	add	BUF, CNT
+	mov	CNT, BUFFER_END
+
+	lea	K_XMM_AR(%rip), K_BASE
+	xmm_mov	BSWAP_SHUFB_CTL(%rip), XMM_SHUFB_BSWAP
+
+	SHA1_PIPELINED_MAIN_BODY
+
+	# cleanup workspace
+	mov	$8, %ecx
+	mov	%rsp, %rdi
+	xor	%rax, %rax
+	rep stosq
+
+	mov	%r12, %rsp		# deallocate workspace
+
+	pop	%r12
+	pop	%rbp
+	pop	%rbx
+	ret
+
+	.size	\name, .-\name
+.endm
+
+/*
+ * This macro implements 80 rounds of SHA-1 for one 64-byte block
+ */
+.macro SHA1_PIPELINED_MAIN_BODY
+	INIT_REGALLOC
+
+	mov	  (HASH_PTR), A
+	mov	 4(HASH_PTR), B
+	mov	 8(HASH_PTR), C
+	mov	12(HASH_PTR), D
+	mov	16(HASH_PTR), E
+
+  .set i, 0
+  .rept W_PRECALC_AHEAD
+	W_PRECALC i
+    .set i, (i+1)
+  .endr
+
+.align 4
+1:
+	RR F1,A,B,C,D,E,0
+	RR F1,D,E,A,B,C,2
+	RR F1,B,C,D,E,A,4
+	RR F1,E,A,B,C,D,6
+	RR F1,C,D,E,A,B,8
+
+	RR F1,A,B,C,D,E,10
+	RR F1,D,E,A,B,C,12
+	RR F1,B,C,D,E,A,14
+	RR F1,E,A,B,C,D,16
+	RR F1,C,D,E,A,B,18
+
+	RR F2,A,B,C,D,E,20
+	RR F2,D,E,A,B,C,22
+	RR F2,B,C,D,E,A,24
+	RR F2,E,A,B,C,D,26
+	RR F2,C,D,E,A,B,28
+
+	RR F2,A,B,C,D,E,30
+	RR F2,D,E,A,B,C,32
+	RR F2,B,C,D,E,A,34
+	RR F2,E,A,B,C,D,36
+	RR F2,C,D,E,A,B,38
+
+	RR F3,A,B,C,D,E,40
+	RR F3,D,E,A,B,C,42
+	RR F3,B,C,D,E,A,44
+	RR F3,E,A,B,C,D,46
+	RR F3,C,D,E,A,B,48
+
+	RR F3,A,B,C,D,E,50
+	RR F3,D,E,A,B,C,52
+	RR F3,B,C,D,E,A,54
+	RR F3,E,A,B,C,D,56
+	RR F3,C,D,E,A,B,58
+
+	add	$64, BUFFER_PTR		# move to the next 64-byte block
+	cmp	BUFFER_END, BUFFER_PTR	# if the current is the last one use
+	cmovae	K_BASE, BUFFER_PTR	# dummy source to avoid buffer overrun
+
+	RR F4,A,B,C,D,E,60
+	RR F4,D,E,A,B,C,62
+	RR F4,B,C,D,E,A,64
+	RR F4,E,A,B,C,D,66
+	RR F4,C,D,E,A,B,68
+
+	RR F4,A,B,C,D,E,70
+	RR F4,D,E,A,B,C,72
+	RR F4,B,C,D,E,A,74
+	RR F4,E,A,B,C,D,76
+	RR F4,C,D,E,A,B,78
+
+	UPDATE_HASH   (HASH_PTR), A
+	UPDATE_HASH  4(HASH_PTR), B
+	UPDATE_HASH  8(HASH_PTR), C
+	UPDATE_HASH 12(HASH_PTR), D
+	UPDATE_HASH 16(HASH_PTR), E
+
+	RESTORE_RENAMED_REGS
+	cmp	K_BASE, BUFFER_PTR	# K_BASE means, we reached the end
+	jne	1b
+.endm
+
+.macro INIT_REGALLOC
+  .set A, REG_A
+  .set B, REG_B
+  .set C, REG_C
+  .set D, REG_D
+  .set E, REG_E
+  .set T1, REG_T1
+  .set T2, REG_T2
+.endm
+
+.macro RESTORE_RENAMED_REGS
+	# order is important (REG_C is where it should be)
+	mov	B, REG_B
+	mov	D, REG_D
+	mov	A, REG_A
+	mov	E, REG_E
+.endm
+
+.macro SWAP_REG_NAMES  a, b
+  .set _T, \a
+  .set \a, \b
+  .set \b, _T
+.endm
+
+.macro F1  b, c, d
+	mov	\c, T1
+	SWAP_REG_NAMES \c, T1
+	xor	\d, T1
+	and	\b, T1
+	xor	\d, T1
+.endm
+
+.macro F2  b, c, d
+	mov	\d, T1
+	SWAP_REG_NAMES \d, T1
+	xor	\c, T1
+	xor	\b, T1
+.endm
+
+.macro F3  b, c ,d
+	mov	\c, T1
+	SWAP_REG_NAMES \c, T1
+	mov	\b, T2
+	or	\b, T1
+	and	\c, T2
+	and	\d, T1
+	or	T2, T1
+.endm
+
+.macro F4  b, c, d
+	F2 \b, \c, \d
+.endm
+
+.macro UPDATE_HASH  hash, val
+	add	\hash, \val
+	mov	\val, \hash
+.endm
+
+/*
+ * RR does two rounds of SHA-1 back to back with W[] pre-calc
+ *   t1 = F(b, c, d);   e += w(i)
+ *   e += t1;           b <<= 30;   d  += w(i+1);
+ *   t1 = F(a, b, c);
+ *   d += t1;           a <<= 5;
+ *   e += a;
+ *   t1 = e;            a >>= 7;
+ *   t1 <<= 5;
+ *   d += t1;
+ */
+.macro RR  F, a, b, c, d, e, round
+	add	WK(\round), \e
+	\F   \b, \c, \d		# t1 = F(b, c, d);
+	W_PRECALC (\round + W_PRECALC_AHEAD)
+	rol	$30, \b
+	add	T1, \e
+	add	WK(\round + 1), \d
+
+	\F   \a, \b, \c
+	W_PRECALC (\round + W_PRECALC_AHEAD + 1)
+	rol	$5, \a
+	add	\a, \e
+	add	T1, \d
+	ror	$7, \a		# (a <<r 5) >>r 7) => a <<r 30)
+
+	mov	\e, T1
+	SWAP_REG_NAMES \e, T1
+
+	rol	$5, T1
+	add	T1, \d
+
+	# write:  \a, \b
+	# rotate: \a<=\d, \b<=\e, \c<=\a, \d<=\b, \e<=\c
+.endm
+
+.macro W_PRECALC  r
+  .set i, \r
+
+  .if (i < 20)
+    .set K_XMM, 0
+  .elseif (i < 40)
+    .set K_XMM, 16
+  .elseif (i < 60)
+    .set K_XMM, 32
+  .elseif (i < 80)
+    .set K_XMM, 48
+  .endif
+
+  .if ((i < 16) || ((i >= 80) && (i < (80 + W_PRECALC_AHEAD))))
+    .set i, ((\r) % 80)	    # pre-compute for the next iteration
+    .if (i == 0)
+	W_PRECALC_RESET
+    .endif
+	W_PRECALC_00_15
+  .elseif (i<32)
+	W_PRECALC_16_31
+  .elseif (i < 80)   // rounds 32-79
+	W_PRECALC_32_79
+  .endif
+.endm
+
+.macro W_PRECALC_RESET
+  .set W,          W0
+  .set W_minus_04, W4
+  .set W_minus_08, W8
+  .set W_minus_12, W12
+  .set W_minus_16, W16
+  .set W_minus_20, W20
+  .set W_minus_24, W24
+  .set W_minus_28, W28
+  .set W_minus_32, W
+.endm
+
+.macro W_PRECALC_ROTATE
+  .set W_minus_32, W_minus_28
+  .set W_minus_28, W_minus_24
+  .set W_minus_24, W_minus_20
+  .set W_minus_20, W_minus_16
+  .set W_minus_16, W_minus_12
+  .set W_minus_12, W_minus_08
+  .set W_minus_08, W_minus_04
+  .set W_minus_04, W
+  .set W,          W_minus_32
+.endm
+
+.macro W_PRECALC_SSSE3
+
+.macro W_PRECALC_00_15
+	W_PRECALC_00_15_SSSE3
+.endm
+.macro W_PRECALC_16_31
+	W_PRECALC_16_31_SSSE3
+.endm
+.macro W_PRECALC_32_79
+	W_PRECALC_32_79_SSSE3
+.endm
+
+/* message scheduling pre-compute for rounds 0-15 */
+.macro W_PRECALC_00_15_SSSE3
+  .if ((i & 3) == 0)
+	movdqu	(i*4)(BUFFER_PTR), W_TMP1
+  .elseif ((i & 3) == 1)
+	pshufb	XMM_SHUFB_BSWAP, W_TMP1
+	movdqa	W_TMP1, W
+  .elseif ((i & 3) == 2)
+	paddd	(K_BASE), W_TMP1
+  .elseif ((i & 3) == 3)
+	movdqa  W_TMP1, WK(i&~3)
+	W_PRECALC_ROTATE
+  .endif
+.endm
+
+/* message scheduling pre-compute for rounds 16-31
+ *
+ * - calculating last 32 w[i] values in 8 XMM registers
+ * - pre-calculate K+w[i] values and store to mem, for later load by ALU add
+ *   instruction
+ *
+ * some "heavy-lifting" vectorization for rounds 16-31 due to w[i]->w[i-3]
+ * dependency, but improves for 32-79
+ */
+.macro W_PRECALC_16_31_SSSE3
+  # blended scheduling of vector and scalar instruction streams, one 4-wide
+  # vector iteration / 4 scalar rounds
+  .if ((i & 3) == 0)
+	movdqa	W_minus_12, W
+	palignr	$8, W_minus_16, W	# w[i-14]
+	movdqa	W_minus_04, W_TMP1
+	psrldq	$4, W_TMP1		# w[i-3]
+	pxor	W_minus_08, W
+  .elseif ((i & 3) == 1)
+	pxor	W_minus_16, W_TMP1
+	pxor	W_TMP1, W
+	movdqa	W, W_TMP2
+	movdqa	W, W_TMP1
+	pslldq	$12, W_TMP2
+  .elseif ((i & 3) == 2)
+	psrld	$31, W
+	pslld	$1, W_TMP1
+	por	W, W_TMP1
+	movdqa	W_TMP2, W
+	psrld	$30, W_TMP2
+	pslld	$2, W
+  .elseif ((i & 3) == 3)
+	pxor	W, W_TMP1
+	pxor	W_TMP2, W_TMP1
+	movdqa	W_TMP1, W
+	paddd	K_XMM(K_BASE), W_TMP1
+	movdqa	W_TMP1, WK(i&~3)
+	W_PRECALC_ROTATE
+  .endif
+.endm
+
+/* message scheduling pre-compute for rounds 32-79
+ *
+ * in SHA-1 specification: w[i] = (w[i-3] ^ w[i-8]  ^ w[i-14] ^ w[i-16]) rol 1
+ * instead we do equal:    w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2
+ * allows more efficient vectorization since w[i]=>w[i-3] dependency is broken
+ */
+.macro W_PRECALC_32_79_SSSE3
+  .if ((i & 3) == 0)
+	movdqa	W_minus_04, W_TMP1
+	pxor	W_minus_28, W		# W is W_minus_32 before xor
+	palignr	$8, W_minus_08, W_TMP1
+  .elseif ((i & 3) == 1)
+	pxor	W_minus_16, W
+	pxor	W_TMP1, W
+	movdqa	W, W_TMP1
+  .elseif ((i & 3) == 2)
+	psrld	$30, W
+	pslld	$2, W_TMP1
+	por	W, W_TMP1
+  .elseif ((i & 3) == 3)
+	movdqa	W_TMP1, W
+	paddd	K_XMM(K_BASE), W_TMP1
+	movdqa	W_TMP1, WK(i&~3)
+	W_PRECALC_ROTATE
+  .endif
+.endm
+
+.endm		// W_PRECALC_SSSE3
+
+
+#define K1	0x5a827999
+#define K2	0x6ed9eba1
+#define K3	0x8f1bbcdc
+#define K4	0xca62c1d6
+
+.section .rodata
+.align 16
+
+K_XMM_AR:
+	.long K1, K1, K1, K1
+	.long K2, K2, K2, K2
+	.long K3, K3, K3, K3
+	.long K4, K4, K4, K4
+
+BSWAP_SHUFB_CTL:
+	.long 0x00010203
+	.long 0x04050607
+	.long 0x08090a0b
+	.long 0x0c0d0e0f
+
+
+.section .text
+
+W_PRECALC_SSSE3
+.macro xmm_mov a, b
+	movdqu	\a,\b
+.endm
+
+/* SSSE3 optimized implementation:
+ *  extern "C" void sha1_transform_ssse3(u32 *digest, const char *data, u32 *ws,
+ *                                       unsigned int rounds);
+ */
+SHA1_VECTOR_ASM     sha1_transform_ssse3
+
+#ifdef SHA1_ENABLE_AVX_SUPPORT
+
+.macro W_PRECALC_AVX
+
+.purgem W_PRECALC_00_15
+.macro  W_PRECALC_00_15
+    W_PRECALC_00_15_AVX
+.endm
+.purgem W_PRECALC_16_31
+.macro  W_PRECALC_16_31
+    W_PRECALC_16_31_AVX
+.endm
+.purgem W_PRECALC_32_79
+.macro  W_PRECALC_32_79
+    W_PRECALC_32_79_AVX
+.endm
+
+.macro W_PRECALC_00_15_AVX
+  .if ((i & 3) == 0)
+	vmovdqu	(i*4)(BUFFER_PTR), W_TMP1
+  .elseif ((i & 3) == 1)
+	vpshufb	XMM_SHUFB_BSWAP, W_TMP1, W
+  .elseif ((i & 3) == 2)
+	vpaddd	(K_BASE), W, W_TMP1
+  .elseif ((i & 3) == 3)
+	vmovdqa	W_TMP1, WK(i&~3)
+	W_PRECALC_ROTATE
+  .endif
+.endm
+
+.macro W_PRECALC_16_31_AVX
+  .if ((i & 3) == 0)
+	vpalignr $8, W_minus_16, W_minus_12, W	# w[i-14]
+	vpsrldq	$4, W_minus_04, W_TMP1		# w[i-3]
+	vpxor	W_minus_08, W, W
+	vpxor	W_minus_16, W_TMP1, W_TMP1
+  .elseif ((i & 3) == 1)
+	vpxor	W_TMP1, W, W
+	vpslldq	$12, W, W_TMP2
+	vpslld	$1, W, W_TMP1
+  .elseif ((i & 3) == 2)
+	vpsrld	$31, W, W
+	vpor	W, W_TMP1, W_TMP1
+	vpslld	$2, W_TMP2, W
+	vpsrld	$30, W_TMP2, W_TMP2
+  .elseif ((i & 3) == 3)
+	vpxor	W, W_TMP1, W_TMP1
+	vpxor	W_TMP2, W_TMP1, W
+	vpaddd	K_XMM(K_BASE), W, W_TMP1
+	vmovdqu	W_TMP1, WK(i&~3)
+	W_PRECALC_ROTATE
+  .endif
+.endm
+
+.macro W_PRECALC_32_79_AVX
+  .if ((i & 3) == 0)
+	vpalignr $8, W_minus_08, W_minus_04, W_TMP1
+	vpxor	W_minus_28, W, W		# W is W_minus_32 before xor
+  .elseif ((i & 3) == 1)
+	vpxor	W_minus_16, W_TMP1, W_TMP1
+	vpxor	W_TMP1, W, W
+  .elseif ((i & 3) == 2)
+	vpslld	$2, W, W_TMP1
+	vpsrld	$30, W, W
+	vpor	W, W_TMP1, W
+  .elseif ((i & 3) == 3)
+	vpaddd	K_XMM(K_BASE), W, W_TMP1
+	vmovdqu	W_TMP1, WK(i&~3)
+	W_PRECALC_ROTATE
+  .endif
+.endm
+
+.endm    // W_PRECALC_AVX
+
+W_PRECALC_AVX
+.purgem xmm_mov
+.macro xmm_mov a, b
+	vmovdqu	\a,\b
+.endm
+
+
+/* AVX optimized implementation:
+ *  extern "C" void sha1_transform_avx(u32 *digest, const char *data, u32 *ws,
+ *                                     unsigned int rounds);
+ */
+SHA1_VECTOR_ASM     sha1_transform_avx
+
+#endif
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
new file mode 100644
index 000000000000..f916499d0abe
--- /dev/null
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -0,0 +1,240 @@
+/*
+ * Cryptographic API.
+ *
+ * Glue code for the SHA1 Secure Hash Algorithm assembler implementation using
+ * Supplemental SSE3 instructions.
+ *
+ * This file is based on sha1_generic.c
+ *
+ * Copyright (c) Alan Smithee.
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
+ * Copyright (c) Mathias Krause <minipli@googlemail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/i387.h>
+#include <asm/xcr.h>
+#include <asm/xsave.h>
+
+
+asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
+				     unsigned int rounds);
+#ifdef SHA1_ENABLE_AVX_SUPPORT
+asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
+				   unsigned int rounds);
+#endif
+
+static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
+
+
+static int sha1_ssse3_init(struct shash_desc *desc)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	*sctx = (struct sha1_state){
+		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
+	};
+
+	return 0;
+}
+
+static int __sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
+			       unsigned int len, unsigned int partial)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	unsigned int done = 0;
+
+	sctx->count += len;
+
+	if (partial) {
+		done = SHA1_BLOCK_SIZE - partial;
+		memcpy(sctx->buffer + partial, data, done);
+		sha1_transform_asm(sctx->state, sctx->buffer, 1);
+	}
+
+	if (len - done >= SHA1_BLOCK_SIZE) {
+		const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
+
+		sha1_transform_asm(sctx->state, data + done, rounds);
+		done += rounds * SHA1_BLOCK_SIZE;
+	}
+
+	memcpy(sctx->buffer, data + done, len - done);
+
+	return 0;
+}
+
+static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
+			     unsigned int len)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
+	int res;
+
+	/* Handle the fast case right here */
+	if (partial + len < SHA1_BLOCK_SIZE) {
+		sctx->count += len;
+		memcpy(sctx->buffer + partial, data, len);
+
+		return 0;
+	}
+
+	if (!irq_fpu_usable()) {
+		res = crypto_sha1_update(desc, data, len);
+	} else {
+		kernel_fpu_begin();
+		res = __sha1_ssse3_update(desc, data, len, partial);
+		kernel_fpu_end();
+	}
+
+	return res;
+}
+
+
+/* Add padding and return the message digest. */
+static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	unsigned int i, index, padlen;
+	__be32 *dst = (__be32 *)out;
+	__be64 bits;
+	static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
+
+	bits = cpu_to_be64(sctx->count << 3);
+
+	/* Pad out to 56 mod 64 and append length */
+	index = sctx->count % SHA1_BLOCK_SIZE;
+	padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
+	if (!irq_fpu_usable()) {
+		crypto_sha1_update(desc, padding, padlen);
+		crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits));
+	} else {
+		kernel_fpu_begin();
+		/* We need to fill a whole block for __sha1_ssse3_update() */
+		if (padlen <= 56) {
+			sctx->count += padlen;
+			memcpy(sctx->buffer + index, padding, padlen);
+		} else {
+			__sha1_ssse3_update(desc, padding, padlen, index);
+		}
+		__sha1_ssse3_update(desc, (const u8 *)&bits, sizeof(bits), 56);
+		kernel_fpu_end();
+	}
+
+	/* Store state in digest */
+	for (i = 0; i < 5; i++)
+		dst[i] = cpu_to_be32(sctx->state[i]);
+
+	/* Wipe context */
+	memset(sctx, 0, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha1_ssse3_export(struct shash_desc *desc, void *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha1_ssse3_import(struct shash_desc *desc, const void *in)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+
+	return 0;
+}
+
+static struct shash_alg alg = {
+	.digestsize	=	SHA1_DIGEST_SIZE,
+	.init		=	sha1_ssse3_init,
+	.update		=	sha1_ssse3_update,
+	.final		=	sha1_ssse3_final,
+	.export		=	sha1_ssse3_export,
+	.import		=	sha1_ssse3_import,
+	.descsize	=	sizeof(struct sha1_state),
+	.statesize	=	sizeof(struct sha1_state),
+	.base		=	{
+		.cra_name	=	"sha1",
+		.cra_driver_name=	"sha1-ssse3",
+		.cra_priority	=	150,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA1_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+#ifdef SHA1_ENABLE_AVX_SUPPORT
+static bool __init avx_usable(void)
+{
+	u64 xcr0;
+
+	if (!cpu_has_avx || !cpu_has_osxsave)
+		return false;
+
+	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
+		pr_info("AVX detected but unusable.\n");
+
+		return false;
+	}
+
+	return true;
+}
+#endif
+
+static int __init sha1_ssse3_mod_init(void)
+{
+	/* test for SSSE3 first */
+	if (cpu_has_ssse3)
+		sha1_transform_asm = sha1_transform_ssse3;
+
+#ifdef SHA1_ENABLE_AVX_SUPPORT
+	/* allow AVX to override SSSE3, it's a little faster */
+	if (avx_usable())
+		sha1_transform_asm = sha1_transform_avx;
+#endif
+
+	if (sha1_transform_asm) {
+		pr_info("Using %s optimized SHA-1 implementation\n",
+		        sha1_transform_asm == sha1_transform_ssse3 ? "SSSE3"
+		                                                   : "AVX");
+		return crypto_register_shash(&alg);
+	}
+	pr_info("Neither AVX nor SSSE3 is available/usable.\n");
+
+	return -ENODEV;
+}
+
+static void __exit sha1_ssse3_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_init(sha1_ssse3_mod_init);
+module_exit(sha1_ssse3_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, Supplemental SSE3 accelerated");
+
+MODULE_ALIAS("sha1");
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S
index 575331cb2a8a..658af4bb35c9 100644
--- a/arch/x86/crypto/twofish-i586-asm_32.S
+++ b/arch/x86/crypto/twofish-i586-asm_32.S
@@ -26,7 +26,7 @@
 
 #define in_blk    12  /* input byte array address parameter*/
 #define out_blk   8  /* output byte array address parameter*/
-#define tfm       4  /* Twofish context structure */
+#define ctx       4  /* Twofish context structure */
 
 #define a_offset	0
 #define b_offset	4
@@ -229,8 +229,8 @@ twofish_enc_blk:
 	push    %esi
 	push    %edi
 
-	mov	tfm + 16(%esp),	%ebp	/* abuse the base pointer: set new base bointer to the crypto tfm */
-	add	$crypto_tfm_ctx_offset, %ebp	/* ctx address */
+	mov	ctx + 16(%esp),	%ebp	/* abuse the base pointer: set new base
+					 * pointer to the ctx address */
 	mov     in_blk+16(%esp),%edi	/* input address in edi */
 
 	mov	(%edi),		%eax
@@ -285,8 +285,8 @@ twofish_dec_blk:
 	push    %edi
 
 
-	mov	tfm + 16(%esp),	%ebp	/* abuse the base pointer: set new base bointer to the crypto tfm */
-	add	$crypto_tfm_ctx_offset, %ebp	/* ctx address */
+	mov	ctx + 16(%esp),	%ebp	/* abuse the base pointer: set new base
+					 * pointer to the ctx address */
 	mov     in_blk+16(%esp),%edi	/* input address in edi */
 
 	mov	(%edi),		%eax
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
new file mode 100644
index 000000000000..5b012a2c5119
--- /dev/null
+++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
@@ -0,0 +1,316 @@
+/*
+ * Twofish Cipher 3-way parallel algorithm (x86_64)
+ *
+ * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+.file "twofish-x86_64-asm-3way.S"
+.text
+
+/* structure of crypto context */
+#define s0	0
+#define s1	1024
+#define s2	2048
+#define s3	3072
+#define w	4096
+#define k	4128
+
+/**********************************************************************
+  3-way twofish
+ **********************************************************************/
+#define CTX %rdi
+#define RIO %rdx
+
+#define RAB0 %rax
+#define RAB1 %rbx
+#define RAB2 %rcx
+
+#define RAB0d %eax
+#define RAB1d %ebx
+#define RAB2d %ecx
+
+#define RAB0bh %ah
+#define RAB1bh %bh
+#define RAB2bh %ch
+
+#define RAB0bl %al
+#define RAB1bl %bl
+#define RAB2bl %cl
+
+#define RCD0 %r8
+#define RCD1 %r9
+#define RCD2 %r10
+
+#define RCD0d %r8d
+#define RCD1d %r9d
+#define RCD2d %r10d
+
+#define RX0 %rbp
+#define RX1 %r11
+#define RX2 %r12
+
+#define RX0d %ebp
+#define RX1d %r11d
+#define RX2d %r12d
+
+#define RY0 %r13
+#define RY1 %r14
+#define RY2 %r15
+
+#define RY0d %r13d
+#define RY1d %r14d
+#define RY2d %r15d
+
+#define RT0 %rdx
+#define RT1 %rsi
+
+#define RT0d %edx
+#define RT1d %esi
+
+#define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \
+	movzbl ab ## bl,		tmp2 ## d; \
+	movzbl ab ## bh,		tmp1 ## d; \
+	rorq $(rot),			ab; \
+	op1##l T0(CTX, tmp2, 4),	dst ## d; \
+	op2##l T1(CTX, tmp1, 4),	dst ## d;
+
+/*
+ * Combined G1 & G2 function. Reordered with help of rotates to have moves
+ * at begining.
+ */
+#define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \
+	/* G1,1 && G2,1 */ \
+	do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \
+	do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \
+	\
+	do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \
+	do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \
+	\
+	do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \
+	do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \
+	\
+	/* G1,2 && G2,2 */ \
+	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \
+	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \
+	xchgq cd ## 0, ab ## 0; \
+	\
+	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \
+	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \
+	xchgq cd ## 1, ab ## 1; \
+	\
+	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \
+	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \
+	xchgq cd ## 2, ab ## 2;
+
+#define enc_round_end(ab, x, y, n) \
+	addl y ## d,			x ## d; \
+	addl x ## d,			y ## d; \
+	addl k+4*(2*(n))(CTX),		x ## d; \
+	xorl ab ## d,			x ## d; \
+	addl k+4*(2*(n)+1)(CTX),	y ## d; \
+	shrq $32,			ab; \
+	roll $1,			ab ## d; \
+	xorl y ## d,			ab ## d; \
+	shlq $32,			ab; \
+	rorl $1,			x ## d; \
+	orq x,				ab;
+
+#define dec_round_end(ba, x, y, n) \
+	addl y ## d,			x ## d; \
+	addl x ## d,			y ## d; \
+	addl k+4*(2*(n))(CTX),		x ## d; \
+	addl k+4*(2*(n)+1)(CTX),	y ## d; \
+	xorl ba ## d,			y ## d; \
+	shrq $32,			ba; \
+	roll $1,			ba ## d; \
+	xorl x ## d,			ba ## d; \
+	shlq $32,			ba; \
+	rorl $1,			y ## d; \
+	orq y,				ba;
+
+#define encrypt_round3(ab, cd, n) \
+	g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \
+	\
+	enc_round_end(ab ## 0, RX0, RY0, n); \
+	enc_round_end(ab ## 1, RX1, RY1, n); \
+	enc_round_end(ab ## 2, RX2, RY2, n);
+
+#define decrypt_round3(ba, dc, n) \
+	g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \
+	\
+	dec_round_end(ba ## 0, RX0, RY0, n); \
+	dec_round_end(ba ## 1, RX1, RY1, n); \
+	dec_round_end(ba ## 2, RX2, RY2, n);
+
+#define encrypt_cycle3(ab, cd, n) \
+	encrypt_round3(ab, cd, n*2); \
+	encrypt_round3(ab, cd, (n*2)+1);
+
+#define decrypt_cycle3(ba, dc, n) \
+	decrypt_round3(ba, dc, (n*2)+1); \
+	decrypt_round3(ba, dc, (n*2));
+
+#define inpack3(in, n, xy, m) \
+	movq 4*(n)(in),			xy ## 0; \
+	xorq w+4*m(CTX),		xy ## 0; \
+	\
+	movq 4*(4+(n))(in),		xy ## 1; \
+	xorq w+4*m(CTX),		xy ## 1; \
+	\
+	movq 4*(8+(n))(in),		xy ## 2; \
+	xorq w+4*m(CTX),		xy ## 2;
+
+#define outunpack3(op, out, n, xy, m) \
+	xorq w+4*m(CTX),		xy ## 0; \
+	op ## q xy ## 0,		4*(n)(out); \
+	\
+	xorq w+4*m(CTX),		xy ## 1; \
+	op ## q xy ## 1,		4*(4+(n))(out); \
+	\
+	xorq w+4*m(CTX),		xy ## 2; \
+	op ## q xy ## 2,		4*(8+(n))(out);
+
+#define inpack_enc3() \
+	inpack3(RIO, 0, RAB, 0); \
+	inpack3(RIO, 2, RCD, 2);
+
+#define outunpack_enc3(op) \
+	outunpack3(op, RIO, 2, RAB, 6); \
+	outunpack3(op, RIO, 0, RCD, 4);
+
+#define inpack_dec3() \
+	inpack3(RIO, 0, RAB, 4); \
+	rorq $32,			RAB0; \
+	rorq $32,			RAB1; \
+	rorq $32,			RAB2; \
+	inpack3(RIO, 2, RCD, 6); \
+	rorq $32,			RCD0; \
+	rorq $32,			RCD1; \
+	rorq $32,			RCD2;
+
+#define outunpack_dec3() \
+	rorq $32,			RCD0; \
+	rorq $32,			RCD1; \
+	rorq $32,			RCD2; \
+	outunpack3(mov, RIO, 0, RCD, 0); \
+	rorq $32,			RAB0; \
+	rorq $32,			RAB1; \
+	rorq $32,			RAB2; \
+	outunpack3(mov, RIO, 2, RAB, 2);
+
+.align 8
+.global __twofish_enc_blk_3way
+.type   __twofish_enc_blk_3way,@function;
+
+__twofish_enc_blk_3way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src, RIO
+	 *	%rcx: bool, if true: xor output
+	 */
+	pushq %r15;
+	pushq %r14;
+	pushq %r13;
+	pushq %r12;
+	pushq %rbp;
+	pushq %rbx;
+
+	pushq %rcx; /* bool xor */
+	pushq %rsi; /* dst */
+
+	inpack_enc3();
+
+	encrypt_cycle3(RAB, RCD, 0);
+	encrypt_cycle3(RAB, RCD, 1);
+	encrypt_cycle3(RAB, RCD, 2);
+	encrypt_cycle3(RAB, RCD, 3);
+	encrypt_cycle3(RAB, RCD, 4);
+	encrypt_cycle3(RAB, RCD, 5);
+	encrypt_cycle3(RAB, RCD, 6);
+	encrypt_cycle3(RAB, RCD, 7);
+
+	popq RIO; /* dst */
+	popq %rbp; /* bool xor */
+
+	testb %bpl, %bpl;
+	jnz __enc_xor3;
+
+	outunpack_enc3(mov);
+
+	popq %rbx;
+	popq %rbp;
+	popq %r12;
+	popq %r13;
+	popq %r14;
+	popq %r15;
+	ret;
+
+__enc_xor3:
+	outunpack_enc3(xor);
+
+	popq %rbx;
+	popq %rbp;
+	popq %r12;
+	popq %r13;
+	popq %r14;
+	popq %r15;
+	ret;
+
+.global twofish_dec_blk_3way
+.type   twofish_dec_blk_3way,@function;
+
+twofish_dec_blk_3way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src, RIO
+	 */
+	pushq %r15;
+	pushq %r14;
+	pushq %r13;
+	pushq %r12;
+	pushq %rbp;
+	pushq %rbx;
+
+	pushq %rsi; /* dst */
+
+	inpack_dec3();
+
+	decrypt_cycle3(RAB, RCD, 7);
+	decrypt_cycle3(RAB, RCD, 6);
+	decrypt_cycle3(RAB, RCD, 5);
+	decrypt_cycle3(RAB, RCD, 4);
+	decrypt_cycle3(RAB, RCD, 3);
+	decrypt_cycle3(RAB, RCD, 2);
+	decrypt_cycle3(RAB, RCD, 1);
+	decrypt_cycle3(RAB, RCD, 0);
+
+	popq RIO; /* dst */
+
+	outunpack_dec3();
+
+	popq %rbx;
+	popq %rbp;
+	popq %r12;
+	popq %r13;
+	popq %r14;
+	popq %r15;
+	ret;
+
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S
index 573aa102542e..7bcf3fcc3668 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
@@ -221,10 +221,9 @@
 twofish_enc_blk:
 	pushq    R1
 
-	/* %rdi contains the crypto tfm address */
+	/* %rdi contains the ctx address */
 	/* %rsi contains the output address */
 	/* %rdx contains the input address */
-	add	$crypto_tfm_ctx_offset, %rdi	/* set ctx address */
 	/* ctx address is moved to free one non-rex register
 	as target for the 8bit high operations */
 	mov	%rdi,		%r11
@@ -274,10 +273,9 @@ twofish_enc_blk:
 twofish_dec_blk:
 	pushq    R1
 
-	/* %rdi contains the crypto tfm address */
+	/* %rdi contains the ctx address */
 	/* %rsi contains the output address */
 	/* %rdx contains the input address */
-	add	$crypto_tfm_ctx_offset, %rdi	/* set ctx address */
 	/* ctx address is moved to free one non-rex register
 	as target for the 8bit high operations */
 	mov	%rdi,		%r11
diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c
index cefaf8b9aa18..dc6b3fb817fc 100644
--- a/arch/x86/crypto/twofish_glue.c
+++ b/arch/x86/crypto/twofish_glue.c
@@ -44,17 +44,21 @@
 #include <linux/module.h>
 #include <linux/types.h>
 
-asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
+				const u8 *src);
+EXPORT_SYMBOL_GPL(twofish_enc_blk);
+asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
+				const u8 *src);
+EXPORT_SYMBOL_GPL(twofish_dec_blk);
 
 static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	twofish_enc_blk(tfm, dst, src);
+	twofish_enc_blk(crypto_tfm_ctx(tfm), dst, src);
 }
 
 static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	twofish_dec_blk(tfm, dst, src);
+	twofish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
 }
 
 static struct crypto_alg alg = {
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
new file mode 100644
index 000000000000..5ede9c444c3e
--- /dev/null
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -0,0 +1,472 @@
+/*
+ * Glue Code for 3-way parallel assembler optimized version of Twofish
+ *
+ * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
+ *   Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ * CTR part based on code (crypto/ctr.c) by:
+ *   (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <crypto/algapi.h>
+#include <crypto/twofish.h>
+#include <crypto/b128ops.h>
+
+/* regular block cipher functions from twofish_x86_64 module */
+asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
+				const u8 *src);
+asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
+				const u8 *src);
+
+/* 3-way parallel cipher functions */
+asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+				       const u8 *src, bool xor);
+asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+				     const u8 *src);
+
+static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+					const u8 *src)
+{
+	__twofish_enc_blk_3way(ctx, dst, src, false);
+}
+
+static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst,
+					    const u8 *src)
+{
+	__twofish_enc_blk_3way(ctx, dst, src, true);
+}
+
+static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
+		     void (*fn)(struct twofish_ctx *, u8 *, const u8 *),
+		     void (*fn_3way)(struct twofish_ctx *, u8 *, const u8 *))
+{
+	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = TF_BLOCK_SIZE;
+	unsigned int nbytes;
+	int err;
+
+	err = blkcipher_walk_virt(desc, walk);
+
+	while ((nbytes = walk->nbytes)) {
+		u8 *wsrc = walk->src.virt.addr;
+		u8 *wdst = walk->dst.virt.addr;
+
+		/* Process three block batch */
+		if (nbytes >= bsize * 3) {
+			do {
+				fn_3way(ctx, wdst, wsrc);
+
+				wsrc += bsize * 3;
+				wdst += bsize * 3;
+				nbytes -= bsize * 3;
+			} while (nbytes >= bsize * 3);
+
+			if (nbytes < bsize)
+				goto done;
+		}
+
+		/* Handle leftovers */
+		do {
+			fn(ctx, wdst, wsrc);
+
+			wsrc += bsize;
+			wdst += bsize;
+			nbytes -= bsize;
+		} while (nbytes >= bsize);
+
+done:
+		err = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	return err;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_crypt(desc, &walk, twofish_enc_blk, twofish_enc_blk_3way);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way);
+}
+
+static struct crypto_alg blk_ecb_alg = {
+	.cra_name		= "ecb(twofish)",
+	.cra_driver_name	= "ecb-twofish-3way",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= TF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct twofish_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(blk_ecb_alg.cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE,
+			.max_keysize	= TF_MAX_KEY_SIZE,
+			.setkey		= twofish_setkey,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+};
+
+static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
+				  struct blkcipher_walk *walk)
+{
+	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = TF_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u128 *src = (u128 *)walk->src.virt.addr;
+	u128 *dst = (u128 *)walk->dst.virt.addr;
+	u128 *iv = (u128 *)walk->iv;
+
+	do {
+		u128_xor(dst, src, iv);
+		twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
+		iv = dst;
+
+		src += 1;
+		dst += 1;
+		nbytes -= bsize;
+	} while (nbytes >= bsize);
+
+	u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
+	return nbytes;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		nbytes = __cbc_encrypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
+				  struct blkcipher_walk *walk)
+{
+	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = TF_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u128 *src = (u128 *)walk->src.virt.addr;
+	u128 *dst = (u128 *)walk->dst.virt.addr;
+	u128 ivs[3 - 1];
+	u128 last_iv;
+
+	/* Start of the last block. */
+	src += nbytes / bsize - 1;
+	dst += nbytes / bsize - 1;
+
+	last_iv = *src;
+
+	/* Process three block batch */
+	if (nbytes >= bsize * 3) {
+		do {
+			nbytes -= bsize * (3 - 1);
+			src -= 3 - 1;
+			dst -= 3 - 1;
+
+			ivs[0] = src[0];
+			ivs[1] = src[1];
+
+			twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
+
+			u128_xor(dst + 1, dst + 1, ivs + 0);
+			u128_xor(dst + 2, dst + 2, ivs + 1);
+
+			nbytes -= bsize;
+			if (nbytes < bsize)
+				goto done;
+
+			u128_xor(dst, dst, src - 1);
+			src -= 1;
+			dst -= 1;
+		} while (nbytes >= bsize * 3);
+
+		if (nbytes < bsize)
+			goto done;
+	}
+
+	/* Handle leftovers */
+	for (;;) {
+		twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
+
+		nbytes -= bsize;
+		if (nbytes < bsize)
+			break;
+
+		u128_xor(dst, dst, src - 1);
+		src -= 1;
+		dst -= 1;
+	}
+
+done:
+	u128_xor(dst, dst, (u128 *)walk->iv);
+	*(u128 *)walk->iv = last_iv;
+
+	return nbytes;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		nbytes = __cbc_decrypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static struct crypto_alg blk_cbc_alg = {
+	.cra_name		= "cbc(twofish)",
+	.cra_driver_name	= "cbc-twofish-3way",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= TF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct twofish_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(blk_cbc_alg.cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE,
+			.max_keysize	= TF_MAX_KEY_SIZE,
+			.ivsize		= TF_BLOCK_SIZE,
+			.setkey		= twofish_setkey,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+};
+
+static inline void u128_to_be128(be128 *dst, const u128 *src)
+{
+	dst->a = cpu_to_be64(src->a);
+	dst->b = cpu_to_be64(src->b);
+}
+
+static inline void be128_to_u128(u128 *dst, const be128 *src)
+{
+	dst->a = be64_to_cpu(src->a);
+	dst->b = be64_to_cpu(src->b);
+}
+
+static inline void u128_inc(u128 *i)
+{
+	i->b++;
+	if (!i->b)
+		i->a++;
+}
+
+static void ctr_crypt_final(struct blkcipher_desc *desc,
+			    struct blkcipher_walk *walk)
+{
+	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	u8 *ctrblk = walk->iv;
+	u8 keystream[TF_BLOCK_SIZE];
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	unsigned int nbytes = walk->nbytes;
+
+	twofish_enc_blk(ctx, keystream, ctrblk);
+	crypto_xor(keystream, src, nbytes);
+	memcpy(dst, keystream, nbytes);
+
+	crypto_inc(ctrblk, TF_BLOCK_SIZE);
+}
+
+static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
+				struct blkcipher_walk *walk)
+{
+	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = TF_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u128 *src = (u128 *)walk->src.virt.addr;
+	u128 *dst = (u128 *)walk->dst.virt.addr;
+	u128 ctrblk;
+	be128 ctrblocks[3];
+
+	be128_to_u128(&ctrblk, (be128 *)walk->iv);
+
+	/* Process three block batch */
+	if (nbytes >= bsize * 3) {
+		do {
+			if (dst != src) {
+				dst[0] = src[0];
+				dst[1] = src[1];
+				dst[2] = src[2];
+			}
+
+			/* create ctrblks for parallel encrypt */
+			u128_to_be128(&ctrblocks[0], &ctrblk);
+			u128_inc(&ctrblk);
+			u128_to_be128(&ctrblocks[1], &ctrblk);
+			u128_inc(&ctrblk);
+			u128_to_be128(&ctrblocks[2], &ctrblk);
+			u128_inc(&ctrblk);
+
+			twofish_enc_blk_xor_3way(ctx, (u8 *)dst,
+						 (u8 *)ctrblocks);
+
+			src += 3;
+			dst += 3;
+			nbytes -= bsize * 3;
+		} while (nbytes >= bsize * 3);
+
+		if (nbytes < bsize)
+			goto done;
+	}
+
+	/* Handle leftovers */
+	do {
+		if (dst != src)
+			*dst = *src;
+
+		u128_to_be128(&ctrblocks[0], &ctrblk);
+		u128_inc(&ctrblk);
+
+		twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
+		u128_xor(dst, dst, (u128 *)ctrblocks);
+
+		src += 1;
+		dst += 1;
+		nbytes -= bsize;
+	} while (nbytes >= bsize);
+
+done:
+	u128_to_be128((be128 *)walk->iv, &ctrblk);
+	return nbytes;
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		     struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE);
+
+	while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) {
+		nbytes = __ctr_crypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	if (walk.nbytes) {
+		ctr_crypt_final(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+
+	return err;
+}
+
+static struct crypto_alg blk_ctr_alg = {
+	.cra_name		= "ctr(twofish)",
+	.cra_driver_name	= "ctr-twofish-3way",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct twofish_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(blk_ctr_alg.cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE,
+			.max_keysize	= TF_MAX_KEY_SIZE,
+			.ivsize		= TF_BLOCK_SIZE,
+			.setkey		= twofish_setkey,
+			.encrypt	= ctr_crypt,
+			.decrypt	= ctr_crypt,
+		},
+	},
+};
+
+int __init init(void)
+{
+	int err;
+
+	err = crypto_register_alg(&blk_ecb_alg);
+	if (err)
+		goto ecb_err;
+	err = crypto_register_alg(&blk_cbc_alg);
+	if (err)
+		goto cbc_err;
+	err = crypto_register_alg(&blk_ctr_alg);
+	if (err)
+		goto ctr_err;
+
+	return 0;
+
+ctr_err:
+	crypto_unregister_alg(&blk_cbc_alg);
+cbc_err:
+	crypto_unregister_alg(&blk_ecb_alg);
+ecb_err:
+	return err;
+}
+
+void __exit fini(void)
+{
+	crypto_unregister_alg(&blk_ctr_alg);
+	crypto_unregister_alg(&blk_cbc_alg);
+	crypto_unregister_alg(&blk_ecb_alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized");
+MODULE_ALIAS("twofish");
+MODULE_ALIAS("twofish-asm");
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 2f84a433b6a0..f3444f700f36 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -259,7 +259,9 @@ extern const char * const x86_power_flags[32];
 #define cpu_has_xmm		boot_cpu_has(X86_FEATURE_XMM)
 #define cpu_has_xmm2		boot_cpu_has(X86_FEATURE_XMM2)
 #define cpu_has_xmm3		boot_cpu_has(X86_FEATURE_XMM3)
+#define cpu_has_ssse3		boot_cpu_has(X86_FEATURE_SSSE3)
 #define cpu_has_aes		boot_cpu_has(X86_FEATURE_AES)
+#define cpu_has_avx		boot_cpu_has(X86_FEATURE_AVX)
 #define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
 #define cpu_has_mp		boot_cpu_has(X86_FEATURE_MP)
 #define cpu_has_nx		boot_cpu_has(X86_FEATURE_NX)
@@ -287,6 +289,7 @@ extern const char * const x86_power_flags[32];
 #define cpu_has_xmm4_2		boot_cpu_has(X86_FEATURE_XMM4_2)
 #define cpu_has_x2apic		boot_cpu_has(X86_FEATURE_X2APIC)
 #define cpu_has_xsave		boot_cpu_has(X86_FEATURE_XSAVE)
+#define cpu_has_osxsave		boot_cpu_has(X86_FEATURE_OSXSAVE)
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
 #define cpu_has_pclmulqdq	boot_cpu_has(X86_FEATURE_PCLMULQDQ)
 #define cpu_has_perfctr_core	boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h
index 29f66793cc55..4420993acc47 100644
--- a/arch/x86/include/asm/intel_scu_ipc.h
+++ b/arch/x86/include/asm/intel_scu_ipc.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_INTEL_SCU_IPC_H_
 #define  _ASM_X86_INTEL_SCU_IPC_H_
 
+#include <linux/notifier.h>
+
 #define IPCMSG_VRTC	0xFA	 /* Set vRTC device */
 
 /* Command id associated with message IPCMSG_VRTC */
@@ -44,4 +46,24 @@ int intel_scu_ipc_i2c_cntrl(u32 addr, u32 *data);
 /* Update FW version */
 int intel_scu_ipc_fw_update(u8 *buffer, u32 length);
 
+extern struct blocking_notifier_head intel_scu_notifier;
+
+static inline void intel_scu_notifier_add(struct notifier_block *nb)
+{
+	blocking_notifier_chain_register(&intel_scu_notifier, nb);
+}
+
+static inline void intel_scu_notifier_remove(struct notifier_block *nb)
+{
+	blocking_notifier_chain_unregister(&intel_scu_notifier, nb);
+}
+
+static inline int intel_scu_notifier_post(unsigned long v, void *p)
+{
+	return blocking_notifier_call_chain(&intel_scu_notifier, v, p);
+}
+
+#define		SCU_AVAILABLE		1
+#define		SCU_DOWN		2
+
 #endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 7b5063a6ad42..864830e1dd65 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -36,7 +36,6 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/debugfs.h>
-#include <linux/edac_mce.h>
 #include <linux/irq_work.h>
 
 #include <asm/processor.h>
@@ -144,23 +143,20 @@ static struct mce_log mcelog = {
 void mce_log(struct mce *mce)
 {
 	unsigned next, entry;
+	int ret = 0;
 
 	/* Emit the trace record: */
 	trace_mce_record(mce);
 
+	ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
+	if (ret == NOTIFY_STOP)
+		return;
+
 	mce->finished = 0;
 	wmb();
 	for (;;) {
 		entry = rcu_dereference_check_mce(mcelog.next);
 		for (;;) {
-			/*
-			 * If edac_mce is enabled, it will check the error type
-			 * and will process it, if it is a known error.
-			 * Otherwise, the error will be sent through mcelog
-			 * interface
-			 */
-			if (edac_mce_parse(mce))
-				return;
 
 			/*
 			 * When the buffer fills up discard new entries.
@@ -556,10 +552,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 		 * Don't get the IP here because it's unlikely to
 		 * have anything to do with the actual error location.
 		 */
-		if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) {
+		if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce)
 			mce_log(&m);
-			atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m);
-		}
 
 		/*
 		 * Clear state for this bank.
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index dbe34b931374..ea305856151c 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -108,16 +108,6 @@ static inline void get_head_page_multiple(struct page *page, int nr)
 	SetPageReferenced(page);
 }
 
-static inline void get_huge_page_tail(struct page *page)
-{
-	/*
-	 * __split_huge_page_refcount() cannot run
-	 * from under us.
-	 */
-	VM_BUG_ON(atomic_read(&page->_count) < 0);
-	atomic_inc(&page->_count);
-}
-
 static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 {
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index e6379526675b..6ed7afdaf4af 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -26,6 +26,8 @@
 #include <linux/platform_device.h>
 #include <linux/irq.h>
 #include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/mfd/intel_msic.h>
 
 #include <asm/setup.h>
 #include <asm/mpspec_def.h>
@@ -483,6 +485,128 @@ static void __init *no_platform_data(void *info)
 	return NULL;
 }
 
+static struct resource msic_resources[] = {
+	{
+		.start	= INTEL_MSIC_IRQ_PHYS_BASE,
+		.end	= INTEL_MSIC_IRQ_PHYS_BASE + 64 - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+};
+
+static struct intel_msic_platform_data msic_pdata;
+
+static struct platform_device msic_device = {
+	.name		= "intel_msic",
+	.id		= -1,
+	.dev		= {
+		.platform_data	= &msic_pdata,
+	},
+	.num_resources	= ARRAY_SIZE(msic_resources),
+	.resource	= msic_resources,
+};
+
+static inline bool mrst_has_msic(void)
+{
+	return mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL;
+}
+
+static int msic_scu_status_change(struct notifier_block *nb,
+				  unsigned long code, void *data)
+{
+	if (code == SCU_DOWN) {
+		platform_device_unregister(&msic_device);
+		return 0;
+	}
+
+	return platform_device_register(&msic_device);
+}
+
+static int __init msic_init(void)
+{
+	static struct notifier_block msic_scu_notifier = {
+		.notifier_call	= msic_scu_status_change,
+	};
+
+	/*
+	 * We need to be sure that the SCU IPC is ready before MSIC device
+	 * can be registered.
+	 */
+	if (mrst_has_msic())
+		intel_scu_notifier_add(&msic_scu_notifier);
+
+	return 0;
+}
+arch_initcall(msic_init);
+
+/*
+ * msic_generic_platform_data - sets generic platform data for the block
+ * @info: pointer to the SFI device table entry for this block
+ * @block: MSIC block
+ *
+ * Function sets IRQ number from the SFI table entry for given device to
+ * the MSIC platform data.
+ */
+static void *msic_generic_platform_data(void *info, enum intel_msic_block block)
+{
+	struct sfi_device_table_entry *entry = info;
+
+	BUG_ON(block < 0 || block >= INTEL_MSIC_BLOCK_LAST);
+	msic_pdata.irq[block] = entry->irq;
+
+	return no_platform_data(info);
+}
+
+static void *msic_battery_platform_data(void *info)
+{
+	return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_BATTERY);
+}
+
+static void *msic_gpio_platform_data(void *info)
+{
+	static struct intel_msic_gpio_pdata pdata;
+	int gpio = get_gpio_by_name("msic_gpio_base");
+
+	if (gpio < 0)
+		return NULL;
+
+	pdata.gpio_base = gpio;
+	msic_pdata.gpio = &pdata;
+
+	return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_GPIO);
+}
+
+static void *msic_audio_platform_data(void *info)
+{
+	struct platform_device *pdev;
+
+	pdev = platform_device_register_simple("sst-platform", -1, NULL, 0);
+	if (IS_ERR(pdev)) {
+		pr_err("failed to create audio platform device\n");
+		return NULL;
+	}
+
+	return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_AUDIO);
+}
+
+static void *msic_power_btn_platform_data(void *info)
+{
+	return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_POWER_BTN);
+}
+
+static void *msic_ocd_platform_data(void *info)
+{
+	static struct intel_msic_ocd_pdata pdata;
+	int gpio = get_gpio_by_name("ocd_gpio");
+
+	if (gpio < 0)
+		return NULL;
+
+	pdata.gpio = gpio;
+	msic_pdata.ocd = &pdata;
+
+	return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_OCD);
+}
+
 static const struct devs_id __initconst device_ids[] = {
 	{"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data},
 	{"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data},
@@ -491,7 +615,14 @@ static const struct devs_id __initconst device_ids[] = {
 	{"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data},
 	{"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data},
 	{"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
-	{"msic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
+
+	/* MSIC subdevices */
+	{"msic_battery", SFI_DEV_TYPE_IPC, 1, &msic_battery_platform_data},
+	{"msic_gpio", SFI_DEV_TYPE_IPC, 1, &msic_gpio_platform_data},
+	{"msic_audio", SFI_DEV_TYPE_IPC, 1, &msic_audio_platform_data},
+	{"msic_power_btn", SFI_DEV_TYPE_IPC, 1, &msic_power_btn_platform_data},
+	{"msic_ocd", SFI_DEV_TYPE_IPC, 1, &msic_ocd_platform_data},
+
 	{},
 };
 
@@ -558,6 +689,9 @@ static void __init intel_scu_i2c_device_register(int bus,
 	i2c_devs[i2c_next_dev++] = new_dev;
 }
 
+BLOCKING_NOTIFIER_HEAD(intel_scu_notifier);
+EXPORT_SYMBOL_GPL(intel_scu_notifier);
+
 /* Called by IPC driver */
 void intel_scu_devices_create(void)
 {
@@ -582,6 +716,7 @@ void intel_scu_devices_create(void)
 		} else
 			i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1);
 	}
+	intel_scu_notifier_post(SCU_AVAILABLE, 0L);
 }
 EXPORT_SYMBOL_GPL(intel_scu_devices_create);
 
@@ -590,6 +725,8 @@ void intel_scu_devices_destroy(void)
 {
 	int i;
 
+	intel_scu_notifier_post(SCU_DOWN, 0L);
+
 	for (i = 0; i < ipc_next_dev; i++)
 		platform_device_del(ipc_devs[i]);
 }
@@ -606,19 +743,37 @@ static void __init install_irq_resource(struct platform_device *pdev, int irq)
 	platform_device_add_resources(pdev, &res, 1);
 }
 
-static void __init sfi_handle_ipc_dev(struct platform_device *pdev)
+static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *entry)
 {
 	const struct devs_id *dev = device_ids;
+	struct platform_device *pdev;
 	void *pdata = NULL;
 
 	while (dev->name[0]) {
 		if (dev->type == SFI_DEV_TYPE_IPC &&
-			!strncmp(dev->name, pdev->name, SFI_NAME_LEN)) {
-			pdata = dev->get_platform_data(pdev);
+			!strncmp(dev->name, entry->name, SFI_NAME_LEN)) {
+			pdata = dev->get_platform_data(entry);
 			break;
 		}
 		dev++;
 	}
+
+	/*
+	 * On Medfield the platform device creation is handled by the MSIC
+	 * MFD driver so we don't need to do it here.
+	 */
+	if (mrst_has_msic())
+		return;
+
+	/* ID as IRQ is a hack that will go away */
+	pdev = platform_device_alloc(entry->name, entry->irq);
+	if (pdev == NULL) {
+		pr_err("out of memory for SFI platform device '%s'.\n",
+			entry->name);
+		return;
+	}
+	install_irq_resource(pdev, entry->irq);
+
 	pdev->dev.platform_data = pdata;
 	intel_scu_device_register(pdev);
 }
@@ -671,7 +826,6 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
 	struct sfi_device_table_entry *pentry;
 	struct spi_board_info spi_info;
 	struct i2c_board_info i2c_info;
-	struct platform_device *pdev;
 	int num, i, bus;
 	int ioapic;
 	struct io_apic_irq_attr irq_attr;
@@ -699,17 +853,9 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
 
 		switch (pentry->type) {
 		case SFI_DEV_TYPE_IPC:
-			/* ID as IRQ is a hack that will go away */
-			pdev = platform_device_alloc(pentry->name, irq);
-			if (pdev == NULL) {
-				pr_err("out of memory for SFI platform device '%s'.\n",
-							pentry->name);
-				continue;
-			}
-			install_irq_resource(pdev, irq);
 			pr_debug("info[%2d]: IPC bus, name = %16.16s, "
-				"irq = 0x%2x\n", i, pentry->name, irq);
-			sfi_handle_ipc_dev(pdev);
+				"irq = 0x%2x\n", i, pentry->name, pentry->irq);
+			sfi_handle_ipc_dev(pentry);
 			break;
 		case SFI_DEV_TYPE_SPI:
 			memset(&spi_info, 0, sizeof(spi_info));
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
new file mode 100644
index 000000000000..21bebe63df66
--- /dev/null
+++ b/arch/x86/um/Kconfig
@@ -0,0 +1,70 @@
+mainmenu "User Mode Linux/$SUBARCH $KERNELVERSION Kernel Configuration"
+
+source "arch/um/Kconfig.common"
+
+menu "UML-specific options"
+
+menu "Host processor type and features"
+
+config CMPXCHG_LOCAL
+	bool
+	default n
+
+config CMPXCHG_DOUBLE
+	bool
+	default n
+
+source "arch/x86/Kconfig.cpu"
+
+endmenu
+
+config UML_X86
+	def_bool y
+	select GENERIC_FIND_FIRST_BIT
+
+config 64BIT
+	bool
+	default SUBARCH = "x86_64"
+
+config X86_32
+	def_bool !64BIT
+	select HAVE_AOUT
+
+config X86_64
+	def_bool 64BIT
+
+config RWSEM_XCHGADD_ALGORITHM
+	def_bool X86_XADD && 64BIT
+
+config RWSEM_GENERIC_SPINLOCK
+	def_bool !RWSEM_XCHGADD_ALGORITHM
+
+config 3_LEVEL_PGTABLES
+	bool "Three-level pagetables (EXPERIMENTAL)" if !64BIT
+	default 64BIT
+	depends on EXPERIMENTAL
+	help
+	Three-level pagetables will let UML have more than 4G of physical
+	memory.  All the memory that can't be mapped directly will be treated
+	as high memory.
+
+	However, this it experimental on 32-bit architectures, so if unsure say
+	N (on x86-64 it's automatically enabled, instead, as it's safe there).
+
+config ARCH_HAS_SC_SIGNALS
+	def_bool !64BIT
+
+config ARCH_REUSE_HOST_VSYSCALL_AREA
+	def_bool !64BIT
+
+config SMP_BROKEN
+	def_bool 64BIT
+
+config GENERIC_HWEIGHT
+	def_bool y
+
+source "arch/um/Kconfig.um"
+
+endmenu
+
+source "arch/um/Kconfig.rest"
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
new file mode 100644
index 000000000000..8fb58400e415
--- /dev/null
+++ b/arch/x86/um/Makefile
@@ -0,0 +1,45 @@
+#
+# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+#
+
+ifeq ($(CONFIG_X86_32),y)
+	BITS := 32
+else
+	BITS := 64
+endif
+
+obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \
+	ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \
+	stub_$(BITS).o stub_segv.o syscalls_$(BITS).o \
+	sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \
+	mem_$(BITS).o subarch.o os-$(OS)/
+
+ifeq ($(CONFIG_X86_32),y)
+
+obj-y += checksum_32.o
+obj-$(CONFIG_BINFMT_ELF) += elfcore.o
+
+subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
+subarch-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += ../lib/rwsem.o
+subarch-$(CONFIG_HIGHMEM) += ../mm/highmem_32.o
+
+else
+
+obj-y += vdso/
+
+subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../lib/thunk_64.o \
+		../lib/rwsem.o
+
+endif
+
+subarch-$(CONFIG_MODULES) += ../kernel/module.o
+
+USER_OBJS := bugs_$(BITS).o ptrace_user.o fault.o
+
+extra-y += user-offsets.s
+$(obj)/user-offsets.s: c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS)
+
+UNPROFILE_OBJS := stub_segv.o
+CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
+
+include arch/um/scripts/Makefile.rules
diff --git a/arch/x86/um/asm/apic.h b/arch/x86/um/asm/apic.h
new file mode 100644
index 000000000000..876dee84ab11
--- /dev/null
+++ b/arch/x86/um/asm/apic.h
@@ -0,0 +1,4 @@
+#ifndef __UM_APIC_H
+#define __UM_APIC_H
+
+#endif
diff --git a/arch/x86/um/asm/arch_hweight.h b/arch/x86/um/asm/arch_hweight.h
new file mode 100644
index 000000000000..c656cf443f4a
--- /dev/null
+++ b/arch/x86/um/asm/arch_hweight.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_UM_HWEIGHT_H
+#define _ASM_UM_HWEIGHT_H
+
+#include <asm-generic/bitops/arch_hweight.h>
+
+#endif
diff --git a/arch/x86/um/asm/archparam.h b/arch/x86/um/asm/archparam.h
new file mode 100644
index 000000000000..c17cf68dda0f
--- /dev/null
+++ b/arch/x86/um/asm/archparam.h
@@ -0,0 +1,20 @@
+/* 
+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Copyright 2003 PathScale, Inc.
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_ARCHPARAM_H
+#define __UM_ARCHPARAM_H
+
+#ifdef CONFIG_X86_32
+
+#ifdef CONFIG_X86_PAE
+#define LAST_PKMAP 512
+#else
+#define LAST_PKMAP 1024
+#endif
+
+#endif
+
+#endif
diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h
new file mode 100644
index 000000000000..b6efe2381b5d
--- /dev/null
+++ b/arch/x86/um/asm/checksum.h
@@ -0,0 +1,10 @@
+#ifndef __UM_CHECKSUM_H
+#define __UM_CHECKSUM_H
+
+#ifdef CONFIG_X86_32
+# include "checksum_32.h"
+#else
+# include "checksum_64.h"
+#endif
+
+#endif
diff --git a/arch/x86/um/asm/checksum_32.h b/arch/x86/um/asm/checksum_32.h
new file mode 100644
index 000000000000..caab74252e27
--- /dev/null
+++ b/arch/x86/um/asm/checksum_32.h
@@ -0,0 +1,201 @@
+/*
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_SYSDEP_CHECKSUM_H
+#define __UM_SYSDEP_CHECKSUM_H
+
+#include "linux/in6.h"
+#include "linux/string.h"
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+__wsum csum_partial(const void *buff, int len, __wsum sum);
+
+/*
+ *	Note: when you get a NULL pointer exception here this means someone
+ *	passed in an incorrect kernel address to one of these functions.
+ *
+ *	If you use these functions directly please don't forget the
+ *	access_ok().
+ */
+
+static __inline__
+__wsum csum_partial_copy_nocheck(const void *src, void *dst,
+				       int len, __wsum sum)
+{
+	memcpy(dst, src, len);
+	return csum_partial(dst, len, sum);
+}
+
+/*
+ * the same as csum_partial, but copies from src while it
+ * checksums, and handles user-space pointer exceptions correctly, when needed.
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+
+static __inline__
+__wsum csum_partial_copy_from_user(const void __user *src, void *dst,
+					 int len, __wsum sum, int *err_ptr)
+{
+	if (copy_from_user(dst, src, len)) {
+		*err_ptr = -EFAULT;
+		return (__force __wsum)-1;
+	}
+
+	return csum_partial(dst, len, sum);
+}
+
+/*
+ *	This is a version of ip_compute_csum() optimized for IP headers,
+ *	which always checksum on 4 octet boundaries.
+ *
+ *	By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
+ *	Arnt Gulbrandsen.
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	unsigned int sum;
+
+	__asm__ __volatile__(
+	    "movl (%1), %0	;\n"
+	    "subl $4, %2	;\n"
+	    "jbe 2f		;\n"
+	    "addl 4(%1), %0	;\n"
+	    "adcl 8(%1), %0	;\n"
+	    "adcl 12(%1), %0	;\n"
+"1:	    adcl 16(%1), %0	;\n"
+	    "lea 4(%1), %1	;\n"
+	    "decl %2		;\n"
+	    "jne 1b		;\n"
+	    "adcl $0, %0	;\n"
+	    "movl %0, %2	;\n"
+	    "shrl $16, %0	;\n"
+	    "addw %w2, %w0	;\n"
+	    "adcl $0, %0	;\n"
+	    "notl %0		;\n"
+"2:				;\n"
+	/* Since the input registers which are loaded with iph and ipl
+	   are modified, we must also specify them as outputs, or gcc
+	   will assume they contain their original values. */
+	: "=r" (sum), "=r" (iph), "=r" (ihl)
+	: "1" (iph), "2" (ihl)
+	: "memory");
+	return (__force __sum16)sum;
+}
+
+/*
+ *	Fold a partial checksum
+ */
+
+static inline __sum16 csum_fold(__wsum sum)
+{
+	__asm__(
+		"addl %1, %0		;\n"
+		"adcl $0xffff, %0	;\n"
+		: "=r" (sum)
+		: "r" ((__force u32)sum << 16),
+		  "0" ((__force u32)sum & 0xffff0000)
+	);
+	return (__force __sum16)(~(__force u32)sum >> 16);
+}
+
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+						   unsigned short len,
+						   unsigned short proto,
+						   __wsum sum)
+{
+    __asm__(
+	"addl %1, %0	;\n"
+	"adcl %2, %0	;\n"
+	"adcl %3, %0	;\n"
+	"adcl $0, %0	;\n"
+	: "=r" (sum)
+	: "g" (daddr), "g"(saddr), "g"((len + proto) << 8), "0"(sum));
+    return sum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+						   unsigned short len,
+						   unsigned short proto,
+						   __wsum sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+}
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+
+static inline __sum16 ip_compute_csum(const void *buff, int len)
+{
+    return csum_fold (csum_partial(buff, len, 0));
+}
+
+#define _HAVE_ARCH_IPV6_CSUM
+static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+					  const struct in6_addr *daddr,
+					  __u32 len, unsigned short proto,
+					  __wsum sum)
+{
+	__asm__(
+		"addl 0(%1), %0		;\n"
+		"adcl 4(%1), %0		;\n"
+		"adcl 8(%1), %0		;\n"
+		"adcl 12(%1), %0	;\n"
+		"adcl 0(%2), %0		;\n"
+		"adcl 4(%2), %0		;\n"
+		"adcl 8(%2), %0		;\n"
+		"adcl 12(%2), %0	;\n"
+		"adcl %3, %0		;\n"
+		"adcl %4, %0		;\n"
+		"adcl $0, %0		;\n"
+		: "=&r" (sum)
+		: "r" (saddr), "r" (daddr),
+		  "r"(htonl(len)), "r"(htonl(proto)), "0"(sum));
+
+	return csum_fold(sum);
+}
+
+/*
+ *	Copy and checksum to user
+ */
+#define HAVE_CSUM_COPY_USER
+static __inline__ __wsum csum_and_copy_to_user(const void *src,
+						     void __user *dst,
+						     int len, __wsum sum, int *err_ptr)
+{
+	if (access_ok(VERIFY_WRITE, dst, len)) {
+		if (copy_to_user(dst, src, len)) {
+			*err_ptr = -EFAULT;
+			return (__force __wsum)-1;
+		}
+
+		return csum_partial(src, len, sum);
+	}
+
+	if (len)
+		*err_ptr = -EFAULT;
+
+	return (__force __wsum)-1; /* invalid checksum */
+}
+
+#endif
+
diff --git a/arch/x86/um/asm/checksum_64.h b/arch/x86/um/asm/checksum_64.h
new file mode 100644
index 000000000000..a5be9031ea85
--- /dev/null
+++ b/arch/x86/um/asm/checksum_64.h
@@ -0,0 +1,144 @@
+/*
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_SYSDEP_CHECKSUM_H
+#define __UM_SYSDEP_CHECKSUM_H
+
+#include "linux/string.h"
+#include "linux/in6.h"
+#include "asm/uaccess.h"
+
+extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+
+/*
+ *	Note: when you get a NULL pointer exception here this means someone
+ *	passed in an incorrect kernel address to one of these functions.
+ *
+ *	If you use these functions directly please don't forget the
+ *	access_ok().
+ */
+
+static __inline__
+__wsum csum_partial_copy_nocheck(const void *src, void *dst,
+				       int len, __wsum sum)
+{
+	memcpy(dst, src, len);
+	return(csum_partial(dst, len, sum));
+}
+
+static __inline__
+__wsum csum_partial_copy_from_user(const void __user *src,
+                                         void *dst, int len, __wsum sum,
+                                         int *err_ptr)
+{
+        if (copy_from_user(dst, src, len)) {
+                *err_ptr = -EFAULT;
+                return (__force __wsum)-1;
+        }
+        return csum_partial(dst, len, sum);
+}
+
+/**
+ * csum_fold - Fold and invert a 32bit checksum.
+ * sum: 32bit unfolded sum
+ *
+ * Fold a 32bit running checksum to 16bit and invert it. This is usually
+ * the last step before putting a checksum into a packet.
+ * Make sure not to mix with 64bit checksums.
+ */
+static inline __sum16 csum_fold(__wsum sum)
+{
+	__asm__(
+		"  addl %1,%0\n"
+		"  adcl $0xffff,%0"
+		: "=r" (sum)
+		: "r" ((__force u32)sum << 16),
+		  "0" ((__force u32)sum & 0xffff0000)
+	);
+	return (__force __sum16)(~(__force u32)sum >> 16);
+}
+
+/**
+ * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum.
+ * @saddr: source address
+ * @daddr: destination address
+ * @len: length of packet
+ * @proto: ip protocol of packet
+ * @sum: initial sum to be added in (32bit unfolded)
+ *
+ * Returns the pseudo header checksum the input data. Result is
+ * 32bit unfolded.
+ */
+static inline __wsum
+csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
+		   unsigned short proto, __wsum sum)
+{
+	asm("  addl %1, %0\n"
+	    "  adcl %2, %0\n"
+	    "  adcl %3, %0\n"
+	    "  adcl $0, %0\n"
+		: "=r" (sum)
+	    : "g" (daddr), "g" (saddr), "g" ((len + proto) << 8), "0" (sum));
+	return sum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+					   unsigned short len,
+					   unsigned short proto,
+					   __wsum sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+}
+
+/**
+ * ip_fast_csum - Compute the IPv4 header checksum efficiently.
+ * iph: ipv4 header
+ * ihl: length of header / 4
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	unsigned int sum;
+
+	asm(	"  movl (%1), %0\n"
+		"  subl $4, %2\n"
+		"  jbe 2f\n"
+		"  addl 4(%1), %0\n"
+		"  adcl 8(%1), %0\n"
+		"  adcl 12(%1), %0\n"
+		"1: adcl 16(%1), %0\n"
+		"  lea 4(%1), %1\n"
+		"  decl %2\n"
+		"  jne	1b\n"
+		"  adcl $0, %0\n"
+		"  movl %0, %2\n"
+		"  shrl $16, %0\n"
+		"  addw %w2, %w0\n"
+		"  adcl $0, %0\n"
+		"  notl %0\n"
+		"2:"
+	/* Since the input registers which are loaded with iph and ipl
+	   are modified, we must also specify them as outputs, or gcc
+	   will assume they contain their original values. */
+	: "=r" (sum), "=r" (iph), "=r" (ihl)
+	: "1" (iph), "2" (ihl)
+	: "memory");
+	return (__force __sum16)sum;
+}
+
+static inline unsigned add32_with_carry(unsigned a, unsigned b)
+{
+        asm("addl %2,%0\n\t"
+            "adcl $0,%0"
+            : "=r" (a)
+            : "0" (a), "r" (b));
+        return a;
+}
+
+extern __sum16 ip_compute_csum(const void *buff, int len);
+
+#endif
diff --git a/arch/x86/um/asm/desc.h b/arch/x86/um/asm/desc.h
new file mode 100644
index 000000000000..4ec34a51b62c
--- /dev/null
+++ b/arch/x86/um/asm/desc.h
@@ -0,0 +1,16 @@
+#ifndef __UM_DESC_H
+#define __UM_DESC_H
+
+/* Taken from asm-i386/desc.h, it's the only thing we need. The rest wouldn't
+ * compile, and has never been used. */
+#define LDT_empty(info) (\
+	(info)->base_addr	== 0	&& \
+	(info)->limit		== 0	&& \
+	(info)->contents	== 0	&& \
+	(info)->read_exec_only	== 1	&& \
+	(info)->seg_32bit	== 0	&& \
+	(info)->limit_in_pages	== 0	&& \
+	(info)->seg_not_present	== 1	&& \
+	(info)->useable		== 0	)
+
+#endif
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
new file mode 100644
index 000000000000..f3b0633b69a1
--- /dev/null
+++ b/arch/x86/um/asm/elf.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+#ifndef __UM_ELF_X86_H
+#define __UM_ELF_X86_H
+
+#include <asm/user.h>
+#include "skas.h"
+
+#ifdef CONFIG_X86_32
+
+#define R_386_NONE	0
+#define R_386_32	1
+#define R_386_PC32	2
+#define R_386_GOT32	3
+#define R_386_PLT32	4
+#define R_386_COPY	5
+#define R_386_GLOB_DAT	6
+#define R_386_JMP_SLOT	7
+#define R_386_RELATIVE	8
+#define R_386_GOTOFF	9
+#define R_386_GOTPC	10
+#define R_386_NUM	11
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) \
+	(((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
+
+#define ELF_CLASS	ELFCLASS32
+#define ELF_DATA        ELFDATA2LSB
+#define ELF_ARCH        EM_386
+
+#define ELF_PLAT_INIT(regs, load_addr) do { \
+	PT_REGS_EBX(regs) = 0; \
+	PT_REGS_ECX(regs) = 0; \
+	PT_REGS_EDX(regs) = 0; \
+	PT_REGS_ESI(regs) = 0; \
+	PT_REGS_EDI(regs) = 0; \
+	PT_REGS_EBP(regs) = 0; \
+	PT_REGS_EAX(regs) = 0; \
+} while (0)
+
+/* Shamelessly stolen from include/asm-i386/elf.h */
+
+#define ELF_CORE_COPY_REGS(pr_reg, regs) do {	\
+	pr_reg[0] = PT_REGS_EBX(regs);		\
+	pr_reg[1] = PT_REGS_ECX(regs);		\
+	pr_reg[2] = PT_REGS_EDX(regs);		\
+	pr_reg[3] = PT_REGS_ESI(regs);		\
+	pr_reg[4] = PT_REGS_EDI(regs);		\
+	pr_reg[5] = PT_REGS_EBP(regs);		\
+	pr_reg[6] = PT_REGS_EAX(regs);		\
+	pr_reg[7] = PT_REGS_DS(regs);		\
+	pr_reg[8] = PT_REGS_ES(regs);		\
+	/* fake once used fs and gs selectors? */	\
+	pr_reg[9] = PT_REGS_DS(regs);		\
+	pr_reg[10] = PT_REGS_DS(regs);		\
+	pr_reg[11] = PT_REGS_SYSCALL_NR(regs);	\
+	pr_reg[12] = PT_REGS_IP(regs);		\
+	pr_reg[13] = PT_REGS_CS(regs);		\
+	pr_reg[14] = PT_REGS_EFLAGS(regs);	\
+	pr_reg[15] = PT_REGS_SP(regs);		\
+	pr_reg[16] = PT_REGS_SS(regs);		\
+} while (0);
+
+extern char * elf_aux_platform;
+#define ELF_PLATFORM (elf_aux_platform)
+
+extern unsigned long vsyscall_ehdr;
+extern unsigned long vsyscall_end;
+extern unsigned long __kernel_vsyscall;
+
+/*
+ * This is the range that is readable by user mode, and things
+ * acting like user mode such as get_user_pages.
+ */
+#define FIXADDR_USER_START      vsyscall_ehdr
+#define FIXADDR_USER_END        vsyscall_end
+
+
+/*
+ * Architecture-neutral AT_ values in 0-17, leave some room
+ * for more of them, start the x86-specific ones at 32.
+ */
+#define AT_SYSINFO		32
+#define AT_SYSINFO_EHDR		33
+
+#define ARCH_DLINFO						\
+do {								\
+	if ( vsyscall_ehdr ) {					\
+		NEW_AUX_ENT(AT_SYSINFO,	__kernel_vsyscall);	\
+		NEW_AUX_ENT(AT_SYSINFO_EHDR, vsyscall_ehdr);	\
+	}							\
+} while (0)
+
+#else
+
+/* x86-64 relocation types, taken from asm-x86_64/elf.h */
+#define R_X86_64_NONE		0	/* No reloc */
+#define R_X86_64_64		1	/* Direct 64 bit  */
+#define R_X86_64_PC32		2	/* PC relative 32 bit signed */
+#define R_X86_64_GOT32		3	/* 32 bit GOT entry */
+#define R_X86_64_PLT32		4	/* 32 bit PLT address */
+#define R_X86_64_COPY		5	/* Copy symbol at runtime */
+#define R_X86_64_GLOB_DAT	6	/* Create GOT entry */
+#define R_X86_64_JUMP_SLOT	7	/* Create PLT entry */
+#define R_X86_64_RELATIVE	8	/* Adjust by program base */
+#define R_X86_64_GOTPCREL	9	/* 32 bit signed pc relative
+					   offset to GOT */
+#define R_X86_64_32		10	/* Direct 32 bit zero extended */
+#define R_X86_64_32S		11	/* Direct 32 bit sign extended */
+#define R_X86_64_16		12	/* Direct 16 bit zero extended */
+#define R_X86_64_PC16		13	/* 16 bit sign extended pc relative */
+#define R_X86_64_8		14	/* Direct 8 bit sign extended  */
+#define R_X86_64_PC8		15	/* 8 bit sign extended pc relative */
+
+#define R_X86_64_NUM		16
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) \
+	((x)->e_machine == EM_X86_64)
+
+#define ELF_CLASS	ELFCLASS64
+#define ELF_DATA        ELFDATA2LSB
+#define ELF_ARCH        EM_X86_64
+
+#define ELF_PLAT_INIT(regs, load_addr)    do { \
+	PT_REGS_RBX(regs) = 0; \
+	PT_REGS_RCX(regs) = 0; \
+	PT_REGS_RDX(regs) = 0; \
+	PT_REGS_RSI(regs) = 0; \
+	PT_REGS_RDI(regs) = 0; \
+	PT_REGS_RBP(regs) = 0; \
+	PT_REGS_RAX(regs) = 0; \
+	PT_REGS_R8(regs) = 0; \
+	PT_REGS_R9(regs) = 0; \
+	PT_REGS_R10(regs) = 0; \
+	PT_REGS_R11(regs) = 0; \
+	PT_REGS_R12(regs) = 0; \
+	PT_REGS_R13(regs) = 0; \
+	PT_REGS_R14(regs) = 0; \
+	PT_REGS_R15(regs) = 0; \
+} while (0)
+
+#define ELF_CORE_COPY_REGS(pr_reg, _regs)		\
+	(pr_reg)[0] = (_regs)->regs.gp[0];			\
+	(pr_reg)[1] = (_regs)->regs.gp[1];			\
+	(pr_reg)[2] = (_regs)->regs.gp[2];			\
+	(pr_reg)[3] = (_regs)->regs.gp[3];			\
+	(pr_reg)[4] = (_regs)->regs.gp[4];			\
+	(pr_reg)[5] = (_regs)->regs.gp[5];			\
+	(pr_reg)[6] = (_regs)->regs.gp[6];			\
+	(pr_reg)[7] = (_regs)->regs.gp[7];			\
+	(pr_reg)[8] = (_regs)->regs.gp[8];			\
+	(pr_reg)[9] = (_regs)->regs.gp[9];			\
+	(pr_reg)[10] = (_regs)->regs.gp[10];			\
+	(pr_reg)[11] = (_regs)->regs.gp[11];			\
+	(pr_reg)[12] = (_regs)->regs.gp[12];			\
+	(pr_reg)[13] = (_regs)->regs.gp[13];			\
+	(pr_reg)[14] = (_regs)->regs.gp[14];			\
+	(pr_reg)[15] = (_regs)->regs.gp[15];			\
+	(pr_reg)[16] = (_regs)->regs.gp[16];			\
+	(pr_reg)[17] = (_regs)->regs.gp[17];			\
+	(pr_reg)[18] = (_regs)->regs.gp[18];			\
+	(pr_reg)[19] = (_regs)->regs.gp[19];			\
+	(pr_reg)[20] = (_regs)->regs.gp[20];			\
+	(pr_reg)[21] = current->thread.arch.fs;			\
+	(pr_reg)[22] = 0;					\
+	(pr_reg)[23] = 0;					\
+	(pr_reg)[24] = 0;					\
+	(pr_reg)[25] = 0;					\
+	(pr_reg)[26] = 0;
+
+#define ELF_PLATFORM "x86_64"
+
+/* No user-accessible fixmap addresses, i.e. vsyscall */
+#define FIXADDR_USER_START      0
+#define FIXADDR_USER_END        0
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+	int uses_interp);
+
+extern unsigned long um_vdso_addr;
+#define AT_SYSINFO_EHDR 33
+#define ARCH_DLINFO	NEW_AUX_ENT(AT_SYSINFO_EHDR, um_vdso_addr)
+
+#endif
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef struct user_i387_struct elf_fpregset_t;
+
+#define task_pt_regs(t) (&(t)->thread.regs)
+
+struct task_struct;
+
+extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu);
+
+#define ELF_CORE_COPY_FPREGS(t, fpu) elf_core_copy_fpregs(t, fpu)
+
+#define ELF_EXEC_PAGESIZE 4096
+
+#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3)
+
+extern long elf_aux_hwcap;
+#define ELF_HWCAP (elf_aux_hwcap)
+
+#define SET_PERSONALITY(ex) do ; while(0)
+#define __HAVE_ARCH_GATE_AREA 1
+
+#endif
diff --git a/arch/x86/um/asm/irq_vectors.h b/arch/x86/um/asm/irq_vectors.h
new file mode 100644
index 000000000000..272a81e0ce14
--- /dev/null
+++ b/arch/x86/um/asm/irq_vectors.h
@@ -0,0 +1,10 @@
+/* 
+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_IRQ_VECTORS_H
+#define __UM_IRQ_VECTORS_H
+
+#endif
+
diff --git a/arch/x86/um/asm/mm_context.h b/arch/x86/um/asm/mm_context.h
new file mode 100644
index 000000000000..4a73d63e4760
--- /dev/null
+++ b/arch/x86/um/asm/mm_context.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
+ * Licensed under the GPL
+ *
+ * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
+ */
+
+#ifndef __ASM_LDT_H
+#define __ASM_LDT_H
+
+#include <linux/mutex.h>
+#include <asm/ldt.h>
+
+extern void ldt_host_info(void);
+
+#define LDT_PAGES_MAX \
+	((LDT_ENTRIES * LDT_ENTRY_SIZE)/PAGE_SIZE)
+#define LDT_ENTRIES_PER_PAGE \
+	(PAGE_SIZE/LDT_ENTRY_SIZE)
+#define LDT_DIRECT_ENTRIES \
+	((LDT_PAGES_MAX*sizeof(void *))/LDT_ENTRY_SIZE)
+
+struct ldt_entry {
+	__u32 a;
+	__u32 b;
+};
+
+typedef struct uml_ldt {
+	int entry_count;
+	struct mutex lock;
+	union {
+		struct ldt_entry * pages[LDT_PAGES_MAX];
+		struct ldt_entry entries[LDT_DIRECT_ENTRIES];
+	} u;
+} uml_ldt_t;
+
+#define LDT_entry_a(info) \
+	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
+
+#define LDT_entry_b(info) \
+	(((info)->base_addr & 0xff000000) | \
+	(((info)->base_addr & 0x00ff0000) >> 16) | \
+	((info)->limit & 0xf0000) | \
+	(((info)->read_exec_only ^ 1) << 9) | \
+	((info)->contents << 10) | \
+	(((info)->seg_not_present ^ 1) << 15) | \
+	((info)->seg_32bit << 22) | \
+	((info)->limit_in_pages << 23) | \
+	((info)->useable << 20) | \
+	0x7000)
+
+#define _LDT_empty(info) (\
+	(info)->base_addr	== 0	&& \
+	(info)->limit		== 0	&& \
+	(info)->contents	== 0	&& \
+	(info)->read_exec_only	== 1	&& \
+	(info)->seg_32bit	== 0	&& \
+	(info)->limit_in_pages	== 0	&& \
+	(info)->seg_not_present	== 1	&& \
+	(info)->useable		== 0	)
+
+#ifdef CONFIG_X86_64
+#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
+#else
+#define LDT_empty(info) (_LDT_empty(info))
+#endif
+
+struct uml_arch_mm_context {
+	uml_ldt_t ldt;
+};
+
+#endif
diff --git a/arch/x86/um/asm/module.h b/arch/x86/um/asm/module.h
new file mode 100644
index 000000000000..61af80e932eb
--- /dev/null
+++ b/arch/x86/um/asm/module.h
@@ -0,0 +1,23 @@
+#ifndef __UM_MODULE_H
+#define __UM_MODULE_H
+
+/* UML is simple */
+struct mod_arch_specific
+{
+};
+
+#ifdef CONFIG_X86_32
+
+#define Elf_Shdr Elf32_Shdr
+#define Elf_Sym Elf32_Sym
+#define Elf_Ehdr Elf32_Ehdr
+
+#else
+
+#define Elf_Shdr Elf64_Shdr
+#define Elf_Sym Elf64_Sym
+#define Elf_Ehdr Elf64_Ehdr
+
+#endif
+
+#endif
diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h
new file mode 100644
index 000000000000..118c143a9cb4
--- /dev/null
+++ b/arch/x86/um/asm/processor.h
@@ -0,0 +1,22 @@
+#ifndef __UM_PROCESSOR_H
+#define __UM_PROCESSOR_H
+
+/* include faultinfo structure */
+#include <sysdep/faultinfo.h>
+
+#ifdef CONFIG_X86_32
+# include "processor_32.h"
+#else
+# include "processor_64.h"
+#endif
+
+#define KSTK_EIP(tsk) KSTK_REG(tsk, HOST_IP)
+#define KSTK_ESP(tsk) KSTK_REG(tsk, HOST_IP)
+#define KSTK_EBP(tsk) KSTK_REG(tsk, HOST_BP)
+
+#define ARCH_IS_STACKGROW(address) \
+       (address + 65536 + 32 * sizeof(unsigned long) >= UPT_SP(&current->thread.regs.regs))
+
+#include <asm/processor-generic.h>
+
+#endif
diff --git a/arch/x86/um/asm/processor_32.h b/arch/x86/um/asm/processor_32.h
new file mode 100644
index 000000000000..018f732704dd
--- /dev/null
+++ b/arch/x86/um/asm/processor_32.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_PROCESSOR_I386_H
+#define __UM_PROCESSOR_I386_H
+
+#include <linux/string.h>
+#include <asm/segment.h>
+#include <asm/ldt.h>
+
+extern int host_has_cmov;
+
+struct uml_tls_struct {
+	struct user_desc tls;
+	unsigned flushed:1;
+	unsigned present:1;
+};
+
+struct arch_thread {
+	struct uml_tls_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
+	unsigned long debugregs[8];
+	int debugregs_seq;
+	struct faultinfo faultinfo;
+};
+
+#define INIT_ARCH_THREAD { \
+	.tls_array  		= { [ 0 ... GDT_ENTRY_TLS_ENTRIES - 1 ] = \
+				    { .present = 0, .flushed = 0 } }, \
+	.debugregs  		= { [ 0 ... 7 ] = 0 }, \
+	.debugregs_seq		= 0, \
+	.faultinfo		= { 0, 0, 0 } \
+}
+
+static inline void arch_flush_thread(struct arch_thread *thread)
+{
+	/* Clear any TLS still hanging */
+	memset(&thread->tls_array, 0, sizeof(thread->tls_array));
+}
+
+static inline void arch_copy_thread(struct arch_thread *from,
+                                    struct arch_thread *to)
+{
+        memcpy(&to->tls_array, &from->tls_array, sizeof(from->tls_array));
+}
+
+#include <asm/user.h>
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static inline void rep_nop(void)
+{
+	__asm__ __volatile__("rep;nop": : :"memory");
+}
+
+#define cpu_relax()	rep_nop()
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter"). Stolen
+ * from asm-i386/processor.h
+ */
+#define current_text_addr() \
+	({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; })
+
+#endif
diff --git a/arch/x86/um/asm/processor_64.h b/arch/x86/um/asm/processor_64.h
new file mode 100644
index 000000000000..61de92d916c3
--- /dev/null
+++ b/arch/x86/um/asm/processor_64.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2003 PathScale, Inc.
+ *
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_PROCESSOR_X86_64_H
+#define __UM_PROCESSOR_X86_64_H
+
+struct arch_thread {
+        unsigned long debugregs[8];
+        int debugregs_seq;
+        unsigned long fs;
+        struct faultinfo faultinfo;
+};
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static inline void rep_nop(void)
+{
+	__asm__ __volatile__("rep;nop": : :"memory");
+}
+
+#define cpu_relax()   rep_nop()
+
+#define INIT_ARCH_THREAD { .debugregs  		= { [ 0 ... 7 ] = 0 }, \
+			   .debugregs_seq	= 0, \
+			   .fs			= 0, \
+			   .faultinfo		= { 0, 0, 0 } }
+
+static inline void arch_flush_thread(struct arch_thread *thread)
+{
+}
+
+static inline void arch_copy_thread(struct arch_thread *from,
+                                    struct arch_thread *to)
+{
+	to->fs = from->fs;
+}
+
+#include <asm/user.h>
+
+#define current_text_addr() \
+	({ void *pc; __asm__("movq $1f,%0\n1:":"=g" (pc)); pc; })
+
+#endif
diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
new file mode 100644
index 000000000000..c8aca8c501b0
--- /dev/null
+++ b/arch/x86/um/asm/ptrace.h
@@ -0,0 +1,5 @@
+#ifdef CONFIG_X86_32
+# include "ptrace_32.h"
+#else
+# include "ptrace_64.h"
+#endif
diff --git a/arch/x86/um/asm/ptrace_32.h b/arch/x86/um/asm/ptrace_32.h
new file mode 100644
index 000000000000..5d2a59112537
--- /dev/null
+++ b/arch/x86/um/asm/ptrace_32.h
@@ -0,0 +1,51 @@
+/* 
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_PTRACE_I386_H
+#define __UM_PTRACE_I386_H
+
+#define HOST_AUDIT_ARCH AUDIT_ARCH_I386
+
+#include "linux/compiler.h"
+#include "asm/ptrace-generic.h"
+
+#define PT_REGS_EAX(r) UPT_EAX(&(r)->regs)
+#define PT_REGS_EBX(r) UPT_EBX(&(r)->regs)
+#define PT_REGS_ECX(r) UPT_ECX(&(r)->regs)
+#define PT_REGS_EDX(r) UPT_EDX(&(r)->regs)
+#define PT_REGS_ESI(r) UPT_ESI(&(r)->regs)
+#define PT_REGS_EDI(r) UPT_EDI(&(r)->regs)
+#define PT_REGS_EBP(r) UPT_EBP(&(r)->regs)
+
+#define PT_REGS_CS(r) UPT_CS(&(r)->regs)
+#define PT_REGS_SS(r) UPT_SS(&(r)->regs)
+#define PT_REGS_DS(r) UPT_DS(&(r)->regs)
+#define PT_REGS_ES(r) UPT_ES(&(r)->regs)
+#define PT_REGS_FS(r) UPT_FS(&(r)->regs)
+#define PT_REGS_GS(r) UPT_GS(&(r)->regs)
+
+#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs)
+
+#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_EAX(r)
+#define PT_REGS_SYSCALL_RET(r) PT_REGS_EAX(r)
+#define PT_FIX_EXEC_STACK(sp) do ; while(0)
+
+#define profile_pc(regs) PT_REGS_IP(regs)
+
+#define user_mode(r) UPT_IS_USER(&(r)->regs)
+
+/*
+ * Forward declaration to avoid including sysdep/tls.h, which causes a
+ * circular include, and compilation failures.
+ */
+struct user_desc;
+
+extern int ptrace_get_thread_area(struct task_struct *child, int idx,
+                                  struct user_desc __user *user_desc);
+
+extern int ptrace_set_thread_area(struct task_struct *child, int idx,
+                                  struct user_desc __user *user_desc);
+
+#endif
diff --git a/arch/x86/um/asm/ptrace_64.h b/arch/x86/um/asm/ptrace_64.h
new file mode 100644
index 000000000000..706a0d80545c
--- /dev/null
+++ b/arch/x86/um/asm/ptrace_64.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2003 PathScale, Inc.
+ *
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_PTRACE_X86_64_H
+#define __UM_PTRACE_X86_64_H
+
+#include "linux/compiler.h"
+#include "asm/errno.h"
+
+#define __FRAME_OFFSETS /* Needed to get the R* macros */
+#include "asm/ptrace-generic.h"
+
+#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64
+
+#define PT_REGS_RBX(r) UPT_RBX(&(r)->regs)
+#define PT_REGS_RCX(r) UPT_RCX(&(r)->regs)
+#define PT_REGS_RDX(r) UPT_RDX(&(r)->regs)
+#define PT_REGS_RSI(r) UPT_RSI(&(r)->regs)
+#define PT_REGS_RDI(r) UPT_RDI(&(r)->regs)
+#define PT_REGS_RBP(r) UPT_RBP(&(r)->regs)
+#define PT_REGS_RAX(r) UPT_RAX(&(r)->regs)
+#define PT_REGS_R8(r) UPT_R8(&(r)->regs)
+#define PT_REGS_R9(r) UPT_R9(&(r)->regs)
+#define PT_REGS_R10(r) UPT_R10(&(r)->regs)
+#define PT_REGS_R11(r) UPT_R11(&(r)->regs)
+#define PT_REGS_R12(r) UPT_R12(&(r)->regs)
+#define PT_REGS_R13(r) UPT_R13(&(r)->regs)
+#define PT_REGS_R14(r) UPT_R14(&(r)->regs)
+#define PT_REGS_R15(r) UPT_R15(&(r)->regs)
+
+#define PT_REGS_FS(r) UPT_FS(&(r)->regs)
+#define PT_REGS_GS(r) UPT_GS(&(r)->regs)
+#define PT_REGS_DS(r) UPT_DS(&(r)->regs)
+#define PT_REGS_ES(r) UPT_ES(&(r)->regs)
+#define PT_REGS_SS(r) UPT_SS(&(r)->regs)
+#define PT_REGS_CS(r) UPT_CS(&(r)->regs)
+
+#define PT_REGS_ORIG_RAX(r) UPT_ORIG_RAX(&(r)->regs)
+#define PT_REGS_RIP(r) UPT_IP(&(r)->regs)
+#define PT_REGS_SP(r) UPT_SP(&(r)->regs)
+
+#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs)
+
+/* XXX */
+#define user_mode(r) UPT_IS_USER(&(r)->regs)
+#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_RAX(r)
+#define PT_REGS_SYSCALL_RET(r) PT_REGS_RAX(r)
+
+#define PT_FIX_EXEC_STACK(sp) do ; while(0)
+
+#define profile_pc(regs) PT_REGS_IP(regs)
+
+struct user_desc;
+
+static inline int ptrace_get_thread_area(struct task_struct *child, int idx,
+                                         struct user_desc __user *user_desc)
+{
+        return -ENOSYS;
+}
+
+static inline int ptrace_set_thread_area(struct task_struct *child, int idx,
+                                         struct user_desc __user *user_desc)
+{
+        return -ENOSYS;
+}
+
+extern long arch_prctl(struct task_struct *task, int code,
+		       unsigned long __user *addr);
+#endif
diff --git a/arch/x86/um/asm/required-features.h b/arch/x86/um/asm/required-features.h
new file mode 100644
index 000000000000..dfb967b2d2f3
--- /dev/null
+++ b/arch/x86/um/asm/required-features.h
@@ -0,0 +1,9 @@
+#ifndef __UM_REQUIRED_FEATURES_H
+#define __UM_REQUIRED_FEATURES_H
+
+/*
+ * Nothing to see, just need something for the i386 and x86_64 asm
+ * headers to include.
+ */
+
+#endif
diff --git a/arch/x86/um/asm/segment.h b/arch/x86/um/asm/segment.h
new file mode 100644
index 000000000000..45183fcd10b6
--- /dev/null
+++ b/arch/x86/um/asm/segment.h
@@ -0,0 +1,10 @@
+#ifndef __UM_SEGMENT_H
+#define __UM_SEGMENT_H
+
+extern int host_gdt_entry_tls_min;
+
+#define GDT_ENTRY_TLS_ENTRIES 3
+#define GDT_ENTRY_TLS_MIN host_gdt_entry_tls_min
+#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
+
+#endif
diff --git a/arch/x86/um/asm/system.h b/arch/x86/um/asm/system.h
new file mode 100644
index 000000000000..a459fd9b7598
--- /dev/null
+++ b/arch/x86/um/asm/system.h
@@ -0,0 +1,135 @@
+#ifndef _ASM_X86_SYSTEM_H_
+#define _ASM_X86_SYSTEM_H_
+
+#include <asm/asm.h>
+#include <asm/segment.h>
+#include <asm/cpufeature.h>
+#include <asm/cmpxchg.h>
+#include <asm/nops.h>
+
+#include <linux/kernel.h>
+#include <linux/irqflags.h>
+
+/* entries in ARCH_DLINFO: */
+#ifdef CONFIG_IA32_EMULATION
+# define AT_VECTOR_SIZE_ARCH 2
+#else
+# define AT_VECTOR_SIZE_ARCH 1
+#endif
+
+extern unsigned long arch_align_stack(unsigned long sp);
+
+void default_idle(void);
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ */
+#ifdef CONFIG_X86_32
+/*
+ * Some non-Intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
+#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
+#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
+#else
+#define mb() 	asm volatile("mfence":::"memory")
+#define rmb()	asm volatile("lfence":::"memory")
+#define wmb()	asm volatile("sfence" ::: "memory")
+#endif
+
+/**
+ * read_barrier_depends - Flush all pending reads that subsequents reads
+ * depend on.
+ *
+ * No data-dependent reads from memory-like regions are ever reordered
+ * over this barrier.  All reads preceding this primitive are guaranteed
+ * to access memory (but not necessarily other CPUs' caches) before any
+ * reads following this primitive that depend on the data return by
+ * any of the preceding reads.  This primitive is much lighter weight than
+ * rmb() on most CPUs, and is never heavier weight than is
+ * rmb().
+ *
+ * These ordering constraints are respected by both the local CPU
+ * and the compiler.
+ *
+ * Ordering is not guaranteed by anything other than these primitives,
+ * not even by data dependencies.  See the documentation for
+ * memory_barrier() for examples and URLs to more information.
+ *
+ * For example, the following code would force ordering (the initial
+ * value of "a" is zero, "b" is one, and "p" is "&a"):
+ *
+ * <programlisting>
+ *	CPU 0				CPU 1
+ *
+ *	b = 2;
+ *	memory_barrier();
+ *	p = &b;				q = p;
+ *					read_barrier_depends();
+ *					d = *q;
+ * </programlisting>
+ *
+ * because the read of "*q" depends on the read of "p" and these
+ * two reads are separated by a read_barrier_depends().  However,
+ * the following code, with the same initial values for "a" and "b":
+ *
+ * <programlisting>
+ *	CPU 0				CPU 1
+ *
+ *	a = 2;
+ *	memory_barrier();
+ *	b = 3;				y = b;
+ *					read_barrier_depends();
+ *					x = a;
+ * </programlisting>
+ *
+ * does not enforce ordering, since there is no data dependency between
+ * the read of "a" and the read of "b".  Therefore, on some CPUs, such
+ * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
+ * in cases like this where there are no data dependencies.
+ **/
+
+#define read_barrier_depends()	do { } while (0)
+
+#ifdef CONFIG_SMP
+#define smp_mb()	mb()
+#ifdef CONFIG_X86_PPRO_FENCE
+# define smp_rmb()	rmb()
+#else
+# define smp_rmb()	barrier()
+#endif
+#ifdef CONFIG_X86_OOSTORE
+# define smp_wmb() 	wmb()
+#else
+# define smp_wmb()	barrier()
+#endif
+#define smp_read_barrier_depends()	read_barrier_depends()
+#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
+#else
+#define smp_mb()	barrier()
+#define smp_rmb()	barrier()
+#define smp_wmb()	barrier()
+#define smp_read_barrier_depends()	do { } while (0)
+#define set_mb(var, value) do { var = value; barrier(); } while (0)
+#endif
+
+/*
+ * Stop RDTSC speculation. This is needed when you need to use RDTSC
+ * (or get_cycles or vread that possibly accesses the TSC) in a defined
+ * code region.
+ *
+ * (Could use an alternative three way for this if there was one.)
+ */
+static inline void rdtsc_barrier(void)
+{
+	alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
+	alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
+}
+
+extern void *_switch_to(void *prev, void *next, void *last);
+#define switch_to(prev, next, last) prev = _switch_to(prev, next, last)
+
+#endif
diff --git a/arch/x86/um/asm/vm-flags.h b/arch/x86/um/asm/vm-flags.h
new file mode 100644
index 000000000000..7c297e9e2413
--- /dev/null
+++ b/arch/x86/um/asm/vm-flags.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Copyright 2003 PathScale, Inc.
+ * Licensed under the GPL
+ */
+
+#ifndef __VM_FLAGS_X86_H
+#define __VM_FLAGS_X86_H
+
+#ifdef CONFIG_X86_32
+
+#define VM_DATA_DEFAULT_FLAGS \
+	(VM_READ | VM_WRITE | \
+	((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
+		 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#else
+
+#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
+	VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_STACK_DEFAULT_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | \
+	VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#endif
+#endif
diff --git a/arch/x86/um/bug.c b/arch/x86/um/bug.c
new file mode 100644
index 000000000000..e8034e363d83
--- /dev/null
+++ b/arch/x86/um/bug.c
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2006 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL V2
+ */
+
+#include <linux/uaccess.h>
+
+/*
+ * Mostly copied from i386/x86_86 - eliminated the eip < PAGE_OFFSET because
+ * that's not relevant in skas mode.
+ */
+
+int is_valid_bugaddr(unsigned long eip)
+{
+	unsigned short ud2;
+
+	if (probe_kernel_address((unsigned short __user *)eip, ud2))
+		return 0;
+
+	return ud2 == 0x0b0f;
+}
diff --git a/arch/x86/um/bugs_32.c b/arch/x86/um/bugs_32.c
new file mode 100644
index 000000000000..a1fba5fb9dbe
--- /dev/null
+++ b/arch/x86/um/bugs_32.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <signal.h>
+#include "kern_util.h"
+#include "longjmp.h"
+#include "sysdep/ptrace.h"
+#include <generated/asm-offsets.h>
+
+/* Set during early boot */
+static int host_has_cmov = 1;
+static jmp_buf cmov_test_return;
+
+#define TASK_PID(task) *((int *) &(((char *) (task))[HOST_TASK_PID]))
+
+static void cmov_sigill_test_handler(int sig)
+{
+	host_has_cmov = 0;
+	longjmp(cmov_test_return, 1);
+}
+
+void arch_check_bugs(void)
+{
+	struct sigaction old, new;
+
+	printk(UM_KERN_INFO "Checking for host processor cmov support...");
+	new.sa_handler = cmov_sigill_test_handler;
+
+	/* Make sure that SIGILL is enabled after the handler longjmps back */
+	new.sa_flags = SA_NODEFER;
+	sigemptyset(&new.sa_mask);
+	sigaction(SIGILL, &new, &old);
+
+	if (setjmp(cmov_test_return) == 0) {
+		unsigned long foo = 0;
+		__asm__ __volatile__("cmovz %0, %1" : "=r" (foo) : "0" (foo));
+		printk(UM_KERN_CONT "Yes\n");
+	} else
+		printk(UM_KERN_CONT "No\n");
+
+	sigaction(SIGILL, &old, &new);
+}
+
+void arch_examine_signal(int sig, struct uml_pt_regs *regs)
+{
+	unsigned char tmp[2];
+
+	/*
+	 * This is testing for a cmov (0x0f 0x4x) instruction causing a
+	 * SIGILL in init.
+	 */
+	if ((sig != SIGILL) || (TASK_PID(get_current()) != 1))
+		return;
+
+	if (copy_from_user_proc(tmp, (void *) UPT_IP(regs), 2)) {
+		printk(UM_KERN_ERR "SIGILL in init, could not read "
+		       "instructions!\n");
+		return;
+	}
+
+	if ((tmp[0] != 0x0f) || ((tmp[1] & 0xf0) != 0x40))
+		return;
+
+	if (host_has_cmov == 0)
+		printk(UM_KERN_ERR "SIGILL caused by cmov, which this "
+		       "processor doesn't implement.  Boot a filesystem "
+		       "compiled for older processors");
+	else if (host_has_cmov == 1)
+		printk(UM_KERN_ERR "SIGILL caused by cmov, which this "
+		       "processor claims to implement");
+	else
+		printk(UM_KERN_ERR "Bad value for host_has_cmov (%d)",
+			host_has_cmov);
+}
diff --git a/arch/x86/um/bugs_64.c b/arch/x86/um/bugs_64.c
new file mode 100644
index 000000000000..44e02ba2a265
--- /dev/null
+++ b/arch/x86/um/bugs_64.c
@@ -0,0 +1,15 @@
+/*
+ * Copyright 2003 PathScale, Inc.
+ *
+ * Licensed under the GPL
+ */
+
+#include "sysdep/ptrace.h"
+
+void arch_check_bugs(void)
+{
+}
+
+void arch_examine_signal(int sig, struct uml_pt_regs *regs)
+{
+}
diff --git a/arch/x86/um/checksum_32.S b/arch/x86/um/checksum_32.S
new file mode 100644
index 000000000000..f058d2f82e18
--- /dev/null
+++ b/arch/x86/um/checksum_32.S
@@ -0,0 +1,458 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		IP/TCP/UDP checksumming routines
+ *
+ * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Tom May, <ftom@netcom.com>
+ *              Pentium Pro/II routines:
+ *              Alexander Kjeldaas <astor@guardian.no>
+ *              Finn Arne Gangstad <finnag@guardian.no>
+ *		Lots of code moved from tcp.c and ip.c; see those files
+ *		for more names.
+ *
+ * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
+ *			     handling.
+ *		Andi Kleen,  add zeroing on error
+ *                   converted to pure assembler
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/errno.h>
+				
+/*
+ * computes a partial checksum, e.g. for TCP/UDP fragments
+ */
+
+/*	
+unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
+ */
+		
+.text
+.align 4
+.globl csum_partial
+		
+#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
+
+	  /*		
+	   * Experiments with Ethernet and SLIP connections show that buff
+	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
+	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
+	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
+	   * alignment for the unrolled loop.
+	   */		
+csum_partial:
+	pushl %esi
+	pushl %ebx
+	movl 20(%esp),%eax	# Function arg: unsigned int sum
+	movl 16(%esp),%ecx	# Function arg: int len
+	movl 12(%esp),%esi	# Function arg: unsigned char *buff
+	testl $2, %esi		# Check alignment.
+	jz 2f			# Jump if alignment is ok.
+	subl $2, %ecx		# Alignment uses up two bytes.
+	jae 1f			# Jump if we had at least two bytes.
+	addl $2, %ecx		# ecx was < 2.  Deal with it.
+	jmp 4f
+1:	movw (%esi), %bx
+	addl $2, %esi
+	addw %bx, %ax
+	adcl $0, %eax
+2:
+	movl %ecx, %edx
+	shrl $5, %ecx
+	jz 2f
+	testl %esi, %esi
+1:	movl (%esi), %ebx
+	adcl %ebx, %eax
+	movl 4(%esi), %ebx
+	adcl %ebx, %eax
+	movl 8(%esi), %ebx
+	adcl %ebx, %eax
+	movl 12(%esi), %ebx
+	adcl %ebx, %eax
+	movl 16(%esi), %ebx
+	adcl %ebx, %eax
+	movl 20(%esi), %ebx
+	adcl %ebx, %eax
+	movl 24(%esi), %ebx
+	adcl %ebx, %eax
+	movl 28(%esi), %ebx
+	adcl %ebx, %eax
+	lea 32(%esi), %esi
+	dec %ecx
+	jne 1b
+	adcl $0, %eax
+2:	movl %edx, %ecx
+	andl $0x1c, %edx
+	je 4f
+	shrl $2, %edx		# This clears CF
+3:	adcl (%esi), %eax
+	lea 4(%esi), %esi
+	dec %edx
+	jne 3b
+	adcl $0, %eax
+4:	andl $3, %ecx
+	jz 7f
+	cmpl $2, %ecx
+	jb 5f
+	movw (%esi),%cx
+	leal 2(%esi),%esi
+	je 6f
+	shll $16,%ecx
+5:	movb (%esi),%cl
+6:	addl %ecx,%eax
+	adcl $0, %eax 
+7:	
+	popl %ebx
+	popl %esi
+	ret
+
+#else
+
+/* Version for PentiumII/PPro */
+
+csum_partial:
+	pushl %esi
+	pushl %ebx
+	movl 20(%esp),%eax	# Function arg: unsigned int sum
+	movl 16(%esp),%ecx	# Function arg: int len
+	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
+
+	testl $2, %esi         
+	jnz 30f                 
+10:
+	movl %ecx, %edx
+	movl %ecx, %ebx
+	andl $0x7c, %ebx
+	shrl $7, %ecx
+	addl %ebx,%esi
+	shrl $2, %ebx  
+	negl %ebx
+	lea 45f(%ebx,%ebx,2), %ebx
+	testl %esi, %esi
+	jmp *%ebx
+
+	# Handle 2-byte-aligned regions
+20:	addw (%esi), %ax
+	lea 2(%esi), %esi
+	adcl $0, %eax
+	jmp 10b
+
+30:	subl $2, %ecx          
+	ja 20b                 
+	je 32f
+	movzbl (%esi),%ebx	# csumming 1 byte, 2-aligned
+	addl %ebx, %eax
+	adcl $0, %eax
+	jmp 80f
+32:
+	addw (%esi), %ax	# csumming 2 bytes, 2-aligned
+	adcl $0, %eax
+	jmp 80f
+
+40: 
+	addl -128(%esi), %eax
+	adcl -124(%esi), %eax
+	adcl -120(%esi), %eax
+	adcl -116(%esi), %eax   
+	adcl -112(%esi), %eax   
+	adcl -108(%esi), %eax
+	adcl -104(%esi), %eax
+	adcl -100(%esi), %eax
+	adcl -96(%esi), %eax
+	adcl -92(%esi), %eax
+	adcl -88(%esi), %eax
+	adcl -84(%esi), %eax
+	adcl -80(%esi), %eax
+	adcl -76(%esi), %eax
+	adcl -72(%esi), %eax
+	adcl -68(%esi), %eax
+	adcl -64(%esi), %eax     
+	adcl -60(%esi), %eax     
+	adcl -56(%esi), %eax     
+	adcl -52(%esi), %eax   
+	adcl -48(%esi), %eax   
+	adcl -44(%esi), %eax
+	adcl -40(%esi), %eax
+	adcl -36(%esi), %eax
+	adcl -32(%esi), %eax
+	adcl -28(%esi), %eax
+	adcl -24(%esi), %eax
+	adcl -20(%esi), %eax
+	adcl -16(%esi), %eax
+	adcl -12(%esi), %eax
+	adcl -8(%esi), %eax
+	adcl -4(%esi), %eax
+45:
+	lea 128(%esi), %esi
+	adcl $0, %eax
+	dec %ecx
+	jge 40b
+	movl %edx, %ecx
+50:	andl $3, %ecx
+	jz 80f
+
+	# Handle the last 1-3 bytes without jumping
+	notl %ecx		# 1->2, 2->1, 3->0, higher bits are masked
+	movl $0xffffff,%ebx	# by the shll and shrl instructions
+	shll $3,%ecx
+	shrl %cl,%ebx
+	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
+	addl %ebx,%eax
+	adcl $0,%eax
+80: 
+	popl %ebx
+	popl %esi
+	ret
+				
+#endif
+
+/*
+unsigned int csum_partial_copy_generic (const char *src, char *dst,
+				  int len, int sum, int *src_err_ptr, int *dst_err_ptr)
+ */ 
+
+/*
+ * Copy from ds while checksumming, otherwise like csum_partial
+ *
+ * The macros SRC and DST specify the type of access for the instruction.
+ * thus we can call a custom exception handler for all access types.
+ *
+ * FIXME: could someone double-check whether I haven't mixed up some SRC and
+ *	  DST definitions? It's damn hard to trigger all cases.  I hope I got
+ *	  them all but there's no guarantee.
+ */
+
+#define SRC(y...)			\
+	9999: y;			\
+	.section __ex_table, "a";	\
+	.long 9999b, 6001f	;	\
+	.previous
+
+#define DST(y...)			\
+	9999: y;			\
+	.section __ex_table, "a";	\
+	.long 9999b, 6002f	;	\
+	.previous
+
+.align 4
+
+#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
+
+#define ARGBASE 16		
+#define FP		12
+
+csum_partial_copy_generic_i386:
+	subl  $4,%esp	
+	pushl %edi
+	pushl %esi
+	pushl %ebx
+	movl ARGBASE+16(%esp),%eax	# sum
+	movl ARGBASE+12(%esp),%ecx	# len
+	movl ARGBASE+4(%esp),%esi	# src
+	movl ARGBASE+8(%esp),%edi	# dst
+
+	testl $2, %edi			# Check alignment. 
+	jz 2f				# Jump if alignment is ok.
+	subl $2, %ecx			# Alignment uses up two bytes.
+	jae 1f				# Jump if we had at least two bytes.
+	addl $2, %ecx			# ecx was < 2.  Deal with it.
+	jmp 4f
+SRC(1:	movw (%esi), %bx	)
+	addl $2, %esi
+DST(	movw %bx, (%edi)	)
+	addl $2, %edi
+	addw %bx, %ax	
+	adcl $0, %eax
+2:
+	movl %ecx, FP(%esp)
+	shrl $5, %ecx
+	jz 2f
+	testl %esi, %esi
+SRC(1:	movl (%esi), %ebx	)
+SRC(	movl 4(%esi), %edx	)
+	adcl %ebx, %eax
+DST(	movl %ebx, (%edi)	)
+	adcl %edx, %eax
+DST(	movl %edx, 4(%edi)	)
+
+SRC(	movl 8(%esi), %ebx	)
+SRC(	movl 12(%esi), %edx	)
+	adcl %ebx, %eax
+DST(	movl %ebx, 8(%edi)	)
+	adcl %edx, %eax
+DST(	movl %edx, 12(%edi)	)
+
+SRC(	movl 16(%esi), %ebx 	)
+SRC(	movl 20(%esi), %edx	)
+	adcl %ebx, %eax
+DST(	movl %ebx, 16(%edi)	)
+	adcl %edx, %eax
+DST(	movl %edx, 20(%edi)	)
+
+SRC(	movl 24(%esi), %ebx	)
+SRC(	movl 28(%esi), %edx	)
+	adcl %ebx, %eax
+DST(	movl %ebx, 24(%edi)	)
+	adcl %edx, %eax
+DST(	movl %edx, 28(%edi)	)
+
+	lea 32(%esi), %esi
+	lea 32(%edi), %edi
+	dec %ecx
+	jne 1b
+	adcl $0, %eax
+2:	movl FP(%esp), %edx
+	movl %edx, %ecx
+	andl $0x1c, %edx
+	je 4f
+	shrl $2, %edx			# This clears CF
+SRC(3:	movl (%esi), %ebx	)
+	adcl %ebx, %eax
+DST(	movl %ebx, (%edi)	)
+	lea 4(%esi), %esi
+	lea 4(%edi), %edi
+	dec %edx
+	jne 3b
+	adcl $0, %eax
+4:	andl $3, %ecx
+	jz 7f
+	cmpl $2, %ecx
+	jb 5f
+SRC(	movw (%esi), %cx	)
+	leal 2(%esi), %esi
+DST(	movw %cx, (%edi)	)
+	leal 2(%edi), %edi
+	je 6f
+	shll $16,%ecx
+SRC(5:	movb (%esi), %cl	)
+DST(	movb %cl, (%edi)	)
+6:	addl %ecx, %eax
+	adcl $0, %eax
+7:
+5000:
+
+# Exception handler:
+.section .fixup, "ax"							
+
+6001:
+	movl ARGBASE+20(%esp), %ebx	# src_err_ptr
+	movl $-EFAULT, (%ebx)
+
+	# zero the complete destination - computing the rest
+	# is too much work 
+	movl ARGBASE+8(%esp), %edi	# dst
+	movl ARGBASE+12(%esp), %ecx	# len
+	xorl %eax,%eax
+	rep ; stosb
+
+	jmp 5000b
+
+6002:
+	movl ARGBASE+24(%esp), %ebx	# dst_err_ptr
+	movl $-EFAULT,(%ebx)
+	jmp 5000b
+
+.previous
+
+	popl %ebx
+	popl %esi
+	popl %edi
+	popl %ecx			# equivalent to addl $4,%esp
+	ret	
+
+#else
+
+/* Version for PentiumII/PPro */
+
+#define ROUND1(x) \
+	SRC(movl x(%esi), %ebx	)	;	\
+	addl %ebx, %eax			;	\
+	DST(movl %ebx, x(%edi)	)	; 
+
+#define ROUND(x) \
+	SRC(movl x(%esi), %ebx	)	;	\
+	adcl %ebx, %eax			;	\
+	DST(movl %ebx, x(%edi)	)	;
+
+#define ARGBASE 12
+		
+csum_partial_copy_generic_i386:
+	pushl %ebx
+	pushl %edi
+	pushl %esi
+	movl ARGBASE+4(%esp),%esi	#src
+	movl ARGBASE+8(%esp),%edi	#dst	
+	movl ARGBASE+12(%esp),%ecx	#len
+	movl ARGBASE+16(%esp),%eax	#sum
+#	movl %ecx, %edx  
+	movl %ecx, %ebx  
+	movl %esi, %edx
+	shrl $6, %ecx     
+	andl $0x3c, %ebx  
+	negl %ebx
+	subl %ebx, %esi  
+	subl %ebx, %edi  
+	lea  -1(%esi),%edx
+	andl $-32,%edx
+	lea 3f(%ebx,%ebx), %ebx
+	testl %esi, %esi 
+	jmp *%ebx
+1:	addl $64,%esi
+	addl $64,%edi 
+	SRC(movb -32(%edx),%bl)	; SRC(movb (%edx),%bl)
+	ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)	
+	ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)	
+	ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)	
+	ROUND (-16) ROUND(-12) ROUND(-8)  ROUND(-4)	
+3:	adcl $0,%eax
+	addl $64, %edx
+	dec %ecx
+	jge 1b
+4:	movl ARGBASE+12(%esp),%edx	#len
+	andl $3, %edx
+	jz 7f
+	cmpl $2, %edx
+	jb 5f
+SRC(	movw (%esi), %dx         )
+	leal 2(%esi), %esi
+DST(	movw %dx, (%edi)         )
+	leal 2(%edi), %edi
+	je 6f
+	shll $16,%edx
+5:
+SRC(	movb (%esi), %dl         )
+DST(	movb %dl, (%edi)         )
+6:	addl %edx, %eax
+	adcl $0, %eax
+7:
+.section .fixup, "ax"
+6001:	movl	ARGBASE+20(%esp), %ebx	# src_err_ptr	
+	movl $-EFAULT, (%ebx)
+	# zero the complete destination (computing the rest is too much work)
+	movl ARGBASE+8(%esp),%edi	# dst
+	movl ARGBASE+12(%esp),%ecx	# len
+	xorl %eax,%eax
+	rep; stosb
+	jmp 7b
+6002:	movl ARGBASE+24(%esp), %ebx	# dst_err_ptr
+	movl $-EFAULT, (%ebx)
+	jmp  7b			
+.previous				
+
+	popl %esi
+	popl %edi
+	popl %ebx
+	ret
+				
+#undef ROUND
+#undef ROUND1		
+		
+#endif
diff --git a/arch/x86/um/delay.c b/arch/x86/um/delay.c
new file mode 100644
index 000000000000..f3fe1a688f7e
--- /dev/null
+++ b/arch/x86/um/delay.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
+ * Mostly copied from arch/x86/lib/delay.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <asm/param.h>
+
+void __delay(unsigned long loops)
+{
+	asm volatile(
+		"test %0,%0\n"
+		"jz 3f\n"
+		"jmp 1f\n"
+
+		".align 16\n"
+		"1: jmp 2f\n"
+
+		".align 16\n"
+		"2: dec %0\n"
+		" jnz 2b\n"
+		"3: dec %0\n"
+
+		: /* we don't need output */
+		: "a" (loops)
+	);
+}
+EXPORT_SYMBOL(__delay);
+
+inline void __const_udelay(unsigned long xloops)
+{
+	int d0;
+
+	xloops *= 4;
+	asm("mull %%edx"
+		: "=d" (xloops), "=&a" (d0)
+		: "1" (xloops), "0"
+		(loops_per_jiffy * (HZ/4)));
+
+	__delay(++xloops);
+}
+EXPORT_SYMBOL(__const_udelay);
+
+void __udelay(unsigned long usecs)
+{
+	__const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
+}
+EXPORT_SYMBOL(__udelay);
+
+void __ndelay(unsigned long nsecs)
+{
+	__const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
+}
+EXPORT_SYMBOL(__ndelay);
diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c
new file mode 100644
index 000000000000..6bb49b687c97
--- /dev/null
+++ b/arch/x86/um/elfcore.c
@@ -0,0 +1,83 @@
+#include <linux/elf.h>
+#include <linux/coredump.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#include <asm/elf.h>
+
+
+Elf32_Half elf_core_extra_phdrs(void)
+{
+	return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0;
+}
+
+int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
+			       unsigned long limit)
+{
+	if ( vsyscall_ehdr ) {
+		const struct elfhdr *const ehdrp =
+			(struct elfhdr *) vsyscall_ehdr;
+		const struct elf_phdr *const phdrp =
+			(const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
+		int i;
+		Elf32_Off ofs = 0;
+
+		for (i = 0; i < ehdrp->e_phnum; ++i) {
+			struct elf_phdr phdr = phdrp[i];
+
+			if (phdr.p_type == PT_LOAD) {
+				ofs = phdr.p_offset = offset;
+				offset += phdr.p_filesz;
+			} else {
+				phdr.p_offset += ofs;
+			}
+			phdr.p_paddr = 0; /* match other core phdrs */
+			*size += sizeof(phdr);
+			if (*size > limit
+			    || !dump_write(file, &phdr, sizeof(phdr)))
+				return 0;
+		}
+	}
+	return 1;
+}
+
+int elf_core_write_extra_data(struct file *file, size_t *size,
+			      unsigned long limit)
+{
+	if ( vsyscall_ehdr ) {
+		const struct elfhdr *const ehdrp =
+			(struct elfhdr *) vsyscall_ehdr;
+		const struct elf_phdr *const phdrp =
+			(const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
+		int i;
+
+		for (i = 0; i < ehdrp->e_phnum; ++i) {
+			if (phdrp[i].p_type == PT_LOAD) {
+				void *addr = (void *) phdrp[i].p_vaddr;
+				size_t filesz = phdrp[i].p_filesz;
+
+				*size += filesz;
+				if (*size > limit
+				    || !dump_write(file, addr, filesz))
+					return 0;
+			}
+		}
+	}
+	return 1;
+}
+
+size_t elf_core_extra_data_size(void)
+{
+	if ( vsyscall_ehdr ) {
+		const struct elfhdr *const ehdrp =
+			(struct elfhdr *)vsyscall_ehdr;
+		const struct elf_phdr *const phdrp =
+			(const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
+		int i;
+
+		for (i = 0; i < ehdrp->e_phnum; ++i)
+			if (phdrp[i].p_type == PT_LOAD)
+				return (size_t) phdrp[i].p_filesz;
+	}
+	return 0;
+}
diff --git a/arch/x86/um/fault.c b/arch/x86/um/fault.c
new file mode 100644
index 000000000000..d670f68532f4
--- /dev/null
+++ b/arch/x86/um/fault.c
@@ -0,0 +1,28 @@
+/* 
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include "sysdep/ptrace.h"
+
+/* These two are from asm-um/uaccess.h and linux/module.h, check them. */
+struct exception_table_entry
+{
+	unsigned long insn;
+	unsigned long fixup;
+};
+
+const struct exception_table_entry *search_exception_tables(unsigned long add);
+
+/* Compare this to arch/i386/mm/extable.c:fixup_exception() */
+int arch_fixup(unsigned long address, struct uml_pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+
+	fixup = search_exception_tables(address);
+	if (fixup != 0) {
+		UPT_IP(regs) = fixup->fixup;
+		return 1;
+	}
+	return 0;
+}
diff --git a/arch/x86/um/ksyms.c b/arch/x86/um/ksyms.c
new file mode 100644
index 000000000000..2e8f43ec6214
--- /dev/null
+++ b/arch/x86/um/ksyms.c
@@ -0,0 +1,13 @@
+#include <linux/module.h>
+#include <asm/string.h>
+#include <asm/checksum.h>
+
+#ifndef CONFIG_X86_32
+/*XXX: we need them because they would be exported by x86_64 */
+#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4
+EXPORT_SYMBOL(memcpy);
+#else
+EXPORT_SYMBOL(__memcpy);
+#endif
+#endif
+EXPORT_SYMBOL(csum_partial);
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
new file mode 100644
index 000000000000..26b0e39d2ce9
--- /dev/null
+++ b/arch/x86/um/ldt.c
@@ -0,0 +1,502 @@
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <asm/unistd.h>
+#include "os.h"
+#include "proc_mm.h"
+#include "skas.h"
+#include "skas_ptrace.h"
+#include "sysdep/tls.h"
+
+extern int modify_ldt(int func, void *ptr, unsigned long bytecount);
+
+static long write_ldt_entry(struct mm_id *mm_idp, int func,
+		     struct user_desc *desc, void **addr, int done)
+{
+	long res;
+
+	if (proc_mm) {
+		/*
+		 * This is a special handling for the case, that the mm to
+		 * modify isn't current->active_mm.
+		 * If this is called directly by modify_ldt,
+		 *     (current->active_mm->context.skas.u == mm_idp)
+		 * will be true. So no call to __switch_mm(mm_idp) is done.
+		 * If this is called in case of init_new_ldt or PTRACE_LDT,
+		 * mm_idp won't belong to current->active_mm, but child->mm.
+		 * So we need to switch child's mm into our userspace, then
+		 * later switch back.
+		 *
+		 * Note: I'm unsure: should interrupts be disabled here?
+		 */
+		if (!current->active_mm || current->active_mm == &init_mm ||
+		    mm_idp != &current->active_mm->context.id)
+			__switch_mm(mm_idp);
+	}
+
+	if (ptrace_ldt) {
+		struct ptrace_ldt ldt_op = (struct ptrace_ldt) {
+			.func = func,
+			.ptr = desc,
+			.bytecount = sizeof(*desc)};
+		u32 cpu;
+		int pid;
+
+		if (!proc_mm)
+			pid = mm_idp->u.pid;
+		else {
+			cpu = get_cpu();
+			pid = userspace_pid[cpu];
+		}
+
+		res = os_ptrace_ldt(pid, 0, (unsigned long) &ldt_op);
+
+		if (proc_mm)
+			put_cpu();
+	}
+	else {
+		void *stub_addr;
+		res = syscall_stub_data(mm_idp, (unsigned long *)desc,
+					(sizeof(*desc) + sizeof(long) - 1) &
+					    ~(sizeof(long) - 1),
+					addr, &stub_addr);
+		if (!res) {
+			unsigned long args[] = { func,
+						 (unsigned long)stub_addr,
+						 sizeof(*desc),
+						 0, 0, 0 };
+			res = run_syscall_stub(mm_idp, __NR_modify_ldt, args,
+					       0, addr, done);
+		}
+	}
+
+	if (proc_mm) {
+		/*
+		 * This is the second part of special handling, that makes
+		 * PTRACE_LDT possible to implement.
+		 */
+		if (current->active_mm && current->active_mm != &init_mm &&
+		    mm_idp != &current->active_mm->context.id)
+			__switch_mm(&current->active_mm->context.id);
+	}
+
+	return res;
+}
+
+static long read_ldt_from_host(void __user * ptr, unsigned long bytecount)
+{
+	int res, n;
+	struct ptrace_ldt ptrace_ldt = (struct ptrace_ldt) {
+			.func = 0,
+			.bytecount = bytecount,
+			.ptr = kmalloc(bytecount, GFP_KERNEL)};
+	u32 cpu;
+
+	if (ptrace_ldt.ptr == NULL)
+		return -ENOMEM;
+
+	/*
+	 * This is called from sys_modify_ldt only, so userspace_pid gives
+	 * us the right number
+	 */
+
+	cpu = get_cpu();
+	res = os_ptrace_ldt(userspace_pid[cpu], 0, (unsigned long) &ptrace_ldt);
+	put_cpu();
+	if (res < 0)
+		goto out;
+
+	n = copy_to_user(ptr, ptrace_ldt.ptr, res);
+	if (n != 0)
+		res = -EFAULT;
+
+  out:
+	kfree(ptrace_ldt.ptr);
+
+	return res;
+}
+
+/*
+ * In skas mode, we hold our own ldt data in UML.
+ * Thus, the code implementing sys_modify_ldt_skas
+ * is very similar to (and mostly stolen from) sys_modify_ldt
+ * for arch/i386/kernel/ldt.c
+ * The routines copied and modified in part are:
+ * - read_ldt
+ * - read_default_ldt
+ * - write_ldt
+ * - sys_modify_ldt_skas
+ */
+
+static int read_ldt(void __user * ptr, unsigned long bytecount)
+{
+	int i, err = 0;
+	unsigned long size;
+	uml_ldt_t *ldt = &current->mm->context.arch.ldt;
+
+	if (!ldt->entry_count)
+		goto out;
+	if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
+		bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
+	err = bytecount;
+
+	if (ptrace_ldt)
+		return read_ldt_from_host(ptr, bytecount);
+
+	mutex_lock(&ldt->lock);
+	if (ldt->entry_count <= LDT_DIRECT_ENTRIES) {
+		size = LDT_ENTRY_SIZE*LDT_DIRECT_ENTRIES;
+		if (size > bytecount)
+			size = bytecount;
+		if (copy_to_user(ptr, ldt->u.entries, size))
+			err = -EFAULT;
+		bytecount -= size;
+		ptr += size;
+	}
+	else {
+		for (i=0; i<ldt->entry_count/LDT_ENTRIES_PER_PAGE && bytecount;
+		     i++) {
+			size = PAGE_SIZE;
+			if (size > bytecount)
+				size = bytecount;
+			if (copy_to_user(ptr, ldt->u.pages[i], size)) {
+				err = -EFAULT;
+				break;
+			}
+			bytecount -= size;
+			ptr += size;
+		}
+	}
+	mutex_unlock(&ldt->lock);
+
+	if (bytecount == 0 || err == -EFAULT)
+		goto out;
+
+	if (clear_user(ptr, bytecount))
+		err = -EFAULT;
+
+out:
+	return err;
+}
+
+static int read_default_ldt(void __user * ptr, unsigned long bytecount)
+{
+	int err;
+
+	if (bytecount > 5*LDT_ENTRY_SIZE)
+		bytecount = 5*LDT_ENTRY_SIZE;
+
+	err = bytecount;
+	/*
+	 * UML doesn't support lcall7 and lcall27.
+	 * So, we don't really have a default ldt, but emulate
+	 * an empty ldt of common host default ldt size.
+	 */
+	if (clear_user(ptr, bytecount))
+		err = -EFAULT;
+
+	return err;
+}
+
+static int write_ldt(void __user * ptr, unsigned long bytecount, int func)
+{
+	uml_ldt_t *ldt = &current->mm->context.arch.ldt;
+	struct mm_id * mm_idp = &current->mm->context.id;
+	int i, err;
+	struct user_desc ldt_info;
+	struct ldt_entry entry0, *ldt_p;
+	void *addr = NULL;
+
+	err = -EINVAL;
+	if (bytecount != sizeof(ldt_info))
+		goto out;
+	err = -EFAULT;
+	if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
+		goto out;
+
+	err = -EINVAL;
+	if (ldt_info.entry_number >= LDT_ENTRIES)
+		goto out;
+	if (ldt_info.contents == 3) {
+		if (func == 1)
+			goto out;
+		if (ldt_info.seg_not_present == 0)
+			goto out;
+	}
+
+	if (!ptrace_ldt)
+		mutex_lock(&ldt->lock);
+
+	err = write_ldt_entry(mm_idp, func, &ldt_info, &addr, 1);
+	if (err)
+		goto out_unlock;
+	else if (ptrace_ldt) {
+		/* With PTRACE_LDT available, this is used as a flag only */
+		ldt->entry_count = 1;
+		goto out;
+	}
+
+	if (ldt_info.entry_number >= ldt->entry_count &&
+	    ldt_info.entry_number >= LDT_DIRECT_ENTRIES) {
+		for (i=ldt->entry_count/LDT_ENTRIES_PER_PAGE;
+		     i*LDT_ENTRIES_PER_PAGE <= ldt_info.entry_number;
+		     i++) {
+			if (i == 0)
+				memcpy(&entry0, ldt->u.entries,
+				       sizeof(entry0));
+			ldt->u.pages[i] = (struct ldt_entry *)
+				__get_free_page(GFP_KERNEL|__GFP_ZERO);
+			if (!ldt->u.pages[i]) {
+				err = -ENOMEM;
+				/* Undo the change in host */
+				memset(&ldt_info, 0, sizeof(ldt_info));
+				write_ldt_entry(mm_idp, 1, &ldt_info, &addr, 1);
+				goto out_unlock;
+			}
+			if (i == 0) {
+				memcpy(ldt->u.pages[0], &entry0,
+				       sizeof(entry0));
+				memcpy(ldt->u.pages[0]+1, ldt->u.entries+1,
+				       sizeof(entry0)*(LDT_DIRECT_ENTRIES-1));
+			}
+			ldt->entry_count = (i + 1) * LDT_ENTRIES_PER_PAGE;
+		}
+	}
+	if (ldt->entry_count <= ldt_info.entry_number)
+		ldt->entry_count = ldt_info.entry_number + 1;
+
+	if (ldt->entry_count <= LDT_DIRECT_ENTRIES)
+		ldt_p = ldt->u.entries + ldt_info.entry_number;
+	else
+		ldt_p = ldt->u.pages[ldt_info.entry_number/LDT_ENTRIES_PER_PAGE] +
+			ldt_info.entry_number%LDT_ENTRIES_PER_PAGE;
+
+	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
+	   (func == 1 || LDT_empty(&ldt_info))) {
+		ldt_p->a = 0;
+		ldt_p->b = 0;
+	}
+	else{
+		if (func == 1)
+			ldt_info.useable = 0;
+		ldt_p->a = LDT_entry_a(&ldt_info);
+		ldt_p->b = LDT_entry_b(&ldt_info);
+	}
+	err = 0;
+
+out_unlock:
+	mutex_unlock(&ldt->lock);
+out:
+	return err;
+}
+
+static long do_modify_ldt_skas(int func, void __user *ptr,
+			       unsigned long bytecount)
+{
+	int ret = -ENOSYS;
+
+	switch (func) {
+		case 0:
+			ret = read_ldt(ptr, bytecount);
+			break;
+		case 1:
+		case 0x11:
+			ret = write_ldt(ptr, bytecount, func);
+			break;
+		case 2:
+			ret = read_default_ldt(ptr, bytecount);
+			break;
+	}
+	return ret;
+}
+
+static DEFINE_SPINLOCK(host_ldt_lock);
+static short dummy_list[9] = {0, -1};
+static short * host_ldt_entries = NULL;
+
+static void ldt_get_host_info(void)
+{
+	long ret;
+	struct ldt_entry * ldt;
+	short *tmp;
+	int i, size, k, order;
+
+	spin_lock(&host_ldt_lock);
+
+	if (host_ldt_entries != NULL) {
+		spin_unlock(&host_ldt_lock);
+		return;
+	}
+	host_ldt_entries = dummy_list+1;
+
+	spin_unlock(&host_ldt_lock);
+
+	for (i = LDT_PAGES_MAX-1, order=0; i; i>>=1, order++)
+		;
+
+	ldt = (struct ldt_entry *)
+	      __get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
+	if (ldt == NULL) {
+		printk(KERN_ERR "ldt_get_host_info: couldn't allocate buffer "
+		       "for host ldt\n");
+		return;
+	}
+
+	ret = modify_ldt(0, ldt, (1<<order)*PAGE_SIZE);
+	if (ret < 0) {
+		printk(KERN_ERR "ldt_get_host_info: couldn't read host ldt\n");
+		goto out_free;
+	}
+	if (ret == 0) {
+		/* default_ldt is active, simply write an empty entry 0 */
+		host_ldt_entries = dummy_list;
+		goto out_free;
+	}
+
+	for (i=0, size=0; i<ret/LDT_ENTRY_SIZE; i++) {
+		if (ldt[i].a != 0 || ldt[i].b != 0)
+			size++;
+	}
+
+	if (size < ARRAY_SIZE(dummy_list))
+		host_ldt_entries = dummy_list;
+	else {
+		size = (size + 1) * sizeof(dummy_list[0]);
+		tmp = kmalloc(size, GFP_KERNEL);
+		if (tmp == NULL) {
+			printk(KERN_ERR "ldt_get_host_info: couldn't allocate "
+			       "host ldt list\n");
+			goto out_free;
+		}
+		host_ldt_entries = tmp;
+	}
+
+	for (i=0, k=0; i<ret/LDT_ENTRY_SIZE; i++) {
+		if (ldt[i].a != 0 || ldt[i].b != 0)
+			host_ldt_entries[k++] = i;
+	}
+	host_ldt_entries[k] = -1;
+
+out_free:
+	free_pages((unsigned long)ldt, order);
+}
+
+long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm)
+{
+	struct user_desc desc;
+	short * num_p;
+	int i;
+	long page, err=0;
+	void *addr = NULL;
+	struct proc_mm_op copy;
+
+
+	if (!ptrace_ldt)
+		mutex_init(&new_mm->arch.ldt.lock);
+
+	if (!from_mm) {
+		memset(&desc, 0, sizeof(desc));
+		/*
+		 * We have to initialize a clean ldt.
+		 */
+		if (proc_mm) {
+			/*
+			 * If the new mm was created using proc_mm, host's
+			 * default-ldt currently is assigned, which normally
+			 * contains the call-gates for lcall7 and lcall27.
+			 * To remove these gates, we simply write an empty
+			 * entry as number 0 to the host.
+			 */
+			err = write_ldt_entry(&new_mm->id, 1, &desc, &addr, 1);
+		}
+		else{
+			/*
+			 * Now we try to retrieve info about the ldt, we
+			 * inherited from the host. All ldt-entries found
+			 * will be reset in the following loop
+			 */
+			ldt_get_host_info();
+			for (num_p=host_ldt_entries; *num_p != -1; num_p++) {
+				desc.entry_number = *num_p;
+				err = write_ldt_entry(&new_mm->id, 1, &desc,
+						      &addr, *(num_p + 1) == -1);
+				if (err)
+					break;
+			}
+		}
+		new_mm->arch.ldt.entry_count = 0;
+
+		goto out;
+	}
+
+	if (proc_mm) {
+		/*
+		 * We have a valid from_mm, so we now have to copy the LDT of
+		 * from_mm to new_mm, because using proc_mm an new mm with
+		 * an empty/default LDT was created in new_mm()
+		 */
+		copy = ((struct proc_mm_op) { .op 	= MM_COPY_SEGMENTS,
+					      .u 	=
+					      { .copy_segments =
+							from_mm->id.u.mm_fd } } );
+		i = os_write_file(new_mm->id.u.mm_fd, &copy, sizeof(copy));
+		if (i != sizeof(copy))
+			printk(KERN_ERR "new_mm : /proc/mm copy_segments "
+			       "failed, err = %d\n", -i);
+	}
+
+	if (!ptrace_ldt) {
+		/*
+		 * Our local LDT is used to supply the data for
+		 * modify_ldt(READLDT), if PTRACE_LDT isn't available,
+		 * i.e., we have to use the stub for modify_ldt, which
+		 * can't handle the big read buffer of up to 64kB.
+		 */
+		mutex_lock(&from_mm->arch.ldt.lock);
+		if (from_mm->arch.ldt.entry_count <= LDT_DIRECT_ENTRIES)
+			memcpy(new_mm->arch.ldt.u.entries, from_mm->arch.ldt.u.entries,
+			       sizeof(new_mm->arch.ldt.u.entries));
+		else {
+			i = from_mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
+			while (i-->0) {
+				page = __get_free_page(GFP_KERNEL|__GFP_ZERO);
+				if (!page) {
+					err = -ENOMEM;
+					break;
+				}
+				new_mm->arch.ldt.u.pages[i] =
+					(struct ldt_entry *) page;
+				memcpy(new_mm->arch.ldt.u.pages[i],
+				       from_mm->arch.ldt.u.pages[i], PAGE_SIZE);
+			}
+		}
+		new_mm->arch.ldt.entry_count = from_mm->arch.ldt.entry_count;
+		mutex_unlock(&from_mm->arch.ldt.lock);
+	}
+
+    out:
+	return err;
+}
+
+
+void free_ldt(struct mm_context *mm)
+{
+	int i;
+
+	if (!ptrace_ldt && mm->arch.ldt.entry_count > LDT_DIRECT_ENTRIES) {
+		i = mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
+		while (i-- > 0)
+			free_page((long) mm->arch.ldt.u.pages[i]);
+	}
+	mm->arch.ldt.entry_count = 0;
+}
+
+int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
+{
+	return do_modify_ldt_skas(func, ptr, bytecount);
+}
diff --git a/arch/x86/um/mem_32.c b/arch/x86/um/mem_32.c
new file mode 100644
index 000000000000..639900a6fde9
--- /dev/null
+++ b/arch/x86/um/mem_32.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/mm.h>
+#include <asm/page.h>
+#include <asm/mman.h>
+
+static struct vm_area_struct gate_vma;
+
+static int __init gate_vma_init(void)
+{
+	if (!FIXADDR_USER_START)
+		return 0;
+
+	gate_vma.vm_mm = NULL;
+	gate_vma.vm_start = FIXADDR_USER_START;
+	gate_vma.vm_end = FIXADDR_USER_END;
+	gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
+	gate_vma.vm_page_prot = __P101;
+
+	/*
+	 * Make sure the vDSO gets into every core dump.
+	 * Dumping its contents makes post-mortem fully interpretable later
+	 * without matching up the same kernel and hardware config to see
+	 * what PC values meant.
+	 */
+	gate_vma.vm_flags |= VM_ALWAYSDUMP;
+
+	return 0;
+}
+__initcall(gate_vma_init);
+
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+	return FIXADDR_USER_START ? &gate_vma : NULL;
+}
+
+int in_gate_area_no_mm(unsigned long addr)
+{
+	if (!FIXADDR_USER_START)
+		return 0;
+
+	if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
+		return 1;
+
+	return 0;
+}
+
+int in_gate_area(struct mm_struct *mm, unsigned long addr)
+{
+	struct vm_area_struct *vma = get_gate_vma(mm);
+
+	if (!vma)
+		return 0;
+
+	return (addr >= vma->vm_start) && (addr < vma->vm_end);
+}
diff --git a/arch/x86/um/mem_64.c b/arch/x86/um/mem_64.c
new file mode 100644
index 000000000000..546518727a73
--- /dev/null
+++ b/arch/x86/um/mem_64.c
@@ -0,0 +1,26 @@
+#include "linux/mm.h"
+#include "asm/page.h"
+#include "asm/mman.h"
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+	if (vma->vm_mm && vma->vm_start == um_vdso_addr)
+		return "[vdso]";
+
+	return NULL;
+}
+
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+	return NULL;
+}
+
+int in_gate_area(struct mm_struct *mm, unsigned long addr)
+{
+	return 0;
+}
+
+int in_gate_area_no_mm(unsigned long addr)
+{
+	return 0;
+}
diff --git a/arch/x86/um/os-Linux/Makefile b/arch/x86/um/os-Linux/Makefile
new file mode 100644
index 000000000000..253bfb8cb702
--- /dev/null
+++ b/arch/x86/um/os-Linux/Makefile
@@ -0,0 +1,13 @@
+#
+# Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+# Licensed under the GPL
+#
+
+obj-y = registers.o task_size.o mcontext.o
+
+obj-$(CONFIG_X86_32) += tls.o
+obj-$(CONFIG_64BIT) += prctl.o
+
+USER_OBJS := $(obj-y)
+
+include arch/um/scripts/Makefile.rules
diff --git a/arch/x86/um/os-Linux/mcontext.c b/arch/x86/um/os-Linux/mcontext.c
new file mode 100644
index 000000000000..1d33d72c6284
--- /dev/null
+++ b/arch/x86/um/os-Linux/mcontext.c
@@ -0,0 +1,31 @@
+#include <sys/ucontext.h>
+#define __FRAME_OFFSETS
+#include <asm/ptrace.h>
+#include <sysdep/ptrace.h>
+
+void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
+{
+#ifdef __i386__
+#define COPY2(X,Y) regs->gp[X] = mc->gregs[REG_##Y]
+#define COPY(X) regs->gp[X] = mc->gregs[REG_##X]
+#define COPY_SEG(X) regs->gp[X] = mc->gregs[REG_##X] & 0xffff;
+#define COPY_SEG_CPL3(X) regs->gp[X] = (mc->gregs[REG_##X] & 0xffff) | 3;
+	COPY_SEG(GS); COPY_SEG(FS); COPY_SEG(ES); COPY_SEG(DS);
+	COPY(EDI); COPY(ESI); COPY(EBP);
+	COPY2(UESP, ESP); /* sic */
+	COPY(EBX); COPY(EDX); COPY(ECX); COPY(EAX);
+	COPY(EIP); COPY_SEG_CPL3(CS); COPY(EFL); COPY_SEG_CPL3(SS);
+#else
+#define COPY2(X,Y) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##Y]
+#define COPY(X) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##X]
+	COPY(R8); COPY(R9); COPY(R10); COPY(R11);
+	COPY(R12); COPY(R13); COPY(R14); COPY(R15);
+	COPY(RDI); COPY(RSI); COPY(RBP); COPY(RBX);
+	COPY(RDX); COPY(RAX); COPY(RCX); COPY(RSP);
+	COPY(RIP);
+	COPY2(EFLAGS, EFL);
+	COPY2(CS, CSGSFS);
+	regs->gp[CS / sizeof(unsigned long)] &= 0xffff;
+	regs->gp[CS / sizeof(unsigned long)] |= 3;
+#endif
+}
diff --git a/arch/x86/um/os-Linux/prctl.c b/arch/x86/um/os-Linux/prctl.c
new file mode 100644
index 000000000000..9d34eddb517f
--- /dev/null
+++ b/arch/x86/um/os-Linux/prctl.c
@@ -0,0 +1,12 @@
+/*
+ * Copyright (C) 2007 Jeff Dike (jdike@{addtoit.com,linux.intel.com})
+ * Licensed under the GPL
+ */
+
+#include <sys/ptrace.h>
+#include <linux/ptrace.h>
+
+int os_arch_prctl(int pid, int code, unsigned long *addr)
+{
+        return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) addr, code);
+}
diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c
new file mode 100644
index 000000000000..0cdbb86b012b
--- /dev/null
+++ b/arch/x86/um/os-Linux/registers.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2004 PathScale, Inc
+ * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <errno.h>
+#include <sys/ptrace.h>
+#ifdef __i386__
+#include <sys/user.h>
+#endif
+#include "longjmp.h"
+#include "sysdep/ptrace_user.h"
+
+int save_fp_registers(int pid, unsigned long *fp_regs)
+{
+	if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0)
+		return -errno;
+	return 0;
+}
+
+int restore_fp_registers(int pid, unsigned long *fp_regs)
+{
+	if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0)
+		return -errno;
+	return 0;
+}
+
+#ifdef __i386__
+int have_fpx_regs = 1;
+int save_fpx_registers(int pid, unsigned long *fp_regs)
+{
+	if (ptrace(PTRACE_GETFPXREGS, pid, 0, fp_regs) < 0)
+		return -errno;
+	return 0;
+}
+
+int restore_fpx_registers(int pid, unsigned long *fp_regs)
+{
+	if (ptrace(PTRACE_SETFPXREGS, pid, 0, fp_regs) < 0)
+		return -errno;
+	return 0;
+}
+
+int get_fp_registers(int pid, unsigned long *regs)
+{
+	if (have_fpx_regs)
+		return save_fpx_registers(pid, regs);
+	else
+		return save_fp_registers(pid, regs);
+}
+
+int put_fp_registers(int pid, unsigned long *regs)
+{
+	if (have_fpx_regs)
+		return restore_fpx_registers(pid, regs);
+	else
+		return restore_fp_registers(pid, regs);
+}
+
+void arch_init_registers(int pid)
+{
+	struct user_fpxregs_struct fpx_regs;
+	int err;
+
+	err = ptrace(PTRACE_GETFPXREGS, pid, 0, &fpx_regs);
+	if (!err)
+		return;
+
+	if (errno != EIO)
+		panic("check_ptrace : PTRACE_GETFPXREGS failed, errno = %d",
+		      errno);
+
+	have_fpx_regs = 0;
+}
+#else
+
+int get_fp_registers(int pid, unsigned long *regs)
+{
+	return save_fp_registers(pid, regs);
+}
+
+int put_fp_registers(int pid, unsigned long *regs)
+{
+	return restore_fp_registers(pid, regs);
+}
+
+#endif
+
+unsigned long get_thread_reg(int reg, jmp_buf *buf)
+{
+	switch (reg) {
+#ifdef __i386__
+	case HOST_IP:
+		return buf[0]->__eip;
+	case HOST_SP:
+		return buf[0]->__esp;
+	case HOST_BP:
+		return buf[0]->__ebp;
+#else
+	case HOST_IP:
+		return buf[0]->__rip;
+	case HOST_SP:
+		return buf[0]->__rsp;
+	case HOST_BP:
+		return buf[0]->__rbp;
+#endif
+	default:
+		printk(UM_KERN_ERR "get_thread_regs - unknown register %d\n",
+		       reg);
+		return 0;
+	}
+}
diff --git a/arch/x86/um/os-Linux/task_size.c b/arch/x86/um/os-Linux/task_size.c
new file mode 100644
index 000000000000..efb16c5c9bcf
--- /dev/null
+++ b/arch/x86/um/os-Linux/task_size.c
@@ -0,0 +1,150 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include "longjmp.h"
+
+#ifdef __i386__
+
+static jmp_buf buf;
+
+static void segfault(int sig)
+{
+	longjmp(buf, 1);
+}
+
+static int page_ok(unsigned long page)
+{
+	unsigned long *address = (unsigned long *) (page << UM_KERN_PAGE_SHIFT);
+	unsigned long n = ~0UL;
+	void *mapped = NULL;
+	int ok = 0;
+
+	/*
+	 * First see if the page is readable.  If it is, it may still
+	 * be a VDSO, so we go on to see if it's writable.  If not
+	 * then try mapping memory there.  If that fails, then we're
+	 * still in the kernel area.  As a sanity check, we'll fail if
+	 * the mmap succeeds, but gives us an address different from
+	 * what we wanted.
+	 */
+	if (setjmp(buf) == 0)
+		n = *address;
+	else {
+		mapped = mmap(address, UM_KERN_PAGE_SIZE,
+			      PROT_READ | PROT_WRITE,
+			      MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+		if (mapped == MAP_FAILED)
+			return 0;
+		if (mapped != address)
+			goto out;
+	}
+
+	/*
+	 * Now, is it writeable?  If so, then we're in user address
+	 * space.  If not, then try mprotecting it and try the write
+	 * again.
+	 */
+	if (setjmp(buf) == 0) {
+		*address = n;
+		ok = 1;
+		goto out;
+	} else if (mprotect(address, UM_KERN_PAGE_SIZE,
+			    PROT_READ | PROT_WRITE) != 0)
+		goto out;
+
+	if (setjmp(buf) == 0) {
+		*address = n;
+		ok = 1;
+	}
+
+ out:
+	if (mapped != NULL)
+		munmap(mapped, UM_KERN_PAGE_SIZE);
+	return ok;
+}
+
+unsigned long os_get_top_address(void)
+{
+	struct sigaction sa, old;
+	unsigned long bottom = 0;
+	/*
+	 * A 32-bit UML on a 64-bit host gets confused about the VDSO at
+	 * 0xffffe000.  It is mapped, is readable, can be reprotected writeable
+	 * and written.  However, exec discovers later that it can't be
+	 * unmapped.  So, just set the highest address to be checked to just
+	 * below it.  This might waste some address space on 4G/4G 32-bit
+	 * hosts, but shouldn't hurt otherwise.
+	 */
+	unsigned long top = 0xffffd000 >> UM_KERN_PAGE_SHIFT;
+	unsigned long test, original;
+
+	printf("Locating the bottom of the address space ... ");
+	fflush(stdout);
+
+	/*
+	 * We're going to be longjmping out of the signal handler, so
+	 * SA_DEFER needs to be set.
+	 */
+	sa.sa_handler = segfault;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags = SA_NODEFER;
+	if (sigaction(SIGSEGV, &sa, &old)) {
+		perror("os_get_top_address");
+		exit(1);
+	}
+
+	/* Manually scan the address space, bottom-up, until we find
+	 * the first valid page (or run out of them).
+	 */
+	for (bottom = 0; bottom < top; bottom++) {
+		if (page_ok(bottom))
+			break;
+	}
+
+	/* If we've got this far, we ran out of pages. */
+	if (bottom == top) {
+		fprintf(stderr, "Unable to determine bottom of address "
+			"space.\n");
+		exit(1);
+	}
+
+	printf("0x%x\n", bottom << UM_KERN_PAGE_SHIFT);
+	printf("Locating the top of the address space ... ");
+	fflush(stdout);
+
+	original = bottom;
+
+	/* This could happen with a 4G/4G split */
+	if (page_ok(top))
+		goto out;
+
+	do {
+		test = bottom + (top - bottom) / 2;
+		if (page_ok(test))
+			bottom = test;
+		else
+			top = test;
+	} while (top - bottom > 1);
+
+out:
+	/* Restore the old SIGSEGV handling */
+	if (sigaction(SIGSEGV, &old, NULL)) {
+		perror("os_get_top_address");
+		exit(1);
+	}
+	top <<= UM_KERN_PAGE_SHIFT;
+	printf("0x%x\n", top);
+
+	return top;
+}
+
+#else
+
+unsigned long os_get_top_address(void)
+{
+	/* The old value of CONFIG_TOP_ADDR */
+	return 0x7fc0000000;
+}
+
+#endif
diff --git a/arch/x86/um/os-Linux/tls.c b/arch/x86/um/os-Linux/tls.c
new file mode 100644
index 000000000000..82276b6071af
--- /dev/null
+++ b/arch/x86/um/os-Linux/tls.c
@@ -0,0 +1,67 @@
+#include <errno.h>
+#include <linux/unistd.h>
+
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "sysdep/tls.h"
+
+#ifndef PTRACE_GET_THREAD_AREA
+#define PTRACE_GET_THREAD_AREA 25
+#endif
+
+#ifndef PTRACE_SET_THREAD_AREA
+#define PTRACE_SET_THREAD_AREA 26
+#endif
+
+/* Checks whether host supports TLS, and sets *tls_min according to the value
+ * valid on the host.
+ * i386 host have it == 6; x86_64 host have it == 12, for i386 emulation. */
+void check_host_supports_tls(int *supports_tls, int *tls_min)
+{
+	/* Values for x86 and x86_64.*/
+	int val[] = {GDT_ENTRY_TLS_MIN_I386, GDT_ENTRY_TLS_MIN_X86_64};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(val); i++) {
+		user_desc_t info;
+		info.entry_number = val[i];
+
+		if (syscall(__NR_get_thread_area, &info) == 0) {
+			*tls_min = val[i];
+			*supports_tls = 1;
+			return;
+		} else {
+			if (errno == EINVAL)
+				continue;
+			else if (errno == ENOSYS)
+				*supports_tls = 0;
+				return;
+		}
+	}
+
+	*supports_tls = 0;
+}
+
+int os_set_thread_area(user_desc_t *info, int pid)
+{
+	int ret;
+
+	ret = ptrace(PTRACE_SET_THREAD_AREA, pid, info->entry_number,
+		     (unsigned long) info);
+	if (ret < 0)
+		ret = -errno;
+	return ret;
+}
+
+int os_get_thread_area(user_desc_t *info, int pid)
+{
+	int ret;
+
+	ret = ptrace(PTRACE_GET_THREAD_AREA, pid, info->entry_number,
+		     (unsigned long) info);
+	if (ret < 0)
+		ret = -errno;
+	return ret;
+}
diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c
new file mode 100644
index 000000000000..3b949daa095c
--- /dev/null
+++ b/arch/x86/um/ptrace_32.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/mm.h"
+#include "linux/sched.h"
+#include "asm/uaccess.h"
+#include "skas.h"
+
+extern int arch_switch_tls(struct task_struct *to);
+
+void arch_switch_to(struct task_struct *to)
+{
+	int err = arch_switch_tls(to);
+	if (!err)
+		return;
+
+	if (err != -EINVAL)
+		printk(KERN_WARNING "arch_switch_tls failed, errno %d, "
+		       "not EINVAL\n", -err);
+	else
+		printk(KERN_WARNING "arch_switch_tls failed, errno = EINVAL\n");
+}
+
+int is_syscall(unsigned long addr)
+{
+	unsigned short instr;
+	int n;
+
+	n = copy_from_user(&instr, (void __user *) addr, sizeof(instr));
+	if (n) {
+		/* access_process_vm() grants access to vsyscall and stub,
+		 * while copy_from_user doesn't. Maybe access_process_vm is
+		 * slow, but that doesn't matter, since it will be called only
+		 * in case of singlestepping, if copy_from_user failed.
+		 */
+		n = access_process_vm(current, addr, &instr, sizeof(instr), 0);
+		if (n != sizeof(instr)) {
+			printk(KERN_ERR "is_syscall : failed to read "
+			       "instruction from 0x%lx\n", addr);
+			return 1;
+		}
+	}
+	/* int 0x80 or sysenter */
+	return (instr == 0x80cd) || (instr == 0x340f);
+}
+
+/* determines which flags the user has access to. */
+/* 1 = access 0 = no access */
+#define FLAG_MASK 0x00044dd5
+
+static const int reg_offsets[] = {
+	[EBX] = HOST_BX,
+	[ECX] = HOST_CX,
+	[EDX] = HOST_DX,
+	[ESI] = HOST_SI,
+	[EDI] = HOST_DI,
+	[EBP] = HOST_BP,
+	[EAX] = HOST_AX,
+	[DS] = HOST_DS,
+	[ES] = HOST_ES,
+	[FS] = HOST_FS,
+	[GS] = HOST_GS,
+	[EIP] = HOST_IP,
+	[CS] = HOST_CS,
+	[EFL] = HOST_EFLAGS,
+	[UESP] = HOST_SP,
+	[SS] = HOST_SS,
+};
+
+int putreg(struct task_struct *child, int regno, unsigned long value)
+{
+	regno >>= 2;
+	switch (regno) {
+	case EBX:
+	case ECX:
+	case EDX:
+	case ESI:
+	case EDI:
+	case EBP:
+	case EAX:
+	case EIP:
+	case UESP:
+		break;
+	case FS:
+		if (value && (value & 3) != 3)
+			return -EIO;
+		break;
+	case GS:
+		if (value && (value & 3) != 3)
+			return -EIO;
+		break;
+	case DS:
+	case ES:
+		if (value && (value & 3) != 3)
+			return -EIO;
+		value &= 0xffff;
+		break;
+	case SS:
+	case CS:
+		if ((value & 3) != 3)
+			return -EIO;
+		value &= 0xffff;
+		break;
+	case EFL:
+		value &= FLAG_MASK;
+		child->thread.regs.regs.gp[HOST_EFLAGS] |= value;
+		return 0;
+	case ORIG_EAX:
+		child->thread.regs.regs.syscall = value;
+		return 0;
+	default :
+		panic("Bad register in putreg() : %d\n", regno);
+	}
+	child->thread.regs.regs.gp[reg_offsets[regno]] = value;
+	return 0;
+}
+
+int poke_user(struct task_struct *child, long addr, long data)
+{
+	if ((addr & 3) || addr < 0)
+		return -EIO;
+
+	if (addr < MAX_REG_OFFSET)
+		return putreg(child, addr, data);
+	else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
+		 (addr <= offsetof(struct user, u_debugreg[7]))) {
+		addr -= offsetof(struct user, u_debugreg[0]);
+		addr = addr >> 2;
+		if ((addr == 4) || (addr == 5))
+			return -EIO;
+		child->thread.arch.debugregs[addr] = data;
+		return 0;
+	}
+	return -EIO;
+}
+
+unsigned long getreg(struct task_struct *child, int regno)
+{
+	unsigned long mask = ~0UL;
+
+	regno >>= 2;
+	switch (regno) {
+	case ORIG_EAX:
+		return child->thread.regs.regs.syscall;
+	case FS:
+	case GS:
+	case DS:
+	case ES:
+	case SS:
+	case CS:
+		mask = 0xffff;
+		break;
+	case EIP:
+	case UESP:
+	case EAX:
+	case EBX:
+	case ECX:
+	case EDX:
+	case ESI:
+	case EDI:
+	case EBP:
+	case EFL:
+		break;
+	default:
+		panic("Bad register in getreg() : %d\n", regno);
+	}
+	return mask & child->thread.regs.regs.gp[reg_offsets[regno]];
+}
+
+/* read the word at location addr in the USER area. */
+int peek_user(struct task_struct *child, long addr, long data)
+{
+	unsigned long tmp;
+
+	if ((addr & 3) || addr < 0)
+		return -EIO;
+
+	tmp = 0;  /* Default return condition */
+	if (addr < MAX_REG_OFFSET) {
+		tmp = getreg(child, addr);
+	}
+	else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
+		 (addr <= offsetof(struct user, u_debugreg[7]))) {
+		addr -= offsetof(struct user, u_debugreg[0]);
+		addr = addr >> 2;
+		tmp = child->thread.arch.debugregs[addr];
+	}
+	return put_user(tmp, (unsigned long __user *) data);
+}
+
+static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
+{
+	int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
+	struct user_i387_struct fpregs;
+
+	err = save_fp_registers(userspace_pid[cpu], (unsigned long *) &fpregs);
+	if (err)
+		return err;
+
+	n = copy_to_user(buf, &fpregs, sizeof(fpregs));
+	if(n > 0)
+		return -EFAULT;
+
+	return n;
+}
+
+static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
+{
+	int n, cpu = ((struct thread_info *) child->stack)->cpu;
+	struct user_i387_struct fpregs;
+
+	n = copy_from_user(&fpregs, buf, sizeof(fpregs));
+	if (n > 0)
+		return -EFAULT;
+
+	return restore_fp_registers(userspace_pid[cpu],
+				    (unsigned long *) &fpregs);
+}
+
+static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
+{
+	int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
+	struct user_fxsr_struct fpregs;
+
+	err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs);
+	if (err)
+		return err;
+
+	n = copy_to_user(buf, &fpregs, sizeof(fpregs));
+	if(n > 0)
+		return -EFAULT;
+
+	return n;
+}
+
+static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
+{
+	int n, cpu = ((struct thread_info *) child->stack)->cpu;
+	struct user_fxsr_struct fpregs;
+
+	n = copy_from_user(&fpregs, buf, sizeof(fpregs));
+	if (n > 0)
+		return -EFAULT;
+
+	return restore_fpx_registers(userspace_pid[cpu],
+				     (unsigned long *) &fpregs);
+}
+
+long subarch_ptrace(struct task_struct *child, long request,
+		    unsigned long addr, unsigned long data)
+{
+	int ret = -EIO;
+	void __user *datap = (void __user *) data;
+	switch (request) {
+	case PTRACE_GETFPREGS: /* Get the child FPU state. */
+		ret = get_fpregs(datap, child);
+		break;
+	case PTRACE_SETFPREGS: /* Set the child FPU state. */
+		ret = set_fpregs(datap, child);
+		break;
+	case PTRACE_GETFPXREGS: /* Get the child FPU state. */
+		ret = get_fpxregs(datap, child);
+		break;
+	case PTRACE_SETFPXREGS: /* Set the child FPU state. */
+		ret = set_fpxregs(datap, child);
+		break;
+	default:
+		ret = -EIO;
+	}
+	return ret;
+}
diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c
new file mode 100644
index 000000000000..3b52bf0b418a
--- /dev/null
+++ b/arch/x86/um/ptrace_64.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2003 PathScale, Inc.
+ * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ *
+ * Licensed under the GPL
+ */
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#define __FRAME_OFFSETS
+#include <asm/ptrace.h>
+#include <asm/uaccess.h>
+
+/*
+ * determines which flags the user has access to.
+ * 1 = access 0 = no access
+ */
+#define FLAG_MASK 0x44dd5UL
+
+static const int reg_offsets[] =
+{
+	[R8 >> 3] = HOST_R8,
+	[R9 >> 3] = HOST_R9,
+	[R10 >> 3] = HOST_R10,
+	[R11 >> 3] = HOST_R11,
+	[R12 >> 3] = HOST_R12,
+	[R13 >> 3] = HOST_R13,
+	[R14 >> 3] = HOST_R14,
+	[R15 >> 3] = HOST_R15,
+	[RIP >> 3] = HOST_IP,
+	[RSP >> 3] = HOST_SP,
+	[RAX >> 3] = HOST_AX,
+	[RBX >> 3] = HOST_BX,
+	[RCX >> 3] = HOST_CX,
+	[RDX >> 3] = HOST_DX,
+	[RSI >> 3] = HOST_SI,
+	[RDI >> 3] = HOST_DI,
+	[RBP >> 3] = HOST_BP,
+	[CS >> 3] = HOST_CS,
+	[SS >> 3] = HOST_SS,
+	[FS_BASE >> 3] = HOST_FS_BASE,
+	[GS_BASE >> 3] = HOST_GS_BASE,
+	[DS >> 3] = HOST_DS,
+	[ES >> 3] = HOST_ES,
+	[FS >> 3] = HOST_FS,
+	[GS >> 3] = HOST_GS,
+	[EFLAGS >> 3] = HOST_EFLAGS,
+	[ORIG_RAX >> 3] = HOST_ORIG_AX,
+};
+
+int putreg(struct task_struct *child, int regno, unsigned long value)
+{
+#ifdef TIF_IA32
+	/*
+	 * Some code in the 64bit emulation may not be 64bit clean.
+	 * Don't take any chances.
+	 */
+	if (test_tsk_thread_flag(child, TIF_IA32))
+		value &= 0xffffffff;
+#endif
+	switch (regno) {
+	case R8:
+	case R9:
+	case R10:
+	case R11:
+	case R12:
+	case R13:
+	case R14:
+	case R15:
+	case RIP:
+	case RSP:
+	case RAX:
+	case RBX:
+	case RCX:
+	case RDX:
+	case RSI:
+	case RDI:
+	case RBP:
+	case ORIG_RAX:
+		break;
+
+	case FS:
+	case GS:
+	case DS:
+	case ES:
+	case SS:
+	case CS:
+		if (value && (value & 3) != 3)
+			return -EIO;
+		value &= 0xffff;
+		break;
+
+	case FS_BASE:
+	case GS_BASE:
+		if (!((value >> 48) == 0 || (value >> 48) == 0xffff))
+			return -EIO;
+		break;
+
+	case EFLAGS:
+		value &= FLAG_MASK;
+		child->thread.regs.regs.gp[HOST_EFLAGS] |= value;
+		return 0;
+
+	default:
+		panic("Bad register in putreg(): %d\n", regno);
+	}
+
+	child->thread.regs.regs.gp[reg_offsets[regno >> 3]] = value;
+	return 0;
+}
+
+int poke_user(struct task_struct *child, long addr, long data)
+{
+	if ((addr & 3) || addr < 0)
+		return -EIO;
+
+	if (addr < MAX_REG_OFFSET)
+		return putreg(child, addr, data);
+	else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
+		(addr <= offsetof(struct user, u_debugreg[7]))) {
+		addr -= offsetof(struct user, u_debugreg[0]);
+		addr = addr >> 2;
+		if ((addr == 4) || (addr == 5))
+			return -EIO;
+		child->thread.arch.debugregs[addr] = data;
+		return 0;
+	}
+	return -EIO;
+}
+
+unsigned long getreg(struct task_struct *child, int regno)
+{
+	unsigned long mask = ~0UL;
+#ifdef TIF_IA32
+	if (test_tsk_thread_flag(child, TIF_IA32))
+		mask = 0xffffffff;
+#endif
+	switch (regno) {
+	case R8:
+	case R9:
+	case R10:
+	case R11:
+	case R12:
+	case R13:
+	case R14:
+	case R15:
+	case RIP:
+	case RSP:
+	case RAX:
+	case RBX:
+	case RCX:
+	case RDX:
+	case RSI:
+	case RDI:
+	case RBP:
+	case ORIG_RAX:
+	case EFLAGS:
+	case FS_BASE:
+	case GS_BASE:
+		break;
+	case FS:
+	case GS:
+	case DS:
+	case ES:
+	case SS:
+	case CS:
+		mask = 0xffff;
+		break;
+	default:
+		panic("Bad register in getreg: %d\n", regno);
+	}
+	return mask & child->thread.regs.regs.gp[reg_offsets[regno >> 3]];
+}
+
+int peek_user(struct task_struct *child, long addr, long data)
+{
+	/* read the word at location addr in the USER area. */
+	unsigned long tmp;
+
+	if ((addr & 3) || addr < 0)
+		return -EIO;
+
+	tmp = 0;  /* Default return condition */
+	if (addr < MAX_REG_OFFSET)
+		tmp = getreg(child, addr);
+	else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
+		(addr <= offsetof(struct user, u_debugreg[7]))) {
+		addr -= offsetof(struct user, u_debugreg[0]);
+		addr = addr >> 2;
+		tmp = child->thread.arch.debugregs[addr];
+	}
+	return put_user(tmp, (unsigned long *) data);
+}
+
+/* XXX Mostly copied from sys-i386 */
+int is_syscall(unsigned long addr)
+{
+	unsigned short instr;
+	int n;
+
+	n = copy_from_user(&instr, (void __user *) addr, sizeof(instr));
+	if (n) {
+		/*
+		 * access_process_vm() grants access to vsyscall and stub,
+		 * while copy_from_user doesn't. Maybe access_process_vm is
+		 * slow, but that doesn't matter, since it will be called only
+		 * in case of singlestepping, if copy_from_user failed.
+		 */
+		n = access_process_vm(current, addr, &instr, sizeof(instr), 0);
+		if (n != sizeof(instr)) {
+			printk("is_syscall : failed to read instruction from "
+			       "0x%lx\n", addr);
+			return 1;
+		}
+	}
+	/* sysenter */
+	return instr == 0x050f;
+}
+
+static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
+{
+	int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
+	long fpregs[HOST_FP_SIZE];
+
+	BUG_ON(sizeof(*buf) != sizeof(fpregs));
+	err = save_fp_registers(userspace_pid[cpu], fpregs);
+	if (err)
+		return err;
+
+	n = copy_to_user(buf, fpregs, sizeof(fpregs));
+	if (n > 0)
+		return -EFAULT;
+
+	return n;
+}
+
+static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
+{
+	int n, cpu = ((struct thread_info *) child->stack)->cpu;
+	long fpregs[HOST_FP_SIZE];
+
+	BUG_ON(sizeof(*buf) != sizeof(fpregs));
+	n = copy_from_user(fpregs, buf, sizeof(fpregs));
+	if (n > 0)
+		return -EFAULT;
+
+	return restore_fp_registers(userspace_pid[cpu], fpregs);
+}
+
+long subarch_ptrace(struct task_struct *child, long request,
+		    unsigned long addr, unsigned long data)
+{
+	int ret = -EIO;
+	void __user *datap = (void __user *) data;
+
+	switch (request) {
+	case PTRACE_GETFPREGS: /* Get the child FPU state. */
+		ret = get_fpregs(datap, child);
+		break;
+	case PTRACE_SETFPREGS: /* Set the child FPU state. */
+		ret = set_fpregs(datap, child);
+		break;
+	case PTRACE_ARCH_PRCTL:
+		/* XXX Calls ptrace on the host - needs some SMP thinking */
+		ret = arch_prctl(child, data, (void __user *) addr);
+		break;
+	}
+
+	return ret;
+}
diff --git a/arch/x86/um/ptrace_user.c b/arch/x86/um/ptrace_user.c
new file mode 100644
index 000000000000..3960ca1dd35a
--- /dev/null
+++ b/arch/x86/um/ptrace_user.c
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <errno.h>
+#include "ptrace_user.h"
+
+int ptrace_getregs(long pid, unsigned long *regs_out)
+{
+	if (ptrace(PTRACE_GETREGS, pid, 0, regs_out) < 0)
+		return -errno;
+	return 0;
+}
+
+int ptrace_setregs(long pid, unsigned long *regs)
+{
+	if (ptrace(PTRACE_SETREGS, pid, 0, regs) < 0)
+		return -errno;
+	return 0;
+}
diff --git a/arch/x86/um/setjmp_32.S b/arch/x86/um/setjmp_32.S
new file mode 100644
index 000000000000..b766792c9933
--- /dev/null
+++ b/arch/x86/um/setjmp_32.S
@@ -0,0 +1,58 @@
+#
+# arch/i386/setjmp.S
+#
+# setjmp/longjmp for the i386 architecture
+#
+
+#
+# The jmp_buf is assumed to contain the following, in order:
+#	%ebx
+#	%esp
+#	%ebp
+#	%esi
+#	%edi
+#	<return address>
+#
+
+	.text
+	.align 4
+	.globl setjmp
+	.type setjmp, @function
+setjmp:
+#ifdef _REGPARM
+	movl %eax,%edx
+#else
+	movl 4(%esp),%edx
+#endif
+	popl %ecx			# Return address, and adjust the stack
+	xorl %eax,%eax			# Return value
+	movl %ebx,(%edx)
+	movl %esp,4(%edx)		# Post-return %esp!
+	pushl %ecx			# Make the call/return stack happy
+	movl %ebp,8(%edx)
+	movl %esi,12(%edx)
+	movl %edi,16(%edx)
+	movl %ecx,20(%edx)		# Return address
+	ret
+
+	.size setjmp,.-setjmp
+
+	.text
+	.align 4
+	.globl longjmp
+	.type longjmp, @function
+longjmp:
+#ifdef _REGPARM
+	xchgl %eax,%edx
+#else
+	movl 4(%esp),%edx		# jmp_ptr address
+	movl 8(%esp),%eax		# Return value
+#endif
+	movl (%edx),%ebx
+	movl 4(%edx),%esp
+	movl 8(%edx),%ebp
+	movl 12(%edx),%esi
+	movl 16(%edx),%edi
+	jmp *20(%edx)
+
+	.size longjmp,.-longjmp
diff --git a/arch/x86/um/setjmp_64.S b/arch/x86/um/setjmp_64.S
new file mode 100644
index 000000000000..45f547b4043e
--- /dev/null
+++ b/arch/x86/um/setjmp_64.S
@@ -0,0 +1,54 @@
+#
+# arch/x86_64/setjmp.S
+#
+# setjmp/longjmp for the x86-64 architecture
+#
+
+#
+# The jmp_buf is assumed to contain the following, in order:
+#	%rbx
+#	%rsp (post-return)
+#	%rbp
+#	%r12
+#	%r13
+#	%r14
+#	%r15
+#	<return address>
+#
+
+	.text
+	.align 4
+	.globl setjmp
+	.type setjmp, @function
+setjmp:
+	pop  %rsi			# Return address, and adjust the stack
+	xorl %eax,%eax			# Return value
+	movq %rbx,(%rdi)
+	movq %rsp,8(%rdi)		# Post-return %rsp!
+	push %rsi			# Make the call/return stack happy
+	movq %rbp,16(%rdi)
+	movq %r12,24(%rdi)
+	movq %r13,32(%rdi)
+	movq %r14,40(%rdi)
+	movq %r15,48(%rdi)
+	movq %rsi,56(%rdi)		# Return address
+	ret
+
+	.size setjmp,.-setjmp
+
+	.text
+	.align 4
+	.globl longjmp
+	.type longjmp, @function
+longjmp:
+	movl %esi,%eax			# Return value (int)
+	movq (%rdi),%rbx
+	movq 8(%rdi),%rsp
+	movq 16(%rdi),%rbp
+	movq 24(%rdi),%r12
+	movq 32(%rdi),%r13
+	movq 40(%rdi),%r14
+	movq 48(%rdi),%r15
+	jmp *56(%rdi)
+
+	.size longjmp,.-longjmp
diff --git a/arch/x86/um/shared/sysdep/archsetjmp.h b/arch/x86/um/shared/sysdep/archsetjmp.h
new file mode 100644
index 000000000000..ff7766d28226
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/archsetjmp.h
@@ -0,0 +1,5 @@
+#ifdef __i386__
+#include "archsetjmp_32.h"
+#else
+#include "archsetjmp_64.h"
+#endif
diff --git a/arch/x86/um/shared/sysdep/archsetjmp_32.h b/arch/x86/um/shared/sysdep/archsetjmp_32.h
new file mode 100644
index 000000000000..0f312085ce1d
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/archsetjmp_32.h
@@ -0,0 +1,22 @@
+/*
+ * arch/um/include/sysdep-i386/archsetjmp.h
+ */
+
+#ifndef _KLIBC_ARCHSETJMP_H
+#define _KLIBC_ARCHSETJMP_H
+
+struct __jmp_buf {
+	unsigned int __ebx;
+	unsigned int __esp;
+	unsigned int __ebp;
+	unsigned int __esi;
+	unsigned int __edi;
+	unsigned int __eip;
+};
+
+typedef struct __jmp_buf jmp_buf[1];
+
+#define JB_IP __eip
+#define JB_SP __esp
+
+#endif				/* _SETJMP_H */
diff --git a/arch/x86/um/shared/sysdep/archsetjmp_64.h b/arch/x86/um/shared/sysdep/archsetjmp_64.h
new file mode 100644
index 000000000000..2af8f12ca161
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/archsetjmp_64.h
@@ -0,0 +1,24 @@
+/*
+ * arch/um/include/sysdep-x86_64/archsetjmp.h
+ */
+
+#ifndef _KLIBC_ARCHSETJMP_H
+#define _KLIBC_ARCHSETJMP_H
+
+struct __jmp_buf {
+	unsigned long __rbx;
+	unsigned long __rsp;
+	unsigned long __rbp;
+	unsigned long __r12;
+	unsigned long __r13;
+	unsigned long __r14;
+	unsigned long __r15;
+	unsigned long __rip;
+};
+
+typedef struct __jmp_buf jmp_buf[1];
+
+#define JB_IP __rip
+#define JB_SP __rsp
+
+#endif				/* _SETJMP_H */
diff --git a/arch/x86/um/shared/sysdep/faultinfo.h b/arch/x86/um/shared/sysdep/faultinfo.h
new file mode 100644
index 000000000000..862ecb1c7781
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/faultinfo.h
@@ -0,0 +1,5 @@
+#ifdef __i386__
+#include "faultinfo_32.h"
+#else
+#include "faultinfo_64.h"
+#endif
diff --git a/arch/x86/um/shared/sysdep/faultinfo_32.h b/arch/x86/um/shared/sysdep/faultinfo_32.h
new file mode 100644
index 000000000000..a26086b8a800
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/faultinfo_32.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
+ * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
+ * Licensed under the GPL
+ */
+
+#ifndef __FAULTINFO_I386_H
+#define __FAULTINFO_I386_H
+
+/* this structure contains the full arch-specific faultinfo
+ * from the traps.
+ * On i386, ptrace_faultinfo unfortunately doesn't provide
+ * all the info, since trap_no is missing.
+ * All common elements are defined at the same position in
+ * both structures, thus making it easy to copy the
+ * contents without knowledge about the structure elements.
+ */
+struct faultinfo {
+        int error_code; /* in ptrace_faultinfo misleadingly called is_write */
+        unsigned long cr2; /* in ptrace_faultinfo called addr */
+        int trap_no; /* missing in ptrace_faultinfo */
+};
+
+#define FAULT_WRITE(fi) ((fi).error_code & 2)
+#define FAULT_ADDRESS(fi) ((fi).cr2)
+
+/* This is Page Fault */
+#define SEGV_IS_FIXABLE(fi)	((fi)->trap_no == 14)
+
+/* SKAS3 has no trap_no on i386, but get_skas_faultinfo() sets it to 0. */
+#define SEGV_MAYBE_FIXABLE(fi)	((fi)->trap_no == 0 && ptrace_faultinfo)
+
+#define PTRACE_FULL_FAULTINFO 0
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/faultinfo_64.h b/arch/x86/um/shared/sysdep/faultinfo_64.h
new file mode 100644
index 000000000000..f811cbe15d62
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/faultinfo_64.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
+ * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
+ * Licensed under the GPL
+ */
+
+#ifndef __FAULTINFO_X86_64_H
+#define __FAULTINFO_X86_64_H
+
+/* this structure contains the full arch-specific faultinfo
+ * from the traps.
+ * On i386, ptrace_faultinfo unfortunately doesn't provide
+ * all the info, since trap_no is missing.
+ * All common elements are defined at the same position in
+ * both structures, thus making it easy to copy the
+ * contents without knowledge about the structure elements.
+ */
+struct faultinfo {
+        int error_code; /* in ptrace_faultinfo misleadingly called is_write */
+        unsigned long cr2; /* in ptrace_faultinfo called addr */
+        int trap_no; /* missing in ptrace_faultinfo */
+};
+
+#define FAULT_WRITE(fi) ((fi).error_code & 2)
+#define FAULT_ADDRESS(fi) ((fi).cr2)
+
+/* This is Page Fault */
+#define SEGV_IS_FIXABLE(fi)	((fi)->trap_no == 14)
+
+/* No broken SKAS API, which doesn't pass trap_no, here. */
+#define SEGV_MAYBE_FIXABLE(fi)	0
+
+#define PTRACE_FULL_FAULTINFO 1
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h
new file mode 100644
index 000000000000..5868526b5eef
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/kernel-offsets.h
@@ -0,0 +1,21 @@
+#include <linux/stddef.h>
+#include <linux/sched.h>
+#include <linux/elf.h>
+#include <linux/crypto.h>
+#include <asm/mman.h>
+
+#define DEFINE(sym, val) \
+	asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define STR(x) #x
+#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " STR(val) " " #val: : )
+
+#define BLANK() asm volatile("\n->" : : )
+
+#define OFFSET(sym, str, mem) \
+	DEFINE(sym, offsetof(struct str, mem));
+
+void foo(void)
+{
+#include <common-offsets.h>
+}
diff --git a/arch/x86/um/shared/sysdep/mcontext.h b/arch/x86/um/shared/sysdep/mcontext.h
new file mode 100644
index 000000000000..b724c54da316
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/mcontext.h
@@ -0,0 +1,31 @@
+/* 
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __SYS_SIGCONTEXT_X86_H
+#define __SYS_SIGCONTEXT_X86_H
+
+extern void get_regs_from_mc(struct uml_pt_regs *, mcontext_t *);
+
+#ifdef __i386__
+
+#define GET_FAULTINFO_FROM_MC(fi, mc) \
+	{ \
+		(fi).cr2 = (mc)->cr2; \
+		(fi).error_code = (mc)->gregs[REG_ERR]; \
+		(fi).trap_no = (mc)->gregs[REG_TRAPNO]; \
+	}
+
+#else
+
+#define GET_FAULTINFO_FROM_MC(fi, mc) \
+	{ \
+		(fi).cr2 = (mc)->gregs[REG_CR2]; \
+		(fi).error_code = (mc)->gregs[REG_ERR]; \
+		(fi).trap_no = (mc)->gregs[REG_TRAPNO]; \
+	}
+
+#endif
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h
new file mode 100644
index 000000000000..711b1621747f
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/ptrace.h
@@ -0,0 +1,5 @@
+#ifdef __i386__
+#include "ptrace_32.h"
+#else
+#include "ptrace_64.h"
+#endif
diff --git a/arch/x86/um/shared/sysdep/ptrace_32.h b/arch/x86/um/shared/sysdep/ptrace_32.h
new file mode 100644
index 000000000000..befd1df32ed0
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/ptrace_32.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __SYSDEP_I386_PTRACE_H
+#define __SYSDEP_I386_PTRACE_H
+
+#include <generated/user_constants.h>
+#include "sysdep/faultinfo.h"
+
+#define MAX_REG_NR (UM_FRAME_SIZE / sizeof(unsigned long))
+#define MAX_REG_OFFSET (UM_FRAME_SIZE)
+
+static inline void update_debugregs(int seq) {}
+
+/* syscall emulation path in ptrace */
+
+#ifndef PTRACE_SYSEMU
+#define PTRACE_SYSEMU 31
+#endif
+
+void set_using_sysemu(int value);
+int get_using_sysemu(void);
+extern int sysemu_supported;
+
+#define REGS_IP(r) ((r)[HOST_IP])
+#define REGS_SP(r) ((r)[HOST_SP])
+#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
+#define REGS_EAX(r) ((r)[HOST_AX])
+#define REGS_EBX(r) ((r)[HOST_BX])
+#define REGS_ECX(r) ((r)[HOST_CX])
+#define REGS_EDX(r) ((r)[HOST_DX])
+#define REGS_ESI(r) ((r)[HOST_SI])
+#define REGS_EDI(r) ((r)[HOST_DI])
+#define REGS_EBP(r) ((r)[HOST_BP])
+#define REGS_CS(r) ((r)[HOST_CS])
+#define REGS_SS(r) ((r)[HOST_SS])
+#define REGS_DS(r) ((r)[HOST_DS])
+#define REGS_ES(r) ((r)[HOST_ES])
+#define REGS_FS(r) ((r)[HOST_FS])
+#define REGS_GS(r) ((r)[HOST_GS])
+
+#define REGS_SET_SYSCALL_RETURN(r, res) REGS_EAX(r) = (res)
+
+#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
+#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r))
+
+#ifndef PTRACE_SYSEMU_SINGLESTEP
+#define PTRACE_SYSEMU_SINGLESTEP 32
+#endif
+
+struct uml_pt_regs {
+	unsigned long gp[MAX_REG_NR];
+	unsigned long fp[HOST_FPX_SIZE];
+	struct faultinfo faultinfo;
+	long syscall;
+	int is_user;
+};
+
+#define EMPTY_UML_PT_REGS { }
+
+#define UPT_IP(r) REGS_IP((r)->gp)
+#define UPT_SP(r) REGS_SP((r)->gp)
+#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
+#define UPT_EAX(r) REGS_EAX((r)->gp)
+#define UPT_EBX(r) REGS_EBX((r)->gp)
+#define UPT_ECX(r) REGS_ECX((r)->gp)
+#define UPT_EDX(r) REGS_EDX((r)->gp)
+#define UPT_ESI(r) REGS_ESI((r)->gp)
+#define UPT_EDI(r) REGS_EDI((r)->gp)
+#define UPT_EBP(r) REGS_EBP((r)->gp)
+#define UPT_ORIG_EAX(r) ((r)->syscall)
+#define UPT_CS(r) REGS_CS((r)->gp)
+#define UPT_SS(r) REGS_SS((r)->gp)
+#define UPT_DS(r) REGS_DS((r)->gp)
+#define UPT_ES(r) REGS_ES((r)->gp)
+#define UPT_FS(r) REGS_FS((r)->gp)
+#define UPT_GS(r) REGS_GS((r)->gp)
+
+#define UPT_SYSCALL_ARG1(r) UPT_EBX(r)
+#define UPT_SYSCALL_ARG2(r) UPT_ECX(r)
+#define UPT_SYSCALL_ARG3(r) UPT_EDX(r)
+#define UPT_SYSCALL_ARG4(r) UPT_ESI(r)
+#define UPT_SYSCALL_ARG5(r) UPT_EDI(r)
+#define UPT_SYSCALL_ARG6(r) UPT_EBP(r)
+
+extern int user_context(unsigned long sp);
+
+#define UPT_IS_USER(r) ((r)->is_user)
+
+struct syscall_args {
+	unsigned long args[6];
+};
+
+#define SYSCALL_ARGS(r) ((struct syscall_args) \
+			 { .args = { UPT_SYSCALL_ARG1(r),	\
+				     UPT_SYSCALL_ARG2(r),	\
+				     UPT_SYSCALL_ARG3(r),	\
+				     UPT_SYSCALL_ARG4(r),	\
+				     UPT_SYSCALL_ARG5(r),	\
+				     UPT_SYSCALL_ARG6(r) } } )
+
+#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
+
+#define UPT_ORIG_SYSCALL(r) UPT_EAX(r)
+#define UPT_SYSCALL_NR(r) UPT_ORIG_EAX(r)
+#define UPT_SYSCALL_RET(r) UPT_EAX(r)
+
+#define UPT_FAULTINFO(r) (&(r)->faultinfo)
+
+extern void arch_init_registers(int pid);
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/ptrace_64.h b/arch/x86/um/shared/sysdep/ptrace_64.h
new file mode 100644
index 000000000000..031edc53ac57
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/ptrace_64.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright 2003 PathScale, Inc.
+ * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ *
+ * Licensed under the GPL
+ */
+
+#ifndef __SYSDEP_X86_64_PTRACE_H
+#define __SYSDEP_X86_64_PTRACE_H
+
+#include <generated/user_constants.h>
+#include "sysdep/faultinfo.h"
+
+#define MAX_REG_OFFSET (UM_FRAME_SIZE)
+#define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long))
+
+#define REGS_IP(r) ((r)[HOST_IP])
+#define REGS_SP(r) ((r)[HOST_SP])
+
+#define REGS_RBX(r) ((r)[HOST_BX])
+#define REGS_RCX(r) ((r)[HOST_CX])
+#define REGS_RDX(r) ((r)[HOST_DX])
+#define REGS_RSI(r) ((r)[HOST_SI])
+#define REGS_RDI(r) ((r)[HOST_DI])
+#define REGS_RBP(r) ((r)[HOST_BP])
+#define REGS_RAX(r) ((r)[HOST_AX])
+#define REGS_R8(r) ((r)[HOST_R8])
+#define REGS_R9(r) ((r)[HOST_R9])
+#define REGS_R10(r) ((r)[HOST_R10])
+#define REGS_R11(r) ((r)[HOST_R11])
+#define REGS_R12(r) ((r)[HOST_R12])
+#define REGS_R13(r) ((r)[HOST_R13])
+#define REGS_R14(r) ((r)[HOST_R14])
+#define REGS_R15(r) ((r)[HOST_R15])
+#define REGS_CS(r) ((r)[HOST_CS])
+#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
+#define REGS_SS(r) ((r)[HOST_SS])
+
+#define HOST_FS_BASE 21
+#define HOST_GS_BASE 22
+#define HOST_DS 23
+#define HOST_ES 24
+#define HOST_FS 25
+#define HOST_GS 26
+
+/* Also defined in asm/ptrace-x86_64.h, but not in libc headers.  So, these
+ * are already defined for kernel code, but not for userspace code.
+ */
+#ifndef FS_BASE
+/* These aren't defined in ptrace.h, but exist in struct user_regs_struct,
+ * which is what x86_64 ptrace actually uses.
+ */
+#define FS_BASE (HOST_FS_BASE * sizeof(long))
+#define GS_BASE (HOST_GS_BASE * sizeof(long))
+#define DS (HOST_DS * sizeof(long))
+#define ES (HOST_ES * sizeof(long))
+#define FS (HOST_FS * sizeof(long))
+#define GS (HOST_GS * sizeof(long))
+#endif
+
+#define REGS_FS_BASE(r) ((r)[HOST_FS_BASE])
+#define REGS_GS_BASE(r) ((r)[HOST_GS_BASE])
+#define REGS_DS(r) ((r)[HOST_DS])
+#define REGS_ES(r) ((r)[HOST_ES])
+#define REGS_FS(r) ((r)[HOST_FS])
+#define REGS_GS(r) ((r)[HOST_GS])
+
+#define REGS_ORIG_RAX(r) ((r)[HOST_ORIG_AX])
+
+#define REGS_SET_SYSCALL_RETURN(r, res) REGS_RAX(r) = (res)
+
+#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
+#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r))
+
+#define REGS_FAULT_ADDR(r) ((r)->fault_addr)
+
+#define REGS_FAULT_WRITE(r) FAULT_WRITE((r)->fault_type)
+
+#define REGS_TRAP(r) ((r)->trap_type)
+
+#define REGS_ERR(r) ((r)->fault_type)
+
+struct uml_pt_regs {
+	unsigned long gp[MAX_REG_NR];
+	unsigned long fp[HOST_FP_SIZE];
+	struct faultinfo faultinfo;
+	long syscall;
+	int is_user;
+};
+
+#define EMPTY_UML_PT_REGS { }
+
+#define UPT_RBX(r) REGS_RBX((r)->gp)
+#define UPT_RCX(r) REGS_RCX((r)->gp)
+#define UPT_RDX(r) REGS_RDX((r)->gp)
+#define UPT_RSI(r) REGS_RSI((r)->gp)
+#define UPT_RDI(r) REGS_RDI((r)->gp)
+#define UPT_RBP(r) REGS_RBP((r)->gp)
+#define UPT_RAX(r) REGS_RAX((r)->gp)
+#define UPT_R8(r) REGS_R8((r)->gp)
+#define UPT_R9(r) REGS_R9((r)->gp)
+#define UPT_R10(r) REGS_R10((r)->gp)
+#define UPT_R11(r) REGS_R11((r)->gp)
+#define UPT_R12(r) REGS_R12((r)->gp)
+#define UPT_R13(r) REGS_R13((r)->gp)
+#define UPT_R14(r) REGS_R14((r)->gp)
+#define UPT_R15(r) REGS_R15((r)->gp)
+#define UPT_CS(r) REGS_CS((r)->gp)
+#define UPT_FS_BASE(r) REGS_FS_BASE((r)->gp)
+#define UPT_FS(r) REGS_FS((r)->gp)
+#define UPT_GS_BASE(r) REGS_GS_BASE((r)->gp)
+#define UPT_GS(r) REGS_GS((r)->gp)
+#define UPT_DS(r) REGS_DS((r)->gp)
+#define UPT_ES(r) REGS_ES((r)->gp)
+#define UPT_CS(r) REGS_CS((r)->gp)
+#define UPT_SS(r) REGS_SS((r)->gp)
+#define UPT_ORIG_RAX(r) REGS_ORIG_RAX((r)->gp)
+
+#define UPT_IP(r) REGS_IP((r)->gp)
+#define UPT_SP(r) REGS_SP((r)->gp)
+
+#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
+#define UPT_SYSCALL_NR(r) ((r)->syscall)
+#define UPT_SYSCALL_RET(r) UPT_RAX(r)
+
+extern int user_context(unsigned long sp);
+
+#define UPT_IS_USER(r) ((r)->is_user)
+
+#define UPT_SYSCALL_ARG1(r) UPT_RDI(r)
+#define UPT_SYSCALL_ARG2(r) UPT_RSI(r)
+#define UPT_SYSCALL_ARG3(r) UPT_RDX(r)
+#define UPT_SYSCALL_ARG4(r) UPT_R10(r)
+#define UPT_SYSCALL_ARG5(r) UPT_R8(r)
+#define UPT_SYSCALL_ARG6(r) UPT_R9(r)
+
+struct syscall_args {
+	unsigned long args[6];
+};
+
+#define SYSCALL_ARGS(r) ((struct syscall_args) \
+			 { .args = { UPT_SYSCALL_ARG1(r),	 \
+				     UPT_SYSCALL_ARG2(r),	 \
+				     UPT_SYSCALL_ARG3(r),	 \
+				     UPT_SYSCALL_ARG4(r),	 \
+				     UPT_SYSCALL_ARG5(r),	 \
+				     UPT_SYSCALL_ARG6(r) } } )
+
+#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
+
+#define UPT_FAULTINFO(r) (&(r)->faultinfo)
+
+static inline void arch_init_registers(int pid)
+{
+}
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/ptrace_user.h b/arch/x86/um/shared/sysdep/ptrace_user.h
new file mode 100644
index 000000000000..16cd6b5e71f7
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/ptrace_user.h
@@ -0,0 +1,27 @@
+#include <generated/user_constants.h>
+
+#define PT_OFFSET(r) ((r) * sizeof(long))
+
+#define PT_SYSCALL_NR(regs) ((regs)[HOST_ORIG_AX])
+#define PT_SYSCALL_NR_OFFSET PT_OFFSET(HOST_ORIG_AX)
+
+#define PT_SYSCALL_RET_OFFSET PT_OFFSET(HOST_AX)
+
+#define REGS_IP_INDEX HOST_IP
+#define REGS_SP_INDEX HOST_SP
+
+#ifdef __i386__
+#define FP_SIZE ((HOST_FPX_SIZE > HOST_FP_SIZE) ? HOST_FPX_SIZE : HOST_FP_SIZE)
+#else
+#define FP_SIZE HOST_FP_SIZE
+
+/*
+ * x86_64 FC3 doesn't define this in /usr/include/linux/ptrace.h even though
+ * it's defined in the kernel's include/linux/ptrace.h. Additionally, use the
+ * 2.4 name and value for 2.4 host compatibility.
+ */
+#ifndef PTRACE_OLDSETOPTIONS
+#define PTRACE_OLDSETOPTIONS 21
+#endif
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/skas_ptrace.h b/arch/x86/um/shared/sysdep/skas_ptrace.h
new file mode 100644
index 000000000000..453febe98993
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/skas_ptrace.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __SYSDEP_X86_SKAS_PTRACE_H
+#define __SYSDEP_X86_SKAS_PTRACE_H
+
+struct ptrace_faultinfo {
+        int is_write;
+        unsigned long addr;
+};
+
+struct ptrace_ldt {
+        int func;
+        void *ptr;
+        unsigned long bytecount;
+};
+
+#define PTRACE_LDT 54
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub.h
new file mode 100644
index 000000000000..bd161e300102
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/stub.h
@@ -0,0 +1,14 @@
+#include <asm/unistd.h>
+#include <sys/mman.h>
+#include <signal.h>
+#include "as-layout.h"
+#include "stub-data.h"
+
+#ifdef __i386__
+#include "stub_32.h"
+#else
+#include "stub_64.h"
+#endif
+
+extern void stub_segv_handler(int, siginfo_t *, void *);
+extern void stub_clone_handler(void);
diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h
new file mode 100644
index 000000000000..51fd256c75f0
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/stub_32.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __SYSDEP_STUB_H
+#define __SYSDEP_STUB_H
+
+#include <asm/ptrace.h>
+
+#define STUB_SYSCALL_RET EAX
+#define STUB_MMAP_NR __NR_mmap2
+#define MMAP_OFFSET(o) ((o) >> UM_KERN_PAGE_SHIFT)
+
+static inline long stub_syscall0(long syscall)
+{
+	long ret;
+
+	__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall));
+
+	return ret;
+}
+
+static inline long stub_syscall1(long syscall, long arg1)
+{
+	long ret;
+
+	__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1));
+
+	return ret;
+}
+
+static inline long stub_syscall2(long syscall, long arg1, long arg2)
+{
+	long ret;
+
+	__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
+			"c" (arg2));
+
+	return ret;
+}
+
+static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3)
+{
+	long ret;
+
+	__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
+			"c" (arg2), "d" (arg3));
+
+	return ret;
+}
+
+static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3,
+				 long arg4)
+{
+	long ret;
+
+	__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
+			"c" (arg2), "d" (arg3), "S" (arg4));
+
+	return ret;
+}
+
+static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3,
+				 long arg4, long arg5)
+{
+	long ret;
+
+	__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
+			"c" (arg2), "d" (arg3), "S" (arg4), "D" (arg5));
+
+	return ret;
+}
+
+static inline void trap_myself(void)
+{
+	__asm("int3");
+}
+
+static inline void remap_stack(int fd, unsigned long offset)
+{
+	__asm__ volatile ("movl %%eax,%%ebp ; movl %0,%%eax ; int $0x80 ;"
+			  "movl %7, %%ebx ; movl %%eax, (%%ebx)"
+			  : : "g" (STUB_MMAP_NR), "b" (STUB_DATA),
+			    "c" (UM_KERN_PAGE_SIZE),
+			    "d" (PROT_READ | PROT_WRITE),
+			    "S" (MAP_FIXED | MAP_SHARED), "D" (fd),
+			    "a" (offset),
+			    "i" (&((struct stub_data *) STUB_DATA)->err)
+			  : "memory");
+}
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h
new file mode 100644
index 000000000000..994df93c5ed3
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/stub_64.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __SYSDEP_STUB_H
+#define __SYSDEP_STUB_H
+
+#include <sysdep/ptrace_user.h>
+
+#define STUB_SYSCALL_RET PT_INDEX(RAX)
+#define STUB_MMAP_NR __NR_mmap
+#define MMAP_OFFSET(o) (o)
+
+#define __syscall_clobber "r11","rcx","memory"
+#define __syscall "syscall"
+
+static inline long stub_syscall0(long syscall)
+{
+	long ret;
+
+	__asm__ volatile (__syscall
+		: "=a" (ret)
+		: "0" (syscall) : __syscall_clobber );
+
+	return ret;
+}
+
+static inline long stub_syscall2(long syscall, long arg1, long arg2)
+{
+	long ret;
+
+	__asm__ volatile (__syscall
+		: "=a" (ret)
+		: "0" (syscall), "D" (arg1), "S" (arg2) : __syscall_clobber );
+
+	return ret;
+}
+
+static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3)
+{
+	long ret;
+
+	__asm__ volatile (__syscall
+		: "=a" (ret)
+		: "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3)
+		: __syscall_clobber );
+
+	return ret;
+}
+
+static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3,
+				 long arg4)
+{
+	long ret;
+
+	__asm__ volatile ("movq %5,%%r10 ; " __syscall
+		: "=a" (ret)
+		: "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3),
+		  "g" (arg4)
+		: __syscall_clobber, "r10" );
+
+	return ret;
+}
+
+static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3,
+				 long arg4, long arg5)
+{
+	long ret;
+
+	__asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; " __syscall
+		: "=a" (ret)
+		: "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3),
+		  "g" (arg4), "g" (arg5)
+		: __syscall_clobber, "r10", "r8" );
+
+	return ret;
+}
+
+static inline void trap_myself(void)
+{
+	__asm("int3");
+}
+
+static inline void remap_stack(long fd, unsigned long offset)
+{
+	__asm__ volatile ("movq %4,%%r10 ; movq %5,%%r8 ; "
+			  "movq %6, %%r9; " __syscall "; movq %7, %%rbx ; "
+			  "movq %%rax, (%%rbx)":
+			  : "a" (STUB_MMAP_NR), "D" (STUB_DATA),
+			    "S" (UM_KERN_PAGE_SIZE),
+			    "d" (PROT_READ | PROT_WRITE),
+                            "g" (MAP_FIXED | MAP_SHARED), "g" (fd),
+			    "g" (offset),
+			    "i" (&((struct stub_data *) STUB_DATA)->err)
+			  : __syscall_clobber, "r10", "r8", "r9" );
+}
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h
new file mode 100644
index 000000000000..bd9a89b67e41
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/syscalls.h
@@ -0,0 +1,5 @@
+#ifdef __i386__
+#include "syscalls_32.h"
+#else
+#include "syscalls_64.h"
+#endif
diff --git a/arch/x86/um/shared/sysdep/syscalls_32.h b/arch/x86/um/shared/sysdep/syscalls_32.h
new file mode 100644
index 000000000000..05cb796aecb5
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/syscalls_32.h
@@ -0,0 +1,20 @@
+/* 
+ * Copyright (C) 2000 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include "asm/unistd.h"
+#include "sysdep/ptrace.h"
+
+typedef long syscall_handler_t(struct pt_regs);
+
+/* Not declared on x86, incompatible declarations on x86_64, so these have
+ * to go here rather than in sys_call_table.c
+ */
+extern syscall_handler_t sys_rt_sigaction;
+
+extern syscall_handler_t *sys_call_table[];
+
+#define EXECUTE_SYSCALL(syscall, regs) \
+	((long (*)(struct syscall_args)) \
+	 (*sys_call_table[syscall]))(SYSCALL_ARGS(&regs->regs))
diff --git a/arch/x86/um/shared/sysdep/syscalls_64.h b/arch/x86/um/shared/sysdep/syscalls_64.h
new file mode 100644
index 000000000000..8a7d5e1da98e
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/syscalls_64.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2003 PathScale, Inc.
+ *
+ * Licensed under the GPL
+ */
+
+#ifndef __SYSDEP_X86_64_SYSCALLS_H__
+#define __SYSDEP_X86_64_SYSCALLS_H__
+
+#include <linux/msg.h>
+#include <linux/shm.h>
+
+typedef long syscall_handler_t(void);
+
+extern syscall_handler_t *sys_call_table[];
+
+#define EXECUTE_SYSCALL(syscall, regs) \
+	(((long (*)(long, long, long, long, long, long)) \
+	  (*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(&regs->regs), \
+		 		      UPT_SYSCALL_ARG2(&regs->regs), \
+				      UPT_SYSCALL_ARG3(&regs->regs), \
+				      UPT_SYSCALL_ARG4(&regs->regs), \
+				      UPT_SYSCALL_ARG5(&regs->regs), \
+				      UPT_SYSCALL_ARG6(&regs->regs)))
+
+extern long old_mmap(unsigned long addr, unsigned long len,
+		     unsigned long prot, unsigned long flags,
+		     unsigned long fd, unsigned long pgoff);
+extern syscall_handler_t sys_modify_ldt;
+extern syscall_handler_t sys_arch_prctl;
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/tls.h b/arch/x86/um/shared/sysdep/tls.h
new file mode 100644
index 000000000000..27cce00c6b30
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/tls.h
@@ -0,0 +1,39 @@
+#ifndef _SYSDEP_TLS_H
+#define _SYSDEP_TLS_H
+
+# ifndef __KERNEL__
+
+/* Change name to avoid conflicts with the original one from <asm/ldt.h>, which
+ * may be named user_desc (but in 2.4 and in header matching its API was named
+ * modify_ldt_ldt_s). */
+
+typedef struct um_dup_user_desc {
+	unsigned int  entry_number;
+	unsigned int  base_addr;
+	unsigned int  limit;
+	unsigned int  seg_32bit:1;
+	unsigned int  contents:2;
+	unsigned int  read_exec_only:1;
+	unsigned int  limit_in_pages:1;
+	unsigned int  seg_not_present:1;
+	unsigned int  useable:1;
+#ifdef __x86_64__
+	unsigned int  lm:1;
+#endif
+} user_desc_t;
+
+# else /* __KERNEL__ */
+
+typedef struct user_desc user_desc_t;
+
+# endif /* __KERNEL__ */
+
+extern int os_set_thread_area(user_desc_t *info, int pid);
+extern int os_get_thread_area(user_desc_t *info, int pid);
+
+#ifdef __i386__
+#define GDT_ENTRY_TLS_MIN_I386 6
+#define GDT_ENTRY_TLS_MIN_X86_64 12
+#endif
+
+#endif /* _SYSDEP_TLS_H */
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
new file mode 100644
index 000000000000..4883b9546016
--- /dev/null
+++ b/arch/x86/um/signal.c
@@ -0,0 +1,624 @@
+/*
+ * Copyright (C) 2003 PathScale, Inc.
+ * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+
+#include <linux/personality.h>
+#include <linux/ptrace.h>
+#include <linux/kernel.h>
+#include <asm/unistd.h>
+#include <asm/uaccess.h>
+#include <asm/ucontext.h>
+#include "frame_kern.h"
+#include "skas.h"
+
+#ifdef CONFIG_X86_32
+
+/*
+ * FPU tag word conversions.
+ */
+
+static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
+{
+	unsigned int tmp; /* to avoid 16 bit prefixes in the code */
+
+	/* Transform each pair of bits into 01 (valid) or 00 (empty) */
+	tmp = ~twd;
+	tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
+	/* and move the valid bits to the lower byte. */
+	tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
+	tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
+	tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
+	return tmp;
+}
+
+static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave)
+{
+	struct _fpxreg *st = NULL;
+	unsigned long twd = (unsigned long) fxsave->twd;
+	unsigned long tag;
+	unsigned long ret = 0xffff0000;
+	int i;
+
+#define FPREG_ADDR(f, n)	((char *)&(f)->st_space + (n) * 16)
+
+	for (i = 0; i < 8; i++) {
+		if (twd & 0x1) {
+			st = (struct _fpxreg *) FPREG_ADDR(fxsave, i);
+
+			switch (st->exponent & 0x7fff) {
+			case 0x7fff:
+				tag = 2;		/* Special */
+				break;
+			case 0x0000:
+				if ( !st->significand[0] &&
+				     !st->significand[1] &&
+				     !st->significand[2] &&
+				     !st->significand[3] ) {
+					tag = 1;	/* Zero */
+				} else {
+					tag = 2;	/* Special */
+				}
+				break;
+			default:
+				if (st->significand[3] & 0x8000) {
+					tag = 0;	/* Valid */
+				} else {
+					tag = 2;	/* Special */
+				}
+				break;
+			}
+		} else {
+			tag = 3;			/* Empty */
+		}
+		ret |= (tag << (2 * i));
+		twd = twd >> 1;
+	}
+	return ret;
+}
+
+static int convert_fxsr_to_user(struct _fpstate __user *buf,
+				struct user_fxsr_struct *fxsave)
+{
+	unsigned long env[7];
+	struct _fpreg __user *to;
+	struct _fpxreg *from;
+	int i;
+
+	env[0] = (unsigned long)fxsave->cwd | 0xffff0000ul;
+	env[1] = (unsigned long)fxsave->swd | 0xffff0000ul;
+	env[2] = twd_fxsr_to_i387(fxsave);
+	env[3] = fxsave->fip;
+	env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16);
+	env[5] = fxsave->foo;
+	env[6] = fxsave->fos;
+
+	if (__copy_to_user(buf, env, 7 * sizeof(unsigned long)))
+		return 1;
+
+	to = &buf->_st[0];
+	from = (struct _fpxreg *) &fxsave->st_space[0];
+	for (i = 0; i < 8; i++, to++, from++) {
+		unsigned long __user *t = (unsigned long __user *)to;
+		unsigned long *f = (unsigned long *)from;
+
+		if (__put_user(*f, t) ||
+				__put_user(*(f + 1), t + 1) ||
+				__put_user(from->exponent, &to->exponent))
+			return 1;
+	}
+	return 0;
+}
+
+static int convert_fxsr_from_user(struct user_fxsr_struct *fxsave,
+				  struct _fpstate __user *buf)
+{
+	unsigned long env[7];
+	struct _fpxreg *to;
+	struct _fpreg __user *from;
+	int i;
+
+	if (copy_from_user( env, buf, 7 * sizeof(long)))
+		return 1;
+
+	fxsave->cwd = (unsigned short)(env[0] & 0xffff);
+	fxsave->swd = (unsigned short)(env[1] & 0xffff);
+	fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff));
+	fxsave->fip = env[3];
+	fxsave->fop = (unsigned short)((env[4] & 0xffff0000ul) >> 16);
+	fxsave->fcs = (env[4] & 0xffff);
+	fxsave->foo = env[5];
+	fxsave->fos = env[6];
+
+	to = (struct _fpxreg *) &fxsave->st_space[0];
+	from = &buf->_st[0];
+	for (i = 0; i < 8; i++, to++, from++) {
+		unsigned long *t = (unsigned long *)to;
+		unsigned long __user *f = (unsigned long __user *)from;
+
+		if (__get_user(*t, f) ||
+		    __get_user(*(t + 1), f + 1) ||
+		    __get_user(to->exponent, &from->exponent))
+			return 1;
+	}
+	return 0;
+}
+
+extern int have_fpx_regs;
+
+#endif
+
+static int copy_sc_from_user(struct pt_regs *regs,
+			     struct sigcontext __user *from)
+{
+	struct sigcontext sc;
+	int err, pid;
+
+	err = copy_from_user(&sc, from, sizeof(sc));
+	if (err)
+		return err;
+
+#define GETREG(regno, regname) regs->regs.gp[HOST_##regno] = sc.regname
+
+#ifdef CONFIG_X86_32
+	GETREG(GS, gs);
+	GETREG(FS, fs);
+	GETREG(ES, es);
+	GETREG(DS, ds);
+#endif
+	GETREG(DI, di);
+	GETREG(SI, si);
+	GETREG(BP, bp);
+	GETREG(SP, sp);
+	GETREG(BX, bx);
+	GETREG(DX, dx);
+	GETREG(CX, cx);
+	GETREG(AX, ax);
+	GETREG(IP, ip);
+
+#ifdef CONFIG_X86_64
+	GETREG(R8, r8);
+	GETREG(R9, r9);
+	GETREG(R10, r10);
+	GETREG(R11, r11);
+	GETREG(R12, r12);
+	GETREG(R13, r13);
+	GETREG(R14, r14);
+	GETREG(R15, r15);
+#endif
+
+	GETREG(CS, cs);
+	GETREG(EFLAGS, flags);
+#ifdef CONFIG_X86_32
+	GETREG(SS, ss);
+#endif
+
+#undef GETREG
+
+	pid = userspace_pid[current_thread_info()->cpu];
+#ifdef CONFIG_X86_32
+	if (have_fpx_regs) {
+		struct user_fxsr_struct fpx;
+
+		err = copy_from_user(&fpx,
+			&((struct _fpstate __user *)sc.fpstate)->_fxsr_env[0],
+				     sizeof(struct user_fxsr_struct));
+		if (err)
+			return 1;
+
+		err = convert_fxsr_from_user(&fpx, sc.fpstate);
+		if (err)
+			return 1;
+
+		err = restore_fpx_registers(pid, (unsigned long *) &fpx);
+		if (err < 0) {
+			printk(KERN_ERR "copy_sc_from_user - "
+			       "restore_fpx_registers failed, errno = %d\n",
+			       -err);
+			return 1;
+		}
+	} else
+#endif
+	{
+		struct user_i387_struct fp;
+
+		err = copy_from_user(&fp, sc.fpstate,
+				     sizeof(struct user_i387_struct));
+		if (err)
+			return 1;
+
+		err = restore_fp_registers(pid, (unsigned long *) &fp);
+		if (err < 0) {
+			printk(KERN_ERR "copy_sc_from_user - "
+			       "restore_fp_registers failed, errno = %d\n",
+			       -err);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int copy_sc_to_user(struct sigcontext __user *to,
+			   struct _fpstate __user *to_fp, struct pt_regs *regs,
+			   unsigned long mask)
+{
+	struct sigcontext sc;
+	struct faultinfo * fi = &current->thread.arch.faultinfo;
+	int err, pid;
+	memset(&sc, 0, sizeof(struct sigcontext));
+
+#define PUTREG(regno, regname) sc.regname = regs->regs.gp[HOST_##regno]
+
+#ifdef CONFIG_X86_32
+	PUTREG(GS, gs);
+	PUTREG(FS, fs);
+	PUTREG(ES, es);
+	PUTREG(DS, ds);
+#endif
+	PUTREG(DI, di);
+	PUTREG(SI, si);
+	PUTREG(BP, bp);
+	PUTREG(SP, sp);
+	PUTREG(BX, bx);
+	PUTREG(DX, dx);
+	PUTREG(CX, cx);
+	PUTREG(AX, ax);
+#ifdef CONFIG_X86_64
+	PUTREG(R8, r8);
+	PUTREG(R9, r9);
+	PUTREG(R10, r10);
+	PUTREG(R11, r11);
+	PUTREG(R12, r12);
+	PUTREG(R13, r13);
+	PUTREG(R14, r14);
+	PUTREG(R15, r15);
+#endif
+
+	sc.cr2 = fi->cr2;
+	sc.err = fi->error_code;
+	sc.trapno = fi->trap_no;
+	PUTREG(IP, ip);
+	PUTREG(CS, cs);
+	PUTREG(EFLAGS, flags);
+#ifdef CONFIG_X86_32
+	PUTREG(SP, sp_at_signal);
+	PUTREG(SS, ss);
+#endif
+#undef PUTREG
+	sc.oldmask = mask;
+	sc.fpstate = to_fp;
+
+	err = copy_to_user(to, &sc, sizeof(struct sigcontext));
+	if (err)
+		return 1;
+
+	pid = userspace_pid[current_thread_info()->cpu];
+
+#ifdef CONFIG_X86_32
+	if (have_fpx_regs) {
+		struct user_fxsr_struct fpx;
+
+		err = save_fpx_registers(pid, (unsigned long *) &fpx);
+		if (err < 0){
+			printk(KERN_ERR "copy_sc_to_user - save_fpx_registers "
+			       "failed, errno = %d\n", err);
+			return 1;
+		}
+
+		err = convert_fxsr_to_user(to_fp, &fpx);
+		if (err)
+			return 1;
+
+		err |= __put_user(fpx.swd, &to_fp->status);
+		err |= __put_user(X86_FXSR_MAGIC, &to_fp->magic);
+		if (err)
+			return 1;
+
+		if (copy_to_user(&to_fp->_fxsr_env[0], &fpx,
+				 sizeof(struct user_fxsr_struct)))
+			return 1;
+	} else
+#endif
+	{
+		struct user_i387_struct fp;
+
+		err = save_fp_registers(pid, (unsigned long *) &fp);
+		if (copy_to_user(to_fp, &fp, sizeof(struct user_i387_struct)))
+			return 1;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_X86_32
+static int copy_ucontext_to_user(struct ucontext __user *uc,
+				 struct _fpstate __user *fp, sigset_t *set,
+				 unsigned long sp)
+{
+	int err = 0;
+
+	err |= put_user(current->sas_ss_sp, &uc->uc_stack.ss_sp);
+	err |= put_user(sas_ss_flags(sp), &uc->uc_stack.ss_flags);
+	err |= put_user(current->sas_ss_size, &uc->uc_stack.ss_size);
+	err |= copy_sc_to_user(&uc->uc_mcontext, fp, &current->thread.regs, 0);
+	err |= copy_to_user(&uc->uc_sigmask, set, sizeof(*set));
+	return err;
+}
+
+struct sigframe
+{
+	char __user *pretcode;
+	int sig;
+	struct sigcontext sc;
+	struct _fpstate fpstate;
+	unsigned long extramask[_NSIG_WORDS-1];
+	char retcode[8];
+};
+
+struct rt_sigframe
+{
+	char __user *pretcode;
+	int sig;
+	struct siginfo __user *pinfo;
+	void __user *puc;
+	struct siginfo info;
+	struct ucontext uc;
+	struct _fpstate fpstate;
+	char retcode[8];
+};
+
+int setup_signal_stack_sc(unsigned long stack_top, int sig,
+			  struct k_sigaction *ka, struct pt_regs *regs,
+			  sigset_t *mask)
+{
+	struct sigframe __user *frame;
+	void __user *restorer;
+	int err = 0;
+
+	/* This is the same calculation as i386 - ((sp + 4) & 15) == 0 */
+	stack_top = ((stack_top + 4) & -16UL) - 4;
+	frame = (struct sigframe __user *) stack_top - 1;
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		return 1;
+
+	restorer = frame->retcode;
+	if (ka->sa.sa_flags & SA_RESTORER)
+		restorer = ka->sa.sa_restorer;
+
+	err |= __put_user(restorer, &frame->pretcode);
+	err |= __put_user(sig, &frame->sig);
+	err |= copy_sc_to_user(&frame->sc, &frame->fpstate, regs, mask->sig[0]);
+	if (_NSIG_WORDS > 1)
+		err |= __copy_to_user(&frame->extramask, &mask->sig[1],
+				      sizeof(frame->extramask));
+
+	/*
+	 * This is popl %eax ; movl $,%eax ; int $0x80
+	 *
+	 * WE DO NOT USE IT ANY MORE! It's only left here for historical
+	 * reasons and because gdb uses it as a signature to notice
+	 * signal handler stack frames.
+	 */
+	err |= __put_user(0xb858, (short __user *)(frame->retcode+0));
+	err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2));
+	err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
+
+	if (err)
+		return err;
+
+	PT_REGS_SP(regs) = (unsigned long) frame;
+	PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
+	PT_REGS_EAX(regs) = (unsigned long) sig;
+	PT_REGS_EDX(regs) = (unsigned long) 0;
+	PT_REGS_ECX(regs) = (unsigned long) 0;
+
+	if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
+		ptrace_notify(SIGTRAP);
+	return 0;
+}
+
+int setup_signal_stack_si(unsigned long stack_top, int sig,
+			  struct k_sigaction *ka, struct pt_regs *regs,
+			  siginfo_t *info, sigset_t *mask)
+{
+	struct rt_sigframe __user *frame;
+	void __user *restorer;
+	int err = 0;
+
+	stack_top &= -8UL;
+	frame = (struct rt_sigframe __user *) stack_top - 1;
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		return 1;
+
+	restorer = frame->retcode;
+	if (ka->sa.sa_flags & SA_RESTORER)
+		restorer = ka->sa.sa_restorer;
+
+	err |= __put_user(restorer, &frame->pretcode);
+	err |= __put_user(sig, &frame->sig);
+	err |= __put_user(&frame->info, &frame->pinfo);
+	err |= __put_user(&frame->uc, &frame->puc);
+	err |= copy_siginfo_to_user(&frame->info, info);
+	err |= copy_ucontext_to_user(&frame->uc, &frame->fpstate, mask,
+					PT_REGS_SP(regs));
+
+	/*
+	 * This is movl $,%eax ; int $0x80
+	 *
+	 * WE DO NOT USE IT ANY MORE! It's only left here for historical
+	 * reasons and because gdb uses it as a signature to notice
+	 * signal handler stack frames.
+	 */
+	err |= __put_user(0xb8, (char __user *)(frame->retcode+0));
+	err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1));
+	err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
+
+	if (err)
+		return err;
+
+	PT_REGS_SP(regs) = (unsigned long) frame;
+	PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
+	PT_REGS_EAX(regs) = (unsigned long) sig;
+	PT_REGS_EDX(regs) = (unsigned long) &frame->info;
+	PT_REGS_ECX(regs) = (unsigned long) &frame->uc;
+
+	if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
+		ptrace_notify(SIGTRAP);
+	return 0;
+}
+
+long sys_sigreturn(struct pt_regs *regs)
+{
+	unsigned long sp = PT_REGS_SP(&current->thread.regs);
+	struct sigframe __user *frame = (struct sigframe __user *)(sp - 8);
+	sigset_t set;
+	struct sigcontext __user *sc = &frame->sc;
+	unsigned long __user *oldmask = &sc->oldmask;
+	unsigned long __user *extramask = frame->extramask;
+	int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long);
+
+	if (copy_from_user(&set.sig[0], oldmask, sizeof(set.sig[0])) ||
+	    copy_from_user(&set.sig[1], extramask, sig_size))
+		goto segfault;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	set_current_blocked(&set);
+
+	if (copy_sc_from_user(&current->thread.regs, sc))
+		goto segfault;
+
+	/* Avoid ERESTART handling */
+	PT_REGS_SYSCALL_NR(&current->thread.regs) = -1;
+	return PT_REGS_SYSCALL_RET(&current->thread.regs);
+
+ segfault:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+#else
+
+struct rt_sigframe
+{
+	char __user *pretcode;
+	struct ucontext uc;
+	struct siginfo info;
+	struct _fpstate fpstate;
+};
+
+int setup_signal_stack_si(unsigned long stack_top, int sig,
+			  struct k_sigaction *ka, struct pt_regs * regs,
+			  siginfo_t *info, sigset_t *set)
+{
+	struct rt_sigframe __user *frame;
+	int err = 0;
+	struct task_struct *me = current;
+
+	frame = (struct rt_sigframe __user *)
+		round_down(stack_top - sizeof(struct rt_sigframe), 16);
+	/* Subtract 128 for a red zone and 8 for proper alignment */
+	frame = (struct rt_sigframe __user *) ((unsigned long) frame - 128 - 8);
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		goto out;
+
+	if (ka->sa.sa_flags & SA_SIGINFO) {
+		err |= copy_siginfo_to_user(&frame->info, info);
+		if (err)
+			goto out;
+	}
+
+	/* Create the ucontext.  */
+	err |= __put_user(0, &frame->uc.uc_flags);
+	err |= __put_user(0, &frame->uc.uc_link);
+	err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+	err |= __put_user(sas_ss_flags(PT_REGS_SP(regs)),
+			  &frame->uc.uc_stack.ss_flags);
+	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
+	err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs,
+			       set->sig[0]);
+	err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate);
+	if (sizeof(*set) == 16) {
+		__put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
+		__put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
+	}
+	else
+		err |= __copy_to_user(&frame->uc.uc_sigmask, set,
+				      sizeof(*set));
+
+	/*
+	 * Set up to return from userspace.  If provided, use a stub
+	 * already in userspace.
+	 */
+	/* x86-64 should always use SA_RESTORER. */
+	if (ka->sa.sa_flags & SA_RESTORER)
+		err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
+	else
+		/* could use a vstub here */
+		return err;
+
+	if (err)
+		return err;
+
+	/* Set up registers for signal handler */
+	{
+		struct exec_domain *ed = current_thread_info()->exec_domain;
+		if (unlikely(ed && ed->signal_invmap && sig < 32))
+			sig = ed->signal_invmap[sig];
+	}
+
+	PT_REGS_SP(regs) = (unsigned long) frame;
+	PT_REGS_RDI(regs) = sig;
+	/* In case the signal handler was declared without prototypes */
+	PT_REGS_RAX(regs) = 0;
+
+	/*
+	 * This also works for non SA_SIGINFO handlers because they expect the
+	 * next argument after the signal number on the stack.
+	 */
+	PT_REGS_RSI(regs) = (unsigned long) &frame->info;
+	PT_REGS_RDX(regs) = (unsigned long) &frame->uc;
+	PT_REGS_RIP(regs) = (unsigned long) ka->sa.sa_handler;
+ out:
+	return err;
+}
+#endif
+
+long sys_rt_sigreturn(struct pt_regs *regs)
+{
+	unsigned long sp = PT_REGS_SP(&current->thread.regs);
+	struct rt_sigframe __user *frame =
+		(struct rt_sigframe __user *)(sp - sizeof(long));
+	struct ucontext __user *uc = &frame->uc;
+	sigset_t set;
+
+	if (copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
+		goto segfault;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	set_current_blocked(&set);
+
+	if (copy_sc_from_user(&current->thread.regs, &uc->uc_mcontext))
+		goto segfault;
+
+	/* Avoid ERESTART handling */
+	PT_REGS_SYSCALL_NR(&current->thread.regs) = -1;
+	return PT_REGS_SYSCALL_RET(&current->thread.regs);
+
+ segfault:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+#ifdef CONFIG_X86_32
+long ptregs_sigreturn(void)
+{
+	return sys_sigreturn(NULL);
+}
+long ptregs_rt_sigreturn(void)
+{
+	return sys_rt_sigreturn(NULL);
+}
+#endif
diff --git a/arch/x86/um/stub_32.S b/arch/x86/um/stub_32.S
new file mode 100644
index 000000000000..54a36ec20cb7
--- /dev/null
+++ b/arch/x86/um/stub_32.S
@@ -0,0 +1,51 @@
+#include "as-layout.h"
+
+	.globl syscall_stub
+.section .__syscall_stub, "ax"
+
+	.globl batch_syscall_stub
+batch_syscall_stub:
+	/* load pointer to first operation */
+	mov	$(STUB_DATA+8), %esp
+
+again:
+	/* load length of additional data */
+	mov	0x0(%esp), %eax
+
+	/* if(length == 0) : end of list */
+	/* write possible 0 to header */
+	mov	%eax, STUB_DATA+4
+	cmpl	$0, %eax
+	jz	done
+
+	/* save current pointer */
+	mov	%esp, STUB_DATA+4
+
+	/* skip additional data */
+	add	%eax, %esp
+
+	/* load syscall-# */
+	pop	%eax
+
+	/* load syscall params */
+	pop	%ebx
+	pop	%ecx
+	pop	%edx
+	pop	%esi
+ 	pop	%edi
+	pop	%ebp
+
+	/* execute syscall */
+	int	$0x80
+
+	/* check return value */
+	pop	%ebx
+	cmp	%ebx, %eax
+	je	again
+
+done:
+	/* save return value */
+	mov	%eax, STUB_DATA
+
+	/* stop */
+	int3
diff --git a/arch/x86/um/stub_64.S b/arch/x86/um/stub_64.S
new file mode 100644
index 000000000000..20e4a96a6dcb
--- /dev/null
+++ b/arch/x86/um/stub_64.S
@@ -0,0 +1,66 @@
+#include "as-layout.h"
+
+	.globl syscall_stub
+.section .__syscall_stub, "ax"
+syscall_stub:
+	syscall
+	/* We don't have 64-bit constants, so this constructs the address
+	 * we need.
+	 */
+	movq	$(STUB_DATA >> 32), %rbx
+	salq	$32, %rbx
+	movq	$(STUB_DATA & 0xffffffff), %rcx
+	or	%rcx, %rbx
+	movq	%rax, (%rbx)
+	int3
+
+	.globl batch_syscall_stub
+batch_syscall_stub:
+	mov	$(STUB_DATA >> 32), %rbx
+	sal	$32, %rbx
+	mov	$(STUB_DATA & 0xffffffff), %rax
+	or	%rax, %rbx
+	/* load pointer to first operation */
+	mov	%rbx, %rsp
+	add	$0x10, %rsp
+again:
+	/* load length of additional data */
+	mov	0x0(%rsp), %rax
+
+	/* if(length == 0) : end of list */
+	/* write possible 0 to header */
+	mov	%rax, 8(%rbx)
+	cmp	$0, %rax
+	jz	done
+
+	/* save current pointer */
+	mov	%rsp, 8(%rbx)
+
+	/* skip additional data */
+	add	%rax, %rsp
+
+	/* load syscall-# */
+	pop	%rax
+
+	/* load syscall params */
+	pop	%rdi
+	pop	%rsi
+	pop	%rdx
+	pop	%r10
+ 	pop	%r8
+	pop	%r9
+
+	/* execute syscall */
+	syscall
+
+	/* check return value */
+	pop	%rcx
+	cmp	%rcx, %rax
+	je	again
+
+done:
+	/* save return value */
+	mov	%rax, (%rbx)
+
+	/* stop */
+	int3
diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c
new file mode 100644
index 000000000000..b7450bd22e7d
--- /dev/null
+++ b/arch/x86/um/stub_segv.c
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include "sysdep/stub.h"
+#include "sysdep/faultinfo.h"
+#include "sysdep/mcontext.h"
+
+void __attribute__ ((__section__ (".__syscall_stub")))
+stub_segv_handler(int sig, siginfo_t *info, void *p)
+{
+	struct ucontext *uc = p;
+
+	GET_FAULTINFO_FROM_MC(*((struct faultinfo *) STUB_DATA),
+			      &uc->uc_mcontext);
+	trap_myself();
+}
+
diff --git a/arch/x86/um/sys_call_table_32.S b/arch/x86/um/sys_call_table_32.S
new file mode 100644
index 000000000000..a7ca80d2dceb
--- /dev/null
+++ b/arch/x86/um/sys_call_table_32.S
@@ -0,0 +1,26 @@
+#include <linux/linkage.h>
+/* Steal i386 syscall table for our purposes, but with some slight changes.*/
+
+#define sys_iopl sys_ni_syscall
+#define sys_ioperm sys_ni_syscall
+
+#define sys_vm86old sys_ni_syscall
+#define sys_vm86 sys_ni_syscall
+
+#define old_mmap sys_old_mmap
+
+#define ptregs_fork sys_fork
+#define ptregs_execve sys_execve
+#define ptregs_iopl sys_iopl
+#define ptregs_vm86old sys_vm86old
+#define ptregs_clone sys_clone
+#define ptregs_vm86 sys_vm86
+#define ptregs_sigaltstack sys_sigaltstack
+#define ptregs_vfork sys_vfork
+
+.section .rodata,"a"
+
+#include "../kernel/syscall_table_32.S"
+
+ENTRY(syscall_table_size)
+.long .-sys_call_table
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
new file mode 100644
index 000000000000..99522f78b162
--- /dev/null
+++ b/arch/x86/um/sys_call_table_64.c
@@ -0,0 +1,64 @@
+/*
+ * System call table for UML/x86-64, copied from arch/x86_64/kernel/syscall.c
+ * with some changes for UML.
+ */
+
+#include <linux/linkage.h>
+#include <linux/sys.h>
+#include <linux/cache.h>
+
+#define __NO_STUBS
+
+/*
+ * Below you can see, in terms of #define's, the differences between the x86-64
+ * and the UML syscall table.
+ */
+
+/* Not going to be implemented by UML, since we have no hardware. */
+#define stub_iopl sys_ni_syscall
+#define sys_ioperm sys_ni_syscall
+
+/*
+ * The UML TLS problem. Note that x86_64 does not implement this, so the below
+ * is needed only for the ia32 compatibility.
+ */
+
+/* On UML we call it this way ("old" means it's not mmap2) */
+#define sys_mmap old_mmap
+
+#define stub_clone sys_clone
+#define stub_fork sys_fork
+#define stub_vfork sys_vfork
+#define stub_execve sys_execve
+#define stub_rt_sigsuspend sys_rt_sigsuspend
+#define stub_sigaltstack sys_sigaltstack
+#define stub_rt_sigreturn sys_rt_sigreturn
+
+#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
+#undef _ASM_X86_UNISTD_64_H
+#include "../../x86/include/asm/unistd_64.h"
+
+#undef __SYSCALL
+#define __SYSCALL(nr, sym) [ nr ] = sym,
+#undef _ASM_X86_UNISTD_64_H
+
+typedef void (*sys_call_ptr_t)(void);
+
+extern void sys_ni_syscall(void);
+
+/*
+ * We used to have a trick here which made sure that holes in the
+ * x86_64 table were filled in with sys_ni_syscall, but a comment in
+ * unistd_64.h says that holes aren't allowed, so the trick was
+ * removed.
+ * The trick looked like this
+ *	[0 ... UM_NR_syscall_max] = &sys_ni_syscall
+ * before including unistd_64.h - the later initializations overwrote
+ * the sys_ni_syscall filler.
+ */
+
+sys_call_ptr_t sys_call_table[] __cacheline_aligned = {
+#include <asm/unistd_64.h>
+};
+
+int syscall_table_size = sizeof(sys_call_table);
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
new file mode 100644
index 000000000000..70ca357393b8
--- /dev/null
+++ b/arch/x86/um/syscalls_32.c
@@ -0,0 +1,66 @@
+/* 
+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/sched.h"
+#include "linux/shm.h"
+#include "linux/ipc.h"
+#include "linux/syscalls.h"
+#include "asm/mman.h"
+#include "asm/uaccess.h"
+#include "asm/unistd.h"
+
+/*
+ * The prototype on i386 is:
+ *
+ *     int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls, int * child_tidptr)
+ *
+ * and the "newtls" arg. on i386 is read by copy_thread directly from the
+ * register saved on the stack.
+ */
+long sys_clone(unsigned long clone_flags, unsigned long newsp,
+	       int __user *parent_tid, void *newtls, int __user *child_tid)
+{
+	long ret;
+
+	if (!newsp)
+		newsp = UPT_SP(&current->thread.regs.regs);
+
+	current->thread.forking = 1;
+	ret = do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
+		      child_tid);
+	current->thread.forking = 0;
+	return ret;
+}
+
+long sys_sigaction(int sig, const struct old_sigaction __user *act,
+			 struct old_sigaction __user *oact)
+{
+	struct k_sigaction new_ka, old_ka;
+	int ret;
+
+	if (act) {
+		old_sigset_t mask;
+		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
+			return -EFAULT;
+		__get_user(new_ka.sa.sa_flags, &act->sa_flags);
+		__get_user(mask, &act->sa_mask);
+		siginitset(&new_ka.sa.sa_mask, mask);
+	}
+
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+	if (!ret && oact) {
+		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
+			return -EFAULT;
+		__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+		__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+	}
+
+	return ret;
+}
diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c
new file mode 100644
index 000000000000..f3d82bb6e15a
--- /dev/null
+++ b/arch/x86/um/syscalls_64.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Copyright 2003 PathScale, Inc.
+ *
+ * Licensed under the GPL
+ */
+
+#include "linux/linkage.h"
+#include "linux/personality.h"
+#include "linux/utsname.h"
+#include "asm/prctl.h" /* XXX This should get the constants from libc */
+#include "asm/uaccess.h"
+#include "os.h"
+
+long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
+{
+	unsigned long *ptr = addr, tmp;
+	long ret;
+	int pid = task->mm->context.id.u.pid;
+
+	/*
+	 * With ARCH_SET_FS (and ARCH_SET_GS is treated similarly to
+	 * be safe), we need to call arch_prctl on the host because
+	 * setting %fs may result in something else happening (like a
+	 * GDT or thread.fs being set instead).  So, we let the host
+	 * fiddle the registers and thread struct and restore the
+	 * registers afterwards.
+	 *
+	 * So, the saved registers are stored to the process (this
+	 * needed because a stub may have been the last thing to run),
+	 * arch_prctl is run on the host, then the registers are read
+	 * back.
+	 */
+	switch (code) {
+	case ARCH_SET_FS:
+	case ARCH_SET_GS:
+		ret = restore_registers(pid, &current->thread.regs.regs);
+		if (ret)
+			return ret;
+		break;
+	case ARCH_GET_FS:
+	case ARCH_GET_GS:
+		/*
+		 * With these two, we read to a local pointer and
+		 * put_user it to the userspace pointer that we were
+		 * given.  If addr isn't valid (because it hasn't been
+		 * faulted in or is just bogus), we want put_user to
+		 * fault it in (or return -EFAULT) instead of having
+		 * the host return -EFAULT.
+		 */
+		ptr = &tmp;
+	}
+
+	ret = os_arch_prctl(pid, code, ptr);
+	if (ret)
+		return ret;
+
+	switch (code) {
+	case ARCH_SET_FS:
+		current->thread.arch.fs = (unsigned long) ptr;
+		ret = save_registers(pid, &current->thread.regs.regs);
+		break;
+	case ARCH_SET_GS:
+		ret = save_registers(pid, &current->thread.regs.regs);
+		break;
+	case ARCH_GET_FS:
+		ret = put_user(tmp, addr);
+		break;
+	case ARCH_GET_GS:
+		ret = put_user(tmp, addr);
+		break;
+	}
+
+	return ret;
+}
+
+long sys_arch_prctl(int code, unsigned long addr)
+{
+	return arch_prctl(current, code, (unsigned long __user *) addr);
+}
+
+long sys_clone(unsigned long clone_flags, unsigned long newsp,
+	       void __user *parent_tid, void __user *child_tid)
+{
+	long ret;
+
+	if (!newsp)
+		newsp = UPT_SP(&current->thread.regs.regs);
+	current->thread.forking = 1;
+	ret = do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
+		      child_tid);
+	current->thread.forking = 0;
+	return ret;
+}
+
+void arch_switch_to(struct task_struct *to)
+{
+	if ((to->thread.arch.fs == 0) || (to->mm == NULL))
+		return;
+
+	arch_prctl(to, ARCH_SET_FS, (void __user *) to->thread.arch.fs);
+}
diff --git a/arch/x86/um/sysrq_32.c b/arch/x86/um/sysrq_32.c
new file mode 100644
index 000000000000..171b3e9dc867
--- /dev/null
+++ b/arch/x86/um/sysrq_32.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/kernel.h"
+#include "linux/smp.h"
+#include "linux/sched.h"
+#include "linux/kallsyms.h"
+#include "asm/ptrace.h"
+#include "sysrq.h"
+
+/* This is declared by <linux/sched.h> */
+void show_regs(struct pt_regs *regs)
+{
+        printk("\n");
+        printk("EIP: %04lx:[<%08lx>] CPU: %d %s", 
+	       0xffff & PT_REGS_CS(regs), PT_REGS_IP(regs),
+	       smp_processor_id(), print_tainted());
+        if (PT_REGS_CS(regs) & 3)
+                printk(" ESP: %04lx:%08lx", 0xffff & PT_REGS_SS(regs),
+		       PT_REGS_SP(regs));
+        printk(" EFLAGS: %08lx\n    %s\n", PT_REGS_EFLAGS(regs),
+	       print_tainted());
+        printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
+                PT_REGS_EAX(regs), PT_REGS_EBX(regs), 
+	       PT_REGS_ECX(regs), 
+	       PT_REGS_EDX(regs));
+        printk("ESI: %08lx EDI: %08lx EBP: %08lx",
+	       PT_REGS_ESI(regs), PT_REGS_EDI(regs), 
+	       PT_REGS_EBP(regs));
+        printk(" DS: %04lx ES: %04lx\n",
+	       0xffff & PT_REGS_DS(regs), 
+	       0xffff & PT_REGS_ES(regs));
+
+        show_trace(NULL, (unsigned long *) &regs);
+}
+
+/* Copied from i386. */
+static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+{
+	return	p > (void *)tinfo &&
+		p < (void *)tinfo + THREAD_SIZE - 3;
+}
+
+/* Adapted from i386 (we also print the address we read from). */
+static inline unsigned long print_context_stack(struct thread_info *tinfo,
+				unsigned long *stack, unsigned long ebp)
+{
+	unsigned long addr;
+
+#ifdef CONFIG_FRAME_POINTER
+	while (valid_stack_ptr(tinfo, (void *)ebp)) {
+		addr = *(unsigned long *)(ebp + 4);
+		printk("%08lx:  [<%08lx>]", ebp + 4, addr);
+		print_symbol(" %s", addr);
+		printk("\n");
+		ebp = *(unsigned long *)ebp;
+	}
+#else
+	while (valid_stack_ptr(tinfo, stack)) {
+		addr = *stack;
+		if (__kernel_text_address(addr)) {
+			printk("%08lx:  [<%08lx>]", (unsigned long) stack, addr);
+			print_symbol(" %s", addr);
+			printk("\n");
+		}
+		stack++;
+	}
+#endif
+	return ebp;
+}
+
+void show_trace(struct task_struct* task, unsigned long * stack)
+{
+	unsigned long ebp;
+	struct thread_info *context;
+
+	/* Turn this into BUG_ON if possible. */
+	if (!stack) {
+		stack = (unsigned long*) &stack;
+		printk("show_trace: got NULL stack, implicit assumption task == current");
+		WARN_ON(1);
+	}
+
+	if (!task)
+		task = current;
+
+	if (task != current) {
+		ebp = (unsigned long) KSTK_EBP(task);
+	} else {
+		asm ("movl %%ebp, %0" : "=r" (ebp) : );
+	}
+
+	context = (struct thread_info *)
+		((unsigned long)stack & (~(THREAD_SIZE - 1)));
+	print_context_stack(context, stack, ebp);
+
+	printk("\n");
+}
+
diff --git a/arch/x86/um/sysrq_64.c b/arch/x86/um/sysrq_64.c
new file mode 100644
index 000000000000..e8913436d7dc
--- /dev/null
+++ b/arch/x86/um/sysrq_64.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2003 PathScale, Inc.
+ *
+ * Licensed under the GPL
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/utsname.h>
+#include <asm/current.h>
+#include <asm/ptrace.h>
+#include "sysrq.h"
+
+void __show_regs(struct pt_regs *regs)
+{
+	printk("\n");
+	print_modules();
+	printk(KERN_INFO "Pid: %d, comm: %.20s %s %s\n", task_pid_nr(current),
+		current->comm, print_tainted(), init_utsname()->release);
+	printk(KERN_INFO "RIP: %04lx:[<%016lx>]\n", PT_REGS_CS(regs) & 0xffff,
+	       PT_REGS_RIP(regs));
+	printk(KERN_INFO "RSP: %016lx  EFLAGS: %08lx\n", PT_REGS_SP(regs),
+	       PT_REGS_EFLAGS(regs));
+	printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
+	       PT_REGS_RAX(regs), PT_REGS_RBX(regs), PT_REGS_RCX(regs));
+	printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
+	       PT_REGS_RDX(regs), PT_REGS_RSI(regs), PT_REGS_RDI(regs));
+	printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
+	       PT_REGS_RBP(regs), PT_REGS_R8(regs), PT_REGS_R9(regs));
+	printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
+	       PT_REGS_R10(regs), PT_REGS_R11(regs), PT_REGS_R12(regs));
+	printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
+	       PT_REGS_R13(regs), PT_REGS_R14(regs), PT_REGS_R15(regs));
+}
+
+void show_regs(struct pt_regs *regs)
+{
+	__show_regs(regs);
+	show_trace(current, (unsigned long *) &regs);
+}
diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c
new file mode 100644
index 000000000000..c6c7131e563b
--- /dev/null
+++ b/arch/x86/um/tls_32.c
@@ -0,0 +1,396 @@
+/*
+ * Copyright (C) 2005 Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
+ * Licensed under the GPL
+ */
+
+#include "linux/percpu.h"
+#include "linux/sched.h"
+#include "asm/uaccess.h"
+#include "os.h"
+#include "skas.h"
+#include "sysdep/tls.h"
+
+/*
+ * If needed we can detect when it's uninitialized.
+ *
+ * These are initialized in an initcall and unchanged thereafter.
+ */
+static int host_supports_tls = -1;
+int host_gdt_entry_tls_min;
+
+int do_set_thread_area(struct user_desc *info)
+{
+	int ret;
+	u32 cpu;
+
+	cpu = get_cpu();
+	ret = os_set_thread_area(info, userspace_pid[cpu]);
+	put_cpu();
+
+	if (ret)
+		printk(KERN_ERR "PTRACE_SET_THREAD_AREA failed, err = %d, "
+		       "index = %d\n", ret, info->entry_number);
+
+	return ret;
+}
+
+int do_get_thread_area(struct user_desc *info)
+{
+	int ret;
+	u32 cpu;
+
+	cpu = get_cpu();
+	ret = os_get_thread_area(info, userspace_pid[cpu]);
+	put_cpu();
+
+	if (ret)
+		printk(KERN_ERR "PTRACE_GET_THREAD_AREA failed, err = %d, "
+		       "index = %d\n", ret, info->entry_number);
+
+	return ret;
+}
+
+/*
+ * sys_get_thread_area: get a yet unused TLS descriptor index.
+ * XXX: Consider leaving one free slot for glibc usage at first place. This must
+ * be done here (and by changing GDT_ENTRY_TLS_* macros) and nowhere else.
+ *
+ * Also, this must be tested when compiling in SKAS mode with dynamic linking
+ * and running against NPTL.
+ */
+static int get_free_idx(struct task_struct* task)
+{
+	struct thread_struct *t = &task->thread;
+	int idx;
+
+	if (!t->arch.tls_array)
+		return GDT_ENTRY_TLS_MIN;
+
+	for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
+		if (!t->arch.tls_array[idx].present)
+			return idx + GDT_ENTRY_TLS_MIN;
+	return -ESRCH;
+}
+
+static inline void clear_user_desc(struct user_desc* info)
+{
+	/* Postcondition: LDT_empty(info) returns true. */
+	memset(info, 0, sizeof(*info));
+
+	/*
+	 * Check the LDT_empty or the i386 sys_get_thread_area code - we obtain
+	 * indeed an empty user_desc.
+	 */
+	info->read_exec_only = 1;
+	info->seg_not_present = 1;
+}
+
+#define O_FORCE 1
+
+static int load_TLS(int flags, struct task_struct *to)
+{
+	int ret = 0;
+	int idx;
+
+	for (idx = GDT_ENTRY_TLS_MIN; idx < GDT_ENTRY_TLS_MAX; idx++) {
+		struct uml_tls_struct* curr =
+			&to->thread.arch.tls_array[idx - GDT_ENTRY_TLS_MIN];
+
+		/*
+		 * Actually, now if it wasn't flushed it gets cleared and
+		 * flushed to the host, which will clear it.
+		 */
+		if (!curr->present) {
+			if (!curr->flushed) {
+				clear_user_desc(&curr->tls);
+				curr->tls.entry_number = idx;
+			} else {
+				WARN_ON(!LDT_empty(&curr->tls));
+				continue;
+			}
+		}
+
+		if (!(flags & O_FORCE) && curr->flushed)
+			continue;
+
+		ret = do_set_thread_area(&curr->tls);
+		if (ret)
+			goto out;
+
+		curr->flushed = 1;
+	}
+out:
+	return ret;
+}
+
+/*
+ * Verify if we need to do a flush for the new process, i.e. if there are any
+ * present desc's, only if they haven't been flushed.
+ */
+static inline int needs_TLS_update(struct task_struct *task)
+{
+	int i;
+	int ret = 0;
+
+	for (i = GDT_ENTRY_TLS_MIN; i < GDT_ENTRY_TLS_MAX; i++) {
+		struct uml_tls_struct* curr =
+			&task->thread.arch.tls_array[i - GDT_ENTRY_TLS_MIN];
+
+		/*
+		 * Can't test curr->present, we may need to clear a descriptor
+		 * which had a value.
+		 */
+		if (curr->flushed)
+			continue;
+		ret = 1;
+		break;
+	}
+	return ret;
+}
+
+/*
+ * On a newly forked process, the TLS descriptors haven't yet been flushed. So
+ * we mark them as such and the first switch_to will do the job.
+ */
+void clear_flushed_tls(struct task_struct *task)
+{
+	int i;
+
+	for (i = GDT_ENTRY_TLS_MIN; i < GDT_ENTRY_TLS_MAX; i++) {
+		struct uml_tls_struct* curr =
+			&task->thread.arch.tls_array[i - GDT_ENTRY_TLS_MIN];
+
+		/*
+		 * Still correct to do this, if it wasn't present on the host it
+		 * will remain as flushed as it was.
+		 */
+		if (!curr->present)
+			continue;
+
+		curr->flushed = 0;
+	}
+}
+
+/*
+ * In SKAS0 mode, currently, multiple guest threads sharing the same ->mm have a
+ * common host process. So this is needed in SKAS0 too.
+ *
+ * However, if each thread had a different host process (and this was discussed
+ * for SMP support) this won't be needed.
+ *
+ * And this will not need be used when (and if) we'll add support to the host
+ * SKAS patch.
+ */
+
+int arch_switch_tls(struct task_struct *to)
+{
+	if (!host_supports_tls)
+		return 0;
+
+	/*
+	 * We have no need whatsoever to switch TLS for kernel threads; beyond
+	 * that, that would also result in us calling os_set_thread_area with
+	 * userspace_pid[cpu] == 0, which gives an error.
+	 */
+	if (likely(to->mm))
+		return load_TLS(O_FORCE, to);
+
+	return 0;
+}
+
+static int set_tls_entry(struct task_struct* task, struct user_desc *info,
+			 int idx, int flushed)
+{
+	struct thread_struct *t = &task->thread;
+
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+		return -EINVAL;
+
+	t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].tls = *info;
+	t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].present = 1;
+	t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].flushed = flushed;
+
+	return 0;
+}
+
+int arch_copy_tls(struct task_struct *new)
+{
+	struct user_desc info;
+	int idx, ret = -EFAULT;
+
+	if (copy_from_user(&info,
+			   (void __user *) UPT_ESI(&new->thread.regs.regs),
+			   sizeof(info)))
+		goto out;
+
+	ret = -EINVAL;
+	if (LDT_empty(&info))
+		goto out;
+
+	idx = info.entry_number;
+
+	ret = set_tls_entry(new, &info, idx, 0);
+out:
+	return ret;
+}
+
+/* XXX: use do_get_thread_area to read the host value? I'm not at all sure! */
+static int get_tls_entry(struct task_struct *task, struct user_desc *info,
+			 int idx)
+{
+	struct thread_struct *t = &task->thread;
+
+	if (!t->arch.tls_array)
+		goto clear;
+
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+		return -EINVAL;
+
+	if (!t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].present)
+		goto clear;
+
+	*info = t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].tls;
+
+out:
+	/*
+	 * Temporary debugging check, to make sure that things have been
+	 * flushed. This could be triggered if load_TLS() failed.
+	 */
+	if (unlikely(task == current &&
+		     !t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].flushed)) {
+		printk(KERN_ERR "get_tls_entry: task with pid %d got here "
+				"without flushed TLS.", current->pid);
+	}
+
+	return 0;
+clear:
+	/*
+	 * When the TLS entry has not been set, the values read to user in the
+	 * tls_array are 0 (because it's cleared at boot, see
+	 * arch/i386/kernel/head.S:cpu_gdt_table). Emulate that.
+	 */
+	clear_user_desc(info);
+	info->entry_number = idx;
+	goto out;
+}
+
+int sys_set_thread_area(struct user_desc __user *user_desc)
+{
+	struct user_desc info;
+	int idx, ret;
+
+	if (!host_supports_tls)
+		return -ENOSYS;
+
+	if (copy_from_user(&info, user_desc, sizeof(info)))
+		return -EFAULT;
+
+	idx = info.entry_number;
+
+	if (idx == -1) {
+		idx = get_free_idx(current);
+		if (idx < 0)
+			return idx;
+		info.entry_number = idx;
+		/* Tell the user which slot we chose for him.*/
+		if (put_user(idx, &user_desc->entry_number))
+			return -EFAULT;
+	}
+
+	ret = do_set_thread_area(&info);
+	if (ret)
+		return ret;
+	return set_tls_entry(current, &info, idx, 1);
+}
+
+/*
+ * Perform set_thread_area on behalf of the traced child.
+ * Note: error handling is not done on the deferred load, and this differ from
+ * i386. However the only possible error are caused by bugs.
+ */
+int ptrace_set_thread_area(struct task_struct *child, int idx,
+			   struct user_desc __user *user_desc)
+{
+	struct user_desc info;
+
+	if (!host_supports_tls)
+		return -EIO;
+
+	if (copy_from_user(&info, user_desc, sizeof(info)))
+		return -EFAULT;
+
+	return set_tls_entry(child, &info, idx, 0);
+}
+
+int sys_get_thread_area(struct user_desc __user *user_desc)
+{
+	struct user_desc info;
+	int idx, ret;
+
+	if (!host_supports_tls)
+		return -ENOSYS;
+
+	if (get_user(idx, &user_desc->entry_number))
+		return -EFAULT;
+
+	ret = get_tls_entry(current, &info, idx);
+	if (ret < 0)
+		goto out;
+
+	if (copy_to_user(user_desc, &info, sizeof(info)))
+		ret = -EFAULT;
+
+out:
+	return ret;
+}
+
+/*
+ * Perform get_thread_area on behalf of the traced child.
+ */
+int ptrace_get_thread_area(struct task_struct *child, int idx,
+		struct user_desc __user *user_desc)
+{
+	struct user_desc info;
+	int ret;
+
+	if (!host_supports_tls)
+		return -EIO;
+
+	ret = get_tls_entry(child, &info, idx);
+	if (ret < 0)
+		goto out;
+
+	if (copy_to_user(user_desc, &info, sizeof(info)))
+		ret = -EFAULT;
+out:
+	return ret;
+}
+
+/*
+ * This code is really i386-only, but it detects and logs x86_64 GDT indexes
+ * if a 32-bit UML is running on a 64-bit host.
+ */
+static int __init __setup_host_supports_tls(void)
+{
+	check_host_supports_tls(&host_supports_tls, &host_gdt_entry_tls_min);
+	if (host_supports_tls) {
+		printk(KERN_INFO "Host TLS support detected\n");
+		printk(KERN_INFO "Detected host type: ");
+		switch (host_gdt_entry_tls_min) {
+		case GDT_ENTRY_TLS_MIN_I386:
+			printk(KERN_CONT "i386");
+			break;
+		case GDT_ENTRY_TLS_MIN_X86_64:
+			printk(KERN_CONT "x86_64");
+			break;
+		}
+		printk(KERN_CONT " (GDT indexes %d to %d)\n",
+		       host_gdt_entry_tls_min,
+		       host_gdt_entry_tls_min + GDT_ENTRY_TLS_ENTRIES);
+	} else
+		printk(KERN_ERR "  Host TLS support NOT detected! "
+				"TLS support inside UML will not work\n");
+	return 0;
+}
+
+__initcall(__setup_host_supports_tls);
diff --git a/arch/x86/um/tls_64.c b/arch/x86/um/tls_64.c
new file mode 100644
index 000000000000..f7ba46200ecd
--- /dev/null
+++ b/arch/x86/um/tls_64.c
@@ -0,0 +1,17 @@
+#include "linux/sched.h"
+
+void clear_flushed_tls(struct task_struct *task)
+{
+}
+
+int arch_copy_tls(struct task_struct *t)
+{
+	/*
+	 * If CLONE_SETTLS is set, we need to save the thread id
+	 * (which is argument 5, child_tid, of clone) so it can be set
+	 * during context switches.
+	 */
+	t->thread.arch.fs = t->thread.regs.regs.gp[R8 / sizeof(long)];
+
+	return 0;
+}
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
new file mode 100644
index 000000000000..ca49be8ddd0c
--- /dev/null
+++ b/arch/x86/um/user-offsets.c
@@ -0,0 +1,80 @@
+#include <stdio.h>
+#include <stddef.h>
+#include <signal.h>
+#include <sys/poll.h>
+#include <sys/mman.h>
+#include <sys/user.h>
+#define __FRAME_OFFSETS
+#include <asm/ptrace.h>
+#include <asm/types.h>
+
+#define DEFINE(sym, val) \
+	asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define DEFINE_LONGS(sym, val) \
+	asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long)))
+
+void foo(void)
+{
+#ifdef __i386__
+	DEFINE_LONGS(HOST_FP_SIZE, sizeof(struct user_fpregs_struct));
+	DEFINE_LONGS(HOST_FPX_SIZE, sizeof(struct user_fpxregs_struct));
+
+	DEFINE(HOST_IP, EIP);
+	DEFINE(HOST_SP, UESP);
+	DEFINE(HOST_EFLAGS, EFL);
+	DEFINE(HOST_AX, EAX);
+	DEFINE(HOST_BX, EBX);
+	DEFINE(HOST_CX, ECX);
+	DEFINE(HOST_DX, EDX);
+	DEFINE(HOST_SI, ESI);
+	DEFINE(HOST_DI, EDI);
+	DEFINE(HOST_BP, EBP);
+	DEFINE(HOST_CS, CS);
+	DEFINE(HOST_SS, SS);
+	DEFINE(HOST_DS, DS);
+	DEFINE(HOST_FS, FS);
+	DEFINE(HOST_ES, ES);
+	DEFINE(HOST_GS, GS);
+	DEFINE(HOST_ORIG_AX, ORIG_EAX);
+#else
+	DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long));
+	DEFINE_LONGS(HOST_BX, RBX);
+	DEFINE_LONGS(HOST_CX, RCX);
+	DEFINE_LONGS(HOST_DI, RDI);
+	DEFINE_LONGS(HOST_SI, RSI);
+	DEFINE_LONGS(HOST_DX, RDX);
+	DEFINE_LONGS(HOST_BP, RBP);
+	DEFINE_LONGS(HOST_AX, RAX);
+	DEFINE_LONGS(HOST_R8, R8);
+	DEFINE_LONGS(HOST_R9, R9);
+	DEFINE_LONGS(HOST_R10, R10);
+	DEFINE_LONGS(HOST_R11, R11);
+	DEFINE_LONGS(HOST_R12, R12);
+	DEFINE_LONGS(HOST_R13, R13);
+	DEFINE_LONGS(HOST_R14, R14);
+	DEFINE_LONGS(HOST_R15, R15);
+	DEFINE_LONGS(HOST_ORIG_AX, ORIG_RAX);
+	DEFINE_LONGS(HOST_CS, CS);
+	DEFINE_LONGS(HOST_SS, SS);
+	DEFINE_LONGS(HOST_EFLAGS, EFLAGS);
+#if 0
+	DEFINE_LONGS(HOST_FS, FS);
+	DEFINE_LONGS(HOST_GS, GS);
+	DEFINE_LONGS(HOST_DS, DS);
+	DEFINE_LONGS(HOST_ES, ES);
+#endif
+
+	DEFINE_LONGS(HOST_IP, RIP);
+	DEFINE_LONGS(HOST_SP, RSP);
+#endif
+
+	DEFINE(UM_FRAME_SIZE, sizeof(struct user_regs_struct));
+	DEFINE(UM_POLLIN, POLLIN);
+	DEFINE(UM_POLLPRI, POLLPRI);
+	DEFINE(UM_POLLOUT, POLLOUT);
+
+	DEFINE(UM_PROT_READ, PROT_READ);
+	DEFINE(UM_PROT_WRITE, PROT_WRITE);
+	DEFINE(UM_PROT_EXEC, PROT_EXEC);
+}
diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
new file mode 100644
index 000000000000..6c803ca49b5d
--- /dev/null
+++ b/arch/x86/um/vdso/Makefile
@@ -0,0 +1,90 @@
+#
+# Building vDSO images for x86.
+#
+
+VDSO64-y		:= y
+
+vdso-install-$(VDSO64-y)	+= vdso.so
+
+
+# files to link into the vdso
+vobjs-y := vdso-note.o um_vdso.o
+
+# files to link into kernel
+obj-$(VDSO64-y)			+= vdso.o vma.o
+
+vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
+
+$(obj)/vdso.o: $(obj)/vdso.so
+
+targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
+
+export CPPFLAGS_vdso.lds += -P -C
+
+VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
+       -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
+
+$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
+
+$(obj)/vdso.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
+	$(call if_changed,vdso)
+
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+	$(call if_changed,objcopy)
+
+#
+# Don't omit frame pointers for ease of userspace debugging, but do
+# optimize sibling calls.
+#
+CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
+       $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
+       -fno-omit-frame-pointer -foptimize-sibling-calls
+
+$(vobjs): KBUILD_CFLAGS += $(CFL)
+
+#
+# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
+#
+CFLAGS_REMOVE_vdso-note.o = -pg -fprofile-arcs -ftest-coverage
+CFLAGS_REMOVE_um_vdso.o = -pg -fprofile-arcs -ftest-coverage
+
+targets += vdso-syms.lds
+obj-$(VDSO64-y)			+= vdso-syms.lds
+
+#
+# Match symbols in the DSO that look like VDSO*; produce a file of constants.
+#
+sed-vdsosym := -e 's/^00*/0/' \
+	-e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p'
+quiet_cmd_vdsosym = VDSOSYM $@
+define cmd_vdsosym
+	$(NM) $< | LC_ALL=C sed -n $(sed-vdsosym) | LC_ALL=C sort > $@
+endef
+
+$(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
+	$(call if_changed,vdsosym)
+
+#
+# The DSO images are built using a special linker script.
+#
+quiet_cmd_vdso = VDSO    $@
+      cmd_vdso = $(CC) -nostdlib -o $@ \
+		       $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
+		       -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
+		 sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
+
+VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+GCOV_PROFILE := n
+
+#
+# Install the unstripped copy of vdso*.so listed in $(vdso-install-y).
+#
+quiet_cmd_vdso_install = INSTALL $@
+      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+$(vdso-install-y): %.so: $(obj)/%.so.dbg FORCE
+	@mkdir -p $(MODLIB)/vdso
+	$(call cmd,vdso_install)
+
+PHONY += vdso_install $(vdso-install-y)
+vdso_install: $(vdso-install-y)
diff --git a/arch/x86/um/vdso/checkundef.sh b/arch/x86/um/vdso/checkundef.sh
new file mode 100644
index 000000000000..7ee90a9b549d
--- /dev/null
+++ b/arch/x86/um/vdso/checkundef.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+nm="$1"
+file="$2"
+$nm "$file" | grep '^ *U' > /dev/null 2>&1
+if [ $? -eq 1 ]; then
+    exit 0
+else
+    echo "$file: undefined symbols found" >&2
+    exit 1
+fi
diff --git a/arch/x86/um/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c
new file mode 100644
index 000000000000..7c441b59d375
--- /dev/null
+++ b/arch/x86/um/vdso/um_vdso.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This vDSO turns all calls into a syscall so that UML can trap them.
+ */
+
+
+/* Disable profiling for userspace code */
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/time.h>
+#include <linux/getcpu.h>
+#include <asm/unistd.h>
+
+int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+{
+	long ret;
+
+	asm("syscall" : "=a" (ret) :
+		"0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
+
+	return ret;
+}
+int clock_gettime(clockid_t, struct timespec *)
+	__attribute__((weak, alias("__vdso_clock_gettime")));
+
+int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	long ret;
+
+	asm("syscall" : "=a" (ret) :
+		"0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
+
+	return ret;
+}
+int gettimeofday(struct timeval *, struct timezone *)
+	__attribute__((weak, alias("__vdso_gettimeofday")));
+
+time_t __vdso_time(time_t *t)
+{
+	long secs;
+
+	asm volatile("syscall"
+		: "=a" (secs)
+		: "0" (__NR_time), "D" (t) : "cc", "r11", "cx", "memory");
+
+	return secs;
+}
+int time(time_t *t) __attribute__((weak, alias("__vdso_time")));
+
+long
+__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
+{
+	/*
+	 * UML does not support SMP, we can cheat here. :)
+	 */
+
+	if (cpu)
+		*cpu = 0;
+	if (node)
+		*node = 0;
+
+	return 0;
+}
+
+long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
+	__attribute__((weak, alias("__vdso_getcpu")));
diff --git a/arch/x86/um/vdso/vdso-layout.lds.S b/arch/x86/um/vdso/vdso-layout.lds.S
new file mode 100644
index 000000000000..634a2cf62046
--- /dev/null
+++ b/arch/x86/um/vdso/vdso-layout.lds.S
@@ -0,0 +1,64 @@
+/*
+ * Linker script for vDSO.  This is an ELF shared object prelinked to
+ * its virtual address, and with only one read-only segment.
+ * This script controls its layout.
+ */
+
+SECTIONS
+{
+	. = VDSO_PRELINK + SIZEOF_HEADERS;
+
+	.hash		: { *(.hash) }			:text
+	.gnu.hash	: { *(.gnu.hash) }
+	.dynsym		: { *(.dynsym) }
+	.dynstr		: { *(.dynstr) }
+	.gnu.version	: { *(.gnu.version) }
+	.gnu.version_d	: { *(.gnu.version_d) }
+	.gnu.version_r	: { *(.gnu.version_r) }
+
+	.note		: { *(.note.*) }		:text	:note
+
+	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+
+	.dynamic	: { *(.dynamic) }		:text	:dynamic
+
+	.rodata		: { *(.rodata*) }		:text
+	.data		: {
+	      *(.data*)
+	      *(.sdata*)
+	      *(.got.plt) *(.got)
+	      *(.gnu.linkonce.d.*)
+	      *(.bss*)
+	      *(.dynbss*)
+	      *(.gnu.linkonce.b.*)
+	}
+
+	.altinstructions	: { *(.altinstructions) }
+	.altinstr_replacement	: { *(.altinstr_replacement) }
+
+	/*
+	 * Align the actual code well away from the non-instruction data.
+	 * This is the best thing for the I-cache.
+	 */
+	. = ALIGN(0x100);
+
+	.text		: { *(.text*) }			:text	=0x90909090
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME	0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */
+	note		PT_NOTE		FLAGS(4);		/* PF_R */
+	eh_frame_hdr	PT_GNU_EH_FRAME;
+}
diff --git a/arch/x86/um/vdso/vdso-note.S b/arch/x86/um/vdso/vdso-note.S
new file mode 100644
index 000000000000..79a071e4357e
--- /dev/null
+++ b/arch/x86/um/vdso/vdso-note.S
@@ -0,0 +1,12 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+	.long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/x86/um/vdso/vdso.S b/arch/x86/um/vdso/vdso.S
new file mode 100644
index 000000000000..1cb468adacbb
--- /dev/null
+++ b/arch/x86/um/vdso/vdso.S
@@ -0,0 +1,10 @@
+#include <linux/init.h>
+
+__INITDATA
+
+	.globl vdso_start, vdso_end
+vdso_start:
+	.incbin "arch/x86/um/vdso/vdso.so"
+vdso_end:
+
+__FINIT
diff --git a/arch/x86/um/vdso/vdso.lds.S b/arch/x86/um/vdso/vdso.lds.S
new file mode 100644
index 000000000000..b96b2677cad8
--- /dev/null
+++ b/arch/x86/um/vdso/vdso.lds.S
@@ -0,0 +1,32 @@
+/*
+ * Linker script for 64-bit vDSO.
+ * We #include the file to define the layout details.
+ * Here we only choose the prelinked virtual address.
+ *
+ * This file defines the version script giving the user-exported symbols in
+ * the DSO.  We can define local symbols here called VDSO* to make their
+ * values visible using the asm-x86/vdso.h macros from the kernel proper.
+ */
+
+#define VDSO_PRELINK 0xffffffffff700000
+#include "vdso-layout.lds.S"
+
+/*
+ * This controls what userland symbols we export from the vDSO.
+ */
+VERSION {
+	LINUX_2.6 {
+	global:
+		clock_gettime;
+		__vdso_clock_gettime;
+		gettimeofday;
+		__vdso_gettimeofday;
+		getcpu;
+		__vdso_getcpu;
+		time;
+		__vdso_time;
+	local: *;
+	};
+}
+
+VDSO64_PRELINK = VDSO_PRELINK;
diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c
new file mode 100644
index 000000000000..91f4ec9a0a56
--- /dev/null
+++ b/arch/x86/um/vdso/vma.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <asm/page.h>
+#include <linux/init.h>
+
+unsigned int __read_mostly vdso_enabled = 1;
+unsigned long um_vdso_addr;
+
+extern unsigned long task_size;
+extern char vdso_start[], vdso_end[];
+
+static struct page **vdsop;
+
+static int __init init_vdso(void)
+{
+	struct page *um_vdso;
+
+	BUG_ON(vdso_end - vdso_start > PAGE_SIZE);
+
+	um_vdso_addr = task_size - PAGE_SIZE;
+
+	vdsop = kmalloc(sizeof(struct page *), GFP_KERNEL);
+	if (!vdsop)
+		goto oom;
+
+	um_vdso = alloc_page(GFP_KERNEL);
+	if (!um_vdso) {
+		kfree(vdsop);
+
+		goto oom;
+	}
+
+	copy_page(page_address(um_vdso), vdso_start);
+	*vdsop = um_vdso;
+
+	return 0;
+
+oom:
+	printk(KERN_ERR "Cannot allocate vdso\n");
+	vdso_enabled = 0;
+
+	return -ENOMEM;
+}
+subsys_initcall(init_vdso);
+
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+	int err;
+	struct mm_struct *mm = current->mm;
+
+	if (!vdso_enabled)
+		return 0;
+
+	down_write(&mm->mmap_sem);
+
+	err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE,
+		VM_READ|VM_EXEC|
+		VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+		VM_ALWAYSDUMP,
+		vdsop);
+
+	up_write(&mm->mmap_sem);
+
+	return err;
+}