245 files changed, 3645 insertions, 2895 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index fac58916adec..a3fb23be87f3 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2,6 +2,7 @@ config ARM
 	bool
 	default y
 	select HAVE_AOUT
+	select HAVE_DMA_API_DEBUG
 	select HAVE_IDE
 	select HAVE_MEMBLOCK
 	select RTC_LIB
@@ -24,6 +25,7 @@ config ARM
 	select PERF_USE_VMALLOC
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_HW_BREAKPOINT if (PERF_EVENTS && (CPU_V6 || CPU_V7))
+	select HAVE_C_RECORDMCOUNT
 	help
 	  The ARM series is a line of low-power-consumption RISC chip designs
 	  licensed by ARM Ltd and targeted at embedded applications and
@@ -35,9 +37,15 @@ config ARM
 config HAVE_PWM
 	bool
 
+config MIGHT_HAVE_PCI
+	bool
+
 config SYS_SUPPORTS_APM_EMULATION
 	bool
 
+config HAVE_SCHED_CLOCK
+	bool
+
 config GENERIC_GPIO
 	bool
 
@@ -222,7 +230,7 @@ config ARCH_INTEGRATOR
 	bool "ARM Ltd. Integrator family"
 	select ARM_AMBA
 	select ARCH_HAS_CPUFREQ
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select ICST
 	select GENERIC_CLOCKEVENTS
 	select PLAT_VERSATILE
@@ -232,7 +240,8 @@ config ARCH_INTEGRATOR
 config ARCH_REALVIEW
 	bool "ARM Ltd. RealView family"
 	select ARM_AMBA
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
+	select HAVE_SCHED_CLOCK
 	select ICST
 	select GENERIC_CLOCKEVENTS
 	select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -246,7 +255,8 @@ config ARCH_VERSATILE
 	bool "ARM Ltd. Versatile family"
 	select ARM_AMBA
 	select ARM_VIC
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
+	select HAVE_SCHED_CLOCK
 	select ICST
 	select GENERIC_CLOCKEVENTS
 	select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -260,9 +270,10 @@ config ARCH_VEXPRESS
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARM_AMBA
 	select ARM_TIMER_SP804
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_CLOCKEVENTS
 	select HAVE_CLK
+	select HAVE_SCHED_CLOCK
 	select ICST
 	select PLAT_VERSATILE
 	help
@@ -281,7 +292,7 @@ config ARCH_BCMRING
 	depends on MMU
 	select CPU_V6
 	select ARM_AMBA
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_CLOCKEVENTS
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	help
@@ -299,6 +310,7 @@ config ARCH_CNS3XXX
 	select CPU_V6
 	select GENERIC_CLOCKEVENTS
 	select ARM_GIC
+	select MIGHT_HAVE_PCI
 	select PCI_DOMAINS if PCI
 	help
 	  Support for Cavium Networks CNS3XXX platform.
@@ -328,7 +340,7 @@ config ARCH_EP93XX
 	select CPU_ARM920T
 	select ARM_AMBA
 	select ARM_VIC
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select ARCH_REQUIRE_GPIOLIB
 	select ARCH_HAS_HOLES_MEMORYMODEL
 	select ARCH_USES_GETTIMEOFFSET
@@ -348,7 +360,7 @@ config ARCH_MXC
 	bool "Freescale MXC/iMX-based"
 	select GENERIC_CLOCKEVENTS
 	select ARCH_REQUIRE_GPIOLIB
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	help
 	  Support for Freescale MXC/iMX-based family of processors
 
@@ -363,7 +375,7 @@ config ARCH_MXS
 config ARCH_STMP3XXX
 	bool "Freescale STMP3xxx"
 	select CPU_ARM926T
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select ARCH_REQUIRE_GPIOLIB
 	select GENERIC_CLOCKEVENTS
 	select USB_ARCH_HAS_EHCI
@@ -442,6 +454,8 @@ config ARCH_IXP4XX
 	select CPU_XSCALE
 	select GENERIC_GPIO
 	select GENERIC_CLOCKEVENTS
+	select HAVE_SCHED_CLOCK
+	select MIGHT_HAVE_PCI
 	select DMABOUNCE if PCI
 	help
 	  Support for Intel's IXP4XX (XScale) family of processors.
@@ -481,7 +495,7 @@ config ARCH_LPC32XX
 	select HAVE_IDE
 	select ARM_AMBA
 	select USB_ARCH_HAS_OHCI
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
 	help
@@ -515,8 +529,9 @@ config ARCH_MMP
 	bool "Marvell PXA168/910/MMP2"
 	depends on MMU
 	select ARCH_REQUIRE_GPIOLIB
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_CLOCKEVENTS
+	select HAVE_SCHED_CLOCK
 	select TICK_ONESHOT
 	select PLAT_PXA
 	select SPARSE_IRQ
@@ -548,7 +563,7 @@ config ARCH_W90X900
 	bool "Nuvoton W90X900 CPU"
 	select CPU_ARM926T
 	select ARCH_REQUIRE_GPIOLIB
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_CLOCKEVENTS
 	help
 	  Support for Nuvoton (Winbond logic dept.) ARM9 processor,
@@ -562,18 +577,19 @@ config ARCH_W90X900
 config ARCH_NUC93X
 	bool "Nuvoton NUC93X CPU"
 	select CPU_ARM926T
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	help
 	  Support for Nuvoton (Winbond logic dept.) NUC93X MCU,The NUC93X is a
 	  low-power and high performance MPEG-4/JPEG multimedia controller chip.
 
 config ARCH_TEGRA
 	bool "NVIDIA Tegra"
+	select CLKDEV_LOOKUP
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_GPIO
 	select HAVE_CLK
-	select COMMON_CLKDEV
+	select HAVE_SCHED_CLOCK
 	select ARCH_HAS_BARRIERS if CACHE_L2X0
 	select ARCH_HAS_CPUFREQ
 	help
@@ -583,7 +599,7 @@ config ARCH_TEGRA
 config ARCH_PNX4008
 	bool "Philips Nexperia PNX4008 Mobile"
 	select CPU_ARM926T
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select ARCH_USES_GETTIMEOFFSET
 	help
 	  This enables support for Philips PNX4008 mobile platform.
@@ -593,9 +609,10 @@ config ARCH_PXA
 	depends on MMU
 	select ARCH_MTD_XIP
 	select ARCH_HAS_CPUFREQ
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select ARCH_REQUIRE_GPIOLIB
 	select GENERIC_CLOCKEVENTS
+	select HAVE_SCHED_CLOCK
 	select TICK_ONESHOT
 	select PLAT_PXA
 	select SPARSE_IRQ
@@ -644,6 +661,7 @@ config ARCH_SA1100
 	select CPU_FREQ
 	select GENERIC_CLOCKEVENTS
 	select HAVE_CLK
+	select HAVE_SCHED_CLOCK
 	select TICK_ONESHOT
 	select ARCH_REQUIRE_GPIOLIB
 	help
@@ -770,7 +788,7 @@ config ARCH_TCC_926
 	bool "Telechips TCC ARM926-based systems"
 	select CPU_ARM926T
 	select HAVE_CLK
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_CLOCKEVENTS
 	help
 	  Support for Telechips TCC ARM926-based systems.
@@ -790,11 +808,12 @@ config ARCH_U300
 	bool "ST-Ericsson U300 Series"
 	depends on MMU
 	select CPU_ARM926T
+	select HAVE_SCHED_CLOCK
 	select HAVE_TCM
 	select ARM_AMBA
 	select ARM_VIC
 	select GENERIC_CLOCKEVENTS
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_GPIO
 	help
 	  Support for ST-Ericsson U300 series mobile platforms.
@@ -804,7 +823,7 @@ config ARCH_U8500
 	select CPU_V7
 	select ARM_AMBA
 	select GENERIC_CLOCKEVENTS
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select ARCH_REQUIRE_GPIOLIB
 	select ARCH_HAS_CPUFREQ
 	help
@@ -815,7 +834,7 @@ config ARCH_NOMADIK
 	select ARM_AMBA
 	select ARM_VIC
 	select CPU_ARM926T
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_CLOCKEVENTS
 	select ARCH_REQUIRE_GPIOLIB
 	help
@@ -827,7 +846,7 @@ config ARCH_DAVINCI
 	select ARCH_REQUIRE_GPIOLIB
 	select ZONE_DMA
 	select HAVE_IDE
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_ALLOCATOR
 	select ARCH_HAS_HOLES_MEMORYMODEL
 	help
@@ -839,6 +858,7 @@ config ARCH_OMAP
 	select ARCH_REQUIRE_GPIOLIB
 	select ARCH_HAS_CPUFREQ
 	select GENERIC_CLOCKEVENTS
+	select HAVE_SCHED_CLOCK
 	select ARCH_HAS_HOLES_MEMORYMODEL
 	help
 	  Support for TI's OMAP platform (OMAP1/2/3/4).
@@ -847,7 +867,7 @@ config PLAT_SPEAR
 	bool "ST SPEAr"
 	select ARM_AMBA
 	select ARCH_REQUIRE_GPIOLIB
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_CLOCKEVENTS
 	select HAVE_CLK
 	help
@@ -994,9 +1014,11 @@ config ARCH_ACORN
 config PLAT_IOP
 	bool
 	select GENERIC_CLOCKEVENTS
+	select HAVE_SCHED_CLOCK
 
 config PLAT_ORION
 	bool
+	select HAVE_SCHED_CLOCK
 
 config PLAT_PXA
 	bool
@@ -1029,6 +1051,11 @@ config CPU_HAS_PMU
 	default y
 	bool
 
+config MULTI_IRQ_HANDLER
+	bool
+	help
+	  Allow each machine to specify it's own IRQ handler at run time.
+
 if !MMU
 source "arch/arm/Kconfig-nommu"
 endif
@@ -1176,7 +1203,7 @@ config ISA_DMA_API
 	bool
 
 config PCI
-	bool "PCI support" if ARCH_INTEGRATOR_AP || ARCH_VERSATILE_PB || ARCH_IXP4XX || ARCH_KS8695 || MACH_ARMCORE || ARCH_CNS3XXX
+	bool "PCI support" if MIGHT_HAVE_PCI
 	help
 	  Find out whether you have a PCI motherboard. PCI is the name of a
 	  bus system, i.e. the way the CPU talks to the other stuff inside
@@ -1187,6 +1214,12 @@ config PCI_DOMAINS
 	bool
 	depends on PCI
 
+config PCI_NANOENGINE
+	bool "BSE nanoEngine PCI support"
+	depends on SA1100_NANOENGINE
+	help
+	  Enable PCI on the BSE nanoEngine board.
+
 config PCI_SYSCALL
 	def_bool PCI
 
@@ -1242,7 +1275,7 @@ config SMP
 config SMP_ON_UP
 	bool "Allow booting SMP kernel on uniprocessor systems (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
-	depends on SMP && !XIP && !THUMB2_KERNEL
+	depends on SMP && !XIP
 	default y
 	help
 	  SMP kernels contain instructions which fail on non-SMP processors.
@@ -1261,6 +1294,7 @@ config HAVE_ARM_SCU
 config HAVE_ARM_TWD
 	bool
 	depends on SMP
+	select TICK_ONESHOT
 	help
 	  This options enables support for the ARM timer and watchdog unit
 
@@ -1324,7 +1358,7 @@ config HZ
 	default 100
 
 config THUMB2_KERNEL
-	bool "Compile the kernel in Thumb-2 mode"
+	bool "Compile the kernel in Thumb-2 mode (EXPERIMENTAL)"
 	depends on CPU_V7 && !CPU_V6 && EXPERIMENTAL
 	select AEABI
 	select ARM_ASM_UNIFIED
@@ -1538,6 +1572,7 @@ config SECCOMP
 
 config CC_STACKPROTECTOR
 	bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
 	help
 	  This option turns on the -fstack-protector GCC feature. This
 	  feature puts, at the beginning of functions, a canary value on
@@ -1664,6 +1699,19 @@ config ATAGS_PROC
 	  Should the atags used to boot the kernel be exported in an "atags"
 	  file in procfs. Useful with kexec.
 
+config CRASH_DUMP
+	bool "Build kdump crash kernel (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	help
+	  Generate crash dump after being started by kexec. This should
+	  be normally only set in special crash dump kernels which are
+	  loaded in the main kernel with kexec-tools into a specially
+	  reserved region and then later executed after a crash by
+	  kdump/kexec. The crash dump kernel must be compiled to a
+	  memory address not used by the main kernel
+
+	  For more details see Documentation/kdump/kdump.txt
+
 config AUTO_ZRELADDR
 	bool "Auto calculation of the decompressed kernel image address"
 	depends on !ZBOOT_ROM && !ARCH_U300
@@ -1721,7 +1769,7 @@ config CPU_FREQ_S3C
 	  Internal configuration node for common cpufreq on Samsung SoC
 
 config CPU_FREQ_S3C24XX
-	bool "CPUfreq driver for Samsung S3C24XX series CPUs"
+	bool "CPUfreq driver for Samsung S3C24XX series CPUs (EXPERIMENTAL)"
 	depends on ARCH_S3C2410 && CPU_FREQ && EXPERIMENTAL
 	select CPU_FREQ_S3C
 	help
@@ -1733,7 +1781,7 @@ config CPU_FREQ_S3C24XX
 	  If in doubt, say N.
 
 config CPU_FREQ_S3C24XX_PLL
-	bool "Support CPUfreq changing of PLL frequency"
+	bool "Support CPUfreq changing of PLL frequency (EXPERIMENTAL)"
 	depends on CPU_FREQ_S3C24XX && EXPERIMENTAL
 	help
 	  Compile in support for changing the PLL frequency from the
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index eac62085f5b2..494224a9b459 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -31,7 +31,7 @@ config FRAME_POINTER
 	  reported is severely limited.
 
 config ARM_UNWIND
-	bool "Enable stack unwinding support"
+	bool "Enable stack unwinding support (EXPERIMENTAL)"
 	depends on AEABI && EXPERIMENTAL
 	default y
 	help
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 65a7c1c588a9..0a8f748e506a 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -45,6 +45,10 @@ else
 endif
 endif
 
+ifeq ($(CONFIG_ARCH_SHMOBILE),y)
+OBJS		+= head-shmobile.o
+endif
+
 #
 # We now have a PIC decompressor implementation.  Decompressors running
 # from RAM should not define ZTEXTADDR.  Decompressors running directly
diff --git a/arch/arm/boot/compressed/head-shmobile.S b/arch/arm/boot/compressed/head-shmobile.S
new file mode 100644
index 000000000000..30973b76e6ae
--- /dev/null
+++ b/arch/arm/boot/compressed/head-shmobile.S
@@ -0,0 +1,53 @@
+/*
+ * The head-file for SH-Mobile ARM platforms
+ *
+ * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ * Simon Horman <horms@verge.net.au>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifdef CONFIG_ZBOOT_ROM
+
+	.section	".start", "ax"
+
+	/* load board-specific initialization code */
+#include <mach/zboot.h>
+
+	b	1f
+__atags:@ tag #1
+	.long	12			@ tag->hdr.size = tag_size(tag_core);
+	.long	0x54410001		@ tag->hdr.tag = ATAG_CORE;
+	.long   0			@ tag->u.core.flags = 0;
+	.long	0			@ tag->u.core.pagesize = 0;
+	.long	0			@ tag->u.core.rootdev = 0;
+	@ tag #2
+	.long	8			@ tag->hdr.size = tag_size(tag_mem32);
+	.long	0x54410002		@ tag->hdr.tag = ATAG_MEM;
+	.long	CONFIG_MEMORY_SIZE	@ tag->u.mem.size = CONFIG_MEMORY_SIZE;
+	.long	CONFIG_MEMORY_START	@ @ tag->u.mem.start = CONFIG_MEMORY_START;
+	@ tag #3
+	.long	0			@ tag->hdr.size = 0
+	.long	0			@ tag->hdr.tag = ATAG_NONE;
+1:
+
+	/* Set board ID necessary for boot */
+	ldr	r7, 1f				@ Set machine type register
+	adr	r8, __atags			@ Set atag register
+	b	2f
+
+1 :	.long MACH_TYPE
+2 :
+
+#endif /* CONFIG_ZBOOT_ROM */
diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig
index 0a34c8186924..778655f0257a 100644
--- a/arch/arm/common/Kconfig
+++ b/arch/arm/common/Kconfig
@@ -37,7 +37,3 @@ config SHARP_PARAM
 
 config SHARP_SCOOP
 	bool
-
-config COMMON_CLKDEV
-	bool
-	select HAVE_CLK
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index e6e8664a9413..e7521bca2c35 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_ARCH_IXP2000)	+= uengine.o
 obj-$(CONFIG_ARCH_IXP23XX)	+= uengine.o
 obj-$(CONFIG_PCI_HOST_ITE8152)  += it8152.o
 obj-$(CONFIG_COMMON_CLKDEV)	+= clkdev.o
+obj-$(CONFIG_ARM_TIMER_SP804)	+= timer-sp.o
diff --git a/arch/arm/common/clkdev.c b/arch/arm/common/clkdev.c
deleted file mode 100644
index e2b2bb66e094..000000000000
--- a/arch/arm/common/clkdev.c
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- *  arch/arm/common/clkdev.c
- *
- *  Copyright (C) 2008 Russell King.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Helper for the clk API to assist looking up a struct clk.
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/list.h>
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/string.h>
-#include <linux/mutex.h>
-#include <linux/clk.h>
-#include <linux/slab.h>
-
-#include <asm/clkdev.h>
-#include <mach/clkdev.h>
-
-static LIST_HEAD(clocks);
-static DEFINE_MUTEX(clocks_mutex);
-
-/*
- * Find the correct struct clk for the device and connection ID.
- * We do slightly fuzzy matching here:
- *  An entry with a NULL ID is assumed to be a wildcard.
- *  If an entry has a device ID, it must match
- *  If an entry has a connection ID, it must match
- * Then we take the most specific entry - with the following
- * order of precedence: dev+con > dev only > con only.
- */
-static struct clk *clk_find(const char *dev_id, const char *con_id)
-{
-	struct clk_lookup *p;
-	struct clk *clk = NULL;
-	int match, best = 0;
-
-	list_for_each_entry(p, &clocks, node) {
-		match = 0;
-		if (p->dev_id) {
-			if (!dev_id || strcmp(p->dev_id, dev_id))
-				continue;
-			match += 2;
-		}
-		if (p->con_id) {
-			if (!con_id || strcmp(p->con_id, con_id))
-				continue;
-			match += 1;
-		}
-
-		if (match > best) {
-			clk = p->clk;
-			if (match != 3)
-				best = match;
-			else
-				break;
-		}
-	}
-	return clk;
-}
-
-struct clk *clk_get_sys(const char *dev_id, const char *con_id)
-{
-	struct clk *clk;
-
-	mutex_lock(&clocks_mutex);
-	clk = clk_find(dev_id, con_id);
-	if (clk && !__clk_get(clk))
-		clk = NULL;
-	mutex_unlock(&clocks_mutex);
-
-	return clk ? clk : ERR_PTR(-ENOENT);
-}
-EXPORT_SYMBOL(clk_get_sys);
-
-struct clk *clk_get(struct device *dev, const char *con_id)
-{
-	const char *dev_id = dev ? dev_name(dev) : NULL;
-
-	return clk_get_sys(dev_id, con_id);
-}
-EXPORT_SYMBOL(clk_get);
-
-void clk_put(struct clk *clk)
-{
-	__clk_put(clk);
-}
-EXPORT_SYMBOL(clk_put);
-
-void clkdev_add(struct clk_lookup *cl)
-{
-	mutex_lock(&clocks_mutex);
-	list_add_tail(&cl->node, &clocks);
-	mutex_unlock(&clocks_mutex);
-}
-EXPORT_SYMBOL(clkdev_add);
-
-void __init clkdev_add_table(struct clk_lookup *cl, size_t num)
-{
-	mutex_lock(&clocks_mutex);
-	while (num--) {
-		list_add_tail(&cl->node, &clocks);
-		cl++;
-	}
-	mutex_unlock(&clocks_mutex);
-}
-
-#define MAX_DEV_ID	20
-#define MAX_CON_ID	16
-
-struct clk_lookup_alloc {
-	struct clk_lookup cl;
-	char	dev_id[MAX_DEV_ID];
-	char	con_id[MAX_CON_ID];
-};
-
-struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id,
-	const char *dev_fmt, ...)
-{
-	struct clk_lookup_alloc *cla;
-
-	cla = kzalloc(sizeof(*cla), GFP_KERNEL);
-	if (!cla)
-		return NULL;
-
-	cla->cl.clk = clk;
-	if (con_id) {
-		strlcpy(cla->con_id, con_id, sizeof(cla->con_id));
-		cla->cl.con_id = cla->con_id;
-	}
-
-	if (dev_fmt) {
-		va_list ap;
-
-		va_start(ap, dev_fmt);
-		vscnprintf(cla->dev_id, sizeof(cla->dev_id), dev_fmt, ap);
-		cla->cl.dev_id = cla->dev_id;
-		va_end(ap);
-	}
-
-	return &cla->cl;
-}
-EXPORT_SYMBOL(clkdev_alloc);
-
-int clk_add_alias(const char *alias, const char *alias_dev_name, char *id,
-	struct device *dev)
-{
-	struct clk *r = clk_get(dev, id);
-	struct clk_lookup *l;
-
-	if (IS_ERR(r))
-		return PTR_ERR(r);
-
-	l = clkdev_alloc(r, alias, alias_dev_name);
-	clk_put(r);
-	if (!l)
-		return -ENODEV;
-	clkdev_add(l);
-	return 0;
-}
-EXPORT_SYMBOL(clk_add_alias);
-
-/*
- * clkdev_drop - remove a clock dynamically allocated
- */
-void clkdev_drop(struct clk_lookup *cl)
-{
-	mutex_lock(&clocks_mutex);
-	list_del(&cl->node);
-	mutex_unlock(&clocks_mutex);
-	kfree(cl);
-}
-EXPORT_SYMBOL(clkdev_drop);
diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c
index cc0a932bbea9..e5681636626f 100644
--- a/arch/arm/common/dmabounce.c
+++ b/arch/arm/common/dmabounce.c
@@ -328,7 +328,7 @@ static inline void unmap_single(struct device *dev, dma_addr_t dma_addr,
  * substitute the safe buffer for the unsafe one.
  * (basically move the buffer from an unsafe area to a safe one)
  */
-dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
+dma_addr_t __dma_map_single(struct device *dev, void *ptr, size_t size,
 		enum dma_data_direction dir)
 {
 	dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n",
@@ -338,7 +338,7 @@ dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
 
 	return map_single(dev, ptr, size, dir);
 }
-EXPORT_SYMBOL(dma_map_single);
+EXPORT_SYMBOL(__dma_map_single);
 
 /*
  * see if a mapped address was really a "safe" buffer and if so, copy
@@ -346,7 +346,7 @@ EXPORT_SYMBOL(dma_map_single);
  * the safe buffer.  (basically return things back to the way they
  * should be)
  */
-void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+void __dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
 		enum dma_data_direction dir)
 {
 	dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n",
@@ -354,9 +354,9 @@ void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
 
 	unmap_single(dev, dma_addr, size, dir);
 }
-EXPORT_SYMBOL(dma_unmap_single);
+EXPORT_SYMBOL(__dma_unmap_single);
 
-dma_addr_t dma_map_page(struct device *dev, struct page *page,
+dma_addr_t __dma_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size, enum dma_data_direction dir)
 {
 	dev_dbg(dev, "%s(page=%p,off=%#lx,size=%zx,dir=%x)\n",
@@ -372,7 +372,7 @@ dma_addr_t dma_map_page(struct device *dev, struct page *page,
 
 	return map_single(dev, page_address(page) + offset, size, dir);
 }
-EXPORT_SYMBOL(dma_map_page);
+EXPORT_SYMBOL(__dma_map_page);
 
 /*
  * see if a mapped address was really a "safe" buffer and if so, copy
@@ -380,7 +380,7 @@ EXPORT_SYMBOL(dma_map_page);
  * the safe buffer.  (basically return things back to the way they
  * should be)
  */
-void dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
+void __dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
 		enum dma_data_direction dir)
 {
 	dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n",
@@ -388,7 +388,7 @@ void dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
 
 	unmap_single(dev, dma_addr, size, dir);
 }
-EXPORT_SYMBOL(dma_unmap_page);
+EXPORT_SYMBOL(__dma_unmap_page);
 
 int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr,
 		unsigned long off, size_t sz, enum dma_data_direction dir)
diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index e6388dcd8cfa..0b89ef001330 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -35,6 +35,9 @@
 
 static DEFINE_SPINLOCK(irq_controller_lock);
 
+/* Address of GIC 0 CPU interface */
+void __iomem *gic_cpu_base_addr __read_mostly;
+
 struct gic_chip_data {
 	unsigned int irq_offset;
 	void __iomem *dist_base;
@@ -45,7 +48,7 @@ struct gic_chip_data {
 #define MAX_GIC_NR	1
 #endif
 
-static struct gic_chip_data gic_data[MAX_GIC_NR];
+static struct gic_chip_data gic_data[MAX_GIC_NR] __read_mostly;
 
 static inline void __iomem *gic_dist_base(unsigned int irq)
 {
@@ -213,21 +216,16 @@ void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
 	set_irq_chained_handler(irq, gic_handle_cascade_irq);
 }
 
-void __init gic_dist_init(unsigned int gic_nr, void __iomem *base,
-			  unsigned int irq_start)
+static void __init gic_dist_init(struct gic_chip_data *gic,
+	unsigned int irq_start)
 {
 	unsigned int gic_irqs, irq_limit, i;
+	void __iomem *base = gic->dist_base;
 	u32 cpumask = 1 << smp_processor_id();
 
-	if (gic_nr >= MAX_GIC_NR)
-		BUG();
-
 	cpumask |= cpumask << 8;
 	cpumask |= cpumask << 16;
 
-	gic_data[gic_nr].dist_base = base;
-	gic_data[gic_nr].irq_offset = (irq_start - 1) & ~31;
-
 	writel(0, base + GIC_DIST_CTRL);
 
 	/*
@@ -267,7 +265,7 @@ void __init gic_dist_init(unsigned int gic_nr, void __iomem *base,
 	/*
 	 * Limit number of interrupts registered to the platform maximum
 	 */
-	irq_limit = gic_data[gic_nr].irq_offset + gic_irqs;
+	irq_limit = gic->irq_offset + gic_irqs;
 	if (WARN_ON(irq_limit > NR_IRQS))
 		irq_limit = NR_IRQS;
 
@@ -276,7 +274,7 @@ void __init gic_dist_init(unsigned int gic_nr, void __iomem *base,
 	 */
 	for (i = irq_start; i < irq_limit; i++) {
 		set_irq_chip(i, &gic_chip);
-		set_irq_chip_data(i, &gic_data[gic_nr]);
+		set_irq_chip_data(i, gic);
 		set_irq_handler(i, handle_level_irq);
 		set_irq_flags(i, IRQF_VALID | IRQF_PROBE);
 	}
@@ -284,19 +282,12 @@ void __init gic_dist_init(unsigned int gic_nr, void __iomem *base,
 	writel(1, base + GIC_DIST_CTRL);
 }
 
-void __cpuinit gic_cpu_init(unsigned int gic_nr, void __iomem *base)
+static void __cpuinit gic_cpu_init(struct gic_chip_data *gic)
 {
-	void __iomem *dist_base;
+	void __iomem *dist_base = gic->dist_base;
+	void __iomem *base = gic->cpu_base;
 	int i;
 
-	if (gic_nr >= MAX_GIC_NR)
-		BUG();
-
-	dist_base = gic_data[gic_nr].dist_base;
-	BUG_ON(!dist_base);
-
-	gic_data[gic_nr].cpu_base = base;
-
 	/*
 	 * Deal with the banked PPI and SGI interrupts - disable all
 	 * PPI interrupts, ensure all SGI interrupts are enabled.
@@ -314,6 +305,42 @@ void __cpuinit gic_cpu_init(unsigned int gic_nr, void __iomem *base)
 	writel(1, base + GIC_CPU_CTRL);
 }
 
+void __init gic_init(unsigned int gic_nr, unsigned int irq_start,
+	void __iomem *dist_base, void __iomem *cpu_base)
+{
+	struct gic_chip_data *gic;
+
+	BUG_ON(gic_nr >= MAX_GIC_NR);
+
+	gic = &gic_data[gic_nr];
+	gic->dist_base = dist_base;
+	gic->cpu_base = cpu_base;
+	gic->irq_offset = (irq_start - 1) & ~31;
+
+	if (gic_nr == 0)
+		gic_cpu_base_addr = cpu_base;
+
+	gic_dist_init(gic, irq_start);
+	gic_cpu_init(gic);
+}
+
+void __cpuinit gic_secondary_init(unsigned int gic_nr)
+{
+	BUG_ON(gic_nr >= MAX_GIC_NR);
+
+	gic_cpu_init(&gic_data[gic_nr]);
+}
+
+void __cpuinit gic_enable_ppi(unsigned int irq)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	irq_to_desc(irq)->status |= IRQ_NOPROBE;
+	gic_unmask_irq(irq);
+	local_irq_restore(flags);
+}
+
 #ifdef CONFIG_SMP
 void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 {
diff --git a/arch/arm/common/it8152.c b/arch/arm/common/it8152.c
index 1bec96e85196..42ff90b46dfb 100644
--- a/arch/arm/common/it8152.c
+++ b/arch/arm/common/it8152.c
@@ -352,3 +352,4 @@ struct pci_bus * __init it8152_pci_scan_bus(int nr, struct pci_sys_data *sys)
 	return pci_scan_bus(nr, &it8152_ops, sys);
 }
 
+EXPORT_SYMBOL(dma_set_coherent_mask);
diff --git a/arch/arm/plat-versatile/timer-sp.c b/arch/arm/common/timer-sp.c
index fb0d1c299718..6ef3342153b9 100644
--- a/arch/arm/plat-versatile/timer-sp.c
+++ b/arch/arm/common/timer-sp.c
@@ -1,5 +1,5 @@
 /*
- *  linux/arch/arm/plat-versatile/timer-sp.c
+ *  linux/arch/arm/common/timer-sp.c
  *
  *  Copyright (C) 1999 - 2003 ARM Limited
  *  Copyright (C) 2000 Deep Blue Solutions Ltd
@@ -26,8 +26,6 @@
 
 #include <asm/hardware/arm_timer.h>
 
-#include <plat/timer-sp.h>
-
 /*
  * These timers are currently always setup to be clocked at 1MHz.
  */
@@ -46,7 +44,6 @@ static struct clocksource clocksource_sp804 = {
 	.rating		= 200,
 	.read		= sp804_read,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -63,8 +60,7 @@ void __init sp804_clocksource_init(void __iomem *base)
 	writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
 		clksrc_base + TIMER_CTRL);
 
-	cs->mult = clocksource_khz2mult(TIMER_FREQ_KHZ, cs->shift);
-	clocksource_register(cs);
+	clocksource_register_khz(cs, TIMER_FREQ_KHZ);
 }
 
 
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 749bb6622404..bc2d2d75f706 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -18,6 +18,7 @@
 #endif
 
 #include <asm/ptrace.h>
+#include <asm/domain.h>
 
 /*
  * Endian independent macros for shifting bytes within registers.
@@ -157,16 +158,24 @@
 #ifdef CONFIG_SMP
 #define ALT_SMP(instr...)					\
 9998:	instr
+/*
+ * Note: if you get assembler errors from ALT_UP() when building with
+ * CONFIG_THUMB2_KERNEL, you almost certainly need to use
+ * ALT_SMP( W(instr) ... )
+ */
 #define ALT_UP(instr...)					\
 	.pushsection ".alt.smp.init", "a"			;\
 	.long	9998b						;\
-	instr							;\
+9997:	instr							;\
+	.if . - 9997b != 4					;\
+		.error "ALT_UP() content must assemble to exactly 4 bytes";\
+	.endif							;\
 	.popsection
 #define ALT_UP_B(label)					\
 	.equ	up_b_offset, label - 9998b			;\
 	.pushsection ".alt.smp.init", "a"			;\
 	.long	9998b						;\
-	b	. + up_b_offset					;\
+	W(b)	. + up_b_offset					;\
 	.popsection
 #else
 #define ALT_SMP(instr...)
@@ -177,16 +186,24 @@
 /*
  * SMP data memory barrier
  */
-	.macro	smp_dmb
+	.macro	smp_dmb mode
 #ifdef CONFIG_SMP
 #if __LINUX_ARM_ARCH__ >= 7
+	.ifeqs "\mode","arm"
 	ALT_SMP(dmb)
+	.else
+	ALT_SMP(W(dmb))
+	.endif
 #elif __LINUX_ARM_ARCH__ == 6
 	ALT_SMP(mcr	p15, 0, r0, c7, c10, 5)	@ dmb
 #else
 #error Incompatible SMP platform
 #endif
+	.ifeqs "\mode","arm"
 	ALT_UP(nop)
+	.else
+	ALT_UP(W(nop))
+	.endif
 #endif
 	.endm
 
@@ -206,12 +223,12 @@
  */
 #ifdef CONFIG_THUMB2_KERNEL
 
-	.macro	usraccoff, instr, reg, ptr, inc, off, cond, abort
+	.macro	usraccoff, instr, reg, ptr, inc, off, cond, abort, t=T()
 9999:
 	.if	\inc == 1
-	\instr\cond\()bt \reg, [\ptr, #\off]
+	\instr\cond\()b\()\t\().w \reg, [\ptr, #\off]
 	.elseif	\inc == 4
-	\instr\cond\()t \reg, [\ptr, #\off]
+	\instr\cond\()\t\().w \reg, [\ptr, #\off]
 	.else
 	.error	"Unsupported inc macro argument"
 	.endif
@@ -246,13 +263,13 @@
 
 #else	/* !CONFIG_THUMB2_KERNEL */
 
-	.macro	usracc, instr, reg, ptr, inc, cond, rept, abort
+	.macro	usracc, instr, reg, ptr, inc, cond, rept, abort, t=T()
 	.rept	\rept
 9999:
 	.if	\inc == 1
-	\instr\cond\()bt \reg, [\ptr], #\inc
+	\instr\cond\()b\()\t \reg, [\ptr], #\inc
 	.elseif	\inc == 4
-	\instr\cond\()t \reg, [\ptr], #\inc
+	\instr\cond\()\t \reg, [\ptr], #\inc
 	.else
 	.error	"Unsupported inc macro argument"
 	.endif
diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
index 9d6122096fbe..75fe66bc02b4 100644
--- a/arch/arm/include/asm/cache.h
+++ b/arch/arm/include/asm/cache.h
@@ -23,4 +23,6 @@
 #define ARCH_SLAB_MINALIGN 8
 #endif
 
+#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+
 #endif
diff --git a/arch/arm/include/asm/clkdev.h b/arch/arm/include/asm/clkdev.h
index b56c1389b6fa..765d33222369 100644
--- a/arch/arm/include/asm/clkdev.h
+++ b/arch/arm/include/asm/clkdev.h
@@ -12,23 +12,13 @@
 #ifndef __ASM_CLKDEV_H
 #define __ASM_CLKDEV_H
 
-struct clk;
-struct device;
+#include <linux/slab.h>
 
-struct clk_lookup {
-	struct list_head	node;
-	const char		*dev_id;
-	const char		*con_id;
-	struct clk		*clk;
-};
+#include <mach/clkdev.h>
 
-struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id,
-	const char *dev_fmt, ...);
-
-void clkdev_add(struct clk_lookup *cl);
-void clkdev_drop(struct clk_lookup *cl);
-
-void clkdev_add_table(struct clk_lookup *, size_t);
-int clk_add_alias(const char *, const char *, char *, struct device *);
+static inline struct clk_lookup_alloc *__clkdev_alloc(size_t size)
+{
+	return kzalloc(size, GFP_KERNEL);
+}
 
 #endif
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index c568da7dcae4..4fff837363ed 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -5,24 +5,29 @@
 
 #include <linux/mm_types.h>
 #include <linux/scatterlist.h>
+#include <linux/dma-debug.h>
 
 #include <asm-generic/dma-coherent.h>
 #include <asm/memory.h>
 
+#ifdef __arch_page_to_dma
+#error Please update to __arch_pfn_to_dma
+#endif
+
 /*
- * page_to_dma/dma_to_virt/virt_to_dma are architecture private functions
- * used internally by the DMA-mapping API to provide DMA addresses. They
- * must not be used by drivers.
+ * dma_to_pfn/pfn_to_dma/dma_to_virt/virt_to_dma are architecture private
+ * functions used internally by the DMA-mapping API to provide DMA
+ * addresses. They must not be used by drivers.
  */
-#ifndef __arch_page_to_dma
-static inline dma_addr_t page_to_dma(struct device *dev, struct page *page)
+#ifndef __arch_pfn_to_dma
+static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn)
 {
-	return (dma_addr_t)__pfn_to_bus(page_to_pfn(page));
+	return (dma_addr_t)__pfn_to_bus(pfn);
 }
 
-static inline struct page *dma_to_page(struct device *dev, dma_addr_t addr)
+static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr)
 {
-	return pfn_to_page(__bus_to_pfn(addr));
+	return __bus_to_pfn(addr);
 }
 
 static inline void *dma_to_virt(struct device *dev, dma_addr_t addr)
@@ -35,14 +40,14 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr)
 	return (dma_addr_t)__virt_to_bus((unsigned long)(addr));
 }
 #else
-static inline dma_addr_t page_to_dma(struct device *dev, struct page *page)
+static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn)
 {
-	return __arch_page_to_dma(dev, page);
+	return __arch_pfn_to_dma(dev, pfn);
 }
 
-static inline struct page *dma_to_page(struct device *dev, dma_addr_t addr)
+static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr)
 {
-	return __arch_dma_to_page(dev, addr);
+	return __arch_dma_to_pfn(dev, addr);
 }
 
 static inline void *dma_to_virt(struct device *dev, dma_addr_t addr)
@@ -293,13 +298,13 @@ extern int dma_needs_bounce(struct device*, dma_addr_t, size_t);
 /*
  * The DMA API, implemented by dmabounce.c.  See below for descriptions.
  */
-extern dma_addr_t dma_map_single(struct device *, void *, size_t,
+extern dma_addr_t __dma_map_single(struct device *, void *, size_t,
 		enum dma_data_direction);
-extern void dma_unmap_single(struct device *, dma_addr_t, size_t,
+extern void __dma_unmap_single(struct device *, dma_addr_t, size_t,
 		enum dma_data_direction);
-extern dma_addr_t dma_map_page(struct device *, struct page *,
+extern dma_addr_t __dma_map_page(struct device *, struct page *,
 		unsigned long, size_t, enum dma_data_direction);
-extern void dma_unmap_page(struct device *, dma_addr_t, size_t,
+extern void __dma_unmap_page(struct device *, dma_addr_t, size_t,
 		enum dma_data_direction);
 
 /*
@@ -323,6 +328,34 @@ static inline int dmabounce_sync_for_device(struct device *d, dma_addr_t addr,
 }
 
 
+static inline dma_addr_t __dma_map_single(struct device *dev, void *cpu_addr,
+		size_t size, enum dma_data_direction dir)
+{
+	__dma_single_cpu_to_dev(cpu_addr, size, dir);
+	return virt_to_dma(dev, cpu_addr);
+}
+
+static inline dma_addr_t __dma_map_page(struct device *dev, struct page *page,
+	     unsigned long offset, size_t size, enum dma_data_direction dir)
+{
+	__dma_page_cpu_to_dev(page, offset, size, dir);
+	return pfn_to_dma(dev, page_to_pfn(page)) + offset;
+}
+
+static inline void __dma_unmap_single(struct device *dev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+	__dma_single_dev_to_cpu(dma_to_virt(dev, handle), size, dir);
+}
+
+static inline void __dma_unmap_page(struct device *dev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+	__dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)),
+		handle & ~PAGE_MASK, size, dir);
+}
+#endif /* CONFIG_DMABOUNCE */
+
 /**
  * dma_map_single - map a single buffer for streaming DMA
  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
@@ -340,11 +373,16 @@ static inline int dmabounce_sync_for_device(struct device *d, dma_addr_t addr,
 static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
 		size_t size, enum dma_data_direction dir)
 {
+	dma_addr_t addr;
+
 	BUG_ON(!valid_dma_direction(dir));
 
-	__dma_single_cpu_to_dev(cpu_addr, size, dir);
+	addr = __dma_map_single(dev, cpu_addr, size, dir);
+	debug_dma_map_page(dev, virt_to_page(cpu_addr),
+			(unsigned long)cpu_addr & ~PAGE_MASK, size,
+			dir, addr, true);
 
-	return virt_to_dma(dev, cpu_addr);
+	return addr;
 }
 
 /**
@@ -364,11 +402,14 @@ static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
 static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
 	     unsigned long offset, size_t size, enum dma_data_direction dir)
 {
+	dma_addr_t addr;
+
 	BUG_ON(!valid_dma_direction(dir));
 
-	__dma_page_cpu_to_dev(page, offset, size, dir);
+	addr = __dma_map_page(dev, page, offset, size, dir);
+	debug_dma_map_page(dev, page, offset, size, dir, addr, false);
 
-	return page_to_dma(dev, page) + offset;
+	return addr;
 }
 
 /**
@@ -388,7 +429,8 @@ static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
 static inline void dma_unmap_single(struct device *dev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir)
 {
-	__dma_single_dev_to_cpu(dma_to_virt(dev, handle), size, dir);
+	debug_dma_unmap_page(dev, handle, size, dir, true);
+	__dma_unmap_single(dev, handle, size, dir);
 }
 
 /**
@@ -408,10 +450,9 @@ static inline void dma_unmap_single(struct device *dev, dma_addr_t handle,
 static inline void dma_unmap_page(struct device *dev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir)
 {
-	__dma_page_dev_to_cpu(dma_to_page(dev, handle), handle & ~PAGE_MASK,
-		size, dir);
+	debug_dma_unmap_page(dev, handle, size, dir, false);
+	__dma_unmap_page(dev, handle, size, dir);
 }
-#endif /* CONFIG_DMABOUNCE */
 
 /**
  * dma_sync_single_range_for_cpu
@@ -437,6 +478,8 @@ static inline void dma_sync_single_range_for_cpu(struct device *dev,
 {
 	BUG_ON(!valid_dma_direction(dir));
 
+	debug_dma_sync_single_for_cpu(dev, handle + offset, size, dir);
+
 	if (!dmabounce_sync_for_cpu(dev, handle, offset, size, dir))
 		return;
 
@@ -449,6 +492,8 @@ static inline void dma_sync_single_range_for_device(struct device *dev,
 {
 	BUG_ON(!valid_dma_direction(dir));
 
+	debug_dma_sync_single_for_device(dev, handle + offset, size, dir);
+
 	if (!dmabounce_sync_for_device(dev, handle, offset, size, dir))
 		return;
 
diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h
index cc7ef4080711..af18ceaacf5d 100644
--- a/arch/arm/include/asm/domain.h
+++ b/arch/arm/include/asm/domain.h
@@ -45,13 +45,17 @@
  */
 #define DOMAIN_NOACCESS	0
 #define DOMAIN_CLIENT	1
+#ifdef CONFIG_CPU_USE_DOMAINS
 #define DOMAIN_MANAGER	3
+#else
+#define DOMAIN_MANAGER	1
+#endif
 
 #define domain_val(dom,type)	((type) << (2*(dom)))
 
 #ifndef __ASSEMBLY__
 
-#ifdef CONFIG_MMU
+#ifdef CONFIG_CPU_USE_DOMAINS
 #define set_domain(x)					\
 	do {						\
 	__asm__ __volatile__(				\
@@ -74,5 +78,28 @@
 #define modify_domain(dom,type)	do { } while (0)
 #endif
 
+/*
+ * Generate the T (user) versions of the LDR/STR and related
+ * instructions (inline assembly)
+ */
+#ifdef CONFIG_CPU_USE_DOMAINS
+#define T(instr)	#instr "t"
+#else
+#define T(instr)	#instr
 #endif
-#endif /* !__ASSEMBLY__ */
+
+#else /* __ASSEMBLY__ */
+
+/*
+ * Generate the T (user) versions of the LDR/STR and related
+ * instructions
+ */
+#ifdef CONFIG_CPU_USE_DOMAINS
+#define T(instr)	instr ## t
+#else
+#define T(instr)	instr
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* !__ASM_PROC_DOMAIN_H */
diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
index 8bb66bca2e3e..c3cd8755e648 100644
--- a/arch/arm/include/asm/elf.h
+++ b/arch/arm/include/asm/elf.h
@@ -99,6 +99,8 @@ struct elf32_hdr;
 extern int elf_check_arch(const struct elf32_hdr *);
 #define elf_check_arch elf_check_arch
 
+#define vmcore_elf64_check_arch(x) (0)
+
 extern int arm_elf_read_implies_exec(const struct elf32_hdr *, int);
 #define elf_read_implies_exec(ex,stk) arm_elf_read_implies_exec(&(ex), stk)
 
diff --git a/arch/arm/include/asm/entry-macro-multi.S b/arch/arm/include/asm/entry-macro-multi.S
new file mode 100644
index 000000000000..ec0bbf79c71f
--- /dev/null
+++ b/arch/arm/include/asm/entry-macro-multi.S
@@ -0,0 +1,44 @@
+/*
+ * Interrupt handling.  Preserves r7, r8, r9
+ */
+	.macro	arch_irq_handler_default
+	get_irqnr_preamble r5, lr
+1:	get_irqnr_and_base r0, r6, r5, lr
+	movne	r1, sp
+	@
+	@ routine called with r0 = irq number, r1 = struct pt_regs *
+	@
+	adrne	lr, BSYM(1b)
+	bne	asm_do_IRQ
+
+#ifdef CONFIG_SMP
+	/*
+	 * XXX
+	 *
+	 * this macro assumes that irqstat (r6) and base (r5) are
+	 * preserved from get_irqnr_and_base above
+	 */
+	ALT_SMP(test_for_ipi r0, r6, r5, lr)
+	ALT_UP_B(9997f)
+	movne	r1, sp
+	adrne	lr, BSYM(1b)
+	bne	do_IPI
+
+#ifdef CONFIG_LOCAL_TIMERS
+	test_for_ltirq r0, r6, r5, lr
+	movne	r0, sp
+	adrne	lr, BSYM(1b)
+	bne	do_local_timer
+#endif
+#endif
+9997:
+	.endm
+
+	.macro	arch_irq_handler, symbol_name
+	.align	5
+	.global \symbol_name
+\symbol_name:
+	mov	r4, lr
+	arch_irq_handler_default
+	mov     pc, r4
+	.endm
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index 540a044153a5..b33fe7065b38 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -13,12 +13,13 @@
 #include <linux/preempt.h>
 #include <linux/uaccess.h>
 #include <asm/errno.h>
+#include <asm/domain.h>
 
 #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)	\
 	__asm__ __volatile__(					\
-	"1:	ldrt	%1, [%2]\n"				\
+	"1:	" T(ldr) "	%1, [%2]\n"			\
 	"	" insn "\n"					\
-	"2:	strt	%0, [%2]\n"				\
+	"2:	" T(str) "	%0, [%2]\n"			\
 	"	mov	%0, #0\n"				\
 	"3:\n"							\
 	"	.pushsection __ex_table,\"a\"\n"		\
@@ -97,10 +98,10 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 	pagefault_disable();	/* implies preempt_disable() */
 
 	__asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
-	"1:	ldrt	%0, [%3]\n"
+	"1:	" T(ldr) "	%0, [%3]\n"
 	"	teq	%0, %1\n"
 	"	it	eq	@ explicit IT needed for the 2b label\n"
-	"2:	streqt	%2, [%3]\n"
+	"2:	" T(streq) "	%2, [%3]\n"
 	"3:\n"
 	"	.pushsection __ex_table,\"a\"\n"
 	"	.align	3\n"
diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h
index 6d7485aff955..89ad1805e579 100644
--- a/arch/arm/include/asm/hardirq.h
+++ b/arch/arm/include/asm/hardirq.h
@@ -5,13 +5,31 @@
 #include <linux/threads.h>
 #include <asm/irq.h>
 
+#define NR_IPI	5
+
 typedef struct {
 	unsigned int __softirq_pending;
+#ifdef CONFIG_LOCAL_TIMERS
 	unsigned int local_timer_irqs;
+#endif
+#ifdef CONFIG_SMP
+	unsigned int ipi_irqs[NR_IPI];
+#endif
 } ____cacheline_aligned irq_cpustat_t;
 
 #include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
 
+#define __inc_irq_stat(cpu, member)	__IRQ_STAT(cpu, member)++
+#define __get_irq_stat(cpu, member)	__IRQ_STAT(cpu, member)
+
+#ifdef CONFIG_SMP
+u64 smp_irq_stat_cpu(unsigned int cpu);
+#else
+#define smp_irq_stat_cpu(cpu)	0
+#endif
+
+#define arch_irq_stat_cpu	smp_irq_stat_cpu
+
 #if NR_IRQS > 512
 #define HARDIRQ_BITS	10
 #elif NR_IRQS > 256
diff --git a/arch/arm/include/asm/hardware/entry-macro-gic.S b/arch/arm/include/asm/hardware/entry-macro-gic.S
new file mode 100644
index 000000000000..c115b82fe80a
--- /dev/null
+++ b/arch/arm/include/asm/hardware/entry-macro-gic.S
@@ -0,0 +1,75 @@
+/*
+ * arch/arm/include/asm/hardware/entry-macro-gic.S
+ *
+ * Low-level IRQ helper macros for GIC
+ *
+ * This file is licensed under  the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <asm/hardware/gic.h>
+
+#ifndef HAVE_GET_IRQNR_PREAMBLE
+	.macro	get_irqnr_preamble, base, tmp
+	ldr	\base, =gic_cpu_base_addr
+	ldr	\base, [\base]
+	.endm
+#endif
+
+/*
+ * The interrupt numbering scheme is defined in the
+ * interrupt controller spec.  To wit:
+ *
+ * Interrupts 0-15 are IPI
+ * 16-28 are reserved
+ * 29-31 are local.  We allow 30 to be used for the watchdog.
+ * 32-1020 are global
+ * 1021-1022 are reserved
+ * 1023 is "spurious" (no interrupt)
+ *
+ * For now, we ignore all local interrupts so only return an interrupt if it's
+ * between 30 and 1020.  The test_for_ipi routine below will pick up on IPIs.
+ *
+ * A simple read from the controller will tell us the number of the highest
+ * priority enabled interrupt.  We then just need to check whether it is in the
+ * valid range for an IRQ (30-1020 inclusive).
+ */
+
+	.macro  get_irqnr_and_base, irqnr, irqstat, base, tmp
+
+	ldr     \irqstat, [\base, #GIC_CPU_INTACK]
+	/* bits 12-10 = src CPU, 9-0 = int # */
+
+	ldr	\tmp, =1021
+	bic     \irqnr, \irqstat, #0x1c00
+	cmp     \irqnr, #29
+	cmpcc	\irqnr, \irqnr
+	cmpne	\irqnr, \tmp
+	cmpcs	\irqnr, \irqnr
+	.endm
+
+/* We assume that irqstat (the raw value of the IRQ acknowledge
+ * register) is preserved from the macro above.
+ * If there is an IPI, we immediately signal end of interrupt on the
+ * controller, since this requires the original irqstat value which
+ * we won't easily be able to recreate later.
+ */
+
+	.macro test_for_ipi, irqnr, irqstat, base, tmp
+	bic	\irqnr, \irqstat, #0x1c00
+	cmp	\irqnr, #16
+	strcc	\irqstat, [\base, #GIC_CPU_EOI]
+	cmpcs	\irqnr, \irqnr
+	.endm
+
+/* As above, this assumes that irqstat and base are preserved.. */
+
+	.macro test_for_ltirq, irqnr, irqstat, base, tmp
+	bic	\irqnr, \irqstat, #0x1c00
+	mov 	\tmp, #0
+	cmp	\irqnr, #29
+	moveq	\tmp, #1
+	streq	\irqstat, [\base, #GIC_CPU_EOI]
+	cmp	\tmp, #0
+	.endm
diff --git a/arch/arm/include/asm/hardware/gic.h b/arch/arm/include/asm/hardware/gic.h
index 7f34333bb545..84557d321001 100644
--- a/arch/arm/include/asm/hardware/gic.h
+++ b/arch/arm/include/asm/hardware/gic.h
@@ -33,10 +33,13 @@
 #define GIC_DIST_SOFTINT		0xf00
 
 #ifndef __ASSEMBLY__
-void gic_dist_init(unsigned int gic_nr, void __iomem *base, unsigned int irq_start);
-void gic_cpu_init(unsigned int gic_nr, void __iomem *base);
+extern void __iomem *gic_cpu_base_addr;
+
+void gic_init(unsigned int, unsigned int, void __iomem *, void __iomem *);
+void gic_secondary_init(unsigned int);
 void gic_cascade_irq(unsigned int gic_nr, unsigned int irq);
 void gic_raise_softirq(const struct cpumask *mask, unsigned int irq);
+void gic_enable_ppi(unsigned int);
 #endif
 
 #endif
diff --git a/arch/arm/include/asm/hardware/it8152.h b/arch/arm/include/asm/hardware/it8152.h
index 21fa272301f8..b2f95c72287c 100644
--- a/arch/arm/include/asm/hardware/it8152.h
+++ b/arch/arm/include/asm/hardware/it8152.h
@@ -76,6 +76,7 @@ extern unsigned long it8152_base_address;
   IT8152_PD_IRQ(0)  Audio controller (ACR)
  */
 #define IT8152_IRQ(x)   (IRQ_BOARD_START + (x))
+#define IT8152_LAST_IRQ	(IRQ_BOARD_START + 40)
 
 /* IRQ-sources in 3 groups - local devices, LPC (serial), and external PCI */
 #define IT8152_LD_IRQ_COUNT     9
diff --git a/arch/arm/plat-versatile/include/plat/timer-sp.h b/arch/arm/include/asm/hardware/timer-sp.h
index 21e75e30d497..21e75e30d497 100644
--- a/arch/arm/plat-versatile/include/plat/timer-sp.h
+++ b/arch/arm/include/asm/hardware/timer-sp.h
diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h
index 1fc684e70ab6..7080e2c8fa62 100644
--- a/arch/arm/include/asm/highmem.h
+++ b/arch/arm/include/asm/highmem.h
@@ -25,9 +25,6 @@ extern void *kmap_high(struct page *page);
 extern void *kmap_high_get(struct page *page);
 extern void kunmap_high(struct page *page);
 
-extern void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte);
-extern void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte);
-
 /*
  * The following functions are already defined by <linux/highmem.h>
  * when CONFIG_HIGHMEM is not set.
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 815efa2d4e07..20e0f7c9e03e 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -241,18 +241,15 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
  *
  */
 #ifndef __arch_ioremap
-#define ioremap(cookie,size)		__arm_ioremap(cookie, size, MT_DEVICE)
-#define ioremap_nocache(cookie,size)	__arm_ioremap(cookie, size, MT_DEVICE)
-#define ioremap_cached(cookie,size)	__arm_ioremap(cookie, size, MT_DEVICE_CACHED)
-#define ioremap_wc(cookie,size)		__arm_ioremap(cookie, size, MT_DEVICE_WC)
-#define iounmap(cookie)			__iounmap(cookie)
-#else
+#define __arch_ioremap			__arm_ioremap
+#define __arch_iounmap			__iounmap
+#endif
+
 #define ioremap(cookie,size)		__arch_ioremap((cookie), (size), MT_DEVICE)
 #define ioremap_nocache(cookie,size)	__arch_ioremap((cookie), (size), MT_DEVICE)
 #define ioremap_cached(cookie,size)	__arch_ioremap((cookie), (size), MT_DEVICE_CACHED)
 #define ioremap_wc(cookie,size)		__arch_ioremap((cookie), (size), MT_DEVICE_WC)
-#define iounmap(cookie)			__arch_iounmap(cookie)
-#endif
+#define iounmap				__arch_iounmap
 
 /*
  * io{read,write}{8,16,32} macros
diff --git a/arch/arm/include/asm/kexec.h b/arch/arm/include/asm/kexec.h
index 8ec9ef5c3c7b..c0094d8edae4 100644
--- a/arch/arm/include/asm/kexec.h
+++ b/arch/arm/include/asm/kexec.h
@@ -33,10 +33,20 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
 	if (oldregs) {
 		memcpy(newregs, oldregs, sizeof(*newregs));
 	} else {
-		__asm__ __volatile__ ("stmia %0, {r0 - r15}"
-				      : : "r" (&newregs->ARM_r0));
-		__asm__ __volatile__ ("mrs %0, cpsr"
-				      : "=r" (newregs->ARM_cpsr));
+		__asm__ __volatile__ (
+			"stmia	%[regs_base], {r0-r12}\n\t"
+			"mov	%[_ARM_sp], sp\n\t"
+			"str	lr, %[_ARM_lr]\n\t"
+			"adr	%[_ARM_pc], 1f\n\t"
+			"mrs	%[_ARM_cpsr], cpsr\n\t"
+		"1:"
+			: [_ARM_pc] "=r" (newregs->ARM_pc),
+			  [_ARM_cpsr] "=r" (newregs->ARM_cpsr),
+			  [_ARM_sp] "=r" (newregs->ARM_sp),
+			  [_ARM_lr] "=o" (newregs->ARM_lr)
+			: [regs_base] "r" (&newregs->ARM_r0)
+			: "memory"
+		);
 	}
 }
 
diff --git a/arch/arm/include/asm/localtimer.h b/arch/arm/include/asm/localtimer.h
index 50c7e7cfd670..6bc63ab498ce 100644
--- a/arch/arm/include/asm/localtimer.h
+++ b/arch/arm/include/asm/localtimer.h
@@ -30,7 +30,6 @@ asmlinkage void do_local_timer(struct pt_regs *);
 #include "smp_twd.h"
 
 #define local_timer_ack()	twd_timer_ack()
-#define local_timer_stop()	twd_timer_stop()
 
 #else
 
@@ -40,11 +39,6 @@ asmlinkage void do_local_timer(struct pt_regs *);
  */
 int local_timer_ack(void);
 
-/*
- * Stop a local timer interrupt.
- */
-void local_timer_stop(void);
-
 #endif
 
 /*
@@ -52,12 +46,6 @@ void local_timer_stop(void);
  */
 void local_timer_setup(struct clock_event_device *);
 
-#else
-
-static inline void local_timer_stop(void)
-{
-}
-
 #endif
 
 #endif
diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index d97a964207fa..3a0893a76a3b 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -37,12 +37,21 @@ struct machine_desc {
 					 struct meminfo *);
 	void			(*reserve)(void);/* reserve mem blocks	*/
 	void			(*map_io)(void);/* IO mapping function	*/
+	void			(*init_early)(void);
 	void			(*init_irq)(void);
 	struct sys_timer	*timer;		/* system tick timer	*/
 	void			(*init_machine)(void);
+#ifdef CONFIG_MULTI_IRQ_HANDLER
+	void			(*handle_irq)(struct pt_regs *);
+#endif
 };
 
 /*
+ * Current machine - only accessible during boot.
+ */
+extern struct machine_desc *machine_desc;
+
+/*
  * Set of macros to define architecture features.  This is built into
  * a table by the linker.
  */
diff --git a/arch/arm/include/asm/mach/irq.h b/arch/arm/include/asm/mach/irq.h
index ce3eee9fe26c..22ac140edd9e 100644
--- a/arch/arm/include/asm/mach/irq.h
+++ b/arch/arm/include/asm/mach/irq.h
@@ -17,10 +17,12 @@ struct seq_file;
 /*
  * This is internal.  Do not use it.
  */
-extern unsigned int arch_nr_irqs;
-extern void (*init_arch_irq)(void);
 extern void init_FIQ(void);
-extern int show_fiq_list(struct seq_file *, void *);
+extern int show_fiq_list(struct seq_file *, int);
+
+#ifdef CONFIG_MULTI_IRQ_HANDLER
+extern void (*handle_arch_irq)(struct pt_regs *);
+#endif
 
 /*
  * This is for easy migration, but should be changed in the source
diff --git a/arch/arm/include/asm/mach/time.h b/arch/arm/include/asm/mach/time.h
index 35d408f6dccf..883f6be5117a 100644
--- a/arch/arm/include/asm/mach/time.h
+++ b/arch/arm/include/asm/mach/time.h
@@ -43,7 +43,6 @@ struct sys_timer {
 #endif
 };
 
-extern struct sys_timer *system_timer;
 extern void timer_tick(void);
 
 #endif
diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
index cbb0bc295d2b..12c8e680cbff 100644
--- a/arch/arm/include/asm/module.h
+++ b/arch/arm/include/asm/module.h
@@ -8,11 +8,6 @@
 struct unwind_table;
 
 #ifdef CONFIG_ARM_UNWIND
-struct arm_unwind_mapping {
-	Elf_Shdr *unw_sec;
-	Elf_Shdr *sec_text;
-	struct unwind_table *unwind;
-};
 enum {
 	ARM_SEC_INIT,
 	ARM_SEC_DEVINIT,
@@ -21,13 +16,13 @@ enum {
 	ARM_SEC_DEVEXIT,
 	ARM_SEC_MAX,
 };
+#endif
+
 struct mod_arch_specific {
-	struct arm_unwind_mapping map[ARM_SEC_MAX];
-};
-#else
-struct mod_arch_specific {
-};
+#ifdef CONFIG_ARM_UNWIND
+	struct unwind_table *unwind[ARM_SEC_MAX];
 #endif
+};
 
 /*
  * Include the ARM architecture version.
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index a485ac3c8696..f51a69595f6e 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -151,13 +151,15 @@ extern void __cpu_copy_user_highpage(struct page *to, struct page *from,
 #define clear_page(page)	memset((void *)(page), 0, PAGE_SIZE)
 extern void copy_page(void *to, const void *from);
 
+typedef unsigned long pteval_t;
+
 #undef STRICT_MM_TYPECHECKS
 
 #ifdef STRICT_MM_TYPECHECKS
 /*
  * These are used to make use of C type-checking..
  */
-typedef struct { unsigned long pte; } pte_t;
+typedef struct { pteval_t pte; } pte_t;
 typedef struct { unsigned long pmd; } pmd_t;
 typedef struct { unsigned long pgd[2]; } pgd_t;
 typedef struct { unsigned long pgprot; } pgprot_t;
@@ -175,7 +177,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 /*
  * .. while these make it easier on the compiler
  */
-typedef unsigned long pte_t;
+typedef pteval_t pte_t;
 typedef unsigned long pmd_t;
 typedef unsigned long pgd_t[2];
 typedef unsigned long pgprot_t;
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index b12cc98bbe04..9763be04f77e 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -30,14 +30,16 @@
 #define pmd_free(mm, pmd)		do { } while (0)
 #define pgd_populate(mm,pmd,pte)	BUG()
 
-extern pgd_t *get_pgd_slow(struct mm_struct *mm);
-extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
-
-#define pgd_alloc(mm)			get_pgd_slow(mm)
-#define pgd_free(mm, pgd)		free_pgd_slow(mm, pgd)
+extern pgd_t *pgd_alloc(struct mm_struct *mm);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
 #define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
 
+static inline void clean_pte_table(pte_t *pte)
+{
+	clean_dcache_area(pte + PTE_HWTABLE_PTRS, PTE_HWTABLE_SIZE);
+}
+
 /*
  * Allocate one PTE table.
  *
@@ -45,14 +47,14 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
  * into one table thus:
  *
  *  +------------+
- *  |  h/w pt 0  |
- *  +------------+
- *  |  h/w pt 1  |
- *  +------------+
  *  | Linux pt 0 |
  *  +------------+
  *  | Linux pt 1 |
  *  +------------+
+ *  |  h/w pt 0  |
+ *  +------------+
+ *  |  h/w pt 1  |
+ *  +------------+
  */
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
@@ -60,10 +62,8 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 	pte_t *pte;
 
 	pte = (pte_t *)__get_free_page(PGALLOC_GFP);
-	if (pte) {
-		clean_dcache_area(pte, sizeof(pte_t) * PTRS_PER_PTE);
-		pte += PTRS_PER_PTE;
-	}
+	if (pte)
+		clean_pte_table(pte);
 
 	return pte;
 }
@@ -79,10 +79,8 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 	pte = alloc_pages(PGALLOC_GFP, 0);
 #endif
 	if (pte) {
-		if (!PageHighMem(pte)) {
-			void *page = page_address(pte);
-			clean_dcache_area(page, sizeof(pte_t) * PTRS_PER_PTE);
-		}
+		if (!PageHighMem(pte))
+			clean_pte_table(page_address(pte));
 		pgtable_page_ctor(pte);
 	}
 
@@ -94,10 +92,8 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr)
  */
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
-	if (pte) {
-		pte -= PTRS_PER_PTE;
+	if (pte)
 		free_page((unsigned long)pte);
-	}
 }
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
@@ -106,8 +102,10 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 	__free_page(pte);
 }
 
-static inline void __pmd_populate(pmd_t *pmdp, unsigned long pmdval)
+static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
+	unsigned long prot)
 {
+	unsigned long pmdval = (pte + PTE_HWTABLE_OFF) | prot;
 	pmdp[0] = __pmd(pmdval);
 	pmdp[1] = __pmd(pmdval + 256 * sizeof(pte_t));
 	flush_pmd_entry(pmdp);
@@ -122,20 +120,16 @@ static inline void __pmd_populate(pmd_t *pmdp, unsigned long pmdval)
 static inline void
 pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
 {
-	unsigned long pte_ptr = (unsigned long)ptep;
-
 	/*
-	 * The pmd must be loaded with the physical
-	 * address of the PTE table
+	 * The pmd must be loaded with the physical address of the PTE table
 	 */
-	pte_ptr -= PTRS_PER_PTE * sizeof(void *);
-	__pmd_populate(pmdp, __pa(pte_ptr) | _PAGE_KERNEL_TABLE);
+	__pmd_populate(pmdp, __pa(ptep), _PAGE_KERNEL_TABLE);
 }
 
 static inline void
 pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep)
 {
-	__pmd_populate(pmdp, page_to_pfn(ptep) << PAGE_SHIFT | _PAGE_USER_TABLE);
+	__pmd_populate(pmdp, page_to_phys(ptep), _PAGE_USER_TABLE);
 }
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 53d1d5deb111..ebcb6432f45f 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -10,6 +10,7 @@
 #ifndef _ASMARM_PGTABLE_H
 #define _ASMARM_PGTABLE_H
 
+#include <linux/const.h>
 #include <asm-generic/4level-fixup.h>
 #include <asm/proc-fns.h>
 
@@ -54,7 +55,7 @@
  * Therefore, we tweak the implementation slightly - we tell Linux that we
  * have 2048 entries in the first level, each of which is 8 bytes (iow, two
  * hardware pointers to the second level.)  The second level contains two
- * hardware PTE tables arranged contiguously, followed by Linux versions
+ * hardware PTE tables arranged contiguously, preceded by Linux versions
  * which contain the state information Linux needs.  We, therefore, end up
  * with 512 entries in the "PTE" level.
  *
@@ -62,15 +63,15 @@
  *
  *    pgd             pte
  * |        |
- * +--------+ +0
- * |        |-----> +------------+ +0
+ * +--------+
+ * |        |       +------------+ +0
+ * +- - - - +       | Linux pt 0 |
+ * |        |       +------------+ +1024
+ * +--------+ +0    | Linux pt 1 |
+ * |        |-----> +------------+ +2048
  * +- - - - + +4    |  h/w pt 0  |
- * |        |-----> +------------+ +1024
+ * |        |-----> +------------+ +3072
  * +--------+ +8    |  h/w pt 1  |
- * |        |       +------------+ +2048
- * +- - - - +       | Linux pt 0 |
- * |        |       +------------+ +3072
- * +--------+       | Linux pt 1 |
  * |        |       +------------+ +4096
  *
  * See L_PTE_xxx below for definitions of bits in the "Linux pt", and
@@ -102,6 +103,10 @@
 #define PTRS_PER_PMD		1
 #define PTRS_PER_PGD		2048
 
+#define PTE_HWTABLE_PTRS	(PTRS_PER_PTE)
+#define PTE_HWTABLE_OFF		(PTE_HWTABLE_PTRS * sizeof(pte_t))
+#define PTE_HWTABLE_SIZE	(PTRS_PER_PTE * sizeof(u32))
+
 /*
  * PMD_SHIFT determines the size of the area a second-level page table can map
  * PGDIR_SHIFT determines what a third-level page table entry can map
@@ -112,13 +117,13 @@
 #define LIBRARY_TEXT_START	0x0c000000
 
 #ifndef __ASSEMBLY__
-extern void __pte_error(const char *file, int line, unsigned long val);
-extern void __pmd_error(const char *file, int line, unsigned long val);
-extern void __pgd_error(const char *file, int line, unsigned long val);
+extern void __pte_error(const char *file, int line, pte_t);
+extern void __pmd_error(const char *file, int line, pmd_t);
+extern void __pgd_error(const char *file, int line, pgd_t);
 
-#define pte_ERROR(pte)		__pte_error(__FILE__, __LINE__, pte_val(pte))
-#define pmd_ERROR(pmd)		__pmd_error(__FILE__, __LINE__, pmd_val(pmd))
-#define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd_val(pgd))
+#define pte_ERROR(pte)		__pte_error(__FILE__, __LINE__, pte)
+#define pmd_ERROR(pmd)		__pmd_error(__FILE__, __LINE__, pmd)
+#define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd)
 #endif /* !__ASSEMBLY__ */
 
 #define PMD_SIZE		(1UL << PMD_SHIFT)
@@ -133,8 +138,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
  */
 #define FIRST_USER_ADDRESS	PAGE_SIZE
 
-#define FIRST_USER_PGD_NR	1
-#define USER_PTRS_PER_PGD	((TASK_SIZE/PGDIR_SIZE) - FIRST_USER_PGD_NR)
+#define USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
 
 /*
  * section address mask and size definitions.
@@ -161,30 +165,30 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
  * The PTE table pointer refers to the hardware entries; the "Linux"
  * entries are stored 1024 bytes below.
  */
-#define L_PTE_PRESENT		(1 << 0)
-#define L_PTE_YOUNG		(1 << 1)
-#define L_PTE_FILE		(1 << 2)	/* only when !PRESENT */
-#define L_PTE_DIRTY		(1 << 6)
-#define L_PTE_WRITE		(1 << 7)
-#define L_PTE_USER		(1 << 8)
-#define L_PTE_EXEC		(1 << 9)
-#define L_PTE_SHARED		(1 << 10)	/* shared(v6), coherent(xsc3) */
+#define L_PTE_PRESENT		(_AT(pteval_t, 1) << 0)
+#define L_PTE_YOUNG		(_AT(pteval_t, 1) << 1)
+#define L_PTE_FILE		(_AT(pteval_t, 1) << 2)	/* only when !PRESENT */
+#define L_PTE_DIRTY		(_AT(pteval_t, 1) << 6)
+#define L_PTE_RDONLY		(_AT(pteval_t, 1) << 7)
+#define L_PTE_USER		(_AT(pteval_t, 1) << 8)
+#define L_PTE_XN		(_AT(pteval_t, 1) << 9)
+#define L_PTE_SHARED		(_AT(pteval_t, 1) << 10)	/* shared(v6), coherent(xsc3) */
 
 /*
  * These are the memory types, defined to be compatible with
  * pre-ARMv6 CPUs cacheable and bufferable bits:   XXCB
  */
-#define L_PTE_MT_UNCACHED	(0x00 << 2)	/* 0000 */
-#define L_PTE_MT_BUFFERABLE	(0x01 << 2)	/* 0001 */
-#define L_PTE_MT_WRITETHROUGH	(0x02 << 2)	/* 0010 */
-#define L_PTE_MT_WRITEBACK	(0x03 << 2)	/* 0011 */
-#define L_PTE_MT_MINICACHE	(0x06 << 2)	/* 0110 (sa1100, xscale) */
-#define L_PTE_MT_WRITEALLOC	(0x07 << 2)	/* 0111 */
-#define L_PTE_MT_DEV_SHARED	(0x04 << 2)	/* 0100 */
-#define L_PTE_MT_DEV_NONSHARED	(0x0c << 2)	/* 1100 */
-#define L_PTE_MT_DEV_WC		(0x09 << 2)	/* 1001 */
-#define L_PTE_MT_DEV_CACHED	(0x0b << 2)	/* 1011 */
-#define L_PTE_MT_MASK		(0x0f << 2)
+#define L_PTE_MT_UNCACHED	(_AT(pteval_t, 0x00) << 2)	/* 0000 */
+#define L_PTE_MT_BUFFERABLE	(_AT(pteval_t, 0x01) << 2)	/* 0001 */
+#define L_PTE_MT_WRITETHROUGH	(_AT(pteval_t, 0x02) << 2)	/* 0010 */
+#define L_PTE_MT_WRITEBACK	(_AT(pteval_t, 0x03) << 2)	/* 0011 */
+#define L_PTE_MT_MINICACHE	(_AT(pteval_t, 0x06) << 2)	/* 0110 (sa1100, xscale) */
+#define L_PTE_MT_WRITEALLOC	(_AT(pteval_t, 0x07) << 2)	/* 0111 */
+#define L_PTE_MT_DEV_SHARED	(_AT(pteval_t, 0x04) << 2)	/* 0100 */
+#define L_PTE_MT_DEV_NONSHARED	(_AT(pteval_t, 0x0c) << 2)	/* 1100 */
+#define L_PTE_MT_DEV_WC		(_AT(pteval_t, 0x09) << 2)	/* 1001 */
+#define L_PTE_MT_DEV_CACHED	(_AT(pteval_t, 0x0b) << 2)	/* 1011 */
+#define L_PTE_MT_MASK		(_AT(pteval_t, 0x0f) << 2)
 
 #ifndef __ASSEMBLY__
 
@@ -201,23 +205,44 @@ extern pgprot_t		pgprot_kernel;
 
 #define _MOD_PROT(p, b)	__pgprot(pgprot_val(p) | (b))
 
-#define PAGE_NONE		pgprot_user
-#define PAGE_SHARED		_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_WRITE)
-#define PAGE_SHARED_EXEC	_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_WRITE | L_PTE_EXEC)
-#define PAGE_COPY		_MOD_PROT(pgprot_user, L_PTE_USER)
-#define PAGE_COPY_EXEC		_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_EXEC)
-#define PAGE_READONLY		_MOD_PROT(pgprot_user, L_PTE_USER)
-#define PAGE_READONLY_EXEC	_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_EXEC)
-#define PAGE_KERNEL		pgprot_kernel
-#define PAGE_KERNEL_EXEC	_MOD_PROT(pgprot_kernel, L_PTE_EXEC)
-
-#define __PAGE_NONE		__pgprot(_L_PTE_DEFAULT)
-#define __PAGE_SHARED		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_WRITE)
-#define __PAGE_SHARED_EXEC	__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_WRITE | L_PTE_EXEC)
-#define __PAGE_COPY		__pgprot(_L_PTE_DEFAULT | L_PTE_USER)
-#define __PAGE_COPY_EXEC	__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_EXEC)
-#define __PAGE_READONLY		__pgprot(_L_PTE_DEFAULT | L_PTE_USER)
-#define __PAGE_READONLY_EXEC	__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_EXEC)
+#define PAGE_NONE		_MOD_PROT(pgprot_user, L_PTE_XN | L_PTE_RDONLY)
+#define PAGE_SHARED		_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_XN)
+#define PAGE_SHARED_EXEC	_MOD_PROT(pgprot_user, L_PTE_USER)
+#define PAGE_COPY		_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY | L_PTE_XN)
+#define PAGE_COPY_EXEC		_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY)
+#define PAGE_READONLY		_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY | L_PTE_XN)
+#define PAGE_READONLY_EXEC	_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY)
+#define PAGE_KERNEL		_MOD_PROT(pgprot_kernel, L_PTE_XN)
+#define PAGE_KERNEL_EXEC	pgprot_kernel
+
+#define __PAGE_NONE		__pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN)
+#define __PAGE_SHARED		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
+#define __PAGE_SHARED_EXEC	__pgprot(_L_PTE_DEFAULT | L_PTE_USER)
+#define __PAGE_COPY		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY | L_PTE_XN)
+#define __PAGE_COPY_EXEC	__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY)
+#define __PAGE_READONLY		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY | L_PTE_XN)
+#define __PAGE_READONLY_EXEC	__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY)
+
+#define __pgprot_modify(prot,mask,bits)		\
+	__pgprot((pgprot_val(prot) & ~(mask)) | (bits))
+
+#define pgprot_noncached(prot) \
+	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_UNCACHED)
+
+#define pgprot_writecombine(prot) \
+	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE)
+
+#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
+#define pgprot_dmacoherent(prot) \
+	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE | L_PTE_XN)
+#define __HAVE_PHYS_MEM_ACCESS_PROT
+struct file;
+extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+				     unsigned long size, pgprot_t vma_prot);
+#else
+#define pgprot_dmacoherent(prot) \
+	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_UNCACHED | L_PTE_XN)
+#endif
 
 #endif /* __ASSEMBLY__ */
 
@@ -255,26 +280,84 @@ extern pgprot_t		pgprot_kernel;
 extern struct page *empty_zero_page;
 #define ZERO_PAGE(vaddr)	(empty_zero_page)
 
-#define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
-#define pfn_pte(pfn,prot)	(__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)))
 
-#define pte_none(pte)		(!pte_val(pte))
-#define pte_clear(mm,addr,ptep)	set_pte_ext(ptep, __pte(0), 0)
-#define pte_page(pte)		(pfn_to_page(pte_pfn(pte)))
-#define pte_offset_kernel(dir,addr)	(pmd_page_vaddr(*(dir)) + __pte_index(addr))
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+
+/* to find an entry in a page-table-directory */
+#define pgd_index(addr)		((addr) >> PGDIR_SHIFT)
+
+#define pgd_offset(mm, addr)	((mm)->pgd + pgd_index(addr))
+
+/* to find an entry in a kernel page-table-directory */
+#define pgd_offset_k(addr)	pgd_offset(&init_mm, addr)
+
+/*
+ * The "pgd_xxx()" functions here are trivial for a folded two-level
+ * setup: the pgd is never bad, and a pmd always exists (as it's folded
+ * into the pgd entry)
+ */
+#define pgd_none(pgd)		(0)
+#define pgd_bad(pgd)		(0)
+#define pgd_present(pgd)	(1)
+#define pgd_clear(pgdp)		do { } while (0)
+#define set_pgd(pgd,pgdp)	do { } while (0)
+
+
+/* Find an entry in the second-level page table.. */
+#define pmd_offset(dir, addr)	((pmd_t *)(dir))
+
+#define pmd_none(pmd)		(!pmd_val(pmd))
+#define pmd_present(pmd)	(pmd_val(pmd))
+#define pmd_bad(pmd)		(pmd_val(pmd) & 2)
+
+#define copy_pmd(pmdpd,pmdps)		\
+	do {				\
+		pmdpd[0] = pmdps[0];	\
+		pmdpd[1] = pmdps[1];	\
+		flush_pmd_entry(pmdpd);	\
+	} while (0)
+
+#define pmd_clear(pmdp)			\
+	do {				\
+		pmdp[0] = __pmd(0);	\
+		pmdp[1] = __pmd(0);	\
+		clean_pmd_entry(pmdp);	\
+	} while (0)
+
+static inline pte_t *pmd_page_vaddr(pmd_t pmd)
+{
+	return __va(pmd_val(pmd) & PAGE_MASK);
+}
+
+#define pmd_page(pmd)		pfn_to_page(__phys_to_pfn(pmd_val(pmd)))
+
+/* we don't need complex calculations here as the pmd is folded into the pgd */
+#define pmd_addr_end(addr,end)	(end)
 
-#define pte_offset_map(dir,addr)	(__pte_map(dir) + __pte_index(addr))
-#define pte_unmap(pte)			__pte_unmap(pte)
 
 #ifndef CONFIG_HIGHPTE
-#define __pte_map(dir)		pmd_page_vaddr(*(dir))
+#define __pte_map(pmd)		pmd_page_vaddr(*(pmd))
 #define __pte_unmap(pte)	do { } while (0)
 #else
-#define __pte_map(dir)		((pte_t *)kmap_atomic(pmd_page(*(dir))) + PTRS_PER_PTE)
-#define __pte_unmap(pte)	kunmap_atomic((pte - PTRS_PER_PTE))
+#define __pte_map(pmd)		(pte_t *)kmap_atomic(pmd_page(*(pmd)))
+#define __pte_unmap(pte)	kunmap_atomic(pte)
 #endif
 
+#define pte_index(addr)		(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+
+#define pte_offset_kernel(pmd,addr)	(pmd_page_vaddr(*(pmd)) + pte_index(addr))
+
+#define pte_offset_map(pmd,addr)	(__pte_map(pmd) + pte_index(addr))
+#define pte_unmap(pte)			__pte_unmap(pte)
+
+#define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
+#define pfn_pte(pfn,prot)	__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
+
+#define pte_page(pte)		pfn_to_page(pte_pfn(pte))
+#define mk_pte(page,prot)	pfn_pte(page_to_pfn(page), prot)
+
 #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext)
+#define pte_clear(mm,addr,ptep)	set_pte_ext(ptep, __pte(0), 0)
 
 #if __LINUX_ARM_ARCH__ < 6
 static inline void __sync_icache_dcache(pte_t pteval)
@@ -295,15 +378,12 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 	}
 }
 
-/*
- * The following only work if pte_present() is true.
- * Undefined behaviour if not..
- */
+#define pte_none(pte)		(!pte_val(pte))
 #define pte_present(pte)	(pte_val(pte) & L_PTE_PRESENT)
-#define pte_write(pte)		(pte_val(pte) & L_PTE_WRITE)
+#define pte_write(pte)		(!(pte_val(pte) & L_PTE_RDONLY))
 #define pte_dirty(pte)		(pte_val(pte) & L_PTE_DIRTY)
 #define pte_young(pte)		(pte_val(pte) & L_PTE_YOUNG)
-#define pte_exec(pte)		(pte_val(pte) & L_PTE_EXEC)
+#define pte_exec(pte)		(!(pte_val(pte) & L_PTE_XN))
 #define pte_special(pte)	(0)
 
 #define pte_present_user(pte) \
@@ -313,8 +393,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 #define PTE_BIT_FUNC(fn,op) \
 static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
 
-PTE_BIT_FUNC(wrprotect, &= ~L_PTE_WRITE);
-PTE_BIT_FUNC(mkwrite,   |= L_PTE_WRITE);
+PTE_BIT_FUNC(wrprotect, |= L_PTE_RDONLY);
+PTE_BIT_FUNC(mkwrite,   &= ~L_PTE_RDONLY);
 PTE_BIT_FUNC(mkclean,   &= ~L_PTE_DIRTY);
 PTE_BIT_FUNC(mkdirty,   |= L_PTE_DIRTY);
 PTE_BIT_FUNC(mkold,     &= ~L_PTE_YOUNG);
@@ -322,101 +402,13 @@ PTE_BIT_FUNC(mkyoung,   |= L_PTE_YOUNG);
 
 static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
 
-#define __pgprot_modify(prot,mask,bits)		\
-	__pgprot((pgprot_val(prot) & ~(mask)) | (bits))
-
-/*
- * Mark the prot value as uncacheable and unbufferable.
- */
-#define pgprot_noncached(prot) \
-	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_UNCACHED)
-#define pgprot_writecombine(prot) \
-	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE)
-#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
-#define pgprot_dmacoherent(prot) \
-	__pgprot_modify(prot, L_PTE_MT_MASK|L_PTE_EXEC, L_PTE_MT_BUFFERABLE)
-#define __HAVE_PHYS_MEM_ACCESS_PROT
-struct file;
-extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
-				     unsigned long size, pgprot_t vma_prot);
-#else
-#define pgprot_dmacoherent(prot) \
-	__pgprot_modify(prot, L_PTE_MT_MASK|L_PTE_EXEC, L_PTE_MT_UNCACHED)
-#endif
-
-#define pmd_none(pmd)		(!pmd_val(pmd))
-#define pmd_present(pmd)	(pmd_val(pmd))
-#define pmd_bad(pmd)		(pmd_val(pmd) & 2)
-
-#define copy_pmd(pmdpd,pmdps)		\
-	do {				\
-		pmdpd[0] = pmdps[0];	\
-		pmdpd[1] = pmdps[1];	\
-		flush_pmd_entry(pmdpd);	\
-	} while (0)
-
-#define pmd_clear(pmdp)			\
-	do {				\
-		pmdp[0] = __pmd(0);	\
-		pmdp[1] = __pmd(0);	\
-		clean_pmd_entry(pmdp);	\
-	} while (0)
-
-static inline pte_t *pmd_page_vaddr(pmd_t pmd)
-{
-	unsigned long ptr;
-
-	ptr = pmd_val(pmd) & ~(PTRS_PER_PTE * sizeof(void *) - 1);
-	ptr += PTRS_PER_PTE * sizeof(void *);
-
-	return __va(ptr);
-}
-
-#define pmd_page(pmd)		pfn_to_page(__phys_to_pfn(pmd_val(pmd)))
-
-/* we don't need complex calculations here as the pmd is folded into the pgd */
-#define pmd_addr_end(addr,end)	(end)
-
-/*
- * Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- */
-#define mk_pte(page,prot)	pfn_pte(page_to_pfn(page),prot)
-
-/*
- * The "pgd_xxx()" functions here are trivial for a folded two-level
- * setup: the pgd is never bad, and a pmd always exists (as it's folded
- * into the pgd entry)
- */
-#define pgd_none(pgd)		(0)
-#define pgd_bad(pgd)		(0)
-#define pgd_present(pgd)	(1)
-#define pgd_clear(pgdp)		do { } while (0)
-#define set_pgd(pgd,pgdp)	do { } while (0)
-
-/* to find an entry in a page-table-directory */
-#define pgd_index(addr)		((addr) >> PGDIR_SHIFT)
-
-#define pgd_offset(mm, addr)	((mm)->pgd+pgd_index(addr))
-
-/* to find an entry in a kernel page-table-directory */
-#define pgd_offset_k(addr)	pgd_offset(&init_mm, addr)
-
-/* Find an entry in the second-level page table.. */
-#define pmd_offset(dir, addr)	((pmd_t *)(dir))
-
-/* Find an entry in the third-level page table.. */
-#define __pte_index(addr)	(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-	const unsigned long mask = L_PTE_EXEC | L_PTE_WRITE | L_PTE_USER;
+	const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER;
 	pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
 	return pte;
 }
 
-extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
-
 /*
  * Encode and decode a swap entry.  Swap entries are stored in the Linux
  * page tables as follows:
@@ -481,6 +473,9 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 
 #define pgtable_cache_init() do { } while (0)
 
+void identity_mapping_add(pgd_t *, unsigned long, unsigned long);
+void identity_mapping_del(pgd_t *, unsigned long, unsigned long);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* CONFIG_MMU */
diff --git a/arch/arm/include/asm/sched_clock.h b/arch/arm/include/asm/sched_clock.h
new file mode 100644
index 000000000000..a84628be1a7b
--- /dev/null
+++ b/arch/arm/include/asm/sched_clock.h
@@ -0,0 +1,118 @@
+/*
+ * sched_clock.h: support for extending counters to full 64-bit ns counter
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef ASM_SCHED_CLOCK
+#define ASM_SCHED_CLOCK
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+struct clock_data {
+	u64 epoch_ns;
+	u32 epoch_cyc;
+	u32 epoch_cyc_copy;
+	u32 mult;
+	u32 shift;
+};
+
+#define DEFINE_CLOCK_DATA(name)	struct clock_data name
+
+static inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift)
+{
+	return (cyc * mult) >> shift;
+}
+
+/*
+ * Atomically update the sched_clock epoch.  Your update callback will
+ * be called from a timer before the counter wraps - read the current
+ * counter value, and call this function to safely move the epochs
+ * forward.  Only use this from the update callback.
+ */
+static inline void update_sched_clock(struct clock_data *cd, u32 cyc, u32 mask)
+{
+	unsigned long flags;
+	u64 ns = cd->epoch_ns +
+		cyc_to_ns((cyc - cd->epoch_cyc) & mask, cd->mult, cd->shift);
+
+	/*
+	 * Write epoch_cyc and epoch_ns in a way that the update is
+	 * detectable in cyc_to_fixed_sched_clock().
+	 */
+	raw_local_irq_save(flags);
+	cd->epoch_cyc = cyc;
+	smp_wmb();
+	cd->epoch_ns = ns;
+	smp_wmb();
+	cd->epoch_cyc_copy = cyc;
+	raw_local_irq_restore(flags);
+}
+
+/*
+ * If your clock rate is known at compile time, using this will allow
+ * you to optimize the mult/shift loads away.  This is paired with
+ * init_fixed_sched_clock() to ensure that your mult/shift are correct.
+ */
+static inline unsigned long long cyc_to_fixed_sched_clock(struct clock_data *cd,
+	u32 cyc, u32 mask, u32 mult, u32 shift)
+{
+	u64 epoch_ns;
+	u32 epoch_cyc;
+
+	/*
+	 * Load the epoch_cyc and epoch_ns atomically.  We do this by
+	 * ensuring that we always write epoch_cyc, epoch_ns and
+	 * epoch_cyc_copy in strict order, and read them in strict order.
+	 * If epoch_cyc and epoch_cyc_copy are not equal, then we're in
+	 * the middle of an update, and we should repeat the load.
+	 */
+	do {
+		epoch_cyc = cd->epoch_cyc;
+		smp_rmb();
+		epoch_ns = cd->epoch_ns;
+		smp_rmb();
+	} while (epoch_cyc != cd->epoch_cyc_copy);
+
+	return epoch_ns + cyc_to_ns((cyc - epoch_cyc) & mask, mult, shift);
+}
+
+/*
+ * Otherwise, you need to use this, which will obtain the mult/shift
+ * from the clock_data structure.  Use init_sched_clock() with this.
+ */
+static inline unsigned long long cyc_to_sched_clock(struct clock_data *cd,
+	u32 cyc, u32 mask)
+{
+	return cyc_to_fixed_sched_clock(cd, cyc, mask, cd->mult, cd->shift);
+}
+
+/*
+ * Initialize the clock data - calculate the appropriate multiplier
+ * and shift.  Also setup a timer to ensure that the epoch is refreshed
+ * at the appropriate time interval, which will call your update
+ * handler.
+ */
+void init_sched_clock(struct clock_data *, void (*)(void),
+	unsigned int, unsigned long);
+
+/*
+ * Use this initialization function rather than init_sched_clock() if
+ * you're using cyc_to_fixed_sched_clock, which will warn if your
+ * constants are incorrect.
+ */
+static inline void init_fixed_sched_clock(struct clock_data *cd,
+	void (*update)(void), unsigned int bits, unsigned long rate,
+	u32 mult, u32 shift)
+{
+	init_sched_clock(cd, update, bits, rate);
+	if (cd->mult != mult || cd->shift != shift) {
+		pr_crit("sched_clock: wrong multiply/shift: %u>>%u vs calculated %u>>%u\n"
+			"sched_clock: fix multiply/shift to avoid scheduler hiccups\n",
+			mult, shift, cd->mult, cd->shift);
+	}
+}
+
+#endif
diff --git a/arch/arm/include/asm/sizes.h b/arch/arm/include/asm/sizes.h
index 4fc1565e4f93..316bb2b2be3d 100644
--- a/arch/arm/include/asm/sizes.h
+++ b/arch/arm/include/asm/sizes.h
@@ -13,9 +13,6 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
-/* DO NOT EDIT!! - this file automatically generated
- *                 from .s file by awk -f s2h.awk
- */
 /*  Size definitions
  *  Copyright (C) ARM Limited 1998. All rights reserved.
  */
@@ -25,6 +22,9 @@
 
 /* handy sizes */
 #define SZ_16				0x00000010
+#define SZ_32				0x00000020
+#define SZ_64				0x00000040
+#define SZ_128				0x00000080
 #define SZ_256				0x00000100
 #define SZ_512				0x00000200
 
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index 3d05190797cb..96ed521f2408 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -33,27 +33,23 @@ struct seq_file;
 /*
  * generate IPI list text
  */
-extern void show_ipi_list(struct seq_file *p);
+extern void show_ipi_list(struct seq_file *, int);
 
 /*
  * Called from assembly code, this handles an IPI.
  */
-asmlinkage void do_IPI(struct pt_regs *regs);
+asmlinkage void do_IPI(int ipinr, struct pt_regs *regs);
 
 /*
  * Setup the set of possible CPUs (via set_cpu_possible)
  */
 extern void smp_init_cpus(void);
 
-/*
- * Move global data into per-processor storage.
- */
-extern void smp_store_cpu_info(unsigned int cpuid);
 
 /*
  * Raise an IPI cross call on CPUs in callmap.
  */
-extern void smp_cross_call(const struct cpumask *mask);
+extern void smp_cross_call(const struct cpumask *mask, int ipi);
 
 /*
  * Boot a secondary CPU, and assign it the specified idle task.
@@ -73,6 +69,11 @@ asmlinkage void secondary_start_kernel(void);
 extern void platform_secondary_init(unsigned int cpu);
 
 /*
+ * Initialize cpu_possible map, and enable coherency
+ */
+extern void platform_smp_prepare_cpus(unsigned int);
+
+/*
  * Initial data for bringing up a secondary CPU.
  */
 struct secondary_data {
@@ -97,6 +98,6 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 /*
  * show local interrupt info
  */
-extern void show_local_irqs(struct seq_file *);
+extern void show_local_irqs(struct seq_file *, int);
 
 #endif /* ifndef __ASM_ARM_SMP_H */
diff --git a/arch/arm/include/asm/smp_mpidr.h b/arch/arm/include/asm/smp_mpidr.h
deleted file mode 100644
index 6a9307d64900..000000000000
--- a/arch/arm/include/asm/smp_mpidr.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef ASMARM_SMP_MIDR_H
-#define ASMARM_SMP_MIDR_H
-
-#define hard_smp_processor_id()						\
-	({								\
-		unsigned int cpunum;					\
-		__asm__("\n"						\
-			"1:	mrc p15, 0, %0, c0, c0, 5\n"		\
-			"	.pushsection \".alt.smp.init\", \"a\"\n"\
-			"	.long	1b\n"				\
-			"	mov	%0, #0\n"			\
-			"	.popsection"				\
-			: "=r" (cpunum));				\
-		cpunum &= 0x0F;						\
-	})
-
-#endif
diff --git a/arch/arm/include/asm/smp_twd.h b/arch/arm/include/asm/smp_twd.h
index 634f357be6bb..fed9981fba08 100644
--- a/arch/arm/include/asm/smp_twd.h
+++ b/arch/arm/include/asm/smp_twd.h
@@ -22,7 +22,6 @@ struct clock_event_device;
 
 extern void __iomem *twd_base;
 
-void twd_timer_stop(void);
 int twd_timer_ack(void);
 void twd_timer_setup(struct clock_event_device *);
 
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index ec4327a4653d..97f6d60297d5 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -124,6 +124,13 @@ extern unsigned int user_debug;
 #define vectors_high()	(0)
 #endif
 
+#if __LINUX_ARM_ARCH__ >= 7 ||		\
+	(__LINUX_ARM_ARCH__ == 6 && defined(CONFIG_CPU_32v6K))
+#define sev()	__asm__ __volatile__ ("sev" : : : "memory")
+#define wfe()	__asm__ __volatile__ ("wfe" : : : "memory")
+#define wfi()	__asm__ __volatile__ ("wfi" : : : "memory")
+#endif
+
 #if __LINUX_ARM_ARCH__ >= 7
 #define isb() __asm__ __volatile__ ("isb" : : : "memory")
 #define dsb() __asm__ __volatile__ ("dsb" : : : "memory")
@@ -155,6 +162,7 @@ extern unsigned int user_debug;
 #define rmb()		dmb()
 #define wmb()		mb()
 #else
+#include <asm/memory.h>
 #define mb()	do { if (arch_is_coherent()) dmb(); else barrier(); } while (0)
 #define rmb()	do { if (arch_is_coherent()) dmb(); else barrier(); } while (0)
 #define wmb()	do { if (arch_is_coherent()) dmb(); else barrier(); } while (0)
diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h
index 124475afb007..1b960d5ef6a5 100644
--- a/arch/arm/include/asm/traps.h
+++ b/arch/arm/include/asm/traps.h
@@ -46,4 +46,6 @@ static inline int in_exception_text(unsigned long ptr)
 extern void __init early_trap_init(void);
 extern void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame);
 
+extern void *vectors_page;
+
 #endif
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 33e4a48fe103..b293616a1a1a 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -227,7 +227,7 @@ do {									\
 
 #define __get_user_asm_byte(x,addr,err)				\
 	__asm__ __volatile__(					\
-	"1:	ldrbt	%1,[%2]\n"				\
+	"1:	" T(ldrb) "	%1,[%2],#0\n"			\
 	"2:\n"							\
 	"	.pushsection .fixup,\"ax\"\n"			\
 	"	.align	2\n"					\
@@ -263,7 +263,7 @@ do {									\
 
 #define __get_user_asm_word(x,addr,err)				\
 	__asm__ __volatile__(					\
-	"1:	ldrt	%1,[%2]\n"				\
+	"1:	" T(ldr) "	%1,[%2],#0\n"			\
 	"2:\n"							\
 	"	.pushsection .fixup,\"ax\"\n"			\
 	"	.align	2\n"					\
@@ -308,7 +308,7 @@ do {									\
 
 #define __put_user_asm_byte(x,__pu_addr,err)			\
 	__asm__ __volatile__(					\
-	"1:	strbt	%1,[%2]\n"				\
+	"1:	" T(strb) "	%1,[%2],#0\n"			\
 	"2:\n"							\
 	"	.pushsection .fixup,\"ax\"\n"			\
 	"	.align	2\n"					\
@@ -341,7 +341,7 @@ do {									\
 
 #define __put_user_asm_word(x,__pu_addr,err)			\
 	__asm__ __volatile__(					\
-	"1:	strt	%1,[%2]\n"				\
+	"1:	" T(str) "	%1,[%2],#0\n"			\
 	"2:\n"							\
 	"	.pushsection .fixup,\"ax\"\n"			\
 	"	.align	2\n"					\
@@ -366,10 +366,10 @@ do {									\
 
 #define __put_user_asm_dword(x,__pu_addr,err)			\
 	__asm__ __volatile__(					\
- ARM(	"1:	strt	" __reg_oper1 ", [%1], #4\n"	)	\
- ARM(	"2:	strt	" __reg_oper0 ", [%1]\n"	)	\
- THUMB(	"1:	strt	" __reg_oper1 ", [%1]\n"	)	\
- THUMB(	"2:	strt	" __reg_oper0 ", [%1, #4]\n"	)	\
+ ARM(	"1:	" T(str) "	" __reg_oper1 ", [%1], #4\n"	)	\
+ ARM(	"2:	" T(str) "	" __reg_oper0 ", [%1]\n"	)	\
+ THUMB(	"1:	" T(str) "	" __reg_oper1 ", [%1]\n"	)	\
+ THUMB(	"2:	" T(str) "	" __reg_oper0 ", [%1, #4]\n"	)	\
 	"3:\n"							\
 	"	.pushsection .fixup,\"ax\"\n"			\
 	"	.align	2\n"					\
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index c73abe4b7e72..185ee822c935 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -29,7 +29,8 @@ obj-$(CONFIG_MODULES)		+= armksyms.o module.o
 obj-$(CONFIG_ARTHUR)		+= arthur.o
 obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
 obj-$(CONFIG_PCI)		+= bios32.o isa.o
-obj-$(CONFIG_SMP)		+= smp.o
+obj-$(CONFIG_HAVE_SCHED_CLOCK)	+= sched_clock.o
+obj-$(CONFIG_SMP)		+= smp.o smp_tlb.o
 obj-$(CONFIG_HAVE_ARM_SCU)	+= smp_scu.o
 obj-$(CONFIG_HAVE_ARM_TWD)	+= smp_twd.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
@@ -43,6 +44,8 @@ obj-$(CONFIG_KGDB)		+= kgdb.o
 obj-$(CONFIG_ARM_UNWIND)	+= unwind.o
 obj-$(CONFIG_HAVE_TCM)		+= tcm.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
+obj-$(CONFIG_SWP_EMULATE)	+= swp_emulate.o
+CFLAGS_swp_emulate.o		:= -Wa,-march=armv7-a
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 
 obj-$(CONFIG_CRUNCH)		+= crunch.o crunch-bits.o
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 36199ffc4cc2..2b46fea36c9f 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -25,42 +25,22 @@
 #include <asm/tls.h>
 
 #include "entry-header.S"
+#include <asm/entry-macro-multi.S>
 
 /*
  * Interrupt handling.  Preserves r7, r8, r9
  */
 	.macro	irq_handler
-	get_irqnr_preamble r5, lr
-1:	get_irqnr_and_base r0, r6, r5, lr
-	movne	r1, sp
-	@
-	@ routine called with r0 = irq number, r1 = struct pt_regs *
-	@
-	adrne	lr, BSYM(1b)
-	bne	asm_do_IRQ
-
-#ifdef CONFIG_SMP
-	/*
-	 * XXX
-	 *
-	 * this macro assumes that irqstat (r6) and base (r5) are
-	 * preserved from get_irqnr_and_base above
-	 */
-	ALT_SMP(test_for_ipi r0, r6, r5, lr)
-	ALT_UP_B(9997f)
-	movne	r0, sp
-	adrne	lr, BSYM(1b)
-	bne	do_IPI
-
-#ifdef CONFIG_LOCAL_TIMERS
-	test_for_ltirq r0, r6, r5, lr
-	movne	r0, sp
-	adrne	lr, BSYM(1b)
-	bne	do_local_timer
+#ifdef CONFIG_MULTI_IRQ_HANDLER
+	ldr	r5, =handle_arch_irq
+	mov	r0, sp
+	ldr	r5, [r5]
+	adr	lr, BSYM(9997f)
+	teq	r5, #0
+	movne	pc, r5
 #endif
+	arch_irq_handler_default
 9997:
-#endif
-
 	.endm
 
 #ifdef CONFIG_KPROBES
@@ -739,7 +719,7 @@ ENTRY(__switch_to)
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
  THUMB(	str	lr, [ip], #4		   )
-#ifdef CONFIG_MMU
+#ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
 	set_tls	r3, r4, r5
@@ -748,7 +728,7 @@ ENTRY(__switch_to)
 	ldr	r8, =__stack_chk_guard
 	ldr	r7, [r7, #TSK_STACK_CANARY]
 #endif
-#ifdef CONFIG_MMU
+#ifdef CONFIG_CPU_USE_DOMAINS
 	mcr	p15, 0, r6, c3, c0, 0		@ Set domain register
 #endif
 	mov	r5, r0
@@ -846,7 +826,7 @@ __kuser_helper_start:
  */
 
 __kuser_memory_barrier:				@ 0xffff0fa0
-	smp_dmb
+	smp_dmb	arm
 	usr_ret	lr
 
 	.align	5
@@ -963,7 +943,7 @@ kuser_cmpxchg_fixup:
 
 #else
 
-	smp_dmb
+	smp_dmb	arm
 1:	ldrex	r3, [r2]
 	subs	r3, r3, r0
 	strexeq	r3, r1, [r2]
@@ -1249,3 +1229,9 @@ cr_alignment:
 	.space	4
 cr_no_alignment:
 	.space	4
+
+#ifdef CONFIG_MULTI_IRQ_HANDLER
+	.globl	handle_arch_irq
+handle_arch_irq:
+	.space	4
+#endif
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index aae802ee12f8..1e7b04a40a31 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -29,6 +29,9 @@ ret_fast_syscall:
 	ldr	r1, [tsk, #TI_FLAGS]
 	tst	r1, #_TIF_WORK_MASK
 	bne	fast_work_pending
+#if defined(CONFIG_IRQSOFF_TRACER)
+	asm_trace_hardirqs_on
+#endif
 
 	/* perform architecture specific actions before user return */
 	arch_ret_to_user r1, lr
@@ -65,6 +68,9 @@ ret_slow_syscall:
 	tst	r1, #_TIF_WORK_MASK
 	bne	work_pending
 no_work_pending:
+#if defined(CONFIG_IRQSOFF_TRACER)
+	asm_trace_hardirqs_on
+#endif
 	/* perform architecture specific actions before user return */
 	arch_ret_to_user r1, lr
 
diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c
index 6ff7919613d7..e72dc34eea1c 100644
--- a/arch/arm/kernel/fiq.c
+++ b/arch/arm/kernel/fiq.c
@@ -45,6 +45,7 @@
 #include <asm/fiq.h>
 #include <asm/irq.h>
 #include <asm/system.h>
+#include <asm/traps.h>
 
 static unsigned long no_fiq_insn;
 
@@ -67,17 +68,22 @@ static struct fiq_handler default_owner = {
 
 static struct fiq_handler *current_fiq = &default_owner;
 
-int show_fiq_list(struct seq_file *p, void *v)
+int show_fiq_list(struct seq_file *p, int prec)
 {
 	if (current_fiq != &default_owner)
-		seq_printf(p, "FIQ:              %s\n", current_fiq->name);
+		seq_printf(p, "%*s:              %s\n", prec, "FIQ",
+			current_fiq->name);
 
 	return 0;
 }
 
 void set_fiq_handler(void *start, unsigned int length)
 {
+#if defined(CONFIG_CPU_USE_DOMAINS)
 	memcpy((void *)0xffff001c, start, length);
+#else
+	memcpy(vectors_page + 0x1c, start, length);
+#endif
 	flush_icache_range(0xffff001c, 0xffff001c + length);
 	if (!vectors_high())
 		flush_icache_range(0x1c, 0x1c + length);
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 6bd82d25683c..f17d9a09e8fb 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -91,6 +91,11 @@ ENTRY(stext)
 	movs	r8, r5				@ invalid machine (r5=0)?
  THUMB( it	eq )		@ force fixup-able long branch encoding
 	beq	__error_a			@ yes, error 'a'
+
+	/*
+	 * r1 = machine no, r2 = atags,
+	 * r8 = machinfo, r9 = cpuid, r10 = procinfo
+	 */
 	bl	__vet_atags
 #ifdef CONFIG_SMP_ON_UP
 	bl	__fixup_smp
@@ -387,19 +392,19 @@ ENDPROC(__turn_mmu_on)
 
 #ifdef CONFIG_SMP_ON_UP
 __fixup_smp:
-	mov	r7, #0x00070000
-	orr	r6, r7, #0xff000000	@ mask 0xff070000
-	orr	r7, r7, #0x41000000	@ val 0x41070000
-	and	r0, r9, r6
-	teq	r0, r7			@ ARM CPU and ARMv6/v7?
+	mov	r4, #0x00070000
+	orr	r3, r4, #0xff000000	@ mask 0xff070000
+	orr	r4, r4, #0x41000000	@ val 0x41070000
+	and	r0, r9, r3
+	teq	r0, r4			@ ARM CPU and ARMv6/v7?
 	bne	__fixup_smp_on_up	@ no, assume UP
 
-	orr	r6, r6, #0x0000ff00
-	orr	r6, r6, #0x000000f0	@ mask 0xff07fff0
-	orr	r7, r7, #0x0000b000
-	orr	r7, r7, #0x00000020	@ val 0x4107b020
-	and	r0, r9, r6
-	teq	r0, r7			@ ARM 11MPCore?
+	orr	r3, r3, #0x0000ff00
+	orr	r3, r3, #0x000000f0	@ mask 0xff07fff0
+	orr	r4, r4, #0x0000b000
+	orr	r4, r4, #0x00000020	@ val 0x4107b020
+	and	r0, r9, r3
+	teq	r0, r4			@ ARM 11MPCore?
 	moveq	pc, lr			@ yes, assume SMP
 
 	mrc	p15, 0, r0, c0, c0, 5	@ read MPIDR
@@ -408,15 +413,22 @@ __fixup_smp:
 
 __fixup_smp_on_up:
 	adr	r0, 1f
-	ldmia	r0, {r3, r6, r7}
+	ldmia	r0, {r3 - r5}
 	sub	r3, r0, r3
-	add	r6, r6, r3
-	add	r7, r7, r3
-2:	cmp	r6, r7
-	ldmia	r6!, {r0, r4}
-	strlo	r4, [r0, r3]
-	blo	2b
-	mov	pc, lr
+	add	r4, r4, r3
+	add	r5, r5, r3
+2:	cmp	r4, r5
+	movhs	pc, lr
+	ldmia	r4!, {r0, r6}
+ ARM(	str	r6, [r0, r3]	)
+ THUMB(	add	r0, r0, r3	)
+#ifdef __ARMEB__
+ THUMB(	mov	r6, r6, ror #16	)	@ Convert word order for big-endian.
+#endif
+ THUMB(	strh	r6, [r0], #2	)	@ For Thumb-2, store as two halfwords
+ THUMB(	mov	r6, r6, lsr #16	)	@ to be robust against misaligned r3.
+ THUMB(	strh	r6, [r0]	)
+	b	2b
 ENDPROC(__fixup_smp)
 
 	.align
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 6d616333340f..8135438b8818 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -38,6 +38,7 @@
 #include <linux/ftrace.h>
 
 #include <asm/system.h>
+#include <asm/mach/arch.h>
 #include <asm/mach/irq.h>
 #include <asm/mach/time.h>
 
@@ -48,8 +49,6 @@
 #define irq_finish(irq) do { } while (0)
 #endif
 
-unsigned int arch_nr_irqs;
-void (*init_arch_irq)(void) __initdata = NULL;
 unsigned long irq_err_count;
 
 int show_interrupts(struct seq_file *p, void *v)
@@ -58,11 +57,20 @@ int show_interrupts(struct seq_file *p, void *v)
 	struct irq_desc *desc;
 	struct irqaction * action;
 	unsigned long flags;
+	int prec, n;
+
+	for (prec = 3, n = 1000; prec < 10 && n <= nr_irqs; prec++)
+		n *= 10;
+
+#ifdef CONFIG_SMP
+	if (prec < 4)
+		prec = 4;
+#endif
 
 	if (i == 0) {
 		char cpuname[12];
 
-		seq_printf(p, "    ");
+		seq_printf(p, "%*s ", prec, "");
 		for_each_present_cpu(cpu) {
 			sprintf(cpuname, "CPU%d", cpu);
 			seq_printf(p, " %10s", cpuname);
@@ -77,7 +85,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		if (!action)
 			goto unlock;
 
-		seq_printf(p, "%3d: ", i);
+		seq_printf(p, "%*d: ", prec, i);
 		for_each_present_cpu(cpu)
 			seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu));
 		seq_printf(p, " %10s", desc->chip->name ? : "-");
@@ -90,13 +98,15 @@ unlock:
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	} else if (i == nr_irqs) {
 #ifdef CONFIG_FIQ
-		show_fiq_list(p, v);
+		show_fiq_list(p, prec);
 #endif
 #ifdef CONFIG_SMP
-		show_ipi_list(p);
-		show_local_irqs(p);
+		show_ipi_list(p, prec);
+#endif
+#ifdef CONFIG_LOCAL_TIMERS
+		show_local_irqs(p, prec);
 #endif
-		seq_printf(p, "Err: %10lu\n", irq_err_count);
+		seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count);
 	}
 	return 0;
 }
@@ -156,13 +166,13 @@ void set_irq_flags(unsigned int irq, unsigned int iflags)
 
 void __init init_IRQ(void)
 {
-	init_arch_irq();
+	machine_desc->init_irq();
 }
 
 #ifdef CONFIG_SPARSE_IRQ
 int __init arch_probe_nr_irqs(void)
 {
-	nr_irqs = arch_nr_irqs ? arch_nr_irqs : NR_IRQS;
+	nr_irqs = machine_desc->nr_irqs ? machine_desc->nr_irqs : NR_IRQS;
 	return nr_irqs;
 }
 #endif
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 3a8fd5140d7a..30ead135ff5f 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -23,6 +23,8 @@ extern unsigned long kexec_indirection_page;
 extern unsigned long kexec_mach_type;
 extern unsigned long kexec_boot_atags;
 
+static atomic_t waiting_for_crash_ipi;
+
 /*
  * Provide a dummy crash_notes definition while crash dump arrives to arm.
  * This prevents breakage of crash_notes attribute in kernel/ksysfs.c.
@@ -37,9 +39,37 @@ void machine_kexec_cleanup(struct kimage *image)
 {
 }
 
+void machine_crash_nonpanic_core(void *unused)
+{
+	struct pt_regs regs;
+
+	crash_setup_regs(&regs, NULL);
+	printk(KERN_DEBUG "CPU %u will stop doing anything useful since another CPU has crashed\n",
+	       smp_processor_id());
+	crash_save_cpu(&regs, smp_processor_id());
+	flush_cache_all();
+
+	atomic_dec(&waiting_for_crash_ipi);
+	while (1)
+		cpu_relax();
+}
+
 void machine_crash_shutdown(struct pt_regs *regs)
 {
+	unsigned long msecs;
+
 	local_irq_disable();
+
+	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+	smp_call_function(machine_crash_nonpanic_core, NULL, false);
+	msecs = 1000; /* Wait at most a second for the other cpus to stop */
+	while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
+		mdelay(1);
+		msecs--;
+	}
+	if (atomic_read(&waiting_for_crash_ipi) > 0)
+		printk(KERN_WARNING "Non-crashing CPUs did not react to IPI\n");
+
 	crash_save_cpu(regs, smp_processor_id());
 
 	printk(KERN_INFO "Loading crashdump kernel...\n");
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index d9bd786ce23d..0c1bb68ff4a8 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -67,35 +67,6 @@ int module_frob_arch_sections(Elf_Ehdr *hdr,
 			      char *secstrings,
 			      struct module *mod)
 {
-#ifdef CONFIG_ARM_UNWIND
-	Elf_Shdr *s, *sechdrs_end = sechdrs + hdr->e_shnum;
-	struct arm_unwind_mapping *maps = mod->arch.map;
-
-	for (s = sechdrs; s < sechdrs_end; s++) {
-		char const *secname = secstrings + s->sh_name;
-
-		if (strcmp(".ARM.exidx.init.text", secname) == 0)
-			maps[ARM_SEC_INIT].unw_sec = s;
-		else if (strcmp(".ARM.exidx.devinit.text", secname) == 0)
-			maps[ARM_SEC_DEVINIT].unw_sec = s;
-		else if (strcmp(".ARM.exidx", secname) == 0)
-			maps[ARM_SEC_CORE].unw_sec = s;
-		else if (strcmp(".ARM.exidx.exit.text", secname) == 0)
-			maps[ARM_SEC_EXIT].unw_sec = s;
-		else if (strcmp(".ARM.exidx.devexit.text", secname) == 0)
-			maps[ARM_SEC_DEVEXIT].unw_sec = s;
-		else if (strcmp(".init.text", secname) == 0)
-			maps[ARM_SEC_INIT].sec_text = s;
-		else if (strcmp(".devinit.text", secname) == 0)
-			maps[ARM_SEC_DEVINIT].sec_text = s;
-		else if (strcmp(".text", secname) == 0)
-			maps[ARM_SEC_CORE].sec_text = s;
-		else if (strcmp(".exit.text", secname) == 0)
-			maps[ARM_SEC_EXIT].sec_text = s;
-		else if (strcmp(".devexit.text", secname) == 0)
-			maps[ARM_SEC_DEVEXIT].sec_text = s;
-	}
-#endif
 	return 0;
 }
 
@@ -300,41 +271,69 @@ apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
 	return -ENOEXEC;
 }
 
-#ifdef CONFIG_ARM_UNWIND
-static void register_unwind_tables(struct module *mod)
+struct mod_unwind_map {
+	const Elf_Shdr *unw_sec;
+	const Elf_Shdr *txt_sec;
+};
+
+int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs,
+		    struct module *mod)
 {
+#ifdef CONFIG_ARM_UNWIND
+	const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+	const Elf_Shdr *s, *sechdrs_end = sechdrs + hdr->e_shnum;
+	struct mod_unwind_map maps[ARM_SEC_MAX];
 	int i;
-	for (i = 0; i < ARM_SEC_MAX; ++i) {
-		struct arm_unwind_mapping *map = &mod->arch.map[i];
-		if (map->unw_sec && map->sec_text)
-			map->unwind = unwind_table_add(map->unw_sec->sh_addr,
-						       map->unw_sec->sh_size,
-						       map->sec_text->sh_addr,
-						       map->sec_text->sh_size);
+
+	memset(maps, 0, sizeof(maps));
+
+	for (s = sechdrs; s < sechdrs_end; s++) {
+		const char *secname = secstrs + s->sh_name;
+
+		if (!(s->sh_flags & SHF_ALLOC))
+			continue;
+
+		if (strcmp(".ARM.exidx.init.text", secname) == 0)
+			maps[ARM_SEC_INIT].unw_sec = s;
+		else if (strcmp(".ARM.exidx.devinit.text", secname) == 0)
+			maps[ARM_SEC_DEVINIT].unw_sec = s;
+		else if (strcmp(".ARM.exidx", secname) == 0)
+			maps[ARM_SEC_CORE].unw_sec = s;
+		else if (strcmp(".ARM.exidx.exit.text", secname) == 0)
+			maps[ARM_SEC_EXIT].unw_sec = s;
+		else if (strcmp(".ARM.exidx.devexit.text", secname) == 0)
+			maps[ARM_SEC_DEVEXIT].unw_sec = s;
+		else if (strcmp(".init.text", secname) == 0)
+			maps[ARM_SEC_INIT].txt_sec = s;
+		else if (strcmp(".devinit.text", secname) == 0)
+			maps[ARM_SEC_DEVINIT].txt_sec = s;
+		else if (strcmp(".text", secname) == 0)
+			maps[ARM_SEC_CORE].txt_sec = s;
+		else if (strcmp(".exit.text", secname) == 0)
+			maps[ARM_SEC_EXIT].txt_sec = s;
+		else if (strcmp(".devexit.text", secname) == 0)
+			maps[ARM_SEC_DEVEXIT].txt_sec = s;
 	}
-}
 
-static void unregister_unwind_tables(struct module *mod)
-{
-	int i = ARM_SEC_MAX;
-	while (--i >= 0)
-		unwind_table_del(mod->arch.map[i].unwind);
-}
-#else
-static inline void register_unwind_tables(struct module *mod) { }
-static inline void unregister_unwind_tables(struct module *mod) { }
+	for (i = 0; i < ARM_SEC_MAX; i++)
+		if (maps[i].unw_sec && maps[i].txt_sec)
+			mod->arch.unwind[i] =
+				unwind_table_add(maps[i].unw_sec->sh_addr,
+					         maps[i].unw_sec->sh_size,
+					         maps[i].txt_sec->sh_addr,
+					         maps[i].txt_sec->sh_size);
 #endif
-
-int
-module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs,
-		struct module *module)
-{
-	register_unwind_tables(module);
 	return 0;
 }
 
 void
 module_arch_cleanup(struct module *mod)
 {
-	unregister_unwind_tables(mod);
+#ifdef CONFIG_ARM_UNWIND
+	int i;
+
+	for (i = 0; i < ARM_SEC_MAX; i++)
+		if (mod->arch.unwind[i])
+			unwind_table_del(mod->arch.unwind[i]);
+#endif
 }
diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
new file mode 100644
index 000000000000..2cdcc9287c74
--- /dev/null
+++ b/arch/arm/kernel/sched_clock.c
@@ -0,0 +1,69 @@
+/*
+ * sched_clock.c: support for extending counters to full 64-bit ns counter
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clocksource.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+
+#include <asm/sched_clock.h>
+
+static void sched_clock_poll(unsigned long wrap_ticks);
+static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0);
+static void (*sched_clock_update_fn)(void);
+
+static void sched_clock_poll(unsigned long wrap_ticks)
+{
+	mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks));
+	sched_clock_update_fn();
+}
+
+void __init init_sched_clock(struct clock_data *cd, void (*update)(void),
+	unsigned int clock_bits, unsigned long rate)
+{
+	unsigned long r, w;
+	u64 res, wrap;
+	char r_unit;
+
+	sched_clock_update_fn = update;
+
+	/* calculate the mult/shift to convert counter ticks to ns. */
+	clocks_calc_mult_shift(&cd->mult, &cd->shift, rate, NSEC_PER_SEC, 60);
+
+	r = rate;
+	if (r >= 4000000) {
+		r /= 1000000;
+		r_unit = 'M';
+	} else {
+		r /= 1000;
+		r_unit = 'k';
+	}
+
+	/* calculate how many ns until we wrap */
+	wrap = cyc_to_ns((1ULL << clock_bits) - 1, cd->mult, cd->shift);
+	do_div(wrap, NSEC_PER_MSEC);
+	w = wrap;
+
+	/* calculate the ns resolution of this counter */
+	res = cyc_to_ns(1ULL, cd->mult, cd->shift);
+	pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n",
+		clock_bits, r, r_unit, res, w);
+
+	/*
+	 * Start the timer to keep sched_clock() properly updated and
+	 * sets the initial epoch.
+	 */
+	sched_clock_timer.data = msecs_to_jiffies(w - (w / 10));
+	sched_clock_poll(sched_clock_timer.data);
+
+	/*
+	 * Ensure that sched_clock() starts off at 0ns
+	 */
+	cd->epoch_ns = 0;
+}
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 336f14e0e5c2..3455ad33de4c 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -75,9 +75,9 @@ extern void reboot_setup(char *str);
 
 unsigned int processor_id;
 EXPORT_SYMBOL(processor_id);
-unsigned int __machine_arch_type;
+unsigned int __machine_arch_type __read_mostly;
 EXPORT_SYMBOL(__machine_arch_type);
-unsigned int cacheid;
+unsigned int cacheid __read_mostly;
 EXPORT_SYMBOL(cacheid);
 
 unsigned int __atags_pointer __initdata;
@@ -91,24 +91,24 @@ EXPORT_SYMBOL(system_serial_low);
 unsigned int system_serial_high;
 EXPORT_SYMBOL(system_serial_high);
 
-unsigned int elf_hwcap;
+unsigned int elf_hwcap __read_mostly;
 EXPORT_SYMBOL(elf_hwcap);
 
 
 #ifdef MULTI_CPU
-struct processor processor;
+struct processor processor __read_mostly;
 #endif
 #ifdef MULTI_TLB
-struct cpu_tlb_fns cpu_tlb;
+struct cpu_tlb_fns cpu_tlb __read_mostly;
 #endif
 #ifdef MULTI_USER
-struct cpu_user_fns cpu_user;
+struct cpu_user_fns cpu_user __read_mostly;
 #endif
 #ifdef MULTI_CACHE
-struct cpu_cache_fns cpu_cache;
+struct cpu_cache_fns cpu_cache __read_mostly;
 #endif
 #ifdef CONFIG_OUTER_CACHE
-struct outer_cache_fns outer_cache;
+struct outer_cache_fns outer_cache __read_mostly;
 EXPORT_SYMBOL(outer_cache);
 #endif
 
@@ -126,6 +126,7 @@ EXPORT_SYMBOL(elf_platform);
 static const char *cpu_name;
 static const char *machine_name;
 static char __initdata cmd_line[COMMAND_LINE_SIZE];
+struct machine_desc *machine_desc __initdata;
 
 static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE;
 static union { char c[4]; unsigned long l; } endian_test __initdata = { { 'l', '?', '?', 'b' } };
@@ -708,13 +709,11 @@ static struct init_tags {
 	{ 0, ATAG_NONE }
 };
 
-static void (*init_machine)(void) __initdata;
-
 static int __init customize_machine(void)
 {
 	/* customizes platform devices, or adds new ones */
-	if (init_machine)
-		init_machine();
+	if (machine_desc->init_machine)
+		machine_desc->init_machine();
 	return 0;
 }
 arch_initcall(customize_machine);
@@ -809,6 +808,7 @@ void __init setup_arch(char **cmdline_p)
 
 	setup_processor();
 	mdesc = setup_machine(machine_arch_type);
+	machine_desc = mdesc;
 	machine_name = mdesc->name;
 
 	if (mdesc->soft_reboot)
@@ -868,13 +868,9 @@ void __init setup_arch(char **cmdline_p)
 	cpu_init();
 	tcm_init();
 
-	/*
-	 * Set up various architecture-specific pointers
-	 */
-	arch_nr_irqs = mdesc->nr_irqs;
-	init_arch_irq = mdesc->init_irq;
-	system_timer = mdesc->timer;
-	init_machine = mdesc->init_machine;
+#ifdef CONFIG_MULTI_IRQ_HANDLER
+	handle_arch_irq = mdesc->handle_irq;
+#endif
 
 #ifdef CONFIG_VT
 #if defined(CONFIG_VGA_CONSOLE)
@@ -884,6 +880,9 @@ void __init setup_arch(char **cmdline_p)
 #endif
 #endif
 	early_trap_init();
+
+	if (mdesc->init_early)
+		mdesc->init_early();
 }
 
 
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index bbca89872c18..4539ebcb089f 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -25,6 +25,7 @@
 #include <linux/irq.h>
 #include <linux/percpu.h>
 #include <linux/clockchips.h>
+#include <linux/completion.h>
 
 #include <asm/atomic.h>
 #include <asm/cacheflush.h>
@@ -38,7 +39,6 @@
 #include <asm/tlbflush.h>
 #include <asm/ptrace.h>
 #include <asm/localtimer.h>
-#include <asm/smp_plat.h>
 
 /*
  * as from 2.5, kernels no longer have an init_tasks structure
@@ -47,64 +47,14 @@
  */
 struct secondary_data secondary_data;
 
-/*
- * structures for inter-processor calls
- * - A collection of single bit ipi messages.
- */
-struct ipi_data {
-	spinlock_t lock;
-	unsigned long ipi_count;
-	unsigned long bits;
-};
-
-static DEFINE_PER_CPU(struct ipi_data, ipi_data) = {
-	.lock	= SPIN_LOCK_UNLOCKED,
-};
-
 enum ipi_msg_type {
-	IPI_TIMER,
+	IPI_TIMER = 2,
 	IPI_RESCHEDULE,
 	IPI_CALL_FUNC,
 	IPI_CALL_FUNC_SINGLE,
 	IPI_CPU_STOP,
 };
 
-static inline void identity_mapping_add(pgd_t *pgd, unsigned long start,
-	unsigned long end)
-{
-	unsigned long addr, prot;
-	pmd_t *pmd;
-
-	prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE;
-	if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale())
-		prot |= PMD_BIT4;
-
-	for (addr = start & PGDIR_MASK; addr < end;) {
-		pmd = pmd_offset(pgd + pgd_index(addr), addr);
-		pmd[0] = __pmd(addr | prot);
-		addr += SECTION_SIZE;
-		pmd[1] = __pmd(addr | prot);
-		addr += SECTION_SIZE;
-		flush_pmd_entry(pmd);
-		outer_clean_range(__pa(pmd), __pa(pmd + 1));
-	}
-}
-
-static inline void identity_mapping_del(pgd_t *pgd, unsigned long start,
-	unsigned long end)
-{
-	unsigned long addr;
-	pmd_t *pmd;
-
-	for (addr = start & PGDIR_MASK; addr < end; addr += PGDIR_SIZE) {
-		pmd = pmd_offset(pgd + pgd_index(addr), addr);
-		pmd[0] = __pmd(0);
-		pmd[1] = __pmd(0);
-		clean_pmd_entry(pmd);
-		outer_clean_range(__pa(pmd), __pa(pmd + 1));
-	}
-}
-
 int __cpuinit __cpu_up(unsigned int cpu)
 {
 	struct cpuinfo_arm *ci = &per_cpu(cpu_data, cpu);
@@ -178,8 +128,12 @@ int __cpuinit __cpu_up(unsigned int cpu)
 			barrier();
 		}
 
-		if (!cpu_online(cpu))
+		if (!cpu_online(cpu)) {
+			pr_crit("CPU%u: failed to come online\n", cpu);
 			ret = -EIO;
+		}
+	} else {
+		pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
 	}
 
 	secondary_data.stack = NULL;
@@ -195,18 +149,12 @@ int __cpuinit __cpu_up(unsigned int cpu)
 
 	pgd_free(&init_mm, pgd);
 
-	if (ret) {
-		printk(KERN_CRIT "CPU%u: processor failed to boot\n", cpu);
-
-		/*
-		 * FIXME: We need to clean up the new idle thread. --rmk
-		 */
-	}
-
 	return ret;
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
+static void percpu_timer_stop(void);
+
 /*
  * __cpu_disable runs on the processor to be shutdown.
  */
@@ -234,7 +182,7 @@ int __cpu_disable(void)
 	/*
 	 * Stop the local timer for this CPU.
 	 */
-	local_timer_stop();
+	percpu_timer_stop();
 
 	/*
 	 * Flush user cache and TLB mappings, and then remove this CPU
@@ -253,12 +201,20 @@ int __cpu_disable(void)
 	return 0;
 }
 
+static DECLARE_COMPLETION(cpu_died);
+
 /*
  * called on the thread which is asking for a CPU to be shutdown -
  * waits until shutdown has completed, or it is timed out.
  */
 void __cpu_die(unsigned int cpu)
 {
+	if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) {
+		pr_err("CPU%u: cpu didn't die\n", cpu);
+		return;
+	}
+	printk(KERN_NOTICE "CPU%u: shutdown\n", cpu);
+
 	if (!platform_cpu_kill(cpu))
 		printk("CPU%u: unable to kill\n", cpu);
 }
@@ -275,12 +231,17 @@ void __ref cpu_die(void)
 {
 	unsigned int cpu = smp_processor_id();
 
-	local_irq_disable();
 	idle_task_exit();
 
+	local_irq_disable();
+	mb();
+
+	/* Tell __cpu_die() that this CPU is now safe to dispose of */
+	complete(&cpu_died);
+
 	/*
 	 * actual CPU shutdown procedure is at least platform (if not
-	 * CPU) specific
+	 * CPU) specific.
 	 */
 	platform_cpu_die(cpu);
 
@@ -290,6 +251,7 @@ void __ref cpu_die(void)
 	 * to be repeated to undo the effects of taking the CPU offline.
 	 */
 	__asm__("mov	sp, %0\n"
+	"	mov	fp, #0\n"
 	"	b	secondary_start_kernel"
 		:
 		: "r" (task_stack_page(current) + THREAD_SIZE - 8));
@@ -297,6 +259,17 @@ void __ref cpu_die(void)
 #endif /* CONFIG_HOTPLUG_CPU */
 
 /*
+ * Called by both boot and secondaries to move global data into
+ * per-processor storage.
+ */
+static void __cpuinit smp_store_cpu_info(unsigned int cpuid)
+{
+	struct cpuinfo_arm *cpu_info = &per_cpu(cpu_data, cpuid);
+
+	cpu_info->loops_per_jiffy = loops_per_jiffy;
+}
+
+/*
  * This is the secondary CPU boot entry.  We're using this CPUs
  * idle thread stack, but a set of temporary page tables.
  */
@@ -311,7 +284,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 	 * All kernel threads share the same mm context; grab a
 	 * reference and switch to it.
 	 */
-	atomic_inc(&mm->mm_users);
 	atomic_inc(&mm->mm_count);
 	current->active_mm = mm;
 	cpumask_set_cpu(cpu, mm_cpumask(mm));
@@ -321,6 +293,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 
 	cpu_init();
 	preempt_disable();
+	trace_hardirqs_off();
 
 	/*
 	 * Give the platform a chance to do its own initialisation.
@@ -354,17 +327,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 	cpu_idle();
 }
 
-/*
- * Called by both boot and secondaries to move global data into
- * per-processor storage.
- */
-void __cpuinit smp_store_cpu_info(unsigned int cpuid)
-{
-	struct cpuinfo_arm *cpu_info = &per_cpu(cpu_data, cpuid);
-
-	cpu_info->loops_per_jiffy = loops_per_jiffy;
-}
-
 void __init smp_cpus_done(unsigned int max_cpus)
 {
 	int cpu;
@@ -387,61 +349,80 @@ void __init smp_prepare_boot_cpu(void)
 	per_cpu(cpu_data, cpu).idle = current;
 }
 
-static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg)
+void __init smp_prepare_cpus(unsigned int max_cpus)
 {
-	unsigned long flags;
-	unsigned int cpu;
+	unsigned int ncores = num_possible_cpus();
 
-	local_irq_save(flags);
-
-	for_each_cpu(cpu, mask) {
-		struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
-
-		spin_lock(&ipi->lock);
-		ipi->bits |= 1 << msg;
-		spin_unlock(&ipi->lock);
-	}
+	smp_store_cpu_info(smp_processor_id());
 
 	/*
-	 * Call the platform specific cross-CPU call function.
+	 * are we trying to boot more cores than exist?
 	 */
-	smp_cross_call(mask);
+	if (max_cpus > ncores)
+		max_cpus = ncores;
+
+	if (max_cpus > 1) {
+		/*
+		 * Enable the local timer or broadcast device for the
+		 * boot CPU, but only if we have more than one CPU.
+		 */
+		percpu_timer_setup();
 
-	local_irq_restore(flags);
+		/*
+		 * Initialise the SCU if there are more than one CPU
+		 * and let them know where to start.
+		 */
+		platform_smp_prepare_cpus(max_cpus);
+	}
 }
 
 void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 {
-	send_ipi_message(mask, IPI_CALL_FUNC);
+	smp_cross_call(mask, IPI_CALL_FUNC);
 }
 
 void arch_send_call_function_single_ipi(int cpu)
 {
-	send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
+	smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
 }
 
-void show_ipi_list(struct seq_file *p)
+static const char *ipi_types[NR_IPI] = {
+#define S(x,s)	[x - IPI_TIMER] = s
+	S(IPI_TIMER, "Timer broadcast interrupts"),
+	S(IPI_RESCHEDULE, "Rescheduling interrupts"),
+	S(IPI_CALL_FUNC, "Function call interrupts"),
+	S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"),
+	S(IPI_CPU_STOP, "CPU stop interrupts"),
+};
+
+void show_ipi_list(struct seq_file *p, int prec)
 {
-	unsigned int cpu;
+	unsigned int cpu, i;
 
-	seq_puts(p, "IPI:");
+	for (i = 0; i < NR_IPI; i++) {
+		seq_printf(p, "%*s%u: ", prec - 1, "IPI", i);
 
-	for_each_present_cpu(cpu)
-		seq_printf(p, " %10lu", per_cpu(ipi_data, cpu).ipi_count);
+		for_each_present_cpu(cpu)
+			seq_printf(p, "%10u ",
+				   __get_irq_stat(cpu, ipi_irqs[i]));
 
-	seq_putc(p, '\n');
+		seq_printf(p, " %s\n", ipi_types[i]);
+	}
 }
 
-void show_local_irqs(struct seq_file *p)
+u64 smp_irq_stat_cpu(unsigned int cpu)
 {
-	unsigned int cpu;
+	u64 sum = 0;
+	int i;
 
-	seq_printf(p, "LOC: ");
+	for (i = 0; i < NR_IPI; i++)
+		sum += __get_irq_stat(cpu, ipi_irqs[i]);
 
-	for_each_present_cpu(cpu)
-		seq_printf(p, "%10u ", irq_stat[cpu].local_timer_irqs);
+#ifdef CONFIG_LOCAL_TIMERS
+	sum += __get_irq_stat(cpu, local_timer_irqs);
+#endif
 
-	seq_putc(p, '\n');
+	return sum;
 }
 
 /*
@@ -464,18 +445,30 @@ asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs)
 	int cpu = smp_processor_id();
 
 	if (local_timer_ack()) {
-		irq_stat[cpu].local_timer_irqs++;
+		__inc_irq_stat(cpu, local_timer_irqs);
 		ipi_timer();
 	}
 
 	set_irq_regs(old_regs);
 }
+
+void show_local_irqs(struct seq_file *p, int prec)
+{
+	unsigned int cpu;
+
+	seq_printf(p, "%*s: ", prec, "LOC");
+
+	for_each_present_cpu(cpu)
+		seq_printf(p, "%10u ", __get_irq_stat(cpu, local_timer_irqs));
+
+	seq_printf(p, " Local timer interrupts\n");
+}
 #endif
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 static void smp_timer_broadcast(const struct cpumask *mask)
 {
-	send_ipi_message(mask, IPI_TIMER);
+	smp_cross_call(mask, IPI_TIMER);
 }
 #else
 #define smp_timer_broadcast	NULL
@@ -512,6 +505,21 @@ void __cpuinit percpu_timer_setup(void)
 	local_timer_setup(evt);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * The generic clock events code purposely does not stop the local timer
+ * on CPU_DEAD/CPU_DEAD_FROZEN hotplug events, so we have to do it
+ * manually here.
+ */
+static void percpu_timer_stop(void)
+{
+	unsigned int cpu = smp_processor_id();
+	struct clock_event_device *evt = &per_cpu(percpu_clockevent, cpu);
+
+	evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt);
+}
+#endif
+
 static DEFINE_SPINLOCK(stop_lock);
 
 /*
@@ -538,216 +546,76 @@ static void ipi_cpu_stop(unsigned int cpu)
 
 /*
  * Main handler for inter-processor interrupts
- *
- * For ARM, the ipimask now only identifies a single
- * category of IPI (Bit 1 IPIs have been replaced by a
- * different mechanism):
- *
- *  Bit 0 - Inter-processor function call
  */
-asmlinkage void __exception_irq_entry do_IPI(struct pt_regs *regs)
+asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs)
 {
 	unsigned int cpu = smp_processor_id();
-	struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
-	ipi->ipi_count++;
-
-	for (;;) {
-		unsigned long msgs;
-
-		spin_lock(&ipi->lock);
-		msgs = ipi->bits;
-		ipi->bits = 0;
-		spin_unlock(&ipi->lock);
+	if (ipinr >= IPI_TIMER && ipinr < IPI_TIMER + NR_IPI)
+		__inc_irq_stat(cpu, ipi_irqs[ipinr - IPI_TIMER]);
 
-		if (!msgs)
-			break;
-
-		do {
-			unsigned nextmsg;
-
-			nextmsg = msgs & -msgs;
-			msgs &= ~nextmsg;
-			nextmsg = ffz(~nextmsg);
-
-			switch (nextmsg) {
-			case IPI_TIMER:
-				ipi_timer();
-				break;
+	switch (ipinr) {
+	case IPI_TIMER:
+		ipi_timer();
+		break;
 
-			case IPI_RESCHEDULE:
-				/*
-				 * nothing more to do - eveything is
-				 * done on the interrupt return path
-				 */
-				break;
+	case IPI_RESCHEDULE:
+		/*
+		 * nothing more to do - eveything is
+		 * done on the interrupt return path
+		 */
+		break;
 
-			case IPI_CALL_FUNC:
-				generic_smp_call_function_interrupt();
-				break;
+	case IPI_CALL_FUNC:
+		generic_smp_call_function_interrupt();
+		break;
 
-			case IPI_CALL_FUNC_SINGLE:
-				generic_smp_call_function_single_interrupt();
-				break;
+	case IPI_CALL_FUNC_SINGLE:
+		generic_smp_call_function_single_interrupt();
+		break;
 
-			case IPI_CPU_STOP:
-				ipi_cpu_stop(cpu);
-				break;
+	case IPI_CPU_STOP:
+		ipi_cpu_stop(cpu);
+		break;
 
-			default:
-				printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n",
-				       cpu, nextmsg);
-				break;
-			}
-		} while (msgs);
+	default:
+		printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n",
+		       cpu, ipinr);
+		break;
 	}
-
 	set_irq_regs(old_regs);
 }
 
 void smp_send_reschedule(int cpu)
 {
-	send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
+	smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);
 }
 
 void smp_send_stop(void)
 {
-	cpumask_t mask = cpu_online_map;
-	cpu_clear(smp_processor_id(), mask);
-	if (!cpus_empty(mask))
-		send_ipi_message(&mask, IPI_CPU_STOP);
-}
+	unsigned long timeout;
 
-/*
- * not supported here
- */
-int setup_profiling_timer(unsigned int multiplier)
-{
-	return -EINVAL;
-}
+	if (num_online_cpus() > 1) {
+		cpumask_t mask = cpu_online_map;
+		cpu_clear(smp_processor_id(), mask);
 
-static void
-on_each_cpu_mask(void (*func)(void *), void *info, int wait,
-		const struct cpumask *mask)
-{
-	preempt_disable();
+		smp_cross_call(&mask, IPI_CPU_STOP);
+	}
 
-	smp_call_function_many(mask, func, info, wait);
-	if (cpumask_test_cpu(smp_processor_id(), mask))
-		func(info);
+	/* Wait up to one second for other CPUs to stop */
+	timeout = USEC_PER_SEC;
+	while (num_online_cpus() > 1 && timeout--)
+		udelay(1);
 
-	preempt_enable();
+	if (num_online_cpus() > 1)
+		pr_warning("SMP: failed to stop secondary CPUs\n");
 }
 
-/**********************************************************************/
-
 /*
- * TLB operations
+ * not supported here
  */
-struct tlb_args {
-	struct vm_area_struct *ta_vma;
-	unsigned long ta_start;
-	unsigned long ta_end;
-};
-
-static inline void ipi_flush_tlb_all(void *ignored)
-{
-	local_flush_tlb_all();
-}
-
-static inline void ipi_flush_tlb_mm(void *arg)
-{
-	struct mm_struct *mm = (struct mm_struct *)arg;
-
-	local_flush_tlb_mm(mm);
-}
-
-static inline void ipi_flush_tlb_page(void *arg)
-{
-	struct tlb_args *ta = (struct tlb_args *)arg;
-
-	local_flush_tlb_page(ta->ta_vma, ta->ta_start);
-}
-
-static inline void ipi_flush_tlb_kernel_page(void *arg)
-{
-	struct tlb_args *ta = (struct tlb_args *)arg;
-
-	local_flush_tlb_kernel_page(ta->ta_start);
-}
-
-static inline void ipi_flush_tlb_range(void *arg)
-{
-	struct tlb_args *ta = (struct tlb_args *)arg;
-
-	local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
-}
-
-static inline void ipi_flush_tlb_kernel_range(void *arg)
-{
-	struct tlb_args *ta = (struct tlb_args *)arg;
-
-	local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end);
-}
-
-void flush_tlb_all(void)
-{
-	if (tlb_ops_need_broadcast())
-		on_each_cpu(ipi_flush_tlb_all, NULL, 1);
-	else
-		local_flush_tlb_all();
-}
-
-void flush_tlb_mm(struct mm_struct *mm)
-{
-	if (tlb_ops_need_broadcast())
-		on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, mm_cpumask(mm));
-	else
-		local_flush_tlb_mm(mm);
-}
-
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
-{
-	if (tlb_ops_need_broadcast()) {
-		struct tlb_args ta;
-		ta.ta_vma = vma;
-		ta.ta_start = uaddr;
-		on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, mm_cpumask(vma->vm_mm));
-	} else
-		local_flush_tlb_page(vma, uaddr);
-}
-
-void flush_tlb_kernel_page(unsigned long kaddr)
-{
-	if (tlb_ops_need_broadcast()) {
-		struct tlb_args ta;
-		ta.ta_start = kaddr;
-		on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1);
-	} else
-		local_flush_tlb_kernel_page(kaddr);
-}
-
-void flush_tlb_range(struct vm_area_struct *vma,
-                     unsigned long start, unsigned long end)
-{
-	if (tlb_ops_need_broadcast()) {
-		struct tlb_args ta;
-		ta.ta_vma = vma;
-		ta.ta_start = start;
-		ta.ta_end = end;
-		on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, mm_cpumask(vma->vm_mm));
-	} else
-		local_flush_tlb_range(vma, start, end);
-}
-
-void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+int setup_profiling_timer(unsigned int multiplier)
 {
-	if (tlb_ops_need_broadcast()) {
-		struct tlb_args ta;
-		ta.ta_start = start;
-		ta.ta_end = end;
-		on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
-	} else
-		local_flush_tlb_kernel_range(start, end);
+	return -EINVAL;
 }
diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
new file mode 100644
index 000000000000..7dcb35285be7
--- /dev/null
+++ b/arch/arm/kernel/smp_tlb.c
@@ -0,0 +1,139 @@
+/*
+ *  linux/arch/arm/kernel/smp_tlb.c
+ *
+ *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/preempt.h>
+#include <linux/smp.h>
+
+#include <asm/smp_plat.h>
+#include <asm/tlbflush.h>
+
+static void on_each_cpu_mask(void (*func)(void *), void *info, int wait,
+	const struct cpumask *mask)
+{
+	preempt_disable();
+
+	smp_call_function_many(mask, func, info, wait);
+	if (cpumask_test_cpu(smp_processor_id(), mask))
+		func(info);
+
+	preempt_enable();
+}
+
+/**********************************************************************/
+
+/*
+ * TLB operations
+ */
+struct tlb_args {
+	struct vm_area_struct *ta_vma;
+	unsigned long ta_start;
+	unsigned long ta_end;
+};
+
+static inline void ipi_flush_tlb_all(void *ignored)
+{
+	local_flush_tlb_all();
+}
+
+static inline void ipi_flush_tlb_mm(void *arg)
+{
+	struct mm_struct *mm = (struct mm_struct *)arg;
+
+	local_flush_tlb_mm(mm);
+}
+
+static inline void ipi_flush_tlb_page(void *arg)
+{
+	struct tlb_args *ta = (struct tlb_args *)arg;
+
+	local_flush_tlb_page(ta->ta_vma, ta->ta_start);
+}
+
+static inline void ipi_flush_tlb_kernel_page(void *arg)
+{
+	struct tlb_args *ta = (struct tlb_args *)arg;
+
+	local_flush_tlb_kernel_page(ta->ta_start);
+}
+
+static inline void ipi_flush_tlb_range(void *arg)
+{
+	struct tlb_args *ta = (struct tlb_args *)arg;
+
+	local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
+}
+
+static inline void ipi_flush_tlb_kernel_range(void *arg)
+{
+	struct tlb_args *ta = (struct tlb_args *)arg;
+
+	local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end);
+}
+
+void flush_tlb_all(void)
+{
+	if (tlb_ops_need_broadcast())
+		on_each_cpu(ipi_flush_tlb_all, NULL, 1);
+	else
+		local_flush_tlb_all();
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	if (tlb_ops_need_broadcast())
+		on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, mm_cpumask(mm));
+	else
+		local_flush_tlb_mm(mm);
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
+{
+	if (tlb_ops_need_broadcast()) {
+		struct tlb_args ta;
+		ta.ta_vma = vma;
+		ta.ta_start = uaddr;
+		on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, mm_cpumask(vma->vm_mm));
+	} else
+		local_flush_tlb_page(vma, uaddr);
+}
+
+void flush_tlb_kernel_page(unsigned long kaddr)
+{
+	if (tlb_ops_need_broadcast()) {
+		struct tlb_args ta;
+		ta.ta_start = kaddr;
+		on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1);
+	} else
+		local_flush_tlb_kernel_page(kaddr);
+}
+
+void flush_tlb_range(struct vm_area_struct *vma,
+                     unsigned long start, unsigned long end)
+{
+	if (tlb_ops_need_broadcast()) {
+		struct tlb_args ta;
+		ta.ta_vma = vma;
+		ta.ta_start = start;
+		ta.ta_end = end;
+		on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, mm_cpumask(vma->vm_mm));
+	} else
+		local_flush_tlb_range(vma, start, end);
+}
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	if (tlb_ops_need_broadcast()) {
+		struct tlb_args ta;
+		ta.ta_start = start;
+		ta.ta_end = end;
+		on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
+	} else
+		local_flush_tlb_kernel_range(start, end);
+}
+
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index 35882fbf37f9..dd790745b3ef 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -127,8 +127,6 @@ static void __cpuinit twd_calibrate_rate(void)
  */
 void __cpuinit twd_timer_setup(struct clock_event_device *clk)
 {
-	unsigned long flags;
-
 	twd_calibrate_rate();
 
 	clk->name = "local_timer";
@@ -143,20 +141,7 @@ void __cpuinit twd_timer_setup(struct clock_event_device *clk)
 	clk->min_delta_ns = clockevent_delta2ns(0xf, clk);
 
 	/* Make sure our local interrupt controller has this enabled */
-	local_irq_save(flags);
-	irq_to_desc(clk->irq)->status |= IRQ_NOPROBE;
-	get_irq_chip(clk->irq)->unmask(clk->irq);
-	local_irq_restore(flags);
+	gic_enable_ppi(clk->irq);
 
 	clockevents_register_device(clk);
 }
-
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * take a local timer down
- */
-void twd_timer_stop(void)
-{
-	__raw_writel(0, twd_base + TWD_TIMER_CONTROL);
-}
-#endif
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
new file mode 100644
index 000000000000..7a5760922914
--- /dev/null
+++ b/arch/arm/kernel/swp_emulate.c
@@ -0,0 +1,267 @@
+/*
+ *  linux/arch/arm/kernel/swp_emulate.c
+ *
+ *  Copyright (C) 2009 ARM Limited
+ *  __user_* functions adapted from include/asm/uaccess.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Implements emulation of the SWP/SWPB instructions using load-exclusive and
+ *  store-exclusive for processors that have them disabled (or future ones that
+ *  might not implement them).
+ *
+ *  Syntax of SWP{B} instruction: SWP{B}<c> <Rt>, <Rt2>, [<Rn>]
+ *  Where: Rt  = destination
+ *	   Rt2 = source
+ *	   Rn  = address
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/sched.h>
+#include <linux/syscalls.h>
+#include <linux/perf_event.h>
+
+#include <asm/traps.h>
+#include <asm/uaccess.h>
+
+/*
+ * Error-checking SWP macros implemented using ldrex{b}/strex{b}
+ */
+#define __user_swpX_asm(data, addr, res, temp, B)		\
+	__asm__ __volatile__(					\
+	"	mov		%2, %1\n"			\
+	"0:	ldrex"B"	%1, [%3]\n"			\
+	"1:	strex"B"	%0, %2, [%3]\n"			\
+	"	cmp		%0, #0\n"			\
+	"	movne		%0, %4\n"			\
+	"2:\n"							\
+	"	.section	 .fixup,\"ax\"\n"		\
+	"	.align		2\n"				\
+	"3:	mov		%0, %5\n"			\
+	"	b		2b\n"				\
+	"	.previous\n"					\
+	"	.section	 __ex_table,\"a\"\n"		\
+	"	.align		3\n"				\
+	"	.long		0b, 3b\n"			\
+	"	.long		1b, 3b\n"			\
+	"	.previous"					\
+	: "=&r" (res), "+r" (data), "=&r" (temp)		\
+	: "r" (addr), "i" (-EAGAIN), "i" (-EFAULT)		\
+	: "cc", "memory")
+
+#define __user_swp_asm(data, addr, res, temp) \
+	__user_swpX_asm(data, addr, res, temp, "")
+#define __user_swpb_asm(data, addr, res, temp) \
+	__user_swpX_asm(data, addr, res, temp, "b")
+
+/*
+ * Macros/defines for extracting register numbers from instruction.
+ */
+#define EXTRACT_REG_NUM(instruction, offset) \
+	(((instruction) & (0xf << (offset))) >> (offset))
+#define RN_OFFSET  16
+#define RT_OFFSET  12
+#define RT2_OFFSET  0
+/*
+ * Bit 22 of the instruction encoding distinguishes between
+ * the SWP and SWPB variants (bit set means SWPB).
+ */
+#define TYPE_SWPB (1 << 22)
+
+static unsigned long swpcounter;
+static unsigned long swpbcounter;
+static unsigned long abtcounter;
+static pid_t         previous_pid;
+
+#ifdef CONFIG_PROC_FS
+static int proc_read_status(char *page, char **start, off_t off, int count,
+			    int *eof, void *data)
+{
+	char *p = page;
+	int len;
+
+	p += sprintf(p, "Emulated SWP:\t\t%lu\n", swpcounter);
+	p += sprintf(p, "Emulated SWPB:\t\t%lu\n", swpbcounter);
+	p += sprintf(p, "Aborted SWP{B}:\t\t%lu\n", abtcounter);
+	if (previous_pid != 0)
+		p += sprintf(p, "Last process:\t\t%d\n", previous_pid);
+
+	len = (p - page) - off;
+	if (len < 0)
+		len = 0;
+
+	*eof = (len <= count) ? 1 : 0;
+	*start = page + off;
+
+	return len;
+}
+#endif
+
+/*
+ * Set up process info to signal segmentation fault - called on access error.
+ */
+static void set_segfault(struct pt_regs *regs, unsigned long addr)
+{
+	siginfo_t info;
+
+	if (find_vma(current->mm, addr) == NULL)
+		info.si_code = SEGV_MAPERR;
+	else
+		info.si_code = SEGV_ACCERR;
+
+	info.si_signo = SIGSEGV;
+	info.si_errno = 0;
+	info.si_addr  = (void *) instruction_pointer(regs);
+
+	pr_debug("SWP{B} emulation: access caused memory abort!\n");
+	arm_notify_die("Illegal memory access", regs, &info, 0, 0);
+
+	abtcounter++;
+}
+
+static int emulate_swpX(unsigned int address, unsigned int *data,
+			unsigned int type)
+{
+	unsigned int res = 0;
+
+	if ((type != TYPE_SWPB) && (address & 0x3)) {
+		/* SWP to unaligned address not permitted */
+		pr_debug("SWP instruction on unaligned pointer!\n");
+		return -EFAULT;
+	}
+
+	while (1) {
+		unsigned long temp;
+
+		/*
+		 * Barrier required between accessing protected resource and
+		 * releasing a lock for it. Legacy code might not have done
+		 * this, and we cannot determine that this is not the case
+		 * being emulated, so insert always.
+		 */
+		smp_mb();
+
+		if (type == TYPE_SWPB)
+			__user_swpb_asm(*data, address, res, temp);
+		else
+			__user_swp_asm(*data, address, res, temp);
+
+		if (likely(res != -EAGAIN) || signal_pending(current))
+			break;
+
+		cond_resched();
+	}
+
+	if (res == 0) {
+		/*
+		 * Barrier also required between aquiring a lock for a
+		 * protected resource and accessing the resource. Inserted for
+		 * same reason as above.
+		 */
+		smp_mb();
+
+		if (type == TYPE_SWPB)
+			swpbcounter++;
+		else
+			swpcounter++;
+	}
+
+	return res;
+}
+
+/*
+ * swp_handler logs the id of calling process, dissects the instruction, sanity
+ * checks the memory location, calls emulate_swpX for the actual operation and
+ * deals with fixup/error handling before returning
+ */
+static int swp_handler(struct pt_regs *regs, unsigned int instr)
+{
+	unsigned int address, destreg, data, type;
+	unsigned int res = 0;
+
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, regs->ARM_pc);
+
+	if (current->pid != previous_pid) {
+		pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n",
+			 current->comm, (unsigned long)current->pid);
+		previous_pid = current->pid;
+	}
+
+	address = regs->uregs[EXTRACT_REG_NUM(instr, RN_OFFSET)];
+	data	= regs->uregs[EXTRACT_REG_NUM(instr, RT2_OFFSET)];
+	destreg = EXTRACT_REG_NUM(instr, RT_OFFSET);
+
+	type = instr & TYPE_SWPB;
+
+	pr_debug("addr in r%d->0x%08x, dest is r%d, source in r%d->0x%08x)\n",
+		 EXTRACT_REG_NUM(instr, RN_OFFSET), address,
+		 destreg, EXTRACT_REG_NUM(instr, RT2_OFFSET), data);
+
+	/* Check access in reasonable access range for both SWP and SWPB */
+	if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) {
+		pr_debug("SWP{B} emulation: access to %p not allowed!\n",
+			 (void *)address);
+		res = -EFAULT;
+	} else {
+		res = emulate_swpX(address, &data, type);
+	}
+
+	if (res == 0) {
+		/*
+		 * On successful emulation, revert the adjustment to the PC
+		 * made in kernel/traps.c in order to resume execution at the
+		 * instruction following the SWP{B}.
+		 */
+		regs->ARM_pc += 4;
+		regs->uregs[destreg] = data;
+	} else if (res == -EFAULT) {
+		/*
+		 * Memory errors do not mean emulation failed.
+		 * Set up signal info to return SEGV, then return OK
+		 */
+		set_segfault(regs, address);
+	}
+
+	return 0;
+}
+
+/*
+ * Only emulate SWP/SWPB executed in ARM state/User mode.
+ * The kernel must be SWP free and SWP{B} does not exist in Thumb/ThumbEE.
+ */
+static struct undef_hook swp_hook = {
+	.instr_mask = 0x0fb00ff0,
+	.instr_val  = 0x01000090,
+	.cpsr_mask  = MODE_MASK | PSR_T_BIT | PSR_J_BIT,
+	.cpsr_val   = USR_MODE,
+	.fn	    = swp_handler
+};
+
+/*
+ * Register handler and create status file in /proc/cpu
+ * Invoked as late_initcall, since not needed before init spawned.
+ */
+static int __init swp_emulation_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *res;
+
+	res = create_proc_entry("cpu/swp_emulation", S_IRUGO, NULL);
+
+	if (!res)
+		return -ENOMEM;
+
+	res->read_proc = proc_read_status;
+#endif /* CONFIG_PROC_FS */
+
+	printk(KERN_NOTICE "Registering SWP/SWPB emulation handler\n");
+	register_undef_hook(&swp_hook);
+
+	return 0;
+}
+
+late_initcall(swp_emulation_init);
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 38c261f9951c..f1e2eb19a67d 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -30,12 +30,13 @@
 #include <asm/leds.h>
 #include <asm/thread_info.h>
 #include <asm/stacktrace.h>
+#include <asm/mach/arch.h>
 #include <asm/mach/time.h>
 
 /*
  * Our system timer.
  */
-struct sys_timer *system_timer;
+static struct sys_timer *system_timer;
 
 #if defined(CONFIG_RTC_DRV_CMOS) || defined(CONFIG_RTC_DRV_CMOS_MODULE)
 /* this needs a better home */
@@ -160,6 +161,7 @@ device_initcall(timer_init_sysfs);
 
 void __init time_init(void)
 {
+	system_timer = machine_desc->timer;
 	system_timer->init();
 }
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 446aee97436f..ee57640ba2bb 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -37,6 +37,8 @@
 
 static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" };
 
+void *vectors_page;
+
 #ifdef CONFIG_DEBUG_USER
 unsigned int user_debug;
 
@@ -708,19 +710,19 @@ void __readwrite_bug(const char *fn)
 }
 EXPORT_SYMBOL(__readwrite_bug);
 
-void __pte_error(const char *file, int line, unsigned long val)
+void __pte_error(const char *file, int line, pte_t pte)
 {
-	printk("%s:%d: bad pte %08lx.\n", file, line, val);
+	printk("%s:%d: bad pte %08lx.\n", file, line, pte_val(pte));
 }
 
-void __pmd_error(const char *file, int line, unsigned long val)
+void __pmd_error(const char *file, int line, pmd_t pmd)
 {
-	printk("%s:%d: bad pmd %08lx.\n", file, line, val);
+	printk("%s:%d: bad pmd %08lx.\n", file, line, pmd_val(pmd));
 }
 
-void __pgd_error(const char *file, int line, unsigned long val)
+void __pgd_error(const char *file, int line, pgd_t pgd)
 {
-	printk("%s:%d: bad pgd %08lx.\n", file, line, val);
+	printk("%s:%d: bad pgd %08lx.\n", file, line, pgd_val(pgd));
 }
 
 asmlinkage void __div0(void)
@@ -756,7 +758,11 @@ static void __init kuser_get_tls_init(unsigned long vectors)
 
 void __init early_trap_init(void)
 {
+#if defined(CONFIG_CPU_USE_DOMAINS)
 	unsigned long vectors = CONFIG_VECTORS_BASE;
+#else
+	unsigned long vectors = (unsigned long)vectors_page;
+#endif
 	extern char __stubs_start[], __stubs_end[];
 	extern char __vectors_start[], __vectors_end[];
 	extern char __kuser_helper_start[], __kuser_helper_end[];
@@ -780,10 +786,10 @@ void __init early_trap_init(void)
 	 * Copy signal return handlers into the vector page, and
 	 * set sigreturn to be a pointer to these.
 	 */
-	memcpy((void *)KERN_SIGRETURN_CODE, sigreturn_codes,
-	       sizeof(sigreturn_codes));
-	memcpy((void *)KERN_RESTART_CODE, syscall_restart_code,
-	       sizeof(syscall_restart_code));
+	memcpy((void *)(vectors + KERN_SIGRETURN_CODE - CONFIG_VECTORS_BASE),
+	       sigreturn_codes, sizeof(sigreturn_codes));
+	memcpy((void *)(vectors + KERN_RESTART_CODE - CONFIG_VECTORS_BASE),
+	       syscall_restart_code, sizeof(syscall_restart_code));
 
 	flush_icache_range(vectors, vectors + PAGE_SIZE);
 	modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 897c1a8f1694..86b66f3f2031 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -168,6 +168,7 @@ SECTIONS
 
 		NOSAVE_DATA
 		CACHELINE_ALIGNED_DATA(32)
+		READ_MOSTLY_DATA(32)
 
 		/*
 		 * The exception fixup table (might need resorting at runtime)
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index b1631a7dbe75..1b049cd7a49a 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -28,20 +28,21 @@
  */
 #include <linux/linkage.h>
 #include <asm/errno.h>
+#include <asm/domain.h>
 
 ENTRY(__get_user_1)
-1:	ldrbt	r2, [r0]
+1:	T(ldrb)	r2, [r0]
 	mov	r0, #0
 	mov	pc, lr
 ENDPROC(__get_user_1)
 
 ENTRY(__get_user_2)
 #ifdef CONFIG_THUMB2_KERNEL
-2:	ldrbt	r2, [r0]
-3:	ldrbt	r3, [r0, #1]
+2:	T(ldrb)	r2, [r0]
+3:	T(ldrb)	r3, [r0, #1]
 #else
-2:	ldrbt	r2, [r0], #1
-3:	ldrbt	r3, [r0]
+2:	T(ldrb)	r2, [r0], #1
+3:	T(ldrb)	r3, [r0]
 #endif
 #ifndef __ARMEB__
 	orr	r2, r2, r3, lsl #8
@@ -53,7 +54,7 @@ ENTRY(__get_user_2)
 ENDPROC(__get_user_2)
 
 ENTRY(__get_user_4)
-4:	ldrt	r2, [r0]
+4:	T(ldr)	r2, [r0]
 	mov	r0, #0
 	mov	pc, lr
 ENDPROC(__get_user_4)
diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S
index 5a01a23c6c06..c023fc11e86c 100644
--- a/arch/arm/lib/putuser.S
+++ b/arch/arm/lib/putuser.S
@@ -28,9 +28,10 @@
  */
 #include <linux/linkage.h>
 #include <asm/errno.h>
+#include <asm/domain.h>
 
 ENTRY(__put_user_1)
-1:	strbt	r2, [r0]
+1:	T(strb)	r2, [r0]
 	mov	r0, #0
 	mov	pc, lr
 ENDPROC(__put_user_1)
@@ -39,19 +40,19 @@ ENTRY(__put_user_2)
 	mov	ip, r2, lsr #8
 #ifdef CONFIG_THUMB2_KERNEL
 #ifndef __ARMEB__
-2:	strbt	r2, [r0]
-3:	strbt	ip, [r0, #1]
+2:	T(strb)	r2, [r0]
+3:	T(strb)	ip, [r0, #1]
 #else
-2:	strbt	ip, [r0]
-3:	strbt	r2, [r0, #1]
+2:	T(strb)	ip, [r0]
+3:	T(strb)	r2, [r0, #1]
 #endif
 #else	/* !CONFIG_THUMB2_KERNEL */
 #ifndef __ARMEB__
-2:	strbt	r2, [r0], #1
-3:	strbt	ip, [r0]
+2:	T(strb)	r2, [r0], #1
+3:	T(strb)	ip, [r0]
 #else
-2:	strbt	ip, [r0], #1
-3:	strbt	r2, [r0]
+2:	T(strb)	ip, [r0], #1
+3:	T(strb)	r2, [r0]
 #endif
 #endif	/* CONFIG_THUMB2_KERNEL */
 	mov	r0, #0
@@ -59,18 +60,18 @@ ENTRY(__put_user_2)
 ENDPROC(__put_user_2)
 
 ENTRY(__put_user_4)
-4:	strt	r2, [r0]
+4:	T(str)	r2, [r0]
 	mov	r0, #0
 	mov	pc, lr
 ENDPROC(__put_user_4)
 
 ENTRY(__put_user_8)
 #ifdef CONFIG_THUMB2_KERNEL
-5:	strt	r2, [r0]
-6:	strt	r3, [r0, #4]
+5:	T(str)	r2, [r0]
+6:	T(str)	r3, [r0, #4]
 #else
-5:	strt	r2, [r0], #4
-6:	strt	r3, [r0]
+5:	T(str)	r2, [r0], #4
+6:	T(str)	r3, [r0]
 #endif
 	mov	r0, #0
 	mov	pc, lr
diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S
index fee9f6f88adb..d0ece2aeb70d 100644
--- a/arch/arm/lib/uaccess.S
+++ b/arch/arm/lib/uaccess.S
@@ -14,6 +14,7 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <asm/errno.h>
+#include <asm/domain.h>
 
 		.text
 
@@ -31,11 +32,11 @@
 		rsb	ip, ip, #4
 		cmp	ip, #2
 		ldrb	r3, [r1], #1
-USER(		strbt	r3, [r0], #1)			@ May fault
+USER(		T(strb)	r3, [r0], #1)			@ May fault
 		ldrgeb	r3, [r1], #1
-USER(		strgebt	r3, [r0], #1)			@ May fault
+USER(		T(strgeb) r3, [r0], #1)			@ May fault
 		ldrgtb	r3, [r1], #1
-USER(		strgtbt	r3, [r0], #1)			@ May fault
+USER(		T(strgtb) r3, [r0], #1)			@ May fault
 		sub	r2, r2, ip
 		b	.Lc2u_dest_aligned
 
@@ -58,7 +59,7 @@ ENTRY(__copy_to_user)
 		addmi	ip, r2, #4
 		bmi	.Lc2u_0nowords
 		ldr	r3, [r1], #4
-USER(		strt	r3, [r0], #4)			@ May fault
+USER(		T(str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT	@ On each page, use a ld/st??t instruction
 		rsb	ip, ip, #0
 		movs	ip, ip, lsr #32 - PAGE_SHIFT
@@ -87,18 +88,18 @@ USER(		strt	r3, [r0], #4)			@ May fault
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
 		ldrne	r3, [r1], #4
-		strnet	r3, [r0], #4			@ Shouldnt fault
+		T(strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_0fupi
 .Lc2u_0nowords:	teq	ip, #0
 		beq	.Lc2u_finished
 .Lc2u_nowords:	cmp	ip, #2
 		ldrb	r3, [r1], #1
-USER(		strbt	r3, [r0], #1)			@ May fault
+USER(		T(strb)	r3, [r0], #1)			@ May fault
 		ldrgeb	r3, [r1], #1
-USER(		strgebt	r3, [r0], #1)			@ May fault
+USER(		T(strgeb) r3, [r0], #1)			@ May fault
 		ldrgtb	r3, [r1], #1
-USER(		strgtbt	r3, [r0], #1)			@ May fault
+USER(		T(strgtb) r3, [r0], #1)			@ May fault
 		b	.Lc2u_finished
 
 .Lc2u_not_enough:
@@ -119,7 +120,7 @@ USER(		strgtbt	r3, [r0], #1)			@ May fault
 		mov	r3, r7, pull #8
 		ldr	r7, [r1], #4
 		orr	r3, r3, r7, push #24
-USER(		strt	r3, [r0], #4)			@ May fault
+USER(		T(str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
 		movs	ip, ip, lsr #32 - PAGE_SHIFT
@@ -154,18 +155,18 @@ USER(		strt	r3, [r0], #4)			@ May fault
 		movne	r3, r7, pull #8
 		ldrne	r7, [r1], #4
 		orrne	r3, r3, r7, push #24
-		strnet	r3, [r0], #4			@ Shouldnt fault
+		T(strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_1fupi
 .Lc2u_1nowords:	mov	r3, r7, get_byte_1
 		teq	ip, #0
 		beq	.Lc2u_finished
 		cmp	ip, #2
-USER(		strbt	r3, [r0], #1)			@ May fault
+USER(		T(strb)	r3, [r0], #1)			@ May fault
 		movge	r3, r7, get_byte_2
-USER(		strgebt	r3, [r0], #1)			@ May fault
+USER(		T(strgeb) r3, [r0], #1)			@ May fault
 		movgt	r3, r7, get_byte_3
-USER(		strgtbt	r3, [r0], #1)			@ May fault
+USER(		T(strgtb) r3, [r0], #1)			@ May fault
 		b	.Lc2u_finished
 
 .Lc2u_2fupi:	subs	r2, r2, #4
@@ -174,7 +175,7 @@ USER(		strgtbt	r3, [r0], #1)			@ May fault
 		mov	r3, r7, pull #16
 		ldr	r7, [r1], #4
 		orr	r3, r3, r7, push #16
-USER(		strt	r3, [r0], #4)			@ May fault
+USER(		T(str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
 		movs	ip, ip, lsr #32 - PAGE_SHIFT
@@ -209,18 +210,18 @@ USER(		strt	r3, [r0], #4)			@ May fault
 		movne	r3, r7, pull #16
 		ldrne	r7, [r1], #4
 		orrne	r3, r3, r7, push #16
-		strnet	r3, [r0], #4			@ Shouldnt fault
+		T(strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_2fupi
 .Lc2u_2nowords:	mov	r3, r7, get_byte_2
 		teq	ip, #0
 		beq	.Lc2u_finished
 		cmp	ip, #2
-USER(		strbt	r3, [r0], #1)			@ May fault
+USER(		T(strb)	r3, [r0], #1)			@ May fault
 		movge	r3, r7, get_byte_3
-USER(		strgebt	r3, [r0], #1)			@ May fault
+USER(		T(strgeb) r3, [r0], #1)			@ May fault
 		ldrgtb	r3, [r1], #0
-USER(		strgtbt	r3, [r0], #1)			@ May fault
+USER(		T(strgtb) r3, [r0], #1)			@ May fault
 		b	.Lc2u_finished
 
 .Lc2u_3fupi:	subs	r2, r2, #4
@@ -229,7 +230,7 @@ USER(		strgtbt	r3, [r0], #1)			@ May fault
 		mov	r3, r7, pull #24
 		ldr	r7, [r1], #4
 		orr	r3, r3, r7, push #8
-USER(		strt	r3, [r0], #4)			@ May fault
+USER(		T(str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
 		movs	ip, ip, lsr #32 - PAGE_SHIFT
@@ -264,18 +265,18 @@ USER(		strt	r3, [r0], #4)			@ May fault
 		movne	r3, r7, pull #24
 		ldrne	r7, [r1], #4
 		orrne	r3, r3, r7, push #8
-		strnet	r3, [r0], #4			@ Shouldnt fault
+		T(strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_3fupi
 .Lc2u_3nowords:	mov	r3, r7, get_byte_3
 		teq	ip, #0
 		beq	.Lc2u_finished
 		cmp	ip, #2
-USER(		strbt	r3, [r0], #1)			@ May fault
+USER(		T(strb)	r3, [r0], #1)			@ May fault
 		ldrgeb	r3, [r1], #1
-USER(		strgebt	r3, [r0], #1)			@ May fault
+USER(		T(strgeb) r3, [r0], #1)			@ May fault
 		ldrgtb	r3, [r1], #0
-USER(		strgtbt	r3, [r0], #1)			@ May fault
+USER(		T(strgtb) r3, [r0], #1)			@ May fault
 		b	.Lc2u_finished
 ENDPROC(__copy_to_user)
 
@@ -294,11 +295,11 @@ ENDPROC(__copy_to_user)
 .Lcfu_dest_not_aligned:
 		rsb	ip, ip, #4
 		cmp	ip, #2
-USER(		ldrbt	r3, [r1], #1)			@ May fault
+USER(		T(ldrb)	r3, [r1], #1)			@ May fault
 		strb	r3, [r0], #1
-USER(		ldrgebt	r3, [r1], #1)			@ May fault
+USER(		T(ldrgeb) r3, [r1], #1)			@ May fault
 		strgeb	r3, [r0], #1
-USER(		ldrgtbt	r3, [r1], #1)			@ May fault
+USER(		T(ldrgtb) r3, [r1], #1)			@ May fault
 		strgtb	r3, [r0], #1
 		sub	r2, r2, ip
 		b	.Lcfu_dest_aligned
@@ -321,7 +322,7 @@ ENTRY(__copy_from_user)
 .Lcfu_0fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_0nowords
-USER(		ldrt	r3, [r1], #4)
+USER(		T(ldr)	r3, [r1], #4)
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT	@ On each page, use a ld/st??t instruction
 		rsb	ip, ip, #0
@@ -350,18 +351,18 @@ USER(		ldrt	r3, [r1], #4)
 		ldmneia	r1!, {r3 - r4}			@ Shouldnt fault
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		ldrnet	r3, [r1], #4			@ Shouldnt fault
+		T(ldrne) r3, [r1], #4			@ Shouldnt fault
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_0fupi
 .Lcfu_0nowords:	teq	ip, #0
 		beq	.Lcfu_finished
 .Lcfu_nowords:	cmp	ip, #2
-USER(		ldrbt	r3, [r1], #1)			@ May fault
+USER(		T(ldrb)	r3, [r1], #1)			@ May fault
 		strb	r3, [r0], #1
-USER(		ldrgebt	r3, [r1], #1)			@ May fault
+USER(		T(ldrgeb) r3, [r1], #1)			@ May fault
 		strgeb	r3, [r0], #1
-USER(		ldrgtbt	r3, [r1], #1)			@ May fault
+USER(		T(ldrgtb) r3, [r1], #1)			@ May fault
 		strgtb	r3, [r0], #1
 		b	.Lcfu_finished
 
@@ -374,7 +375,7 @@ USER(		ldrgtbt	r3, [r1], #1)			@ May fault
 
 .Lcfu_src_not_aligned:
 		bic	r1, r1, #3
-USER(		ldrt	r7, [r1], #4)			@ May fault
+USER(		T(ldr)	r7, [r1], #4)			@ May fault
 		cmp	ip, #2
 		bgt	.Lcfu_3fupi
 		beq	.Lcfu_2fupi
@@ -382,7 +383,7 @@ USER(		ldrt	r7, [r1], #4)			@ May fault
 		addmi	ip, r2, #4
 		bmi	.Lcfu_1nowords
 		mov	r3, r7, pull #8
-USER(		ldrt	r7, [r1], #4)			@ May fault
+USER(		T(ldr)	r7, [r1], #4)			@ May fault
 		orr	r3, r3, r7, push #24
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
@@ -417,7 +418,7 @@ USER(		ldrt	r7, [r1], #4)			@ May fault
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
 		movne	r3, r7, pull #8
-USER(		ldrnet	r7, [r1], #4)			@ May fault
+USER(		T(ldrne) r7, [r1], #4)			@ May fault
 		orrne	r3, r3, r7, push #24
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
@@ -437,7 +438,7 @@ USER(		ldrnet	r7, [r1], #4)			@ May fault
 		addmi	ip, r2, #4
 		bmi	.Lcfu_2nowords
 		mov	r3, r7, pull #16
-USER(		ldrt	r7, [r1], #4)			@ May fault
+USER(		T(ldr)	r7, [r1], #4)			@ May fault
 		orr	r3, r3, r7, push #16
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
@@ -473,7 +474,7 @@ USER(		ldrt	r7, [r1], #4)			@ May fault
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
 		movne	r3, r7, pull #16
-USER(		ldrnet	r7, [r1], #4)			@ May fault
+USER(		T(ldrne) r7, [r1], #4)			@ May fault
 		orrne	r3, r3, r7, push #16
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
@@ -485,7 +486,7 @@ USER(		ldrnet	r7, [r1], #4)			@ May fault
 		strb	r3, [r0], #1
 		movge	r3, r7, get_byte_3
 		strgeb	r3, [r0], #1
-USER(		ldrgtbt	r3, [r1], #0)			@ May fault
+USER(		T(ldrgtb) r3, [r1], #0)			@ May fault
 		strgtb	r3, [r0], #1
 		b	.Lcfu_finished
 
@@ -493,7 +494,7 @@ USER(		ldrgtbt	r3, [r1], #0)			@ May fault
 		addmi	ip, r2, #4
 		bmi	.Lcfu_3nowords
 		mov	r3, r7, pull #24
-USER(		ldrt	r7, [r1], #4)			@ May fault
+USER(		T(ldr)	r7, [r1], #4)			@ May fault
 		orr	r3, r3, r7, push #8
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
@@ -528,7 +529,7 @@ USER(		ldrt	r7, [r1], #4)			@ May fault
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
 		movne	r3, r7, pull #24
-USER(		ldrnet	r7, [r1], #4)			@ May fault
+USER(		T(ldrne) r7, [r1], #4)			@ May fault
 		orrne	r3, r3, r7, push #8
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
@@ -538,9 +539,9 @@ USER(		ldrnet	r7, [r1], #4)			@ May fault
 		beq	.Lcfu_finished
 		cmp	ip, #2
 		strb	r3, [r0], #1
-USER(		ldrgebt	r3, [r1], #1)			@ May fault
+USER(		T(ldrgeb) r3, [r1], #1)			@ May fault
 		strgeb	r3, [r0], #1
-USER(		ldrgtbt	r3, [r1], #1)			@ May fault
+USER(		T(ldrgtb) r3, [r1], #1)			@ May fault
 		strgtb	r3, [r0], #1
 		b	.Lcfu_finished
 ENDPROC(__copy_from_user)
diff --git a/arch/arm/mach-at91/Makefile b/arch/arm/mach-at91/Makefile
index 62d686f0b426..d13add71f72a 100644
--- a/arch/arm/mach-at91/Makefile
+++ b/arch/arm/mach-at91/Makefile
@@ -65,7 +65,7 @@ obj-$(CONFIG_MACH_AT91SAM9G20EK) += board-sam9g20ek.o
 obj-$(CONFIG_MACH_CPU9G20)	+= board-cpu9krea.o
 obj-$(CONFIG_MACH_STAMP9G20)	+= board-stamp9g20.o
 obj-$(CONFIG_MACH_PORTUXG20)	+= board-stamp9g20.o
-obj-$(CONFIG_MACH_PCONTROL_G20)	+= board-pcontrol-g20.o
+obj-$(CONFIG_MACH_PCONTROL_G20)	+= board-pcontrol-g20.o board-stamp9g20.o
 
 # AT91SAM9260/AT91SAM9G20 board-specific support
 obj-$(CONFIG_MACH_SNAPPER_9260)	+= board-snapper9260.o
diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c
index 2500f41d8d2d..1dd69c85dfec 100644
--- a/arch/arm/mach-at91/at91rm9200_time.c
+++ b/arch/arm/mach-at91/at91rm9200_time.c
@@ -101,7 +101,6 @@ static struct clocksource clk32k = {
 	.rating		= 150,
 	.read		= read_clk32k,
 	.mask		= CLOCKSOURCE_MASK(20),
-	.shift		= 10,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -201,8 +200,7 @@ void __init at91rm9200_timer_init(void)
 	clockevents_register_device(&clkevt);
 
 	/* register clocksource */
-	clk32k.mult = clocksource_hz2mult(AT91_SLOW_CLOCK, clk32k.shift);
-	clocksource_register(&clk32k);
+	clocksource_register_hz(&clk32k, AT91_SLOW_CLOCK);
 }
 
 struct sys_timer at91rm9200_timer = {
diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c
index 608a63240b64..4ba85499fa97 100644
--- a/arch/arm/mach-at91/at91sam926x_time.c
+++ b/arch/arm/mach-at91/at91sam926x_time.c
@@ -51,7 +51,6 @@ static struct clocksource pit_clk = {
 	.name		= "pit",
 	.rating		= 175,
 	.read		= read_pit_clk,
-	.shift		= 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -163,10 +162,9 @@ static void __init at91sam926x_pit_init(void)
 	 * Register clocksource.  The high order bits of PIV are unused,
 	 * so this isn't a 32-bit counter unless we get clockevent irqs.
 	 */
-	pit_clk.mult = clocksource_hz2mult(pit_rate, pit_clk.shift);
 	bits = 12 /* PICNT */ + ilog2(pit_cycle) /* PIV */;
 	pit_clk.mask = CLOCKSOURCE_MASK(bits);
-	clocksource_register(&pit_clk);
+	clocksource_register_hz(&pit_clk, pit_rate);
 
 	/* Set up irq handler */
 	setup_irq(AT91_ID_SYS, &at91sam926x_pit_irq);
diff --git a/arch/arm/mach-at91/board-pcontrol-g20.c b/arch/arm/mach-at91/board-pcontrol-g20.c
index bba5a560e02b..feb65787c30b 100644
--- a/arch/arm/mach-at91/board-pcontrol-g20.c
+++ b/arch/arm/mach-at91/board-pcontrol-g20.c
@@ -31,6 +31,7 @@
 
 #include <mach/board.h>
 #include <mach/at91sam9_smc.h>
+#include <mach/stamp9g20.h>
 
 #include "sam9_smc.h"
 #include "generic.h"
@@ -38,11 +39,7 @@
 
 static void __init pcontrol_g20_map_io(void)
 {
-	/* Initialize processor: 18.432 MHz crystal */
-	at91sam9260_initialize(18432000);
-
-	/* DGBU on ttyS0. (Rx, Tx) only TTL -> JTAG connector X7 17,19 ) */
-	at91_register_uart(0, 0, 0);
+	stamp9g20_map_io();
 
 	/* USART0 on ttyS1. (Rx, Tx, CTS, RTS) piggyback  A2 */
 	at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS
@@ -54,9 +51,6 @@ static void __init pcontrol_g20_map_io(void)
 
 	/* USART2 on ttyS3. (Rx, Tx)  9bit-Bus  Multidrop-mode  X4 */
 	at91_register_uart(AT91SAM9260_ID_US4, 3, 0);
-
-	/* set serial console to ttyS0 (ie, DBGU) */
-	at91_set_serial_console(0);
 }
 
 
@@ -66,38 +60,6 @@ static void __init init_irq(void)
 }
 
 
-/*
- * NAND flash 512MiB 1,8V 8-bit, sector size 128 KiB
- */
-static struct atmel_nand_data __initdata nand_data = {
-	.ale		= 21,
-	.cle		= 22,
-	.rdy_pin	= AT91_PIN_PC13,
-	.enable_pin	= AT91_PIN_PC14,
-};
-
-/*
- * Bus timings; unit = 7.57ns
- */
-static struct sam9_smc_config __initdata nand_smc_config = {
-	.ncs_read_setup		= 0,
-	.nrd_setup		= 2,
-	.ncs_write_setup	= 0,
-	.nwe_setup		= 2,
-
-	.ncs_read_pulse		= 4,
-	.nrd_pulse		= 4,
-	.ncs_write_pulse	= 4,
-	.nwe_pulse		= 4,
-
-	.read_cycle		= 7,
-	.write_cycle		= 7,
-
-	.mode			= AT91_SMC_READMODE | AT91_SMC_WRITEMODE
-			| AT91_SMC_EXNWMODE_DISABLE | AT91_SMC_DBW_8,
-	.tdf_cycles		= 3,
-};
-
 static struct sam9_smc_config __initdata pcontrol_smc_config[2] = { {
 	.ncs_read_setup		= 16,
 	.nrd_setup		= 18,
@@ -138,14 +100,6 @@ static struct sam9_smc_config __initdata pcontrol_smc_config[2] = { {
 	.tdf_cycles		= 1,
 } };
 
-static void __init add_device_nand(void)
-{
-	/* configure chip-select 3 (NAND) */
-	sam9_smc_configure(3, &nand_smc_config);
-	at91_add_device_nand(&nand_data);
-}
-
-
 static void __init add_device_pcontrol(void)
 {
 	/* configure chip-select 4 (IO compatible to 8051  X4 ) */
@@ -156,23 +110,6 @@ static void __init add_device_pcontrol(void)
 
 
 /*
- * MCI (SD/MMC)
- * det_pin, wp_pin and vcc_pin are not connected
- */
-#if defined(CONFIG_MMC_ATMELMCI) || defined(CONFIG_MMC_ATMELMCI_MODULE)
-static struct mci_platform_data __initdata mmc_data = {
-	.slot[0] = {
-		.bus_width	= 4,
-	},
-};
-#else
-static struct at91_mmc_data __initdata mmc_data = {
-	.wire4		= 1,
-};
-#endif
-
-
-/*
  * USB Host port
  */
 static struct at91_usbh_data __initdata usbh_data = {
@@ -265,42 +202,13 @@ static struct spi_board_info pcontrol_g20_spi_devices[] = {
 };
 
 
-/*
- * Dallas 1-Wire  DS2431
- */
-static struct w1_gpio_platform_data w1_gpio_pdata = {
-	.pin		= AT91_PIN_PA29,
-	.is_open_drain	= 1,
-};
-
-static struct platform_device w1_device = {
-	.name			= "w1-gpio",
-	.id			= -1,
-	.dev.platform_data	= &w1_gpio_pdata,
-};
-
-static void add_wire1(void)
-{
-	at91_set_GPIO_periph(w1_gpio_pdata.pin, 1);
-	at91_set_multi_drive(w1_gpio_pdata.pin, 1);
-	platform_device_register(&w1_device);
-}
-
-
 static void __init pcontrol_g20_board_init(void)
 {
-	at91_add_device_serial();
-	add_device_nand();
-#if defined(CONFIG_MMC_ATMELMCI) || defined(CONFIG_MMC_ATMELMCI_MODULE)
-	at91_add_device_mci(0, &mmc_data);
-#else
-	at91_add_device_mmc(0, &mmc_data);
-#endif
+	stamp9g20_board_init();
 	at91_add_device_usbh(&usbh_data);
 	at91_add_device_eth(&macb_data);
 	at91_add_device_i2c(pcontrol_g20_i2c_devices,
 		ARRAY_SIZE(pcontrol_g20_i2c_devices));
-	add_wire1();
 	add_device_pcontrol();
 	at91_add_device_spi(pcontrol_g20_spi_devices,
 		ARRAY_SIZE(pcontrol_g20_spi_devices));
diff --git a/arch/arm/mach-at91/board-stamp9g20.c b/arch/arm/mach-at91/board-stamp9g20.c
index 5206eef4a67e..f8902b118960 100644
--- a/arch/arm/mach-at91/board-stamp9g20.c
+++ b/arch/arm/mach-at91/board-stamp9g20.c
@@ -32,7 +32,7 @@
 #include "generic.h"
 
 
-static void __init portuxg20_map_io(void)
+void __init stamp9g20_map_io(void)
 {
 	/* Initialize processor: 18.432 MHz crystal */
 	at91sam9260_initialize(18432000);
@@ -40,6 +40,24 @@ static void __init portuxg20_map_io(void)
 	/* DGBU on ttyS0. (Rx & Tx only) */
 	at91_register_uart(0, 0, 0);
 
+	/* set serial console to ttyS0 (ie, DBGU) */
+	at91_set_serial_console(0);
+}
+
+static void __init stamp9g20evb_map_io(void)
+{
+	stamp9g20_map_io();
+
+	/* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */
+	at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS | ATMEL_UART_RTS
+						| ATMEL_UART_DTR | ATMEL_UART_DSR
+						| ATMEL_UART_DCD | ATMEL_UART_RI);
+}
+
+static void __init portuxg20_map_io(void)
+{
+	stamp9g20_map_io();
+
 	/* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */
 	at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS | ATMEL_UART_RTS
 						| ATMEL_UART_DTR | ATMEL_UART_DSR
@@ -56,26 +74,6 @@ static void __init portuxg20_map_io(void)
 
 	/* USART5 on ttyS6. (Rx, Tx only) */
 	at91_register_uart(AT91SAM9260_ID_US5, 6, 0);
-
-	/* set serial console to ttyS0 (ie, DBGU) */
-	at91_set_serial_console(0);
-}
-
-static void __init stamp9g20_map_io(void)
-{
-	/* Initialize processor: 18.432 MHz crystal */
-	at91sam9260_initialize(18432000);
-
-	/* DGBU on ttyS0. (Rx & Tx only) */
-	at91_register_uart(0, 0, 0);
-
-	/* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */
-	at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS | ATMEL_UART_RTS
-						| ATMEL_UART_DTR | ATMEL_UART_DSR
-						| ATMEL_UART_DCD | ATMEL_UART_RI);
-
-	/* set serial console to ttyS0 (ie, DBGU) */
-	at91_set_serial_console(0);
 }
 
 static void __init init_irq(void)
@@ -156,7 +154,7 @@ static struct at91_udc_data __initdata portuxg20_udc_data = {
 	.pullup_pin	= 0,		/* pull-up driven by UDC */
 };
 
-static struct at91_udc_data __initdata stamp9g20_udc_data = {
+static struct at91_udc_data __initdata stamp9g20evb_udc_data = {
 	.vbus_pin	= AT91_PIN_PA22,
 	.pullup_pin	= 0,		/* pull-up driven by UDC */
 };
@@ -190,7 +188,7 @@ static struct gpio_led portuxg20_leds[] = {
 	}
 };
 
-static struct gpio_led stamp9g20_leds[] = {
+static struct gpio_led stamp9g20evb_leds[] = {
 	{
 		.name			= "D8",
 		.gpio			= AT91_PIN_PB18,
@@ -250,7 +248,7 @@ void add_w1(void)
 }
 
 
-static void __init generic_board_init(void)
+void __init stamp9g20_board_init(void)
 {
 	/* Serial */
 	at91_add_device_serial();
@@ -262,34 +260,40 @@ static void __init generic_board_init(void)
 #else
 	at91_add_device_mmc(0, &mmc_data);
 #endif
-	/* USB Host */
-	at91_add_device_usbh(&usbh_data);
-	/* Ethernet */
-	at91_add_device_eth(&macb_data);
-	/* I2C */
-	at91_add_device_i2c(NULL, 0);
 	/* W1 */
 	add_w1();
 }
 
 static void __init portuxg20_board_init(void)
 {
-	generic_board_init();
-	/* SPI */
-	at91_add_device_spi(portuxg20_spi_devices, ARRAY_SIZE(portuxg20_spi_devices));
+	stamp9g20_board_init();
+	/* USB Host */
+	at91_add_device_usbh(&usbh_data);
 	/* USB Device */
 	at91_add_device_udc(&portuxg20_udc_data);
+	/* Ethernet */
+	at91_add_device_eth(&macb_data);
+	/* I2C */
+	at91_add_device_i2c(NULL, 0);
+	/* SPI */
+	at91_add_device_spi(portuxg20_spi_devices, ARRAY_SIZE(portuxg20_spi_devices));
 	/* LEDs */
 	at91_gpio_leds(portuxg20_leds, ARRAY_SIZE(portuxg20_leds));
 }
 
-static void __init stamp9g20_board_init(void)
+static void __init stamp9g20evb_board_init(void)
 {
-	generic_board_init();
+	stamp9g20_board_init();
+	/* USB Host */
+	at91_add_device_usbh(&usbh_data);
 	/* USB Device */
-	at91_add_device_udc(&stamp9g20_udc_data);
+	at91_add_device_udc(&stamp9g20evb_udc_data);
+	/* Ethernet */
+	at91_add_device_eth(&macb_data);
+	/* I2C */
+	at91_add_device_i2c(NULL, 0);
 	/* LEDs */
-	at91_gpio_leds(stamp9g20_leds, ARRAY_SIZE(stamp9g20_leds));
+	at91_gpio_leds(stamp9g20evb_leds, ARRAY_SIZE(stamp9g20evb_leds));
 }
 
 MACHINE_START(PORTUXG20, "taskit PortuxG20")
@@ -305,7 +309,7 @@ MACHINE_START(STAMP9G20, "taskit Stamp9G20")
 	/* Maintainer: taskit GmbH */
 	.boot_params	= AT91_SDRAM_BASE + 0x100,
 	.timer		= &at91sam926x_timer,
-	.map_io		= stamp9g20_map_io,
+	.map_io		= stamp9g20evb_map_io,
 	.init_irq	= init_irq,
-	.init_machine	= stamp9g20_board_init,
+	.init_machine	= stamp9g20evb_board_init,
 MACHINE_END
diff --git a/arch/arm/mach-at91/clock.c b/arch/arm/mach-at91/clock.c
index 7525cee3983f..9113da6845f1 100644
--- a/arch/arm/mach-at91/clock.c
+++ b/arch/arm/mach-at91/clock.c
@@ -658,7 +658,7 @@ static void __init at91_upll_usbfs_clock_init(unsigned long main_clock)
 	/* Now set uhpck values */
 	uhpck.parent = &utmi_clk;
 	uhpck.pmc_mask = AT91SAM926x_PMC_UHP;
-	uhpck.rate_hz = utmi_clk.parent->rate_hz;
+	uhpck.rate_hz = utmi_clk.rate_hz;
 	uhpck.rate_hz /= 1 + ((at91_sys_read(AT91_PMC_USB) & AT91_PMC_OHCIUSBDIV) >> 8);
 }
 
diff --git a/arch/arm/mach-at91/include/mach/at91_mci.h b/arch/arm/mach-at91/include/mach/at91_mci.h
index 57f8ee154943..27ac6f550fe3 100644
--- a/arch/arm/mach-at91/include/mach/at91_mci.h
+++ b/arch/arm/mach-at91/include/mach/at91_mci.h
@@ -74,6 +74,8 @@
 #define			AT91_MCI_TRTYP_BLOCK	(0 << 19)
 #define			AT91_MCI_TRTYP_MULTIPLE	(1 << 19)
 #define			AT91_MCI_TRTYP_STREAM	(2 << 19)
+#define			AT91_MCI_TRTYP_SDIO_BYTE	(4 << 19)
+#define			AT91_MCI_TRTYP_SDIO_BLOCK	(5 << 19)
 
 #define AT91_MCI_BLKR		0x18		/* Block Register */
 #define		AT91_MCI_BLKR_BCNT(n)	((0xffff & (n)) << 0)	/* Block count */
diff --git a/arch/arm/mach-at91/include/mach/stamp9g20.h b/arch/arm/mach-at91/include/mach/stamp9g20.h
new file mode 100644
index 000000000000..6120f9c46d59
--- /dev/null
+++ b/arch/arm/mach-at91/include/mach/stamp9g20.h
@@ -0,0 +1,7 @@
+#ifndef __MACH_STAMP9G20_H
+#define __MACH_STAMP9G20_H
+
+void stamp9g20_map_io(void);
+void stamp9g20_board_init(void);
+
+#endif
diff --git a/arch/arm/mach-bcmring/clock.c b/arch/arm/mach-bcmring/clock.c
index 14bafc38f2dc..ad237a42d265 100644
--- a/arch/arm/mach-bcmring/clock.c
+++ b/arch/arm/mach-bcmring/clock.c
@@ -21,13 +21,12 @@
 #include <linux/string.h>
 #include <linux/clk.h>
 #include <linux/spinlock.h>
+#include <linux/clkdev.h>
 #include <mach/csp/hw_cfg.h>
 #include <mach/csp/chipcHw_def.h>
 #include <mach/csp/chipcHw_reg.h>
 #include <mach/csp/chipcHw_inline.h>
 
-#include <asm/clkdev.h>
-
 #include "clock.h"
 
 #define clk_is_primary(x)       ((x)->type & CLK_TYPE_PRIMARY)
diff --git a/arch/arm/mach-bcmring/core.c b/arch/arm/mach-bcmring/core.c
index d3f959e92b2d..8fc2035759fb 100644
--- a/arch/arm/mach-bcmring/core.c
+++ b/arch/arm/mach-bcmring/core.c
@@ -30,10 +30,10 @@
 #include <linux/amba/bus.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
+#include <linux/clkdev.h>
 
 #include <mach/csp/mm_addr.h>
 #include <mach/hardware.h>
-#include <asm/clkdev.h>
 #include <linux/io.h>
 #include <asm/irq.h>
 #include <asm/hardware/arm_timer.h>
@@ -294,7 +294,6 @@ static struct clocksource clocksource_bcmring_timer1 = {
 	.rating = 200,
 	.read = bcmring_get_cycles_timer1,
 	.mask = CLOCKSOURCE_MASK(32),
-	.shift = 20,
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -303,7 +302,6 @@ static struct clocksource clocksource_bcmring_timer3 = {
 	.rating = 100,
 	.read = bcmring_get_cycles_timer3,
 	.mask = CLOCKSOURCE_MASK(32),
-	.shift = 20,
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -316,10 +314,8 @@ static int __init bcmring_clocksource_init(void)
 	writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
 	       TIMER1_VA_BASE + TIMER_CTRL);
 
-	clocksource_bcmring_timer1.mult =
-	    clocksource_khz2mult(TIMER1_FREQUENCY_MHZ * 1000,
-				 clocksource_bcmring_timer1.shift);
-	clocksource_register(&clocksource_bcmring_timer1);
+	clocksource_register_khz(&clocksource_bcmring_timer1,
+				 TIMER1_FREQUENCY_MHZ * 1000);
 
 	/* setup timer3 as free-running clocksource */
 	writel(0, TIMER3_VA_BASE + TIMER_CTRL);
@@ -328,10 +324,8 @@ static int __init bcmring_clocksource_init(void)
 	writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
 	       TIMER3_VA_BASE + TIMER_CTRL);
 
-	clocksource_bcmring_timer3.mult =
-	    clocksource_khz2mult(TIMER3_FREQUENCY_KHZ,
-				 clocksource_bcmring_timer3.shift);
-	clocksource_register(&clocksource_bcmring_timer3);
+	clocksource_register_khz(&clocksource_bcmring_timer3,
+				 TIMER3_FREQUENCY_KHZ);
 
 	return 0;
 }
diff --git a/arch/arm/mach-cns3xxx/Kconfig b/arch/arm/mach-cns3xxx/Kconfig
index 9ebfcc46feb1..29b13f249aa9 100644
--- a/arch/arm/mach-cns3xxx/Kconfig
+++ b/arch/arm/mach-cns3xxx/Kconfig
@@ -3,6 +3,7 @@ menu "CNS3XXX platform type"
 
 config MACH_CNS3420VB
 	bool "Support for CNS3420 Validation Board"
+	select MIGHT_HAVE_PCI
 	help
 	  Include support for the Cavium Networks CNS3420 MPCore Platform
 	  Baseboard.
diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c
index 9ca4d581016f..da30078a80c1 100644
--- a/arch/arm/mach-cns3xxx/core.c
+++ b/arch/arm/mach-cns3xxx/core.c
@@ -69,13 +69,10 @@ void __init cns3xxx_map_io(void)
 }
 
 /* used by entry-macro.S */
-void __iomem *gic_cpu_base_addr;
-
 void __init cns3xxx_init_irq(void)
 {
-	gic_cpu_base_addr = __io(CNS3XXX_TC11MP_GIC_CPU_BASE_VIRT);
-	gic_dist_init(0, __io(CNS3XXX_TC11MP_GIC_DIST_BASE_VIRT), 29);
-	gic_cpu_init(0, gic_cpu_base_addr);
+	gic_init(0, 29, __io(CNS3XXX_TC11MP_GIC_DIST_BASE_VIRT),
+		 __io(CNS3XXX_TC11MP_GIC_CPU_BASE_VIRT));
 }
 
 void cns3xxx_power_off(void)
diff --git a/arch/arm/mach-cns3xxx/core.h b/arch/arm/mach-cns3xxx/core.h
index 73898a7835d3..ffeb3a8b73ba 100644
--- a/arch/arm/mach-cns3xxx/core.h
+++ b/arch/arm/mach-cns3xxx/core.h
@@ -11,7 +11,6 @@
 #ifndef __CNS3XXX_CORE_H
 #define __CNS3XXX_CORE_H
 
-extern void __iomem *gic_cpu_base_addr;
 extern struct sys_timer cns3xxx_timer;
 
 void __init cns3xxx_map_io(void);
diff --git a/arch/arm/mach-cns3xxx/include/mach/entry-macro.S b/arch/arm/mach-cns3xxx/include/mach/entry-macro.S
index 5e1c5545680f..6bd83ed90afe 100644
--- a/arch/arm/mach-cns3xxx/include/mach/entry-macro.S
+++ b/arch/arm/mach-cns3xxx/include/mach/entry-macro.S
@@ -9,74 +9,10 @@
  */
 
 #include <mach/hardware.h>
-#include <asm/hardware/gic.h>
+#include <asm/hardware/entry-macro-gic.S>
 
 		.macro	disable_fiq
 		.endm
 
-		.macro  get_irqnr_preamble, base, tmp
-		ldr	\base, =gic_cpu_base_addr
-		ldr	\base, [\base]
-		.endm
-
 		.macro  arch_ret_to_user, tmp1, tmp2
 		.endm
-
-		/*
-		 * The interrupt numbering scheme is defined in the
-		 * interrupt controller spec.  To wit:
-		 *
-		 * Interrupts 0-15 are IPI
-		 * 16-28 are reserved
-		 * 29-31 are local.  We allow 30 to be used for the watchdog.
-		 * 32-1020 are global
-		 * 1021-1022 are reserved
-		 * 1023 is "spurious" (no interrupt)
-		 *
-		 * For now, we ignore all local interrupts so only return an interrupt if it's
-		 * between 30 and 1020.  The test_for_ipi routine below will pick up on IPIs.
-		 *
-		 * A simple read from the controller will tell us the number of the highest
-                 * priority enabled interrupt.  We then just need to check whether it is in the
-		 * valid range for an IRQ (30-1020 inclusive).
-		 */
-
-		.macro  get_irqnr_and_base, irqnr, irqstat, base, tmp
-
-		ldr     \irqstat, [\base, #GIC_CPU_INTACK] /* bits 12-10 = src CPU, 9-0 = int # */
-
-		ldr	\tmp, =1021
-
-		bic     \irqnr, \irqstat, #0x1c00
-
-		cmp     \irqnr, #29
-		cmpcc	\irqnr, \irqnr
-		cmpne	\irqnr, \tmp
-		cmpcs	\irqnr, \irqnr
-
-		.endm
-
-		/* We assume that irqstat (the raw value of the IRQ acknowledge
-		 * register) is preserved from the macro above.
-		 * If there is an IPI, we immediately signal end of interrupt on the
-		 * controller, since this requires the original irqstat value which
-		 * we won't easily be able to recreate later.
-		 */
-
-		.macro test_for_ipi, irqnr, irqstat, base, tmp
-		bic	\irqnr, \irqstat, #0x1c00
-		cmp	\irqnr, #16
-		strcc	\irqstat, [\base, #GIC_CPU_EOI]
-		cmpcs	\irqnr, \irqnr
-		.endm
-
-		/* As above, this assumes that irqstat and base are preserved.. */
-
-		.macro test_for_ltirq, irqnr, irqstat, base, tmp
-		bic	\irqnr, \irqstat, #0x1c00
-		mov 	\tmp, #0
-		cmp	\irqnr, #29
-		moveq	\tmp, #1
-		streq	\irqstat, [\base, #GIC_CPU_EOI]
-		cmp	\tmp, #0
-		.endm
diff --git a/arch/arm/mach-davinci/clock.h b/arch/arm/mach-davinci/clock.h
index 11099980b58b..0dd22031ec62 100644
--- a/arch/arm/mach-davinci/clock.h
+++ b/arch/arm/mach-davinci/clock.h
@@ -68,7 +68,7 @@
 #ifndef __ASSEMBLER__
 
 #include <linux/list.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #define PLLSTAT_GOSTAT	BIT(0)
 #define PLLCMD_GOSET	BIT(0)
diff --git a/arch/arm/mach-davinci/include/mach/io.h b/arch/arm/mach-davinci/include/mach/io.h
index 62b0a90309ad..d1b954955c12 100644
--- a/arch/arm/mach-davinci/include/mach/io.h
+++ b/arch/arm/mach-davinci/include/mach/io.h
@@ -22,8 +22,8 @@
 #define __mem_isa(a)		(a)
 
 #ifndef __ASSEMBLER__
-#define __arch_ioremap(p, s, t)	davinci_ioremap(p, s, t)
-#define __arch_iounmap(v)	davinci_iounmap(v)
+#define __arch_ioremap		davinci_ioremap
+#define __arch_iounmap		davinci_iounmap
 
 void __iomem *davinci_ioremap(unsigned long phys, size_t size,
 			      unsigned int type);
diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c
index 5d1eea026635..e1969ce904dc 100644
--- a/arch/arm/mach-davinci/time.c
+++ b/arch/arm/mach-davinci/time.c
@@ -286,7 +286,6 @@ static struct clocksource clocksource_davinci = {
 	.rating		= 300,
 	.read		= read_dummy,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 24,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -400,10 +399,8 @@ static void __init davinci_timer_init(void)
 	/* setup clocksource */
 	clocksource_davinci.read = read_cycles;
 	clocksource_davinci.name = id_to_name[clocksource_id];
-	clocksource_davinci.mult =
-		clocksource_khz2mult(davinci_clock_tick_rate/1000,
-				     clocksource_davinci.shift);
-	if (clocksource_register(&clocksource_davinci))
+	if (clocksource_register_hz(&clocksource_davinci,
+				    davinci_clock_tick_rate))
 		printk(err, clocksource_davinci.name);
 
 	/* setup clockevent */
diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c
index ef06c66a6f16..ca4de7105097 100644
--- a/arch/arm/mach-ep93xx/clock.c
+++ b/arch/arm/mach-ep93xx/clock.c
@@ -19,10 +19,10 @@
 #include <linux/string.h>
 #include <linux/io.h>
 #include <linux/spinlock.h>
+#include <linux/clkdev.h>
 
 #include <mach/hardware.h>
 
-#include <asm/clkdev.h>
 #include <asm/div64.h>
 
 
diff --git a/arch/arm/mach-imx/clock-imx1.c b/arch/arm/mach-imx/clock-imx1.c
index daca30b2d5b1..3938a563b280 100644
--- a/arch/arm/mach-imx/clock-imx1.c
+++ b/arch/arm/mach-imx/clock-imx1.c
@@ -22,8 +22,7 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
-
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #include <mach/clock.h>
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-imx/clock-imx21.c b/arch/arm/mach-imx/clock-imx21.c
index c63a4f5ffcb6..bf30a8c7ce6f 100644
--- a/arch/arm/mach-imx/clock-imx21.c
+++ b/arch/arm/mach-imx/clock-imx21.c
@@ -21,11 +21,11 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/clkdev.h>
 
 #include <mach/clock.h>
 #include <mach/hardware.h>
 #include <mach/common.h>
-#include <asm/clkdev.h>
 #include <asm/div64.h>
 
 #define IO_ADDR_CCM(off)	(MX21_IO_ADDRESS(MX21_CCM_BASE_ADDR + (off)))
diff --git a/arch/arm/mach-imx/clock-imx25.c b/arch/arm/mach-imx/clock-imx25.c
index 21ef34c501e5..daa0165b6772 100644
--- a/arch/arm/mach-imx/clock-imx25.c
+++ b/arch/arm/mach-imx/clock-imx25.c
@@ -21,8 +21,7 @@
 #include <linux/list.h>
 #include <linux/clk.h>
 #include <linux/io.h>
-
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #include <mach/clock.h>
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-imx/clock-imx27.c b/arch/arm/mach-imx/clock-imx27.c
index f32f3b8e8ba4..583f2515c1d5 100644
--- a/arch/arm/mach-imx/clock-imx27.c
+++ b/arch/arm/mach-imx/clock-imx27.c
@@ -21,8 +21,8 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
 #include <asm/div64.h>
 
 #include <mach/clock.h>
diff --git a/arch/arm/mach-integrator/Kconfig b/arch/arm/mach-integrator/Kconfig
index 27db275b367c..769b0f10c834 100644
--- a/arch/arm/mach-integrator/Kconfig
+++ b/arch/arm/mach-integrator/Kconfig
@@ -4,6 +4,7 @@ menu "Integrator Options"
 
 config ARCH_INTEGRATOR_AP
 	bool "Support Integrator/AP and Integrator/PP2 platforms"
+	select MIGHT_HAVE_PCI
 	help
 	  Include support for the ARM(R) Integrator/AP and
 	  Integrator/PP2 platforms.
diff --git a/arch/arm/mach-integrator/core.c b/arch/arm/mach-integrator/core.c
index 8f4fb6d638f7..b8e884b450da 100644
--- a/arch/arm/mach-integrator/core.c
+++ b/arch/arm/mach-integrator/core.c
@@ -21,9 +21,8 @@
 #include <linux/amba/bus.h>
 #include <linux/amba/serial.h>
 #include <linux/io.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
-#include <mach/clkdev.h>
 #include <mach/hardware.h>
 #include <mach/platform.h>
 #include <asm/irq.h>
diff --git a/arch/arm/mach-integrator/impd1.c b/arch/arm/mach-integrator/impd1.c
index fd684bf205e5..5db574f8ae3f 100644
--- a/arch/arm/mach-integrator/impd1.c
+++ b/arch/arm/mach-integrator/impd1.c
@@ -22,9 +22,8 @@
 #include <linux/amba/clcd.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
-#include <mach/clkdev.h>
 #include <asm/hardware/icst.h>
 #include <mach/lm.h>
 #include <mach/impd1.h>
diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c
index 548208f11179..2774df8021dc 100644
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -372,7 +372,6 @@ static struct clocksource clocksource_timersp = {
 	.rating		= 200,
 	.read		= timersp_read,
 	.mask		= CLOCKSOURCE_MASK(16),
-	.shift		= 16,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -390,8 +389,7 @@ static void integrator_clocksource_init(u32 khz)
 	writel(ctrl, base + TIMER_CTRL);
 	writel(0xffff, base + TIMER_LOAD);
 
-	cs->mult = clocksource_khz2mult(khz, cs->shift);
-	clocksource_register(cs);
+	clocksource_register_khz(cs, khz);
 }
 
 static void __iomem * const clkevt_base = (void __iomem *)TIMER1_VA_BASE;
diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c
index 6258c90d020c..85e48a5f77b9 100644
--- a/arch/arm/mach-integrator/integrator_cp.c
+++ b/arch/arm/mach-integrator/integrator_cp.c
@@ -21,9 +21,8 @@
 #include <linux/amba/mmci.h>
 #include <linux/io.h>
 #include <linux/gfp.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
-#include <mach/clkdev.h>
 #include <mach/hardware.h>
 #include <mach/platform.h>
 #include <asm/irq.h>
@@ -41,7 +40,7 @@
 #include <asm/mach/map.h>
 #include <asm/mach/time.h>
 
-#include <plat/timer-sp.h>
+#include <asm/hardware/timer-sp.h>
 
 #include "common.h"
 
diff --git a/arch/arm/mach-iop13xx/include/mach/io.h b/arch/arm/mach-iop13xx/include/mach/io.h
index a6e0f9e6ddcf..dffb234bb967 100644
--- a/arch/arm/mach-iop13xx/include/mach/io.h
+++ b/arch/arm/mach-iop13xx/include/mach/io.h
@@ -35,7 +35,7 @@ extern u32 iop13xx_atux_mem_base;
 extern size_t iop13xx_atue_mem_size;
 extern size_t iop13xx_atux_mem_size;
 
-#define __arch_ioremap(a, s, f) __iop13xx_ioremap(a, s, f)
-#define __arch_iounmap(a)	 __iop13xx_iounmap(a)
+#define __arch_ioremap	__iop13xx_ioremap
+#define __arch_iounmap	__iop13xx_iounmap
 
 #endif
diff --git a/arch/arm/mach-iop13xx/include/mach/memory.h b/arch/arm/mach-iop13xx/include/mach/memory.h
index 7415e4338651..3ad455318868 100644
--- a/arch/arm/mach-iop13xx/include/mach/memory.h
+++ b/arch/arm/mach-iop13xx/include/mach/memory.h
@@ -58,13 +58,13 @@ static inline unsigned long __lbus_to_virt(dma_addr_t x)
 		__dma;							\
 	})
 
-#define __arch_page_to_dma(dev, page)					\
+#define __arch_pfn_to_dma(dev, pfn)					\
 	({								\
 		/* __is_lbus_virt() can never be true for RAM pages */	\
-		(dma_addr_t)page_to_phys(page);				\
+		(dma_addr_t)__pfn_to_phys(pfn);				\
 	})
 
-#define __arch_dma_to_page(dev, addr)	phys_to_page(addr)
+#define __arch_dma_to_pfn(dev, addr)	__phys_to_pfn(addr)
 
 #endif /* CONFIG_ARCH_IOP13XX */
 #endif /* !ASSEMBLY */
diff --git a/arch/arm/mach-iop32x/include/mach/io.h b/arch/arm/mach-iop32x/include/mach/io.h
index 339e5854728b..059c783ce0b2 100644
--- a/arch/arm/mach-iop32x/include/mach/io.h
+++ b/arch/arm/mach-iop32x/include/mach/io.h
@@ -21,7 +21,7 @@ extern void __iop3xx_iounmap(void __iomem *addr);
 #define __io(p)		((void __iomem *)IOP3XX_PCI_IO_PHYS_TO_VIRT(p))
 #define __mem_pci(a)		(a)
 
-#define __arch_ioremap(a, s, f) __iop3xx_ioremap(a, s, f)
-#define __arch_iounmap(a)	 __iop3xx_iounmap(a)
+#define __arch_ioremap	__iop3xx_ioremap
+#define __arch_iounmap	__iop3xx_iounmap
 
 #endif
diff --git a/arch/arm/mach-iop33x/include/mach/io.h b/arch/arm/mach-iop33x/include/mach/io.h
index e99a7ed6d050..39e893e97c21 100644
--- a/arch/arm/mach-iop33x/include/mach/io.h
+++ b/arch/arm/mach-iop33x/include/mach/io.h
@@ -21,7 +21,7 @@ extern void __iop3xx_iounmap(void __iomem *addr);
 #define __io(p)		((void __iomem *)IOP3XX_PCI_IO_PHYS_TO_VIRT(p))
 #define __mem_pci(a)		(a)
 
-#define __arch_ioremap(a, s, f) __iop3xx_ioremap(a, s, f)
-#define __arch_iounmap(a)	 __iop3xx_iounmap(a)
+#define __arch_ioremap	__iop3xx_ioremap
+#define __arch_iounmap	__iop3xx_iounmap
 
 #endif
diff --git a/arch/arm/mach-ixp23xx/include/mach/io.h b/arch/arm/mach-ixp23xx/include/mach/io.h
index fd9ef8e519f7..a1749d0fd896 100644
--- a/arch/arm/mach-ixp23xx/include/mach/io.h
+++ b/arch/arm/mach-ixp23xx/include/mach/io.h
@@ -45,8 +45,8 @@ ixp23xx_iounmap(void __iomem *addr)
 	__iounmap(addr);
 }
 
-#define __arch_ioremap(a,s,f)	ixp23xx_ioremap(a,s,f)
-#define __arch_iounmap(a)	ixp23xx_iounmap(a)
+#define __arch_ioremap	ixp23xx_ioremap
+#define __arch_iounmap	ixp23xx_iounmap
 
 
 #endif
diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c
index 24498a932ba6..a54b3db80366 100644
--- a/arch/arm/mach-ixp4xx/common-pci.c
+++ b/arch/arm/mach-ixp4xx/common-pci.c
@@ -513,4 +513,4 @@ int dma_set_coherent_mask(struct device *dev, u64 mask)
 
 EXPORT_SYMBOL(ixp4xx_pci_read);
 EXPORT_SYMBOL(ixp4xx_pci_write);
-
+EXPORT_SYMBOL(dma_set_coherent_mask);
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 0bce09799d18..4dbfcbb9163c 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -35,6 +35,7 @@
 #include <asm/pgtable.h>
 #include <asm/page.h>
 #include <asm/irq.h>
+#include <asm/sched_clock.h>
 
 #include <asm/mach/map.h>
 #include <asm/mach/irq.h>
@@ -399,6 +400,23 @@ void __init ixp4xx_sys_init(void)
 }
 
 /*
+ * sched_clock()
+ */
+static DEFINE_CLOCK_DATA(cd);
+
+unsigned long long notrace sched_clock(void)
+{
+	u32 cyc = *IXP4XX_OSTS;
+	return cyc_to_sched_clock(&cd, cyc, (u32)~0);
+}
+
+static void notrace ixp4xx_update_sched_clock(void)
+{
+	u32 cyc = *IXP4XX_OSTS;
+	update_sched_clock(&cd, cyc, (u32)~0);
+}
+
+/*
  * clocksource
  */
 static cycle_t ixp4xx_get_cycles(struct clocksource *cs)
@@ -411,7 +429,6 @@ static struct clocksource clocksource_ixp4xx = {
 	.rating		= 200,
 	.read		= ixp4xx_get_cycles,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift 		= 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -419,21 +436,9 @@ unsigned long ixp4xx_timer_freq = FREQ;
 EXPORT_SYMBOL(ixp4xx_timer_freq);
 static void __init ixp4xx_clocksource_init(void)
 {
-	clocksource_ixp4xx.mult =
-		clocksource_hz2mult(ixp4xx_timer_freq,
-				    clocksource_ixp4xx.shift);
-	clocksource_register(&clocksource_ixp4xx);
-}
-
-/*
- * sched_clock()
- */
-unsigned long long sched_clock(void)
-{
-	cycle_t cyc = ixp4xx_get_cycles(NULL);
-	struct clocksource *cs = &clocksource_ixp4xx;
+	init_sched_clock(&cd, ixp4xx_update_sched_clock, 32, ixp4xx_timer_freq);
 
-	return clocksource_cyc2ns(cyc, cs->mult, cs->shift);
+	clocksource_register_hz(&clocksource_ixp4xx, ixp4xx_timer_freq);
 }
 
 /*
diff --git a/arch/arm/mach-ixp4xx/include/mach/io.h b/arch/arm/mach-ixp4xx/include/mach/io.h
index de274a1f19d7..57b5410c31f4 100644
--- a/arch/arm/mach-ixp4xx/include/mach/io.h
+++ b/arch/arm/mach-ixp4xx/include/mach/io.h
@@ -74,8 +74,8 @@ static inline void __indirect_iounmap(void __iomem *addr)
 		__iounmap(addr);
 }
 
-#define __arch_ioremap(a, s, f)		__indirect_ioremap(a, s, f)
-#define __arch_iounmap(a)		__indirect_iounmap(a)
+#define __arch_ioremap			__indirect_ioremap
+#define __arch_iounmap			__indirect_iounmap
 
 #define writeb(v, p)			__indirect_writeb(v, p)
 #define writew(v, p)			__indirect_writew(v, p)
diff --git a/arch/arm/mach-kirkwood/include/mach/io.h b/arch/arm/mach-kirkwood/include/mach/io.h
index 44e8be04f259..1aaddc364f2e 100644
--- a/arch/arm/mach-kirkwood/include/mach/io.h
+++ b/arch/arm/mach-kirkwood/include/mach/io.h
@@ -42,8 +42,8 @@ __arch_iounmap(void __iomem *addr)
 		__iounmap(addr);
 }
 
-#define __arch_ioremap(p, s, m)	__arch_ioremap(p, s, m)
-#define __arch_iounmap(a)	__arch_iounmap(a)
+#define __arch_ioremap		__arch_ioremap
+#define __arch_iounmap		__arch_iounmap
 #define __io(a)			__io(a)
 #define __mem_pci(a)		(a)
 
diff --git a/arch/arm/mach-ks8695/Kconfig b/arch/arm/mach-ks8695/Kconfig
index fe0c82e30b2d..f5c39a8c2b00 100644
--- a/arch/arm/mach-ks8695/Kconfig
+++ b/arch/arm/mach-ks8695/Kconfig
@@ -4,6 +4,7 @@ menu "Kendin/Micrel KS8695 Implementations"
 
 config MACH_KS8695
 	bool "KS8695 development board"
+	select MIGHT_HAVE_PCI
 	help
 	  Say 'Y' here if you want your kernel to run on the original
 	  Kendin-Micrel KS8695 development board.
diff --git a/arch/arm/mach-ks8695/include/mach/memory.h b/arch/arm/mach-ks8695/include/mach/memory.h
index ffa19aae6e05..bace9a681adc 100644
--- a/arch/arm/mach-ks8695/include/mach/memory.h
+++ b/arch/arm/mach-ks8695/include/mach/memory.h
@@ -35,17 +35,17 @@ extern struct bus_type platform_bus_type;
 					__phys_to_virt(x) : __bus_to_virt(x)); })
 #define __arch_virt_to_dma(dev, x)	({ is_lbus_device(dev) ? \
 					(dma_addr_t)__virt_to_phys(x) : (dma_addr_t)__virt_to_bus(x); })
-#define __arch_page_to_dma(dev, x)	\
-	({ dma_addr_t __dma = page_to_phys(page); \
+#define __arch_pfn_to_dma(dev, pfn)	\
+	({ dma_addr_t __dma = __pfn_to_phys(pfn); \
 	   if (!is_lbus_device(dev)) \
 		__dma = __dma - PHYS_OFFSET + KS8695_PCIMEM_PA; \
 	   __dma; })
 
-#define __arch_dma_to_page(dev, x)	\
+#define __arch_dma_to_pfn(dev, x)	\
 	({ dma_addr_t __dma = x;				\
 	   if (!is_lbus_device(dev))				\
 		__dma += PHYS_OFFSET - KS8695_PCIMEM_PA;	\
-	   phys_to_page(__dma);					\
+	   __phys_to_pfn(__dma);				\
 	})
 
 #endif
diff --git a/arch/arm/mach-lpc32xx/clock.c b/arch/arm/mach-lpc32xx/clock.c
index 32d63796430a..da0e6498110a 100644
--- a/arch/arm/mach-lpc32xx/clock.c
+++ b/arch/arm/mach-lpc32xx/clock.c
@@ -90,10 +90,9 @@
 #include <linux/clk.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/clcd.h>
+#include <linux/clkdev.h>
 
 #include <mach/hardware.h>
-#include <asm/clkdev.h>
-#include <mach/clkdev.h>
 #include <mach/platform.h>
 #include "clock.h"
 #include "common.h"
diff --git a/arch/arm/mach-lpc32xx/timer.c b/arch/arm/mach-lpc32xx/timer.c
index 630dd4a74b26..6162ac308c20 100644
--- a/arch/arm/mach-lpc32xx/timer.c
+++ b/arch/arm/mach-lpc32xx/timer.c
@@ -38,7 +38,6 @@ static cycle_t lpc32xx_clksrc_read(struct clocksource *cs)
 
 static struct clocksource lpc32xx_clksrc = {
 	.name	= "lpc32xx_clksrc",
-	.shift	= 24,
 	.rating	= 300,
 	.read	= lpc32xx_clksrc_read,
 	.mask	= CLOCKSOURCE_MASK(32),
@@ -171,9 +170,7 @@ static void __init lpc32xx_timer_init(void)
 	__raw_writel(0, LCP32XX_TIMER_MCR(LPC32XX_TIMER1_BASE));
 	__raw_writel(LCP32XX_TIMER_CNTR_TCR_EN,
 		LCP32XX_TIMER_TCR(LPC32XX_TIMER1_BASE));
-	lpc32xx_clksrc.mult = clocksource_hz2mult(clkrate,
-		lpc32xx_clksrc.shift);
-	clocksource_register(&lpc32xx_clksrc);
+	clocksource_register_hz(&lpc32xx_clksrc, clkrate);
 }
 
 struct sys_timer lpc32xx_timer = {
diff --git a/arch/arm/mach-mmp/clock.h b/arch/arm/mach-mmp/clock.h
index 016ae94691c0..9b027d7491f5 100644
--- a/arch/arm/mach-mmp/clock.h
+++ b/arch/arm/mach-mmp/clock.h
@@ -6,7 +6,7 @@
  *  published by the Free Software Foundation.
  */
 
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 struct clkops {
 	void			(*enable)(struct clk *);
diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c
index 66528193f939..aeb9ae23e6ce 100644
--- a/arch/arm/mach-mmp/time.c
+++ b/arch/arm/mach-mmp/time.c
@@ -26,8 +26,8 @@
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/sched.h>
-#include <linux/cnt32_to_63.h>
 
+#include <asm/sched_clock.h>
 #include <mach/addr-map.h>
 #include <mach/regs-timers.h>
 #include <mach/regs-apbc.h>
@@ -42,23 +42,7 @@
 #define MAX_DELTA		(0xfffffffe)
 #define MIN_DELTA		(16)
 
-#define TCR2NS_SCALE_FACTOR	10
-
-static unsigned long tcr2ns_scale;
-
-static void __init set_tcr2ns_scale(unsigned long tcr_rate)
-{
-	unsigned long long v = 1000000000ULL << TCR2NS_SCALE_FACTOR;
-	do_div(v, tcr_rate);
-	tcr2ns_scale = v;
-	/*
-	 * We want an even value to automatically clear the top bit
-	 * returned by cnt32_to_63() without an additional run time
-	 * instruction. So if the LSB is 1 then round it up.
-	 */
-	if (tcr2ns_scale & 1)
-		tcr2ns_scale++;
-}
+static DEFINE_CLOCK_DATA(cd);
 
 /*
  * FIXME: the timer needs some delay to stablize the counter capture
@@ -75,10 +59,16 @@ static inline uint32_t timer_read(void)
 	return __raw_readl(TIMERS_VIRT_BASE + TMR_CVWR(0));
 }
 
-unsigned long long sched_clock(void)
+unsigned long long notrace sched_clock(void)
 {
-	unsigned long long v = cnt32_to_63(timer_read());
-	return (v * tcr2ns_scale) >> TCR2NS_SCALE_FACTOR;
+	u32 cyc = timer_read();
+	return cyc_to_sched_clock(&cd, cyc, (u32)~0);
+}
+
+static void notrace mmp_update_sched_clock(void)
+{
+	u32 cyc = timer_read();
+	update_sched_clock(&cd, cyc, (u32)~0);
 }
 
 static irqreturn_t timer_interrupt(int irq, void *dev_id)
@@ -146,7 +136,6 @@ static cycle_t clksrc_read(struct clocksource *cs)
 
 static struct clocksource cksrc = {
 	.name		= "clocksource",
-	.shift		= 20,
 	.rating		= 200,
 	.read		= clksrc_read,
 	.mask		= CLOCKSOURCE_MASK(32),
@@ -186,17 +175,15 @@ void __init timer_init(int irq)
 {
 	timer_config();
 
-	set_tcr2ns_scale(CLOCK_TICK_RATE);
+	init_sched_clock(&cd, mmp_update_sched_clock, 32, CLOCK_TICK_RATE);
 
 	ckevt.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, ckevt.shift);
 	ckevt.max_delta_ns = clockevent_delta2ns(MAX_DELTA, &ckevt);
 	ckevt.min_delta_ns = clockevent_delta2ns(MIN_DELTA, &ckevt);
 	ckevt.cpumask = cpumask_of(0);
 
-	cksrc.mult = clocksource_hz2mult(CLOCK_TICK_RATE, cksrc.shift);
-
 	setup_irq(irq, &timer_irq);
 
-	clocksource_register(&cksrc);
+	clocksource_register_hz(&cksrc, CLOCK_TICK_RATE);
 	clockevents_register_device(&ckevt);
 }
diff --git a/arch/arm/mach-msm/board-msm8x60.c b/arch/arm/mach-msm/board-msm8x60.c
index 7486a681cc71..9b5eb2b4ae1b 100644
--- a/arch/arm/mach-msm/board-msm8x60.c
+++ b/arch/arm/mach-msm/board-msm8x60.c
@@ -28,8 +28,6 @@
 #include <mach/board.h>
 #include <mach/msm_iomap.h>
 
-void __iomem *gic_cpu_base_addr;
-
 unsigned long clk_get_max_axi_khz(void)
 {
 	return 0;
@@ -44,9 +42,8 @@ static void __init msm8x60_init_irq(void)
 {
 	unsigned int i;
 
-	gic_dist_init(0, MSM_QGIC_DIST_BASE, GIC_PPI_START);
-	gic_cpu_base_addr = (void *)MSM_QGIC_CPU_BASE;
-	gic_cpu_init(0, MSM_QGIC_CPU_BASE);
+	gic_init(0, GIC_PPI_START, MSM_QGIC_DIST_BASE,
+		 (void *)MSM_QGIC_CPU_BASE);
 
 	/* Edge trigger PPIs except AVS_SVICINT and AVS_SVICINTSWDONE */
 	writel(0xFFFFD7FF, MSM_QGIC_DIST_BASE + GIC_DIST_CONFIG + 4);
diff --git a/arch/arm/mach-msm/include/mach/smp.h b/arch/arm/mach-msm/include/mach/smp.h
index 3ff7bf5e679e..a95f7b9efe31 100644
--- a/arch/arm/mach-msm/include/mach/smp.h
+++ b/arch/arm/mach-msm/include/mach/smp.h
@@ -31,9 +31,9 @@
 
 #include <asm/hardware/gic.h>
 
-static inline void smp_cross_call(const struct cpumask *mask)
+static inline void smp_cross_call(const struct cpumask *mask, int ipi)
 {
-	gic_raise_softirq(mask, 1);
+	gic_raise_softirq(mask, ipi);
 }
 
 #endif
diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c
index 950100f19d07..595be7fea31a 100644
--- a/arch/arm/mach-msm/timer.c
+++ b/arch/arm/mach-msm/timer.c
@@ -137,7 +137,6 @@ static struct msm_clock msm_clocks[] = {
 			.rating         = 200,
 			.read           = msm_gpt_read,
 			.mask           = CLOCKSOURCE_MASK(32),
-			.shift          = 17,
 			.flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 		},
 		.irq = {
@@ -164,7 +163,6 @@ static struct msm_clock msm_clocks[] = {
 			.rating         = 300,
 			.read           = msm_dgt_read,
 			.mask           = CLOCKSOURCE_MASK((32 - MSM_DGT_SHIFT)),
-			.shift          = 24 - MSM_DGT_SHIFT,
 			.flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 		},
 		.irq = {
@@ -205,8 +203,7 @@ static void __init msm_timer_init(void)
 		ce->min_delta_ns = clockevent_delta2ns(4, ce);
 		ce->cpumask = cpumask_of(0);
 
-		cs->mult = clocksource_hz2mult(clock->freq, cs->shift);
-		res = clocksource_register(cs);
+		res = clocksource_register_hz(cs, clock->freq);
 		if (res)
 			printk(KERN_ERR "msm_timer_init: clocksource_register "
 			       "failed for %s\n", cs->name);
diff --git a/arch/arm/mach-mx3/clock-imx31.c b/arch/arm/mach-mx3/clock-imx31.c
index 4193cf5a2637..d423cac8cab7 100644
--- a/arch/arm/mach-mx3/clock-imx31.c
+++ b/arch/arm/mach-mx3/clock-imx31.c
@@ -23,8 +23,8 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/io.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
 #include <asm/div64.h>
 
 #include <mach/clock.h>
diff --git a/arch/arm/mach-mx3/clock-imx35.c b/arch/arm/mach-mx3/clock-imx35.c
index 22259d955837..448a038cd1ec 100644
--- a/arch/arm/mach-mx3/clock-imx35.c
+++ b/arch/arm/mach-mx3/clock-imx35.c
@@ -21,8 +21,7 @@
 #include <linux/list.h>
 #include <linux/clk.h>
 #include <linux/io.h>
-
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #include <mach/clock.h>
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-mx5/clock-mx51-mx53.c b/arch/arm/mach-mx5/clock-mx51-mx53.c
index b21bc47d4827..785e1a336183 100644
--- a/arch/arm/mach-mx5/clock-mx51-mx53.c
+++ b/arch/arm/mach-mx5/clock-mx51-mx53.c
@@ -14,8 +14,8 @@
 #include <linux/delay.h>
 #include <linux/clk.h>
 #include <linux/io.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
 #include <asm/div64.h>
 
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-mxc91231/clock.c b/arch/arm/mach-mxc91231/clock.c
index 5c85075d8a56..9fab505f1eb1 100644
--- a/arch/arm/mach-mxc91231/clock.c
+++ b/arch/arm/mach-mxc91231/clock.c
@@ -2,12 +2,12 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/clkdev.h>
 
 #include <mach/clock.h>
 #include <mach/hardware.h>
 #include <mach/common.h>
 
-#include <asm/clkdev.h>
 #include <asm/bug.h>
 #include <asm/div64.h>
 
diff --git a/arch/arm/mach-netx/time.c b/arch/arm/mach-netx/time.c
index 82801dbf0579..f12f22d09b6c 100644
--- a/arch/arm/mach-netx/time.c
+++ b/arch/arm/mach-netx/time.c
@@ -114,7 +114,6 @@ static struct clocksource clocksource_netx = {
 	.rating		= 200,
 	.read		= netx_get_cycles,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -151,9 +150,7 @@ static void __init netx_timer_init(void)
 	writel(NETX_GPIO_COUNTER_CTRL_RUN,
 			NETX_GPIO_COUNTER_CTRL(TIMER_CLOCKSOURCE));
 
-	clocksource_netx.mult =
-		clocksource_hz2mult(CLOCK_TICK_RATE, clocksource_netx.shift);
-	clocksource_register(&clocksource_netx);
+	clocksource_register_hz(&clocksource_netx, CLOCK_TICK_RATE);
 
 	netx_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
 			netx_clockevent.shift);
diff --git a/arch/arm/mach-nomadik/clock.c b/arch/arm/mach-nomadik/clock.c
index 89f793adf776..48a59f24e10c 100644
--- a/arch/arm/mach-nomadik/clock.c
+++ b/arch/arm/mach-nomadik/clock.c
@@ -7,7 +7,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/clk.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 #include "clock.h"
 
 /*
diff --git a/arch/arm/mach-ns9xxx/time-ns9360.c b/arch/arm/mach-ns9xxx/time-ns9360.c
index 77281260358a..9ca32f55728b 100644
--- a/arch/arm/mach-ns9xxx/time-ns9360.c
+++ b/arch/arm/mach-ns9xxx/time-ns9360.c
@@ -35,7 +35,6 @@ static struct clocksource ns9360_clocksource = {
 	.rating	= 300,
 	.read	= ns9360_clocksource_read,
 	.mask	= CLOCKSOURCE_MASK(32),
-	.shift	= 20,
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -148,10 +147,7 @@ static void __init ns9360_timer_init(void)
 
 	__raw_writel(tc, SYS_TC(TIMER_CLOCKSOURCE));
 
-	ns9360_clocksource.mult = clocksource_hz2mult(ns9360_cpuclock(),
-			ns9360_clocksource.shift);
-
-	clocksource_register(&ns9360_clocksource);
+	clocksource_register_hz(&ns9360_clocksource, ns9360_cpuclock());
 
 	latch = SH_DIV(ns9360_cpuclock(), HZ, 0);
 
diff --git a/arch/arm/mach-nuc93x/clock.h b/arch/arm/mach-nuc93x/clock.h
index 18e51be4816f..4de1f1da9dc5 100644
--- a/arch/arm/mach-nuc93x/clock.h
+++ b/arch/arm/mach-nuc93x/clock.h
@@ -10,7 +10,7 @@
  * the Free Software Foundation; either version 2 of the License.
  */
 
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 void nuc93x_clk_enable(struct clk *clk, int enable);
 void clks_register(struct clk_lookup *clks, size_t num);
diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c
index b8c7fb9d7921..84ef70476b51 100644
--- a/arch/arm/mach-omap1/clock.c
+++ b/arch/arm/mach-omap1/clock.c
@@ -17,9 +17,9 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
+#include <linux/clkdev.h>
 
 #include <asm/mach-types.h>
-#include <asm/clkdev.h>
 
 #include <plat/cpu.h>
 #include <plat/usb.h>
diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c
index 1be6a214d88d..abb34ff2041b 100644
--- a/arch/arm/mach-omap1/time.c
+++ b/arch/arm/mach-omap1/time.c
@@ -208,7 +208,6 @@ static struct clocksource clocksource_mpu = {
 	.rating		= 300,
 	.read		= mpu_read,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 24,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -217,13 +216,10 @@ static void __init omap_init_clocksource(unsigned long rate)
 	static char err[] __initdata = KERN_ERR
 			"%s: can't register clocksource!\n";
 
-	clocksource_mpu.mult
-		= clocksource_khz2mult(rate/1000, clocksource_mpu.shift);
-
 	setup_irq(INT_TIMER2, &omap_mpu_timer2_irq);
 	omap_mpu_timer_start(1, ~0, 1);
 
-	if (clocksource_register(&clocksource_mpu))
+	if (clocksource_register_hz(&clocksource_mpu, rate))
 		printk(err, clocksource_mpu.name);
 }
 
diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c
index ed8d330522f1..ebb888f59365 100644
--- a/arch/arm/mach-omap2/dpll3xxx.c
+++ b/arch/arm/mach-omap2/dpll3xxx.c
@@ -26,10 +26,10 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/bitops.h>
+#include <linux/clkdev.h>
 
 #include <plat/cpu.h>
 #include <plat/clock.h>
-#include <asm/clkdev.h>
 
 #include "clock.h"
 #include "prm.h"
diff --git a/arch/arm/mach-omap2/include/mach/entry-macro.S b/arch/arm/mach-omap2/include/mach/entry-macro.S
index 06e64e1fc28a..d54c4f89a8bd 100644
--- a/arch/arm/mach-omap2/include/mach/entry-macro.S
+++ b/arch/arm/mach-omap2/include/mach/entry-macro.S
@@ -105,6 +105,35 @@ omap_irq_base:	.word	0
 9999:
 		.endm
 
+#ifdef CONFIG_SMP
+		/* We assume that irqstat (the raw value of the IRQ acknowledge
+		 * register) is preserved from the macro above.
+		 * If there is an IPI, we immediately signal end of interrupt
+		 * on the controller, since this requires the original irqstat
+		 * value which we won't easily be able to recreate later.
+		 */
+
+		.macro test_for_ipi, irqnr, irqstat, base, tmp
+		bic	\irqnr, \irqstat, #0x1c00
+		cmp	\irqnr, #16
+		it	cc
+		strcc	\irqstat, [\base, #GIC_CPU_EOI]
+		it	cs
+		cmpcs	\irqnr, \irqnr
+		.endm
+
+		/* As above, this assumes that irqstat and base are preserved */
+
+		.macro test_for_ltirq, irqnr, irqstat, base, tmp
+		bic	\irqnr, \irqstat, #0x1c00
+		mov 	\tmp, #0
+		cmp	\irqnr, #29
+		itt	eq
+		moveq	\tmp, #1
+		streq	\irqstat, [\base, #GIC_CPU_EOI]
+		cmp	\tmp, #0
+		.endm
+#endif	/* CONFIG_SMP */
 
 #else	/* MULTI_OMAP2 */
 
@@ -141,74 +170,16 @@ omap_irq_base:	.word	0
 
 
 #ifdef CONFIG_ARCH_OMAP4
+#define HAVE_GET_IRQNR_PREAMBLE
+#include <asm/hardware/entry-macro-gic.S>
 
 		.macro  get_irqnr_preamble, base, tmp
 		ldr     \base, =OMAP4_IRQ_BASE
 		.endm
 
-		/*
-		 * The interrupt numbering scheme is defined in the
-		 * interrupt controller spec.  To wit:
-		 *
-		 * Interrupts 0-15 are IPI
-		 * 16-28 are reserved
-		 * 29-31 are local.  We allow 30 to be used for the watchdog.
-		 * 32-1020 are global
-		 * 1021-1022 are reserved
-		 * 1023 is "spurious" (no interrupt)
-		 *
-		 * For now, we ignore all local interrupts so only return an
-		 * interrupt if it's between 30 and 1020.  The test_for_ipi
-		 * routine below will pick up on IPIs.
-		 * A simple read from the controller will tell us the number
-		 * of the highest priority enabled interrupt.
-		 * We then just need to check whether it is in the
-		 * valid range for an IRQ (30-1020 inclusive).
-		 */
-		.macro  get_irqnr_and_base, irqnr, irqstat, base, tmp
-		ldr     \irqstat, [\base, #GIC_CPU_INTACK]
-
-		ldr     \tmp, =1021
-
-		bic     \irqnr, \irqstat, #0x1c00
-
-		cmp     \irqnr, #29
-		cmpcc   \irqnr, \irqnr
-		cmpne   \irqnr, \tmp
-		cmpcs   \irqnr, \irqnr
-		.endm
 #endif
-#endif	/* MULTI_OMAP2 */
-
-#ifdef CONFIG_SMP
-		/* We assume that irqstat (the raw value of the IRQ acknowledge
-		 * register) is preserved from the macro above.
-		 * If there is an IPI, we immediately signal end of interrupt
-		 * on the controller, since this requires the original irqstat
-		 * value which we won't easily be able to recreate later.
-		 */
 
-		.macro test_for_ipi, irqnr, irqstat, base, tmp
-		bic	\irqnr, \irqstat, #0x1c00
-		cmp	\irqnr, #16
-		it	cc
-		strcc	\irqstat, [\base, #GIC_CPU_EOI]
-		it	cs
-		cmpcs	\irqnr, \irqnr
-		.endm
-
-		/* As above, this assumes that irqstat and base are preserved */
-
-		.macro test_for_ltirq, irqnr, irqstat, base, tmp
-		bic	\irqnr, \irqstat, #0x1c00
-		mov 	\tmp, #0
-		cmp	\irqnr, #29
-		itt	eq
-		moveq	\tmp, #1
-		streq	\irqstat, [\base, #GIC_CPU_EOI]
-		cmp	\tmp, #0
-		.endm
-#endif	/* CONFIG_SMP */
+#endif	/* MULTI_OMAP2 */
 
 		.macro	irq_prio_table
 		.endm
diff --git a/arch/arm/mach-omap2/include/mach/omap4-common.h b/arch/arm/mach-omap2/include/mach/omap4-common.h
index 2744dfee1ff4..5b0270b28934 100644
--- a/arch/arm/mach-omap2/include/mach/omap4-common.h
+++ b/arch/arm/mach-omap2/include/mach/omap4-common.h
@@ -24,7 +24,6 @@
 extern void __iomem *l2cache_base;
 #endif
 
-extern void __iomem *gic_cpu_base_addr;
 extern void __iomem *gic_dist_base_addr;
 
 extern void __init gic_init_irq(void);
diff --git a/arch/arm/mach-omap2/omap-hotplug.c b/arch/arm/mach-omap2/omap-hotplug.c
index 6cee456ca542..4976b9393e49 100644
--- a/arch/arm/mach-omap2/omap-hotplug.c
+++ b/arch/arm/mach-omap2/omap-hotplug.c
@@ -17,16 +17,13 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/smp.h>
-#include <linux/completion.h>
 
 #include <asm/cacheflush.h>
 #include <mach/omap4-common.h>
 
-static DECLARE_COMPLETION(cpu_killed);
-
 int platform_cpu_kill(unsigned int cpu)
 {
-	return wait_for_completion_timeout(&cpu_killed, 5000);
+	return 1;
 }
 
 /*
@@ -35,15 +32,6 @@ int platform_cpu_kill(unsigned int cpu)
  */
 void platform_cpu_die(unsigned int cpu)
 {
-	unsigned int this_cpu = hard_smp_processor_id();
-
-	if (cpu != this_cpu) {
-		pr_crit("platform_cpu_die running on %u, should be %u\n",
-			   this_cpu, cpu);
-		BUG();
-	}
-	pr_notice("CPU%u: shutdown\n", cpu);
-	complete(&cpu_killed);
 	flush_cache_all();
 	dsb();
 
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
index 9e9f70e18e3c..b66cfe8bc464 100644
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -21,7 +21,6 @@
 #include <linux/io.h>
 
 #include <asm/cacheflush.h>
-#include <asm/localtimer.h>
 #include <asm/smp_scu.h>
 #include <mach/hardware.h>
 #include <mach/omap4-common.h>
@@ -29,28 +28,16 @@
 /* SCU base address */
 static void __iomem *scu_base;
 
-/*
- * Use SCU config register to count number of cores
- */
-static inline unsigned int get_core_count(void)
-{
-	if (scu_base)
-		return scu_get_core_count(scu_base);
-	return 1;
-}
-
 static DEFINE_SPINLOCK(boot_lock);
 
 void __cpuinit platform_secondary_init(unsigned int cpu)
 {
-	trace_hardirqs_off();
-
 	/*
 	 * If any interrupts are already enabled for the primary
 	 * core (e.g. timer irq), then they will not have been enabled
 	 * for us: do so
 	 */
-	gic_cpu_init(0, gic_cpu_base_addr);
+	gic_secondary_init(0);
 
 	/*
 	 * Synchronise with the boot thread.
@@ -76,7 +63,7 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
 	omap_modify_auxcoreboot0(0x200, 0xfffffdff);
 	flush_cache_all();
 	smp_wmb();
-	smp_cross_call(cpumask_of(cpu));
+	smp_cross_call(cpumask_of(cpu), 1);
 
 	/*
 	 * Now the secondary core is starting up let it run its
@@ -118,25 +105,9 @@ void __init smp_init_cpus(void)
 	scu_base = ioremap(OMAP44XX_SCU_BASE, SZ_256);
 	BUG_ON(!scu_base);
 
-	ncores = get_core_count();
-
-	for (i = 0; i < ncores; i++)
-		set_cpu_possible(i, true);
-}
-
-void __init smp_prepare_cpus(unsigned int max_cpus)
-{
-	unsigned int ncores = get_core_count();
-	unsigned int cpu = smp_processor_id();
-	int i;
+	ncores = scu_get_core_count(scu_base);
 
 	/* sanity check */
-	if (ncores == 0) {
-		printk(KERN_ERR
-		       "OMAP4: strange core count of 0? Default to 1\n");
-		ncores = 1;
-	}
-
 	if (ncores > NR_CPUS) {
 		printk(KERN_WARNING
 		       "OMAP4: no. of cores (%d) greater than configured "
@@ -144,13 +115,14 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 		       ncores, NR_CPUS);
 		ncores = NR_CPUS;
 	}
-	smp_store_cpu_info(cpu);
 
-	/*
-	 * are we trying to boot more cores than exist?
-	 */
-	if (max_cpus > ncores)
-		max_cpus = ncores;
+	for (i = 0; i < ncores; i++)
+		set_cpu_possible(i, true);
+}
+
+void __init platform_smp_prepare_cpus(unsigned int max_cpus)
+{
+	int i;
 
 	/*
 	 * Initialise the present map, which describes the set of CPUs
@@ -159,18 +131,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	for (i = 0; i < max_cpus; i++)
 		set_cpu_present(i, true);
 
-	if (max_cpus > 1) {
-		/*
-		 * Enable the local timer or broadcast device for the
-		 * boot CPU, but only if we have more than one CPU.
-		 */
-		percpu_timer_setup();
-
-		/*
-		 * Initialise the SCU and wake up the secondary core using
-		 * wakeup_secondary().
-		 */
-		scu_enable(scu_base);
-		wakeup_secondary();
-	}
+	/*
+	 * Initialise the SCU and wake up the secondary core using
+	 * wakeup_secondary().
+	 */
+	scu_enable(scu_base);
+	wakeup_secondary();
 }
diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c
index 2f895553e6a8..666e852988d5 100644
--- a/arch/arm/mach-omap2/omap4-common.c
+++ b/arch/arm/mach-omap2/omap4-common.c
@@ -26,21 +26,22 @@
 void __iomem *l2cache_base;
 #endif
 
-void __iomem *gic_cpu_base_addr;
 void __iomem *gic_dist_base_addr;
 
 
 void __init gic_init_irq(void)
 {
+	void __iomem *gic_cpu_base;
+
 	/* Static mapping, never released */
 	gic_dist_base_addr = ioremap(OMAP44XX_GIC_DIST_BASE, SZ_4K);
 	BUG_ON(!gic_dist_base_addr);
-	gic_dist_init(0, gic_dist_base_addr, 29);
 
 	/* Static mapping, never released */
-	gic_cpu_base_addr = ioremap(OMAP44XX_GIC_CPU_BASE, SZ_512);
-	BUG_ON(!gic_cpu_base_addr);
-	gic_cpu_init(0, gic_cpu_base_addr);
+	gic_cpu_base = ioremap(OMAP44XX_GIC_CPU_BASE, SZ_512);
+	BUG_ON(!gic_cpu_base);
+
+	gic_init(0, 29, gic_dist_base_addr, gic_cpu_base);
 }
 
 #ifdef CONFIG_CACHE_L2X0
diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c
index e13c29eecf2b..a7816dbdc6b1 100644
--- a/arch/arm/mach-omap2/timer-gp.c
+++ b/arch/arm/mach-omap2/timer-gp.c
@@ -195,7 +195,6 @@ static struct clocksource clocksource_gpt = {
 	.rating		= 300,
 	.read		= clocksource_read_cycles,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 24,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -220,9 +219,7 @@ static void __init omap2_gp_clocksource_init(void)
 
 	omap_dm_timer_set_load_start(gpt, 1, 0);
 
-	clocksource_gpt.mult =
-		clocksource_khz2mult(tick_rate/1000, clocksource_gpt.shift);
-	if (clocksource_register(&clocksource_gpt))
+	if (clocksource_register_hz(&clocksource_gpt, tick_rate))
 		printk(err2, clocksource_gpt.name);
 }
 #endif
diff --git a/arch/arm/mach-orion5x/include/mach/io.h b/arch/arm/mach-orion5x/include/mach/io.h
index c47b033bd999..c5196101a237 100644
--- a/arch/arm/mach-orion5x/include/mach/io.h
+++ b/arch/arm/mach-orion5x/include/mach/io.h
@@ -38,8 +38,8 @@ __arch_iounmap(void __iomem *addr)
 		__iounmap(addr);
 }
 
-#define __arch_ioremap(p, s, m)	__arch_ioremap(p, s, m)
-#define __arch_iounmap(a)	__arch_iounmap(a)
+#define __arch_ioremap		__arch_ioremap
+#define __arch_iounmap		__arch_iounmap
 #define __io(a)			__typesafe_io(a)
 #define __mem_pci(a)		(a)
 
diff --git a/arch/arm/mach-pnx4008/clock.c b/arch/arm/mach-pnx4008/clock.c
index 9d1975fa4d9f..a4a3819c96cb 100644
--- a/arch/arm/mach-pnx4008/clock.c
+++ b/arch/arm/mach-pnx4008/clock.c
@@ -21,8 +21,7 @@
 #include <linux/err.h>
 #include <linux/delay.h>
 #include <linux/io.h>
-
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #include <mach/hardware.h>
 #include <mach/clock.h>
diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig
index 1df6db6a136e..2fc9f94cdd29 100644
--- a/arch/arm/mach-pxa/Kconfig
+++ b/arch/arm/mach-pxa/Kconfig
@@ -98,6 +98,7 @@ config MACH_ARMCORE
 	select PXA27x
 	select IWMMXT
 	select PXA25x
+	select MIGHT_HAVE_PCI
 
 config MACH_EM_X270
 	bool "CompuLab EM-x270 platform"
@@ -544,6 +545,7 @@ config MACH_ICONTROL
 config ARCH_PXA_ESERIES
 	bool "PXA based Toshiba e-series PDAs"
 	select PXA25x
+	select FB_W100
 
 config MACH_E330
 	bool "Toshiba e330"
diff --git a/arch/arm/mach-pxa/clock.c b/arch/arm/mach-pxa/clock.c
index 8184fe2d71c3..d5152220ce94 100644
--- a/arch/arm/mach-pxa/clock.c
+++ b/arch/arm/mach-pxa/clock.c
@@ -6,8 +6,7 @@
 #include <linux/clk.h>
 #include <linux/spinlock.h>
 #include <linux/delay.h>
-
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #include "clock.h"
 
diff --git a/arch/arm/mach-pxa/clock.h b/arch/arm/mach-pxa/clock.h
index 6e949944f2ec..f9f349a21b54 100644
--- a/arch/arm/mach-pxa/clock.h
+++ b/arch/arm/mach-pxa/clock.h
@@ -1,5 +1,5 @@
+#include <linux/clkdev.h>
 #include <linux/sysdev.h>
-#include <asm/clkdev.h>
 
 struct clkops {
 	void			(*enable)(struct clk *);
diff --git a/arch/arm/mach-pxa/sleep.S b/arch/arm/mach-pxa/sleep.S
index 2f5b08aeb52e..c551da86baf6 100644
--- a/arch/arm/mach-pxa/sleep.S
+++ b/arch/arm/mach-pxa/sleep.S
@@ -353,8 +353,8 @@ resume_turn_on_mmu:
 
 	@ Let us ensure we jump to resume_after_mmu only when the mcr above
 	@ actually took effect.  They call it the "cpwait" operation.
-	mrc	p15, 0, r1, c2, c0, 0		@ queue a dependency on CP15
-	sub	pc, r2, r1, lsr #32		@ jump to virtual addr
+	mrc	p15, 0, r0, c2, c0, 0		@ queue a dependency on CP15
+	sub	pc, r2, r0, lsr #32		@ jump to virtual addr
 	nop
 	nop
 	nop
diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c
index 293e40aeaf29..e7f64d9b4f2d 100644
--- a/arch/arm/mach-pxa/time.c
+++ b/arch/arm/mach-pxa/time.c
@@ -17,11 +17,11 @@
 #include <linux/interrupt.h>
 #include <linux/clockchips.h>
 #include <linux/sched.h>
-#include <linux/cnt32_to_63.h>
 
 #include <asm/div64.h>
 #include <asm/mach/irq.h>
 #include <asm/mach/time.h>
+#include <asm/sched_clock.h>
 #include <mach/regs-ost.h>
 
 /*
@@ -32,29 +32,18 @@
  * long as there is always less than 582 seconds between successive
  * calls to sched_clock() which should always be the case in practice.
  */
+static DEFINE_CLOCK_DATA(cd);
 
-#define OSCR2NS_SCALE_FACTOR 10
-
-static unsigned long oscr2ns_scale;
-
-static void __init set_oscr2ns_scale(unsigned long oscr_rate)
+unsigned long long notrace sched_clock(void)
 {
-	unsigned long long v = 1000000000ULL << OSCR2NS_SCALE_FACTOR;
-	do_div(v, oscr_rate);
-	oscr2ns_scale = v;
-	/*
-	 * We want an even value to automatically clear the top bit
-	 * returned by cnt32_to_63() without an additional run time
-	 * instruction. So if the LSB is 1 then round it up.
-	 */
-	if (oscr2ns_scale & 1)
-		oscr2ns_scale++;
+	u32 cyc = OSCR;
+	return cyc_to_sched_clock(&cd, cyc, (u32)~0);
 }
 
-unsigned long long sched_clock(void)
+static void notrace pxa_update_sched_clock(void)
 {
-	unsigned long long v = cnt32_to_63(OSCR);
-	return (v * oscr2ns_scale) >> OSCR2NS_SCALE_FACTOR;
+	u32 cyc = OSCR;
+	update_sched_clock(&cd, cyc, (u32)~0);
 }
 
 
@@ -127,7 +116,6 @@ static struct clocksource cksrc_pxa_oscr0 = {
 	.rating         = 200,
 	.read           = pxa_read_oscr,
 	.mask           = CLOCKSOURCE_MASK(32),
-	.shift          = 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -145,7 +133,7 @@ static void __init pxa_timer_init(void)
 	OIER = 0;
 	OSSR = OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3;
 
-	set_oscr2ns_scale(clock_tick_rate);
+	init_sched_clock(&cd, pxa_update_sched_clock, 32, clock_tick_rate);
 
 	ckevt_pxa_osmr0.mult =
 		div_sc(clock_tick_rate, NSEC_PER_SEC, ckevt_pxa_osmr0.shift);
@@ -155,12 +143,9 @@ static void __init pxa_timer_init(void)
 		clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1;
 	ckevt_pxa_osmr0.cpumask = cpumask_of(0);
 
-	cksrc_pxa_oscr0.mult =
-		clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift);
-
 	setup_irq(IRQ_OST0, &pxa_ost0_irq);
 
-	clocksource_register(&cksrc_pxa_oscr0);
+	clocksource_register_hz(&cksrc_pxa_oscr0, clock_tick_rate);
 	clockevents_register_device(&ckevt_pxa_osmr0);
 }
 
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 07c08151dfe6..1c6602cf50e4 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -30,8 +30,8 @@
 #include <linux/ata_platform.h>
 #include <linux/amba/mmci.h>
 #include <linux/gfp.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
 #include <asm/system.h>
 #include <mach/hardware.h>
 #include <asm/irq.h>
@@ -47,15 +47,13 @@
 
 #include <asm/hardware/gic.h>
 
-#include <mach/clkdev.h>
 #include <mach/platform.h>
 #include <mach/irqs.h>
-#include <plat/timer-sp.h>
+#include <asm/hardware/timer-sp.h>
 
-#include "core.h"
+#include <plat/sched_clock.h>
 
-/* used by entry-macro.S and platsmp.c */
-void __iomem *gic_cpu_base_addr;
+#include "core.h"
 
 #ifdef CONFIG_ZONE_DMA
 /*
@@ -658,6 +656,12 @@ void realview_leds_event(led_event_t ledevt)
 #endif	/* CONFIG_LEDS */
 
 /*
+ * The sched_clock counter
+ */
+#define REFCOUNTER		(__io_address(REALVIEW_SYS_BASE) + \
+				 REALVIEW_SYS_24MHz_OFFSET)
+
+/*
  * Where is the timer (VA)?
  */
 void __iomem *timer0_va_base;
@@ -672,6 +676,8 @@ void __init realview_timer_init(unsigned int timer_irq)
 {
 	u32 val;
 
+	versatile_sched_clock_init(REFCOUNTER, 24000000);
+
 	/* 
 	 * set clock frequency: 
 	 *	REALVIEW_REFCLK is 32KHz
diff --git a/arch/arm/mach-realview/core.h b/arch/arm/mach-realview/core.h
index 781bca68a9fa..693239ddc39e 100644
--- a/arch/arm/mach-realview/core.h
+++ b/arch/arm/mach-realview/core.h
@@ -53,7 +53,6 @@ extern struct platform_device realview_i2c_device;
 extern struct mmci_platform_data realview_mmc0_plat_data;
 extern struct mmci_platform_data realview_mmc1_plat_data;
 extern struct clcd_board clcd_plat_data;
-extern void __iomem *gic_cpu_base_addr;
 extern void __iomem *timer0_va_base;
 extern void __iomem *timer1_va_base;
 extern void __iomem *timer2_va_base;
diff --git a/arch/arm/mach-realview/hotplug.c b/arch/arm/mach-realview/hotplug.c
index f95521a5e5ce..a87523d095e6 100644
--- a/arch/arm/mach-realview/hotplug.c
+++ b/arch/arm/mach-realview/hotplug.c
@@ -11,14 +11,11 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/smp.h>
-#include <linux/completion.h>
 
 #include <asm/cacheflush.h>
 
 extern volatile int pen_release;
 
-static DECLARE_COMPLETION(cpu_killed);
-
 static inline void cpu_enter_lowpower(void)
 {
 	unsigned int v;
@@ -34,10 +31,10 @@ static inline void cpu_enter_lowpower(void)
 	"	bic	%0, %0, #0x20\n"
 	"	mcr	p15, 0, %0, c1, c0, 1\n"
 	"	mrc	p15, 0, %0, c1, c0, 0\n"
-	"	bic	%0, %0, #0x04\n"
+	"	bic	%0, %0, %2\n"
 	"	mcr	p15, 0, %0, c1, c0, 0\n"
 	  : "=&r" (v)
-	  : "r" (0)
+	  : "r" (0), "Ir" (CR_C)
 	  : "cc");
 }
 
@@ -46,17 +43,17 @@ static inline void cpu_leave_lowpower(void)
 	unsigned int v;
 
 	asm volatile(	"mrc	p15, 0, %0, c1, c0, 0\n"
-	"	orr	%0, %0, #0x04\n"
+	"	orr	%0, %0, %1\n"
 	"	mcr	p15, 0, %0, c1, c0, 0\n"
 	"	mrc	p15, 0, %0, c1, c0, 1\n"
 	"	orr	%0, %0, #0x20\n"
 	"	mcr	p15, 0, %0, c1, c0, 1\n"
 	  : "=&r" (v)
-	  :
+	  : "Ir" (CR_C)
 	  : "cc");
 }
 
-static inline void platform_do_lowpower(unsigned int cpu)
+static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
 {
 	/*
 	 * there is no power-control hardware on this platform, so all
@@ -80,22 +77,19 @@ static inline void platform_do_lowpower(unsigned int cpu)
 		}
 
 		/*
-		 * getting here, means that we have come out of WFI without
+		 * Getting here, means that we have come out of WFI without
 		 * having been woken up - this shouldn't happen
 		 *
-		 * The trouble is, letting people know about this is not really
-		 * possible, since we are currently running incoherently, and
-		 * therefore cannot safely call printk() or anything else
+		 * Just note it happening - when we're woken, we can report
+		 * its occurrence.
 		 */
-#ifdef DEBUG
-		printk("CPU%u: spurious wakeup call\n", cpu);
-#endif
+		(*spurious)++;
 	}
 }
 
 int platform_cpu_kill(unsigned int cpu)
 {
-	return wait_for_completion_timeout(&cpu_killed, 5000);
+	return 1;
 }
 
 /*
@@ -105,30 +99,22 @@ int platform_cpu_kill(unsigned int cpu)
  */
 void platform_cpu_die(unsigned int cpu)
 {
-#ifdef DEBUG
-	unsigned int this_cpu = hard_smp_processor_id();
-
-	if (cpu != this_cpu) {
-		printk(KERN_CRIT "Eek! platform_cpu_die running on %u, should be %u\n",
-			   this_cpu, cpu);
-		BUG();
-	}
-#endif
-
-	printk(KERN_NOTICE "CPU%u: shutdown\n", cpu);
-	complete(&cpu_killed);
+	int spurious = 0;
 
 	/*
 	 * we're ready for shutdown now, so do it
 	 */
 	cpu_enter_lowpower();
-	platform_do_lowpower(cpu);
+	platform_do_lowpower(cpu, &spurious);
 
 	/*
 	 * bring this CPU back into the world of cache
 	 * coherency, and then restore interrupts
 	 */
 	cpu_leave_lowpower();
+
+	if (spurious)
+		pr_warn("CPU%u: %u spurious wakeup calls\n", cpu, spurious);
 }
 
 int platform_cpu_disable(unsigned int cpu)
diff --git a/arch/arm/mach-realview/include/mach/entry-macro.S b/arch/arm/mach-realview/include/mach/entry-macro.S
index 340a5c276946..4071164aebaa 100644
--- a/arch/arm/mach-realview/include/mach/entry-macro.S
+++ b/arch/arm/mach-realview/include/mach/entry-macro.S
@@ -8,74 +8,11 @@
  * warranty of any kind, whether express or implied.
  */
 #include <mach/hardware.h>
-#include <asm/hardware/gic.h>
+#include <asm/hardware/entry-macro-gic.S>
 
 		.macro	disable_fiq
 		.endm
 
-		.macro  get_irqnr_preamble, base, tmp
-		ldr	\base, =gic_cpu_base_addr
-		ldr	\base, [\base]
-		.endm
-
 		.macro  arch_ret_to_user, tmp1, tmp2
 		.endm
 
-		/*
-		 * The interrupt numbering scheme is defined in the
-		 * interrupt controller spec.  To wit:
-		 *
-		 * Interrupts 0-15 are IPI
-		 * 16-28 are reserved
-		 * 29-31 are local.  We allow 30 to be used for the watchdog.
-		 * 32-1020 are global
-		 * 1021-1022 are reserved
-		 * 1023 is "spurious" (no interrupt)
-		 *
-		 * For now, we ignore all local interrupts so only return an interrupt if it's
-		 * between 30 and 1020.  The test_for_ipi routine below will pick up on IPIs.
-		 *
-		 * A simple read from the controller will tell us the number of the highest
-                 * priority enabled interrupt.  We then just need to check whether it is in the
-		 * valid range for an IRQ (30-1020 inclusive).
-		 */
-
-		.macro  get_irqnr_and_base, irqnr, irqstat, base, tmp
-
-		ldr     \irqstat, [\base, #GIC_CPU_INTACK] /* bits 12-10 = src CPU, 9-0 = int # */
-
-		ldr	\tmp, =1021
-
-		bic     \irqnr, \irqstat, #0x1c00
-
-		cmp     \irqnr, #29
-		cmpcc	\irqnr, \irqnr
-		cmpne	\irqnr, \tmp
-		cmpcs	\irqnr, \irqnr
-
-		.endm
-
-		/* We assume that irqstat (the raw value of the IRQ acknowledge
-		 * register) is preserved from the macro above.
-		 * If there is an IPI, we immediately signal end of interrupt on the
-		 * controller, since this requires the original irqstat value which
-		 * we won't easily be able to recreate later.
-		 */
-
-		.macro test_for_ipi, irqnr, irqstat, base, tmp
-		bic	\irqnr, \irqstat, #0x1c00
-		cmp	\irqnr, #16
-		strcc	\irqstat, [\base, #GIC_CPU_EOI]
-		cmpcs	\irqnr, \irqnr
-		.endm
-
-		/* As above, this assumes that irqstat and base are preserved.. */
-
-		.macro test_for_ltirq, irqnr, irqstat, base, tmp
-		bic	\irqnr, \irqstat, #0x1c00
-		mov 	\tmp, #0
-		cmp	\irqnr, #29
-		moveq	\tmp, #1
-		streq	\irqstat, [\base, #GIC_CPU_EOI]
-		cmp	\tmp, #0
-		.endm
diff --git a/arch/arm/mach-realview/include/mach/smp.h b/arch/arm/mach-realview/include/mach/smp.h
index d3cd265cb058..c8221b38ee7c 100644
--- a/arch/arm/mach-realview/include/mach/smp.h
+++ b/arch/arm/mach-realview/include/mach/smp.h
@@ -2,14 +2,13 @@
 #define ASMARM_ARCH_SMP_H
 
 #include <asm/hardware/gic.h>
-#include <asm/smp_mpidr.h>
 
 /*
  * We use IRQ1 as the IPI
  */
-static inline void smp_cross_call(const struct cpumask *mask)
+static inline void smp_cross_call(const struct cpumask *mask, int ipi)
 {
-	gic_raise_softirq(mask, 1);
+	gic_raise_softirq(mask, ipi);
 }
 
 #endif
diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c
index 009265818d55..a22bf67f2f78 100644
--- a/arch/arm/mach-realview/platsmp.c
+++ b/arch/arm/mach-realview/platsmp.c
@@ -19,7 +19,6 @@
 #include <asm/cacheflush.h>
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
-#include <asm/localtimer.h>
 #include <asm/unified.h>
 
 #include <mach/board-eb.h>
@@ -37,6 +36,19 @@ extern void realview_secondary_startup(void);
  */
 volatile int __cpuinitdata pen_release = -1;
 
+/*
+ * Write pen_release in a way that is guaranteed to be visible to all
+ * observers, irrespective of whether they're taking part in coherency
+ * or not.  This is necessary for the hotplug code to work reliably.
+ */
+static void write_pen_release(int val)
+{
+	pen_release = val;
+	smp_wmb();
+	__cpuc_flush_dcache_area((void *)&pen_release, sizeof(pen_release));
+	outer_clean_range(__pa(&pen_release), __pa(&pen_release + 1));
+}
+
 static void __iomem *scu_base_addr(void)
 {
 	if (machine_is_realview_eb_mp())
@@ -50,33 +62,22 @@ static void __iomem *scu_base_addr(void)
 		return (void __iomem *)0;
 }
 
-static inline unsigned int get_core_count(void)
-{
-	void __iomem *scu_base = scu_base_addr();
-	if (scu_base)
-		return scu_get_core_count(scu_base);
-	return 1;
-}
-
 static DEFINE_SPINLOCK(boot_lock);
 
 void __cpuinit platform_secondary_init(unsigned int cpu)
 {
-	trace_hardirqs_off();
-
 	/*
 	 * if any interrupts are already enabled for the primary
 	 * core (e.g. timer irq), then they will not have been enabled
 	 * for us: do so
 	 */
-	gic_cpu_init(0, gic_cpu_base_addr);
+	gic_secondary_init(0);
 
 	/*
 	 * let the primary processor know we're out of the
 	 * pen, then head off into the C entry point
 	 */
-	pen_release = -1;
-	smp_wmb();
+	write_pen_release(-1);
 
 	/*
 	 * Synchronise with the boot thread.
@@ -103,20 +104,14 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
 	 * Note that "pen_release" is the hardware CPU ID, whereas
 	 * "cpu" is Linux's internal ID.
 	 */
-	pen_release = cpu;
-	flush_cache_all();
+	write_pen_release(cpu);
 
 	/*
-	 * XXX
-	 *
-	 * This is a later addition to the booting protocol: the
-	 * bootMonitor now puts secondary cores into WFI, so
-	 * poke_milo() no longer gets the cores moving; we need
-	 * to send a soft interrupt to wake the secondary core.
-	 * Use smp_cross_call() for this, since there's little
-	 * point duplicating the code here
+	 * Send the secondary CPU a soft interrupt, thereby causing
+	 * the boot monitor to read the system wide flags register,
+	 * and branch to the address found there.
 	 */
-	smp_cross_call(cpumask_of(cpu));
+	smp_cross_call(cpumask_of(cpu), 1);
 
 	timeout = jiffies + (1 * HZ);
 	while (time_before(jiffies, timeout)) {
@@ -136,48 +131,18 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
 	return pen_release != -1 ? -ENOSYS : 0;
 }
 
-static void __init poke_milo(void)
-{
-	/* nobody is to be released from the pen yet */
-	pen_release = -1;
-
-	/*
-	 * Write the address of secondary startup into the system-wide flags
-	 * register. The BootMonitor waits for this register to become
-	 * non-zero.
-	 */
-	__raw_writel(BSYM(virt_to_phys(realview_secondary_startup)),
-		     __io_address(REALVIEW_SYS_FLAGSSET));
-
-	mb();
-}
-
 /*
  * Initialise the CPU possible map early - this describes the CPUs
  * which may be present or become present in the system.
  */
 void __init smp_init_cpus(void)
 {
-	unsigned int i, ncores = get_core_count();
+	void __iomem *scu_base = scu_base_addr();
+	unsigned int i, ncores;
 
-	for (i = 0; i < ncores; i++)
-		set_cpu_possible(i, true);
-}
-
-void __init smp_prepare_cpus(unsigned int max_cpus)
-{
-	unsigned int ncores = get_core_count();
-	unsigned int cpu = smp_processor_id();
-	int i;
+	ncores = scu_base ? scu_get_core_count(scu_base) : 1;
 
 	/* sanity check */
-	if (ncores == 0) {
-		printk(KERN_ERR
-		       "Realview: strange CM count of 0? Default to 1\n");
-
-		ncores = 1;
-	}
-
 	if (ncores > NR_CPUS) {
 		printk(KERN_WARNING
 		       "Realview: no. of cores (%d) greater than configured "
@@ -186,13 +151,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 		ncores = NR_CPUS;
 	}
 
-	smp_store_cpu_info(cpu);
+	for (i = 0; i < ncores; i++)
+		set_cpu_possible(i, true);
+}
 
-	/*
-	 * are we trying to boot more cores than exist?
-	 */
-	if (max_cpus > ncores)
-		max_cpus = ncores;
+void __init platform_smp_prepare_cpus(unsigned int max_cpus)
+{
+	int i;
 
 	/*
 	 * Initialise the present map, which describes the set of CPUs
@@ -201,21 +166,14 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	for (i = 0; i < max_cpus; i++)
 		set_cpu_present(i, true);
 
+	scu_enable(scu_base_addr());
+
 	/*
-	 * Initialise the SCU if there are more than one CPU and let
-	 * them know where to start. Note that, on modern versions of
-	 * MILO, the "poke" doesn't actually do anything until each
-	 * individual core is sent a soft interrupt to get it out of
-	 * WFI
+	 * Write the address of secondary startup into the
+	 * system-wide flags register. The BootMonitor waits
+	 * until it receives a soft interrupt, and then the
+	 * secondary CPU branches to this address.
 	 */
-	if (max_cpus > 1) {
-		/*
-		 * Enable the local timer or broadcast device for the
-		 * boot CPU, but only if we have more than one CPU.
-		 */
-		percpu_timer_setup();
-
-		scu_enable(scu_base_addr());
-		poke_milo();
-	}
+	__raw_writel(BSYM(virt_to_phys(realview_secondary_startup)),
+		     __io_address(REALVIEW_SYS_FLAGSSET));
 }
diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c
index f2697106f809..6ef5c5e528b2 100644
--- a/arch/arm/mach-realview/realview_eb.c
+++ b/arch/arm/mach-realview/realview_eb.c
@@ -364,21 +364,19 @@ static void __init gic_init_irq(void)
 		writel(0x00000000, __io_address(REALVIEW_SYS_LOCK));
 
 		/* core tile GIC, primary */
-		gic_cpu_base_addr = __io_address(REALVIEW_EB11MP_GIC_CPU_BASE);
-		gic_dist_init(0, __io_address(REALVIEW_EB11MP_GIC_DIST_BASE), 29);
-		gic_cpu_init(0, gic_cpu_base_addr);
+		gic_init(0, 29, __io_address(REALVIEW_EB11MP_GIC_DIST_BASE),
+			 __io_address(REALVIEW_EB11MP_GIC_CPU_BASE));
 
 #ifndef CONFIG_REALVIEW_EB_ARM11MP_REVB
 		/* board GIC, secondary */
-		gic_dist_init(1, __io_address(REALVIEW_EB_GIC_DIST_BASE), 64);
-		gic_cpu_init(1, __io_address(REALVIEW_EB_GIC_CPU_BASE));
+		gic_init(1, 64, __io_address(REALVIEW_EB_GIC_DIST_BASE),
+			 __io_address(REALVIEW_EB_GIC_CPU_BASE));
 		gic_cascade_irq(1, IRQ_EB11MP_EB_IRQ1);
 #endif
 	} else {
 		/* board GIC, primary */
-		gic_cpu_base_addr = __io_address(REALVIEW_EB_GIC_CPU_BASE);
-		gic_dist_init(0, __io_address(REALVIEW_EB_GIC_DIST_BASE), 29);
-		gic_cpu_init(0, gic_cpu_base_addr);
+		gic_init(0, 29, __io_address(REALVIEW_EB_GIC_DIST_BASE),
+			 __io_address(REALVIEW_EB_GIC_CPU_BASE));
 	}
 }
 
diff --git a/arch/arm/mach-realview/realview_pb1176.c b/arch/arm/mach-realview/realview_pb1176.c
index a4125619d71b..cbdc97a5685f 100644
--- a/arch/arm/mach-realview/realview_pb1176.c
+++ b/arch/arm/mach-realview/realview_pb1176.c
@@ -304,13 +304,14 @@ static struct platform_device char_lcd_device = {
 static void __init gic_init_irq(void)
 {
 	/* ARM1176 DevChip GIC, primary */
-	gic_cpu_base_addr = __io_address(REALVIEW_DC1176_GIC_CPU_BASE);
-	gic_dist_init(0, __io_address(REALVIEW_DC1176_GIC_DIST_BASE), IRQ_DC1176_GIC_START);
-	gic_cpu_init(0, gic_cpu_base_addr);
+	gic_init(0, IRQ_DC1176_GIC_START,
+		 __io_address(REALVIEW_DC1176_GIC_DIST_BASE),
+		 __io_address(REALVIEW_DC1176_GIC_CPU_BASE));
 
 	/* board GIC, secondary */
-	gic_dist_init(1, __io_address(REALVIEW_PB1176_GIC_DIST_BASE), IRQ_PB1176_GIC_START);
-	gic_cpu_init(1, __io_address(REALVIEW_PB1176_GIC_CPU_BASE));
+	gic_init(1, IRQ_PB1176_GIC_START,
+		 __io_address(REALVIEW_PB1176_GIC_DIST_BASE),
+		 __io_address(REALVIEW_PB1176_GIC_CPU_BASE));
 	gic_cascade_irq(1, IRQ_DC1176_PB_IRQ1);
 }
 
diff --git a/arch/arm/mach-realview/realview_pb11mp.c b/arch/arm/mach-realview/realview_pb11mp.c
index 117b95b2ca15..8e8ab7d29a6a 100644
--- a/arch/arm/mach-realview/realview_pb11mp.c
+++ b/arch/arm/mach-realview/realview_pb11mp.c
@@ -309,13 +309,13 @@ static void __init gic_init_irq(void)
 	writel(0x00000000, __io_address(REALVIEW_SYS_LOCK));
 
 	/* ARM11MPCore test chip GIC, primary */
-	gic_cpu_base_addr = __io_address(REALVIEW_TC11MP_GIC_CPU_BASE);
-	gic_dist_init(0, __io_address(REALVIEW_TC11MP_GIC_DIST_BASE), 29);
-	gic_cpu_init(0, gic_cpu_base_addr);
+	gic_init(0, 29, __io_address(REALVIEW_TC11MP_GIC_DIST_BASE),
+		 __io_address(REALVIEW_TC11MP_GIC_CPU_BASE));
 
 	/* board GIC, secondary */
-	gic_dist_init(1, __io_address(REALVIEW_PB11MP_GIC_DIST_BASE), IRQ_PB11MP_GIC_START);
-	gic_cpu_init(1, __io_address(REALVIEW_PB11MP_GIC_CPU_BASE));
+	gic_init(1, IRQ_PB11MP_GIC_START,
+		 __io_address(REALVIEW_PB11MP_GIC_DIST_BASE),
+		 __io_address(REALVIEW_PB11MP_GIC_CPU_BASE));
 	gic_cascade_irq(1, IRQ_TC11MP_PB_IRQ1);
 }
 
diff --git a/arch/arm/mach-realview/realview_pba8.c b/arch/arm/mach-realview/realview_pba8.c
index 929b8dc12e81..841118e3e118 100644
--- a/arch/arm/mach-realview/realview_pba8.c
+++ b/arch/arm/mach-realview/realview_pba8.c
@@ -273,9 +273,9 @@ static struct platform_device pmu_device = {
 static void __init gic_init_irq(void)
 {
 	/* ARM PB-A8 on-board GIC */
-	gic_cpu_base_addr = __io_address(REALVIEW_PBA8_GIC_CPU_BASE);
-	gic_dist_init(0, __io_address(REALVIEW_PBA8_GIC_DIST_BASE), IRQ_PBA8_GIC_START);
-	gic_cpu_init(0, __io_address(REALVIEW_PBA8_GIC_CPU_BASE));
+	gic_init(0, IRQ_PBA8_GIC_START,
+		 __io_address(REALVIEW_PBA8_GIC_DIST_BASE),
+		 __io_address(REALVIEW_PBA8_GIC_CPU_BASE));
 }
 
 static void __init realview_pba8_timer_init(void)
diff --git a/arch/arm/mach-realview/realview_pbx.c b/arch/arm/mach-realview/realview_pbx.c
index b9f9e20031a7..02b755b009db 100644
--- a/arch/arm/mach-realview/realview_pbx.c
+++ b/arch/arm/mach-realview/realview_pbx.c
@@ -313,15 +313,12 @@ static void __init gic_init_irq(void)
 {
 	/* ARM PBX on-board GIC */
 	if (core_tile_pbx11mp() || core_tile_pbxa9mp()) {
-		gic_cpu_base_addr = __io_address(REALVIEW_PBX_TILE_GIC_CPU_BASE);
-		gic_dist_init(0, __io_address(REALVIEW_PBX_TILE_GIC_DIST_BASE),
-			      29);
-		gic_cpu_init(0, __io_address(REALVIEW_PBX_TILE_GIC_CPU_BASE));
+		gic_init(0, 29, __io_address(REALVIEW_PBX_TILE_GIC_DIST_BASE),
+			 __io_address(REALVIEW_PBX_TILE_GIC_CPU_BASE));
 	} else {
-		gic_cpu_base_addr = __io_address(REALVIEW_PBX_GIC_CPU_BASE);
-		gic_dist_init(0, __io_address(REALVIEW_PBX_GIC_DIST_BASE),
-			      IRQ_PBX_GIC_START);
-		gic_cpu_init(0, __io_address(REALVIEW_PBX_GIC_CPU_BASE));
+		gic_init(0, IRQ_PBX_GIC_START,
+			 __io_address(REALVIEW_PBX_GIC_DIST_BASE),
+			 __io_address(REALVIEW_PBX_GIC_CPU_BASE));
 	}
 }
 
diff --git a/arch/arm/mach-s3c2412/Kconfig b/arch/arm/mach-s3c2412/Kconfig
index fa2e5bffbb8e..e82ab4aa7ab9 100644
--- a/arch/arm/mach-s3c2412/Kconfig
+++ b/arch/arm/mach-s3c2412/Kconfig
@@ -28,9 +28,16 @@ config S3C2412_DMA
 
 config S3C2412_PM
 	bool
+	select S3C2412_PM_SLEEP
 	help
 	  Internal config node to apply S3C2412 power management
 
+config S3C2412_PM_SLEEP
+	bool
+	help
+	  Internal config node to apply sleep for S3C2412 power management.
+	  Can be selected by another SoCs with similar sleep procedure.
+
 # Note, the S3C2412 IOtiming support is in plat-s3c24xx
 
 config S3C2412_CPUFREQ
@@ -52,7 +59,7 @@ config MACH_JIVE
 	  Say Y here if you are using the Logitech Jive.
 
 config MACH_JIVE_SHOW_BOOTLOADER
-	bool "Allow access to bootloader partitions in MTD"
+	bool "Allow access to bootloader partitions in MTD (EXPERIMENTAL)"
 	depends on MACH_JIVE && EXPERIMENTAL
 
 config MACH_SMDK2413
diff --git a/arch/arm/mach-s3c2412/Makefile b/arch/arm/mach-s3c2412/Makefile
index 530ec46cbaea..6c48a91ea39e 100644
--- a/arch/arm/mach-s3c2412/Makefile
+++ b/arch/arm/mach-s3c2412/Makefile
@@ -14,7 +14,8 @@ obj-$(CONFIG_CPU_S3C2412)	+= irq.o
 obj-$(CONFIG_CPU_S3C2412)	+= clock.o
 obj-$(CONFIG_CPU_S3C2412)	+= gpio.o
 obj-$(CONFIG_S3C2412_DMA)	+= dma.o
-obj-$(CONFIG_S3C2412_PM)	+= pm.o sleep.o
+obj-$(CONFIG_S3C2412_PM)	+= pm.o
+obj-$(CONFIG_S3C2412_PM_SLEEP)	+= sleep.o
 obj-$(CONFIG_S3C2412_CPUFREQ)	+= cpu-freq.o
 
 # Machine support
diff --git a/arch/arm/mach-s3c2416/Kconfig b/arch/arm/mach-s3c2416/Kconfig
index 27b3e7c9d613..df8d14974c90 100644
--- a/arch/arm/mach-s3c2416/Kconfig
+++ b/arch/arm/mach-s3c2416/Kconfig
@@ -27,6 +27,7 @@ config S3C2416_DMA
 
 config S3C2416_PM
 	bool
+	select S3C2412_PM_SLEEP
 	help
 	  Internal config node to apply S3C2416 power management
 
diff --git a/arch/arm/mach-s5pv210/mach-aquila.c b/arch/arm/mach-s5pv210/mach-aquila.c
index 28677caf3613..461aa035afc0 100644
--- a/arch/arm/mach-s5pv210/mach-aquila.c
+++ b/arch/arm/mach-s5pv210/mach-aquila.c
@@ -378,6 +378,12 @@ static struct max8998_regulator_data aquila_regulators[] = {
 static struct max8998_platform_data aquila_max8998_pdata = {
 	.num_regulators	= ARRAY_SIZE(aquila_regulators),
 	.regulators	= aquila_regulators,
+	.buck1_set1	= S5PV210_GPH0(3),
+	.buck1_set2	= S5PV210_GPH0(4),
+	.buck2_set3	= S5PV210_GPH0(5),
+	.buck1_max_voltage1 = 1200000,
+	.buck1_max_voltage2 = 1200000,
+	.buck2_max_voltage = 1200000,
 };
 #endif
 
diff --git a/arch/arm/mach-s5pv210/mach-goni.c b/arch/arm/mach-s5pv210/mach-goni.c
index b1dcf964a768..e22d5112fd44 100644
--- a/arch/arm/mach-s5pv210/mach-goni.c
+++ b/arch/arm/mach-s5pv210/mach-goni.c
@@ -518,6 +518,12 @@ static struct max8998_regulator_data goni_regulators[] = {
 static struct max8998_platform_data goni_max8998_pdata = {
 	.num_regulators	= ARRAY_SIZE(goni_regulators),
 	.regulators	= goni_regulators,
+	.buck1_set1	= S5PV210_GPH0(3),
+	.buck1_set2	= S5PV210_GPH0(4),
+	.buck2_set3	= S5PV210_GPH0(5),
+	.buck1_max_voltage1 = 1200000,
+	.buck1_max_voltage2 = 1200000,
+	.buck2_max_voltage = 1200000,
 };
 #endif
 
diff --git a/arch/arm/mach-s5pv310/cpu.c b/arch/arm/mach-s5pv310/cpu.c
index 82ce4aa6d61a..72ab289e7816 100644
--- a/arch/arm/mach-s5pv310/cpu.c
+++ b/arch/arm/mach-s5pv310/cpu.c
@@ -24,8 +24,6 @@
 
 #include <mach/regs-irq.h>
 
-void __iomem *gic_cpu_base_addr;
-
 extern int combiner_init(unsigned int combiner_nr, void __iomem *base,
 			 unsigned int irq_start);
 extern void combiner_cascade_irq(unsigned int combiner_nr, unsigned int irq);
@@ -122,9 +120,7 @@ void __init s5pv310_init_irq(void)
 {
 	int irq;
 
-	gic_cpu_base_addr = S5P_VA_GIC_CPU;
-	gic_dist_init(0, S5P_VA_GIC_DIST, IRQ_LOCALTIMER);
-	gic_cpu_init(0, S5P_VA_GIC_CPU);
+	gic_init(0, IRQ_LOCALTIMER, S5P_VA_GIC_DIST, S5P_VA_GIC_CPU);
 
 	for (irq = 0; irq < MAX_COMBINER_NR; irq++) {
 		combiner_init(irq, (void __iomem *)S5P_VA_COMBINER(irq),
diff --git a/arch/arm/mach-s5pv310/hotplug.c b/arch/arm/mach-s5pv310/hotplug.c
index 03652c3605f6..afa5392d9fc0 100644
--- a/arch/arm/mach-s5pv310/hotplug.c
+++ b/arch/arm/mach-s5pv310/hotplug.c
@@ -13,14 +13,11 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/smp.h>
-#include <linux/completion.h>
 
 #include <asm/cacheflush.h>
 
 extern volatile int pen_release;
 
-static DECLARE_COMPLETION(cpu_killed);
-
 static inline void cpu_enter_lowpower(void)
 {
 	unsigned int v;
@@ -33,13 +30,13 @@ static inline void cpu_enter_lowpower(void)
 	 * Turn off coherency
 	 */
 	"	mrc	p15, 0, %0, c1, c0, 1\n"
-	"	bic	%0, %0, #0x20\n"
+	"	bic	%0, %0, %2\n"
 	"	mcr	p15, 0, %0, c1, c0, 1\n"
 	"	mrc	p15, 0, %0, c1, c0, 0\n"
 	"	bic	%0, %0, #0x04\n"
 	"	mcr	p15, 0, %0, c1, c0, 0\n"
 	  : "=&r" (v)
-	  : "r" (0)
+	  : "r" (0), "Ir" (CR_C)
 	  : "cc");
 }
 
@@ -49,17 +46,17 @@ static inline void cpu_leave_lowpower(void)
 
 	asm volatile(
 	"mrc	p15, 0, %0, c1, c0, 0\n"
-	"	orr	%0, %0, #0x04\n"
+	"	orr	%0, %0, %1\n"
 	"	mcr	p15, 0, %0, c1, c0, 0\n"
 	"	mrc	p15, 0, %0, c1, c0, 1\n"
 	"	orr	%0, %0, #0x20\n"
 	"	mcr	p15, 0, %0, c1, c0, 1\n"
 	  : "=&r" (v)
-	  :
+	  : "Ir" (CR_C)
 	  : "cc");
 }
 
-static inline void platform_do_lowpower(unsigned int cpu)
+static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
 {
 	/*
 	 * there is no power-control hardware on this platform, so all
@@ -83,22 +80,19 @@ static inline void platform_do_lowpower(unsigned int cpu)
 		}
 
 		/*
-		 * getting here, means that we have come out of WFI without
+		 * Getting here, means that we have come out of WFI without
 		 * having been woken up - this shouldn't happen
 		 *
-		 * The trouble is, letting people know about this is not really
-		 * possible, since we are currently running incoherently, and
-		 * therefore cannot safely call printk() or anything else
+		 * Just note it happening - when we're woken, we can report
+		 * its occurrence.
 		 */
-#ifdef DEBUG
-		printk(KERN_WARN "CPU%u: spurious wakeup call\n", cpu);
-#endif
+		(*spurious)++;
 	}
 }
 
 int platform_cpu_kill(unsigned int cpu)
 {
-	return wait_for_completion_timeout(&cpu_killed, 5000);
+	return 1;
 }
 
 /*
@@ -108,30 +102,22 @@ int platform_cpu_kill(unsigned int cpu)
  */
 void platform_cpu_die(unsigned int cpu)
 {
-#ifdef DEBUG
-	unsigned int this_cpu = hard_smp_processor_id();
-
-	if (cpu != this_cpu) {
-		printk(KERN_CRIT "Eek! platform_cpu_die running on %u, should be %u\n",
-			   this_cpu, cpu);
-		BUG();
-	}
-#endif
-
-	printk(KERN_NOTICE "CPU%u: shutdown\n", cpu);
-	complete(&cpu_killed);
+	int spurious = 0;
 
 	/*
 	 * we're ready for shutdown now, so do it
 	 */
 	cpu_enter_lowpower();
-	platform_do_lowpower(cpu);
+	platform_do_lowpower(cpu, &spurious);
 
 	/*
 	 * bring this CPU back into the world of cache
 	 * coherency, and then restore interrupts
 	 */
 	cpu_leave_lowpower();
+
+	if (spurious)
+		pr_warn("CPU%u: %u spurious wakeup calls\n", cpu, spurious);
 }
 
 int platform_cpu_disable(unsigned int cpu)
diff --git a/arch/arm/mach-s5pv310/include/mach/smp.h b/arch/arm/mach-s5pv310/include/mach/smp.h
index b7ec252384f4..393ccbd52c4a 100644
--- a/arch/arm/mach-s5pv310/include/mach/smp.h
+++ b/arch/arm/mach-s5pv310/include/mach/smp.h
@@ -7,16 +7,13 @@
 #define ASM_ARCH_SMP_H __FILE__
 
 #include <asm/hardware/gic.h>
-#include <asm/smp_mpidr.h>
-
-extern void __iomem *gic_cpu_base_addr;
 
 /*
  * We use IRQ1 as the IPI
  */
-static inline void smp_cross_call(const struct cpumask *mask)
+static inline void smp_cross_call(const struct cpumask *mask, int ipi)
 {
-	gic_raise_softirq(mask, 1);
+	gic_raise_softirq(mask, ipi);
 }
 
 #endif
diff --git a/arch/arm/mach-s5pv310/platsmp.c b/arch/arm/mach-s5pv310/platsmp.c
index d357c198edee..34093b069f67 100644
--- a/arch/arm/mach-s5pv310/platsmp.c
+++ b/arch/arm/mach-s5pv310/platsmp.c
@@ -22,7 +22,6 @@
 #include <linux/io.h>
 
 #include <asm/cacheflush.h>
-#include <asm/localtimer.h>
 #include <asm/smp_scu.h>
 #include <asm/unified.h>
 
@@ -38,6 +37,19 @@ extern void s5pv310_secondary_startup(void);
 
 volatile int __cpuinitdata pen_release = -1;
 
+/*
+ * Write pen_release in a way that is guaranteed to be visible to all
+ * observers, irrespective of whether they're taking part in coherency
+ * or not.  This is necessary for the hotplug code to work reliably.
+ */
+static void write_pen_release(int val)
+{
+	pen_release = val;
+	smp_wmb();
+	__cpuc_flush_dcache_area((void *)&pen_release, sizeof(pen_release));
+	outer_clean_range(__pa(&pen_release), __pa(&pen_release + 1));
+}
+
 static void __iomem *scu_base_addr(void)
 {
 	return (void __iomem *)(S5P_VA_SCU);
@@ -47,21 +59,18 @@ static DEFINE_SPINLOCK(boot_lock);
 
 void __cpuinit platform_secondary_init(unsigned int cpu)
 {
-	trace_hardirqs_off();
-
 	/*
 	 * if any interrupts are already enabled for the primary
 	 * core (e.g. timer irq), then they will not have been enabled
 	 * for us: do so
 	 */
-	gic_cpu_init(0, gic_cpu_base_addr);
+	gic_secondary_init(0);
 
 	/*
 	 * let the primary processor know we're out of the
 	 * pen, then head off into the C entry point
 	 */
-	pen_release = -1;
-	smp_wmb();
+	write_pen_release(-1);
 
 	/*
 	 * Synchronise with the boot thread.
@@ -88,16 +97,14 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
 	 * Note that "pen_release" is the hardware CPU ID, whereas
 	 * "cpu" is Linux's internal ID.
 	 */
-	pen_release = cpu;
-	__cpuc_flush_dcache_area((void *)&pen_release, sizeof(pen_release));
-	outer_clean_range(__pa(&pen_release), __pa(&pen_release + 1));
+	write_pen_release(cpu);
 
 	/*
 	 * Send the secondary CPU a soft interrupt, thereby causing
 	 * the boot monitor to read the system wide flags register,
 	 * and branch to the address found there.
 	 */
-	smp_cross_call(cpumask_of(cpu));
+	smp_cross_call(cpumask_of(cpu), 1);
 
 	timeout = jiffies + (1 * HZ);
 	while (time_before(jiffies, timeout)) {
@@ -130,13 +137,6 @@ void __init smp_init_cpus(void)
 	ncores = scu_base ? scu_get_core_count(scu_base) : 1;
 
 	/* sanity check */
-	if (ncores == 0) {
-		printk(KERN_ERR
-		       "S5PV310: strange CM count of 0? Default to 1\n");
-
-		ncores = 1;
-	}
-
 	if (ncores > NR_CPUS) {
 		printk(KERN_WARNING
 		       "S5PV310: no. of cores (%d) greater than configured "
@@ -149,18 +149,10 @@ void __init smp_init_cpus(void)
 		set_cpu_possible(i, true);
 }
 
-void __init smp_prepare_cpus(unsigned int max_cpus)
+void __init platform_smp_prepare_cpus(unsigned int max_cpus)
 {
-	unsigned int ncores = num_possible_cpus();
-	unsigned int cpu = smp_processor_id();
 	int i;
 
-	smp_store_cpu_info(cpu);
-
-	/* are we trying to boot more cores than exist? */
-	if (max_cpus > ncores)
-		max_cpus = ncores;
-
 	/*
 	 * Initialise the present map, which describes the set of CPUs
 	 * actually populated at the present time.
@@ -168,25 +160,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	for (i = 0; i < max_cpus; i++)
 		set_cpu_present(i, true);
 
+	scu_enable(scu_base_addr());
+
 	/*
-	 * Initialise the SCU if there are more than one CPU and let
-	 * them know where to start.
+	 * Write the address of secondary startup into the
+	 * system-wide flags register. The boot monitor waits
+	 * until it receives a soft interrupt, and then the
+	 * secondary CPU branches to this address.
 	 */
-	if (max_cpus > 1) {
-		/*
-		 * Enable the local timer or broadcast device for the
-		 * boot CPU, but only if we have more than one CPU.
-		 */
-		percpu_timer_setup();
-
-		scu_enable(scu_base_addr());
-
-		/*
-		 * Write the address of secondary startup into the
-		 * system-wide flags register. The boot monitor waits
-		 * until it receives a soft interrupt, and then the
-		 * secondary CPU branches to this address.
-		 */
 	__raw_writel(BSYM(virt_to_phys(s5pv310_secondary_startup)), S5P_VA_SYSRAM);
-	}
 }
diff --git a/arch/arm/mach-s5pv310/time.c b/arch/arm/mach-s5pv310/time.c
index 01b012ad1bfd..b262d4615331 100644
--- a/arch/arm/mach-s5pv310/time.c
+++ b/arch/arm/mach-s5pv310/time.c
@@ -211,7 +211,6 @@ struct clocksource pwm_clocksource = {
 	.rating		= 250,
 	.read		= s5pv310_pwm4_read,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS ,
 };
 
@@ -230,10 +229,7 @@ static void __init s5pv310_clocksource_init(void)
 	s5pv310_pwm_init(4, ~0);
 	s5pv310_pwm_start(4, 1);
 
-	pwm_clocksource.mult =
-		clocksource_khz2mult(clock_rate/1000, pwm_clocksource.shift);
-
-	if (clocksource_register(&pwm_clocksource))
+	if (clocksource_register_hz(&pwm_clocksource, clock_rate))
 		panic("%s: can't register clocksource\n", pwm_clocksource.name);
 }
 
diff --git a/arch/arm/mach-sa1100/Kconfig b/arch/arm/mach-sa1100/Kconfig
index 5da8c35aa0de..42625e4d949a 100644
--- a/arch/arm/mach-sa1100/Kconfig
+++ b/arch/arm/mach-sa1100/Kconfig
@@ -118,6 +118,16 @@ config SA1100_LART
 	  (also known as the LART).  See <http://www.lartmaker.nl/> for
 	  information on the LART.
 
+config SA1100_NANOENGINE
+	bool "nanoEngine"
+	select CPU_FREQ_SA1110
+	select PCI
+	select PCI_NANOENGINE
+	help
+	  Say Y here if you are using the Bright Star Engineering nanoEngine.
+	  See <http://www.brightstareng.com/arm/nanoeng.htm> for information
+	  on the BSE nanoEngine.
+
 config SA1100_PLEB
 	bool "PLEB"
 	select CPU_FREQ_SA1100
diff --git a/arch/arm/mach-sa1100/Makefile b/arch/arm/mach-sa1100/Makefile
index 89349c1dd7a6..e697691eed28 100644
--- a/arch/arm/mach-sa1100/Makefile
+++ b/arch/arm/mach-sa1100/Makefile
@@ -37,6 +37,9 @@ obj-$(CONFIG_SA1100_JORNADA720_SSP)	+= jornada720_ssp.o
 obj-$(CONFIG_SA1100_LART)		+= lart.o
 led-$(CONFIG_SA1100_LART)		+= leds-lart.o
 
+obj-$(CONFIG_SA1100_NANOENGINE)		+= nanoengine.o
+obj-$(CONFIG_PCI_NANOENGINE)		+= pci-nanoengine.o
+
 obj-$(CONFIG_SA1100_PLEB)		+= pleb.o
 
 obj-$(CONFIG_SA1100_SHANNON)		+= shannon.o
diff --git a/arch/arm/mach-sa1100/cpu-sa1100.c b/arch/arm/mach-sa1100/cpu-sa1100.c
index 96f7dc103b59..07d4e8ba3719 100644
--- a/arch/arm/mach-sa1100/cpu-sa1100.c
+++ b/arch/arm/mach-sa1100/cpu-sa1100.c
@@ -94,48 +94,47 @@
 
 #include "generic.h"
 
-typedef struct {
+struct sa1100_dram_regs {
 	int speed;
 	u32 mdcnfg;
 	u32 mdcas0;
 	u32 mdcas1;
 	u32 mdcas2;
-} sa1100_dram_regs_t;
+};
 
 
 static struct cpufreq_driver sa1100_driver;
 
-static sa1100_dram_regs_t sa1100_dram_settings[] =
-{
-	/* speed,     mdcnfg,     mdcas0,     mdcas1,     mdcas2  clock frequency */
-	{  59000, 0x00dc88a3, 0xcccccccf, 0xfffffffc, 0xffffffff }, /*  59.0 MHz */
-	{  73700, 0x011490a3, 0xcccccccf, 0xfffffffc, 0xffffffff }, /*  73.7 MHz */
-	{  88500, 0x014e90a3, 0xcccccccf, 0xfffffffc, 0xffffffff }, /*  88.5 MHz */
-	{ 103200, 0x01889923, 0xcccccccf, 0xfffffffc, 0xffffffff }, /* 103.2 MHz */
-	{ 118000, 0x01c29923, 0x9999998f, 0xfffffff9, 0xffffffff }, /* 118.0 MHz */
-	{ 132700, 0x01fb2123, 0x9999998f, 0xfffffff9, 0xffffffff }, /* 132.7 MHz */
-	{ 147500, 0x02352123, 0x3333330f, 0xfffffff3, 0xffffffff }, /* 147.5 MHz */
-	{ 162200, 0x026b29a3, 0x38e38e1f, 0xfff8e38e, 0xffffffff }, /* 162.2 MHz */
-	{ 176900, 0x02a329a3, 0x71c71c1f, 0xfff1c71c, 0xffffffff }, /* 176.9 MHz */
-	{ 191700, 0x02dd31a3, 0xe38e383f, 0xffe38e38, 0xffffffff }, /* 191.7 MHz */
-	{ 206400, 0x03153223, 0xc71c703f, 0xffc71c71, 0xffffffff }, /* 206.4 MHz */
-	{ 221200, 0x034fba23, 0xc71c703f, 0xffc71c71, 0xffffffff }, /* 221.2 MHz */
-	{ 235900, 0x03853a23, 0xe1e1e07f, 0xe1e1e1e1, 0xffffffe1 }, /* 235.9 MHz */
-	{ 250700, 0x03bf3aa3, 0xc3c3c07f, 0xc3c3c3c3, 0xffffffc3 }, /* 250.7 MHz */
-	{ 265400, 0x03f7c2a3, 0xc3c3c07f, 0xc3c3c3c3, 0xffffffc3 }, /* 265.4 MHz */
-	{ 280200, 0x0431c2a3, 0x878780ff, 0x87878787, 0xffffff87 }, /* 280.2 MHz */
+static struct sa1100_dram_regs sa1100_dram_settings[] = {
+	/*speed,     mdcnfg,     mdcas0,     mdcas1,     mdcas2,   clock freq */
+	{ 59000, 0x00dc88a3, 0xcccccccf, 0xfffffffc, 0xffffffff},/*  59.0 MHz */
+	{ 73700, 0x011490a3, 0xcccccccf, 0xfffffffc, 0xffffffff},/*  73.7 MHz */
+	{ 88500, 0x014e90a3, 0xcccccccf, 0xfffffffc, 0xffffffff},/*  88.5 MHz */
+	{103200, 0x01889923, 0xcccccccf, 0xfffffffc, 0xffffffff},/* 103.2 MHz */
+	{118000, 0x01c29923, 0x9999998f, 0xfffffff9, 0xffffffff},/* 118.0 MHz */
+	{132700, 0x01fb2123, 0x9999998f, 0xfffffff9, 0xffffffff},/* 132.7 MHz */
+	{147500, 0x02352123, 0x3333330f, 0xfffffff3, 0xffffffff},/* 147.5 MHz */
+	{162200, 0x026b29a3, 0x38e38e1f, 0xfff8e38e, 0xffffffff},/* 162.2 MHz */
+	{176900, 0x02a329a3, 0x71c71c1f, 0xfff1c71c, 0xffffffff},/* 176.9 MHz */
+	{191700, 0x02dd31a3, 0xe38e383f, 0xffe38e38, 0xffffffff},/* 191.7 MHz */
+	{206400, 0x03153223, 0xc71c703f, 0xffc71c71, 0xffffffff},/* 206.4 MHz */
+	{221200, 0x034fba23, 0xc71c703f, 0xffc71c71, 0xffffffff},/* 221.2 MHz */
+	{235900, 0x03853a23, 0xe1e1e07f, 0xe1e1e1e1, 0xffffffe1},/* 235.9 MHz */
+	{250700, 0x03bf3aa3, 0xc3c3c07f, 0xc3c3c3c3, 0xffffffc3},/* 250.7 MHz */
+	{265400, 0x03f7c2a3, 0xc3c3c07f, 0xc3c3c3c3, 0xffffffc3},/* 265.4 MHz */
+	{280200, 0x0431c2a3, 0x878780ff, 0x87878787, 0xffffff87},/* 280.2 MHz */
 	{ 0, 0, 0, 0, 0 } /* last entry */
 };
 
 static void sa1100_update_dram_timings(int current_speed, int new_speed)
 {
-	sa1100_dram_regs_t *settings = sa1100_dram_settings;
+	struct sa1100_dram_regs *settings = sa1100_dram_settings;
 
 	/* find speed */
 	while (settings->speed != 0) {
-		if(new_speed == settings->speed)
+		if (new_speed == settings->speed)
 			break;
-		
+
 		settings++;
 	}
 
@@ -149,7 +148,7 @@ static void sa1100_update_dram_timings(int current_speed, int new_speed)
 		/* We're going FASTER, so first relax the memory
 		 * timings before changing the core frequency
 		 */
-		
+
 		/* Half the memory access clock */
 		MDCNFG |= MDCNFG_CDB2;
 
@@ -187,7 +186,7 @@ static int sa1100_target(struct cpufreq_policy *policy,
 	struct cpufreq_freqs freqs;
 
 	new_ppcr = sa11x0_freq_to_ppcr(target_freq);
-	switch(relation){
+	switch (relation) {
 	case CPUFREQ_RELATION_L:
 		if (sa11x0_ppcr_to_freq(new_ppcr) > policy->max)
 			new_ppcr--;
diff --git a/arch/arm/mach-sa1100/cpu-sa1110.c b/arch/arm/mach-sa1100/cpu-sa1110.c
index 7252874d328b..675bf8ef97e8 100644
--- a/arch/arm/mach-sa1100/cpu-sa1110.c
+++ b/arch/arm/mach-sa1100/cpu-sa1110.c
@@ -16,28 +16,24 @@
  *
  * The SDRAM type can be passed on the command line as cpu_sa1110.sdram=type
  */
-#include <linux/moduleparam.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
 #include <linux/cpufreq.h>
 #include <linux/delay.h>
 #include <linux/init.h>
-#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+#include <linux/types.h>
 
-#include <mach/hardware.h>
 #include <asm/cputype.h>
 #include <asm/mach-types.h>
-#include <asm/system.h>
+
+#include <mach/hardware.h>
 
 #include "generic.h"
 
 #undef DEBUG
 
-static struct cpufreq_driver sa1110_driver;
-
 struct sdram_params {
-	const char name[16];
+	const char name[20];
 	u_char  rows;		/* bits				 */
 	u_char  cas_latency;	/* cycles			 */
 	u_char  tck;		/* clock cycle time (ns)	 */
@@ -107,6 +103,15 @@ static struct sdram_params sdram_tbl[] __initdata = {
 		.twr		= 8,
 		.refresh	= 64000,
 		.cas_latency	= 3,
+	}, {	/* Micron MT48LC8M16A2TG-75 */
+		.name		= "MT48LC8M16A2TG-75",
+		.rows		= 12,
+		.tck		= 8,
+		.trcd		= 20,
+		.trp		= 20,
+		.twr		= 8,
+		.refresh	= 64000,
+		.cas_latency	= 3,
 	},
 };
 
@@ -180,11 +185,13 @@ sdram_calculate_timing(struct sdram_info *sd, u_int cpu_khz,
 		sd->mdrefr |= MDREFR_K1DB2;
 
 	/* initial number of '1's in MDCAS + 1 */
-	set_mdcas(sd->mdcas, sd_khz >= 62000, ns_to_cycles(sdram->trcd, mem_khz));
+	set_mdcas(sd->mdcas, sd_khz >= 62000,
+		ns_to_cycles(sdram->trcd, mem_khz));
 
 #ifdef DEBUG
-	printk("MDCNFG: %08x MDREFR: %08x MDCAS0: %08x MDCAS1: %08x MDCAS2: %08x\n",
-		sd->mdcnfg, sd->mdrefr, sd->mdcas[0], sd->mdcas[1], sd->mdcas[2]);
+	printk(KERN_DEBUG "MDCNFG: %08x MDREFR: %08x MDCAS0: %08x MDCAS1: %08x MDCAS2: %08x\n",
+		sd->mdcnfg, sd->mdrefr, sd->mdcas[0], sd->mdcas[1],
+		sd->mdcas[2]);
 #endif
 }
 
@@ -213,7 +220,7 @@ sdram_update_refresh(u_int cpu_khz, struct sdram_params *sdram)
 
 #ifdef DEBUG
 	mdelay(250);
-	printk("new dri value = %d\n", dri);
+	printk(KERN_DEBUG "new dri value = %d\n", dri);
 #endif
 
 	sdram_set_refresh(dri);
@@ -232,7 +239,7 @@ static int sa1110_target(struct cpufreq_policy *policy,
 	unsigned long flags;
 	unsigned int ppcr, unused;
 
-	switch(relation){
+	switch (relation) {
 	case CPUFREQ_RELATION_L:
 		ppcr = sa11x0_freq_to_ppcr(target_freq);
 		if (sa11x0_ppcr_to_freq(ppcr) > policy->max)
@@ -280,11 +287,10 @@ static int sa1110_target(struct cpufreq_policy *policy,
 	 * We wait 20ms to be safe.
 	 */
 	sdram_set_refresh(2);
-	if (!irqs_disabled()) {
+	if (!irqs_disabled())
 		msleep(20);
-	} else {
+	else
 		mdelay(20);
-	}
 
 	/*
 	 * Reprogram the DRAM timings with interrupts disabled, and
@@ -295,7 +301,7 @@ static int sa1110_target(struct cpufreq_policy *policy,
 	local_irq_save(flags);
 	asm("mcr p15, 0, %0, c7, c10, 4" : : "r" (0));
 	udelay(10);
-	__asm__ __volatile__("					\n\
+	__asm__ __volatile__("\n\
 		b	2f					\n\
 		.align	5					\n\
 1:		str	%3, [%1, #0]		@ MDCNFG	\n\
@@ -336,7 +342,9 @@ static int __init sa1110_cpu_init(struct cpufreq_policy *policy)
 	return 0;
 }
 
-static struct cpufreq_driver sa1110_driver = {
+/* sa1110_driver needs __refdata because it must remain after init registers
+ * it with cpufreq_register_driver() */
+static struct cpufreq_driver sa1110_driver __refdata = {
 	.flags		= CPUFREQ_STICKY,
 	.verify		= sa11x0_verify_speed,
 	.target		= sa1110_target,
@@ -349,7 +357,8 @@ static struct sdram_params *sa1110_find_sdram(const char *name)
 {
 	struct sdram_params *sdram;
 
-	for (sdram = sdram_tbl; sdram < sdram_tbl + ARRAY_SIZE(sdram_tbl); sdram++)
+	for (sdram = sdram_tbl; sdram < sdram_tbl + ARRAY_SIZE(sdram_tbl);
+	     sdram++)
 		if (strcmp(name, sdram->name) == 0)
 			return sdram;
 
@@ -369,14 +378,14 @@ static int __init sa1110_clk_init(void)
 	if (!name[0]) {
 		if (machine_is_assabet())
 			name = "TC59SM716-CL3";
-
 		if (machine_is_pt_system3())
 			name = "K4S641632D";
-
 		if (machine_is_h3100())
 			name = "KM416S4030CT";
 		if (machine_is_jornada720())
-		        name = "K4S281632B-1H";
+			name = "K4S281632B-1H";
+		if (machine_is_nanoengine())
+			name = "MT48LC8M16A2TG-75";
 	}
 
 	sdram = sa1110_find_sdram(name);
diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
index 3c1fcd696714..59d14f0fdcf8 100644
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c
@@ -16,9 +16,7 @@
 #include <linux/pm.h>
 #include <linux/cpufreq.h>
 #include <linux/ioport.h>
-#include <linux/sched.h>	/* just for sched_clock() - funny that */
 #include <linux/platform_device.h>
-#include <linux/cnt32_to_63.h>
 
 #include <asm/div64.h>
 #include <mach/hardware.h>
@@ -110,27 +108,6 @@ unsigned int sa11x0_getspeed(unsigned int cpu)
 }
 
 /*
- * This is the SA11x0 sched_clock implementation.  This has
- * a resolution of 271ns, and a maximum value of 32025597s (370 days).
- *
- * The return value is guaranteed to be monotonic in that range as
- * long as there is always less than 582 seconds between successive
- * calls to this function.
- *
- *  ( * 1E9 / 3686400 => * 78125 / 288)
- */
-unsigned long long sched_clock(void)
-{
-	unsigned long long v = cnt32_to_63(OSCR);
-
-	/* the <<1 gets rid of the cnt_32_to_63 top bit saving on a bic insn */
-	v *= 78125<<1;
-	do_div(v, 288<<1);
-
-	return v;
-}
-
-/*
  * Default power-off for SA1100
  */
 static void sa1100_power_off(void)
@@ -163,10 +140,15 @@ static void sa11x0_register_device(struct platform_device *dev, void *data)
 
 static struct resource sa11x0udc_resources[] = {
 	[0] = {
-		.start	= 0x80000000,
-		.end	= 0x8000ffff,
+		.start	= __PREG(Ser0UDCCR),
+		.end	= __PREG(Ser0UDCCR) + 0xffff,
 		.flags	= IORESOURCE_MEM,
 	},
+	[1] = {
+		.start	= IRQ_Ser0UDC,
+		.end	= IRQ_Ser0UDC,
+		.flags	= IORESOURCE_IRQ,
+	},
 };
 
 static u64 sa11x0udc_dma_mask = 0xffffffffUL;
@@ -184,10 +166,15 @@ static struct platform_device sa11x0udc_device = {
 
 static struct resource sa11x0uart1_resources[] = {
 	[0] = {
-		.start	= 0x80010000,
-		.end	= 0x8001ffff,
+		.start	= __PREG(Ser1UTCR0),
+		.end	= __PREG(Ser1UTCR0) + 0xffff,
 		.flags	= IORESOURCE_MEM,
 	},
+	[1] = {
+		.start	= IRQ_Ser1UART,
+		.end	= IRQ_Ser1UART,
+		.flags	= IORESOURCE_IRQ,
+	},
 };
 
 static struct platform_device sa11x0uart1_device = {
@@ -199,10 +186,15 @@ static struct platform_device sa11x0uart1_device = {
 
 static struct resource sa11x0uart3_resources[] = {
 	[0] = {
-		.start	= 0x80050000,
-		.end	= 0x8005ffff,
+		.start	= __PREG(Ser3UTCR0),
+		.end	= __PREG(Ser3UTCR0) + 0xffff,
 		.flags	= IORESOURCE_MEM,
 	},
+	[1] = {
+		.start	= IRQ_Ser3UART,
+		.end	= IRQ_Ser3UART,
+		.flags	= IORESOURCE_IRQ,
+	},
 };
 
 static struct platform_device sa11x0uart3_device = {
@@ -214,10 +206,15 @@ static struct platform_device sa11x0uart3_device = {
 
 static struct resource sa11x0mcp_resources[] = {
 	[0] = {
-		.start	= 0x80060000,
-		.end	= 0x8006ffff,
+		.start	= __PREG(Ser4MCCR0),
+		.end	= __PREG(Ser4MCCR0) + 0xffff,
 		.flags	= IORESOURCE_MEM,
 	},
+	[1] = {
+		.start	= IRQ_Ser4MCP,
+		.end	= IRQ_Ser4MCP,
+		.flags	= IORESOURCE_IRQ,
+	},
 };
 
 static u64 sa11x0mcp_dma_mask = 0xffffffffUL;
@@ -244,6 +241,11 @@ static struct resource sa11x0ssp_resources[] = {
 		.end	= 0x8007ffff,
 		.flags	= IORESOURCE_MEM,
 	},
+	[1] = {
+		.start	= IRQ_Ser4SSP,
+		.end	= IRQ_Ser4SSP,
+		.flags	= IORESOURCE_IRQ,
+	},
 };
 
 static u64 sa11x0ssp_dma_mask = 0xffffffffUL;
diff --git a/arch/arm/mach-sa1100/include/mach/hardware.h b/arch/arm/mach-sa1100/include/mach/hardware.h
index 99f5856d8de4..967ae7684390 100644
--- a/arch/arm/mach-sa1100/include/mach/hardware.h
+++ b/arch/arm/mach-sa1100/include/mach/hardware.h
@@ -76,4 +76,12 @@ static inline unsigned long get_clock_tick_rate(void)
 #include "SA-1101.h"
 #endif
 
+#if defined(CONFIG_ARCH_SA1100) && defined(CONFIG_PCI)
+#define PCIBIOS_MIN_IO		0
+#define PCIBIOS_MIN_MEM		0
+#define pcibios_assign_all_busses()	1
+#define HAVE_ARCH_PCI_SET_DMA_MASK	1
+#endif
+
+
 #endif  /* _ASM_ARCH_HARDWARE_H */
diff --git a/arch/arm/mach-sa1100/include/mach/nanoengine.h b/arch/arm/mach-sa1100/include/mach/nanoengine.h
new file mode 100644
index 000000000000..14f8382d0665
--- /dev/null
+++ b/arch/arm/mach-sa1100/include/mach/nanoengine.h
@@ -0,0 +1,52 @@
+/*
+ * arch/arm/mach-sa1100/include/mach/nanoengine.h
+ *
+ * This file contains the hardware specific definitions for nanoEngine.
+ * Only include this file from SA1100-specific files.
+ *
+ * Copyright (C) 2010 Marcelo Roberto Jimenez <mroberto@cpti.cetuc.puc-rio.br>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#ifndef __ASM_ARCH_NANOENGINE_H
+#define __ASM_ARCH_NANOENGINE_H
+
+#include <mach/irqs.h>
+
+#define GPIO_PC_READY0	GPIO_GPIO(11) /* ready for socket 0 (active high)*/
+#define GPIO_PC_READY1	GPIO_GPIO(12) /* ready for socket 1 (active high) */
+#define GPIO_PC_CD0	GPIO_GPIO(13) /* detect for socket 0 (active low) */
+#define GPIO_PC_CD1	GPIO_GPIO(14) /* detect for socket 1 (active low) */
+#define GPIO_PC_RESET0	GPIO_GPIO(15) /* reset socket 0 */
+#define GPIO_PC_RESET1	GPIO_GPIO(16) /* reset socket 1 */
+
+#define NANOENGINE_IRQ_GPIO_PCI		IRQ_GPIO0
+#define NANOENGINE_IRQ_GPIO_PC_READY0	IRQ_GPIO11
+#define NANOENGINE_IRQ_GPIO_PC_READY1	IRQ_GPIO12
+#define NANOENGINE_IRQ_GPIO_PC_CD0	IRQ_GPIO13
+#define NANOENGINE_IRQ_GPIO_PC_CD1	IRQ_GPIO14
+
+/*
+ * nanoEngine Memory Map:
+ *
+ * 0000.0000 - 003F.0000 -   4 MB Flash
+ * C000.0000 - C1FF.FFFF -  32 MB SDRAM
+ * 1860.0000 - 186F.FFFF -   1 MB Internal PCI Memory Read/Write
+ * 18A1.0000 - 18A1.FFFF -  64 KB Internal PCI Config Space
+ * 4000.0000 - 47FF.FFFF - 128 MB External Bus I/O - Multiplexed Mode
+ * 4800.0000 - 4FFF.FFFF - 128 MB External Bus I/O - Non-Multiplexed Mode
+ *
+ */
+
+#define NANO_PCI_MEM_RW_PHYS		0x18600000
+#define NANO_PCI_MEM_RW_VIRT		0xf1000000
+#define NANO_PCI_MEM_RW_SIZE		SZ_1M
+#define NANO_PCI_CONFIG_SPACE_PHYS	0x18A10000
+#define NANO_PCI_CONFIG_SPACE_VIRT	0xf2000000
+#define NANO_PCI_CONFIG_SPACE_SIZE	SZ_64K
+
+#endif
+
diff --git a/arch/arm/mach-sa1100/nanoengine.c b/arch/arm/mach-sa1100/nanoengine.c
new file mode 100644
index 000000000000..72087f0658b7
--- /dev/null
+++ b/arch/arm/mach-sa1100/nanoengine.c
@@ -0,0 +1,119 @@
+/*
+ * linux/arch/arm/mach-sa1100/nanoengine.c
+ *
+ * Bright Star Engineering's nanoEngine board init code.
+ *
+ * Copyright (C) 2010 Marcelo Roberto Jimenez <mroberto@cpti.cetuc.puc-rio.br>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/root_dev.h>
+
+#include <asm/mach-types.h>
+#include <asm/setup.h>
+
+#include <asm/mach/arch.h>
+#include <asm/mach/flash.h>
+#include <asm/mach/map.h>
+#include <asm/mach/serial_sa1100.h>
+
+#include <mach/hardware.h>
+#include <mach/nanoengine.h>
+
+#include "generic.h"
+
+/* Flash bank 0 */
+static struct mtd_partition nanoengine_partitions[] = {
+	{
+		.name	= "nanoEngine boot firmware and parameter table",
+		.size		= 0x00010000,  /* 32K */
+		.offset		= 0,
+		.mask_flags	= MTD_WRITEABLE,
+	}, {
+		.name		= "kernel/initrd reserved",
+		.size		= 0x002f0000,
+		.offset		= 0x00010000,
+		.mask_flags	= MTD_WRITEABLE,
+	}, {
+		.name		= "experimental filesystem allocation",
+		.size		= 0x00100000,
+		.offset		= 0x00300000,
+		.mask_flags	= MTD_WRITEABLE,
+	}
+};
+
+static struct flash_platform_data nanoengine_flash_data = {
+	.map_name	= "jedec_probe",
+	.parts		= nanoengine_partitions,
+	.nr_parts	= ARRAY_SIZE(nanoengine_partitions),
+};
+
+static struct resource nanoengine_flash_resources[] = {
+	{
+		.start	= SA1100_CS0_PHYS,
+		.end	= SA1100_CS0_PHYS + SZ_32M - 1,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.start	= SA1100_CS1_PHYS,
+		.end	= SA1100_CS1_PHYS + SZ_32M - 1,
+		.flags	= IORESOURCE_MEM,
+	}
+};
+
+static struct map_desc nanoengine_io_desc[] __initdata = {
+	{
+		/* System Registers */
+		.virtual	= 0xf0000000,
+		.pfn		= __phys_to_pfn(0x10000000),
+		.length		= 0x00100000,
+		.type		= MT_DEVICE
+	}, {
+		/* Internal PCI Memory Read/Write */
+		.virtual	= NANO_PCI_MEM_RW_VIRT,
+		.pfn		= __phys_to_pfn(NANO_PCI_MEM_RW_PHYS),
+		.length		= NANO_PCI_MEM_RW_SIZE,
+		.type		= MT_DEVICE
+	}, {
+		/* Internal PCI Config Space */
+		.virtual	= NANO_PCI_CONFIG_SPACE_VIRT,
+		.pfn		= __phys_to_pfn(NANO_PCI_CONFIG_SPACE_PHYS),
+		.length		= NANO_PCI_CONFIG_SPACE_SIZE,
+		.type		= MT_DEVICE
+	}
+};
+
+static void __init nanoengine_map_io(void)
+{
+	sa1100_map_io();
+	iotable_init(nanoengine_io_desc, ARRAY_SIZE(nanoengine_io_desc));
+
+	sa1100_register_uart(0, 1);
+	sa1100_register_uart(1, 2);
+	sa1100_register_uart(2, 3);
+	Ser1SDCR0 |= SDCR0_UART;
+	/* disable IRDA -- UART2 is used as a normal serial port */
+	Ser2UTCR4 = 0;
+	Ser2HSCR0 = 0;
+}
+
+static void __init nanoengine_init(void)
+{
+	sa11x0_register_mtd(&nanoengine_flash_data, nanoengine_flash_resources,
+		ARRAY_SIZE(nanoengine_flash_resources));
+}
+
+MACHINE_START(NANOENGINE, "BSE nanoEngine")
+	.boot_params	= 0xc0000000,
+	.map_io		= nanoengine_map_io,
+	.init_irq	= sa1100_init_irq,
+	.timer		= &sa1100_timer,
+	.init_machine	= nanoengine_init,
+MACHINE_END
diff --git a/arch/arm/mach-sa1100/pci-nanoengine.c b/arch/arm/mach-sa1100/pci-nanoengine.c
new file mode 100644
index 000000000000..fba7a913f12b
--- /dev/null
+++ b/arch/arm/mach-sa1100/pci-nanoengine.c
@@ -0,0 +1,284 @@
+/*
+ * linux/arch/arm/mach-sa1100/pci-nanoengine.c
+ *
+ * PCI functions for BSE nanoEngine PCI
+ *
+ * Copyright (C) 2010 Marcelo Roberto Jimenez <mroberto@cpti.cetuc.puc-rio.br>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+
+#include <asm/mach/pci.h>
+#include <asm/mach-types.h>
+
+#include <mach/nanoengine.h>
+
+static DEFINE_SPINLOCK(nano_lock);
+
+static int nanoengine_get_pci_address(struct pci_bus *bus,
+	unsigned int devfn, int where, unsigned long *address)
+{
+	int ret = PCIBIOS_DEVICE_NOT_FOUND;
+	unsigned int busnr = bus->number;
+
+	*address = NANO_PCI_CONFIG_SPACE_VIRT +
+		((bus->number << 16) | (devfn << 8) | (where & ~3));
+
+	ret = (busnr > 255 || devfn > 255 || where > 255) ?
+		PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
+
+	return ret;
+}
+
+static int nanoengine_read_config(struct pci_bus *bus, unsigned int devfn, int where,
+	int size, u32 *val)
+{
+	int ret;
+	unsigned long address;
+	unsigned long flags;
+	u32 v;
+
+	/* nanoEngine PCI bridge does not return -1 for a non-existing
+	 * device. We must fake the answer. We know that the only valid
+	 * device is device zero at bus 0, which is the network chip. */
+	if (bus->number != 0 || (devfn >> 3) != 0) {
+		v = -1;
+		nanoengine_get_pci_address(bus, devfn, where, &address);
+		goto exit_function;
+	}
+
+	spin_lock_irqsave(&nano_lock, flags);
+
+	ret = nanoengine_get_pci_address(bus, devfn, where, &address);
+	if (ret != PCIBIOS_SUCCESSFUL)
+		return ret;
+	v = __raw_readl(address);
+
+	spin_unlock_irqrestore(&nano_lock, flags);
+
+	v >>= ((where & 3) * 8);
+	v &= (unsigned long)(-1) >> ((4 - size) * 8);
+
+exit_function:
+	*val = v;
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int nanoengine_write_config(struct pci_bus *bus, unsigned int devfn, int where,
+	int size, u32 val)
+{
+	int ret;
+	unsigned long address;
+	unsigned long flags;
+	unsigned shift;
+	u32 v;
+
+	shift = (where & 3) * 8;
+
+	spin_lock_irqsave(&nano_lock, flags);
+
+	ret = nanoengine_get_pci_address(bus, devfn, where, &address);
+	if (ret != PCIBIOS_SUCCESSFUL)
+		return ret;
+	v = __raw_readl(address);
+	switch (size) {
+	case 1:
+		v &= ~(0xFF << shift);
+		v |= val << shift;
+		break;
+	case 2:
+		v &= ~(0xFFFF << shift);
+		v |= val << shift;
+		break;
+	case 4:
+		v = val;
+		break;
+	}
+	__raw_writel(v, address);
+
+	spin_unlock_irqrestore(&nano_lock, flags);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops pci_nano_ops = {
+	.read	= nanoengine_read_config,
+	.write	= nanoengine_write_config,
+};
+
+static int __init pci_nanoengine_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
+{
+	return NANOENGINE_IRQ_GPIO_PCI;
+}
+
+struct pci_bus * __init pci_nanoengine_scan_bus(int nr, struct pci_sys_data *sys)
+{
+	return pci_scan_bus(sys->busnr, &pci_nano_ops, sys);
+}
+
+static struct resource pci_io_ports = {
+	.name	= "PCI IO",
+	.start	= 0x400,
+	.end	= 0x7FF,
+	.flags	= IORESOURCE_IO,
+};
+
+static struct resource pci_non_prefetchable_memory = {
+	.name	= "PCI non-prefetchable",
+	.start	= NANO_PCI_MEM_RW_PHYS,
+	/* nanoEngine documentation says there is a 1 Megabyte window here,
+	 * but PCI reports just 128 + 8 kbytes. */
+	.end	= NANO_PCI_MEM_RW_PHYS + NANO_PCI_MEM_RW_SIZE - 1,
+/*	.end	= NANO_PCI_MEM_RW_PHYS + SZ_128K + SZ_8K - 1,*/
+	.flags	= IORESOURCE_MEM,
+};
+
+/*
+ * nanoEngine PCI reports 1 Megabyte of prefetchable memory, but it
+ * overlaps with previously defined memory.
+ *
+ * Here is what happens:
+ *
+# dmesg
+...
+pci 0000:00:00.0: [8086:1209] type 0 class 0x000200
+pci 0000:00:00.0: reg 10: [mem 0x00021000-0x00021fff]
+pci 0000:00:00.0: reg 14: [io  0x0000-0x003f]
+pci 0000:00:00.0: reg 18: [mem 0x00000000-0x0001ffff]
+pci 0000:00:00.0: reg 30: [mem 0x00000000-0x000fffff pref]
+pci 0000:00:00.0: supports D1 D2
+pci 0000:00:00.0: PME# supported from D0 D1 D2 D3hot
+pci 0000:00:00.0: PME# disabled
+PCI: bus0: Fast back to back transfers enabled
+pci 0000:00:00.0: BAR 6: can't assign mem pref (size 0x100000)
+pci 0000:00:00.0: BAR 2: assigned [mem 0x18600000-0x1861ffff]
+pci 0000:00:00.0: BAR 2: set to [mem 0x18600000-0x1861ffff] (PCI address [0x0-0x1ffff])
+pci 0000:00:00.0: BAR 0: assigned [mem 0x18620000-0x18620fff]
+pci 0000:00:00.0: BAR 0: set to [mem 0x18620000-0x18620fff] (PCI address [0x20000-0x20fff])
+pci 0000:00:00.0: BAR 1: assigned [io  0x0400-0x043f]
+pci 0000:00:00.0: BAR 1: set to [io  0x0400-0x043f] (PCI address [0x0-0x3f])
+ *
+ * On the other hand, if we do not request the prefetchable memory resource,
+ * linux will alloc it first and the two non-prefetchable memory areas that
+ * are our real interest will not be mapped. So we choose to map it to an
+ * unused area. It gets recognized as expansion ROM, but becomes disabled.
+ *
+ * Here is what happens then:
+ *
+# dmesg
+...
+pci 0000:00:00.0: [8086:1209] type 0 class 0x000200
+pci 0000:00:00.0: reg 10: [mem 0x00021000-0x00021fff]
+pci 0000:00:00.0: reg 14: [io  0x0000-0x003f]
+pci 0000:00:00.0: reg 18: [mem 0x00000000-0x0001ffff]
+pci 0000:00:00.0: reg 30: [mem 0x00000000-0x000fffff pref]
+pci 0000:00:00.0: supports D1 D2
+pci 0000:00:00.0: PME# supported from D0 D1 D2 D3hot
+pci 0000:00:00.0: PME# disabled
+PCI: bus0: Fast back to back transfers enabled
+pci 0000:00:00.0: BAR 6: assigned [mem 0x78000000-0x780fffff pref]
+pci 0000:00:00.0: BAR 2: assigned [mem 0x18600000-0x1861ffff]
+pci 0000:00:00.0: BAR 2: set to [mem 0x18600000-0x1861ffff] (PCI address [0x0-0x1ffff])
+pci 0000:00:00.0: BAR 0: assigned [mem 0x18620000-0x18620fff]
+pci 0000:00:00.0: BAR 0: set to [mem 0x18620000-0x18620fff] (PCI address [0x20000-0x20fff])
+pci 0000:00:00.0: BAR 1: assigned [io  0x0400-0x043f]
+pci 0000:00:00.0: BAR 1: set to [io  0x0400-0x043f] (PCI address [0x0-0x3f])
+
+# lspci -vv -s 0000:00:00.0
+00:00.0 Class 0200: Device 8086:1209 (rev 09)
+        Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B- DisINTx-
+        Status: Cap+ 66MHz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR+ <PERR+ INTx-
+        Latency: 0 (2000ns min, 14000ns max), Cache Line Size: 32 bytes
+        Interrupt: pin A routed to IRQ 0
+        Region 0: Memory at 18620000 (32-bit, non-prefetchable) [size=4K]
+        Region 1: I/O ports at 0400 [size=64]
+        Region 2: [virtual] Memory at 18600000 (32-bit, non-prefetchable) [size=128K]
+        [virtual] Expansion ROM at 78000000 [disabled] [size=1M]
+        Capabilities: [dc] Power Management version 2
+                Flags: PMEClk- DSI+ D1+ D2+ AuxCurrent=0mA PME(D0+,D1+,D2+,D3hot+,D3cold-)
+                Status: D0 NoSoftRst- PME-Enable- DSel=0 DScale=2 PME-
+        Kernel driver in use: e100
+        Kernel modules: e100
+ *
+ */
+static struct resource pci_prefetchable_memory = {
+	.name	= "PCI prefetchable",
+	.start	= 0x78000000,
+	.end	= 0x78000000 + NANO_PCI_MEM_RW_SIZE - 1,
+	.flags	= IORESOURCE_MEM  | IORESOURCE_PREFETCH,
+};
+
+static int __init pci_nanoengine_setup_resources(struct resource **resource)
+{
+	if (request_resource(&ioport_resource, &pci_io_ports)) {
+		printk(KERN_ERR "PCI: unable to allocate io port region\n");
+		return -EBUSY;
+	}
+	if (request_resource(&iomem_resource, &pci_non_prefetchable_memory)) {
+		release_resource(&pci_io_ports);
+		printk(KERN_ERR "PCI: unable to allocate non prefetchable\n");
+		return -EBUSY;
+	}
+	if (request_resource(&iomem_resource, &pci_prefetchable_memory)) {
+		release_resource(&pci_io_ports);
+		release_resource(&pci_non_prefetchable_memory);
+		printk(KERN_ERR "PCI: unable to allocate prefetchable\n");
+		return -EBUSY;
+	}
+	resource[0] = &pci_io_ports;
+	resource[1] = &pci_non_prefetchable_memory;
+	resource[2] = &pci_prefetchable_memory;
+
+	return 1;
+}
+
+int __init pci_nanoengine_setup(int nr, struct pci_sys_data *sys)
+{
+	int ret = 0;
+
+	if (nr == 0) {
+		sys->mem_offset = NANO_PCI_MEM_RW_PHYS;
+		sys->io_offset = 0x400;
+		ret = pci_nanoengine_setup_resources(sys->resource);
+		/* Enable alternate memory bus master mode, see
+		 * "Intel StrongARM SA1110 Developer's Manual",
+		 * section 10.8, "Alternate Memory Bus Master Mode". */
+		GPDR = (GPDR & ~GPIO_MBREQ) | GPIO_MBGNT;
+		GAFR |= GPIO_MBGNT | GPIO_MBREQ;
+		TUCR |= TUCR_MBGPIO;
+	}
+
+	return ret;
+}
+
+static struct hw_pci nanoengine_pci __initdata = {
+	.map_irq		= pci_nanoengine_map_irq,
+	.nr_controllers		= 1,
+	.scan			= pci_nanoengine_scan_bus,
+	.setup			= pci_nanoengine_setup,
+};
+
+static int __init nanoengine_pci_init(void)
+{
+	if (machine_is_nanoengine())
+		pci_common_init(&nanoengine_pci);
+	return 0;
+}
+
+subsys_initcall(nanoengine_pci_init);
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c
index 27692d0ffbe8..cfb76077bd25 100644
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -166,9 +166,6 @@ static void __init simpad_map_io(void)
 	PCFR = 0;
 	PSDR = 0;
 
-	sa11x0_register_mtd(&simpad_flash_data, simpad_flash_resources,
-			      ARRAY_SIZE(simpad_flash_resources));
-	sa11x0_register_mcp(&simpad_mcp_data);
 }
 
 static void simpad_power_off(void)
@@ -216,6 +213,10 @@ static int __init simpad_init(void)
 
 	pm_power_off = simpad_power_off;
 
+	sa11x0_register_mtd(&simpad_flash_data, simpad_flash_resources,
+			      ARRAY_SIZE(simpad_flash_resources));
+	sa11x0_register_mcp(&simpad_mcp_data);
+
 	ret = platform_add_devices(devices, ARRAY_SIZE(devices));
 	if(ret)
 		printk(KERN_WARNING "simpad: Unable to register mq200 framebuffer device");
diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c
index 74b6e0e570b6..ae4f3d80416f 100644
--- a/arch/arm/mach-sa1100/time.c
+++ b/arch/arm/mach-sa1100/time.c
@@ -12,12 +12,39 @@
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/sched.h>	/* just for sched_clock() - funny that */
 #include <linux/timex.h>
 #include <linux/clockchips.h>
 
 #include <asm/mach/time.h>
+#include <asm/sched_clock.h>
 #include <mach/hardware.h>
 
+/*
+ * This is the SA11x0 sched_clock implementation.
+ */
+static DEFINE_CLOCK_DATA(cd);
+
+/*
+ * Constants generated by clocks_calc_mult_shift(m, s, 3.6864MHz,
+ * NSEC_PER_SEC, 60).
+ * This gives a resolution of about 271ns and a wrap period of about 19min.
+ */
+#define SC_MULT		2275555556u
+#define SC_SHIFT	23
+
+unsigned long long notrace sched_clock(void)
+{
+	u32 cyc = OSCR;
+	return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT);
+}
+
+static void notrace sa1100_update_sched_clock(void)
+{
+	u32 cyc = OSCR;
+	update_sched_clock(&cd, cyc, (u32)~0);
+}
+
 #define MIN_OSCR_DELTA 2
 
 static irqreturn_t sa1100_ost0_interrupt(int irq, void *dev_id)
@@ -81,7 +108,6 @@ static struct clocksource cksrc_sa1100_oscr = {
 	.rating		= 200,
 	.read		= sa1100_read_oscr,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -97,6 +123,9 @@ static void __init sa1100_timer_init(void)
 	OIER = 0;		/* disable any timer interrupts */
 	OSSR = 0xf;		/* clear status on all timers */
 
+	init_fixed_sched_clock(&cd, sa1100_update_sched_clock, 32,
+			       3686400, SC_MULT, SC_SHIFT);
+
 	ckevt_sa1100_osmr0.mult =
 		div_sc(3686400, NSEC_PER_SEC, ckevt_sa1100_osmr0.shift);
 	ckevt_sa1100_osmr0.max_delta_ns =
@@ -105,12 +134,9 @@ static void __init sa1100_timer_init(void)
 		clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1;
 	ckevt_sa1100_osmr0.cpumask = cpumask_of(0);
 
-	cksrc_sa1100_oscr.mult =
-		clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift);
-
 	setup_irq(IRQ_OST0, &sa1100_timer_irq);
 
-	clocksource_register(&cksrc_sa1100_oscr);
+	clocksource_register_hz(&cksrc_sa1100_oscr, CLOCK_TICK_RATE);
 	clockevents_register_device(&ckevt_sa1100_osmr0);
 }
 
diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig
index 51dcd59eda6a..632933357242 100644
--- a/arch/arm/mach-shmobile/Kconfig
+++ b/arch/arm/mach-shmobile/Kconfig
@@ -6,7 +6,7 @@ config ARCH_SH7367
 	bool "SH-Mobile G3 (SH7367)"
 	select CPU_V6
 	select HAVE_CLK
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select SH_CLK_CPG
 	select GENERIC_CLOCKEVENTS
 
@@ -14,7 +14,7 @@ config ARCH_SH7377
 	bool "SH-Mobile G4 (SH7377)"
 	select CPU_V7
 	select HAVE_CLK
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select SH_CLK_CPG
 	select GENERIC_CLOCKEVENTS
 
@@ -22,7 +22,7 @@ config ARCH_SH7372
 	bool "SH-Mobile AP4 (SH7372)"
 	select CPU_V7
 	select HAVE_CLK
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select SH_CLK_CPG
 	select GENERIC_CLOCKEVENTS
 
diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c
index d440e5f456ad..ac429ff2c20d 100644
--- a/arch/arm/mach-shmobile/board-ap4evb.c
+++ b/arch/arm/mach-shmobile/board-ap4evb.c
@@ -61,6 +61,7 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 #include <asm/mach/time.h>
+#include <asm/setup.h>
 
 /*
  * Address	Interface		BusWidth	note
diff --git a/arch/arm/mach-shmobile/clock-sh7367.c b/arch/arm/mach-shmobile/clock-sh7367.c
index 9f78729098f2..6b186aefcbd6 100644
--- a/arch/arm/mach-shmobile/clock-sh7367.c
+++ b/arch/arm/mach-shmobile/clock-sh7367.c
@@ -20,8 +20,8 @@
 #include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/sh_clk.h>
+#include <linux/clkdev.h>
 #include <mach/common.h>
-#include <asm/clkdev.h>
 
 /* SH7367 registers */
 #define RTFRQCR    0xe6150000
diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c
index 3aa026069435..d98deb497c2f 100644
--- a/arch/arm/mach-shmobile/clock-sh7372.c
+++ b/arch/arm/mach-shmobile/clock-sh7372.c
@@ -20,8 +20,8 @@
 #include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/sh_clk.h>
+#include <linux/clkdev.h>
 #include <mach/common.h>
-#include <asm/clkdev.h>
 
 /* SH7372 registers */
 #define FRQCRA		0xe6150000
diff --git a/arch/arm/mach-shmobile/clock-sh7377.c b/arch/arm/mach-shmobile/clock-sh7377.c
index f91395aeb9ab..95942466e63f 100644
--- a/arch/arm/mach-shmobile/clock-sh7377.c
+++ b/arch/arm/mach-shmobile/clock-sh7377.c
@@ -20,8 +20,8 @@
 #include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/sh_clk.h>
+#include <linux/clkdev.h>
 #include <mach/common.h>
-#include <asm/clkdev.h>
 
 /* SH7377 registers */
 #define RTFRQCR    0xe6150000
diff --git a/arch/arm/mach-shmobile/include/mach/entry-macro.S b/arch/arm/mach-shmobile/include/mach/entry-macro.S
index a285d13c7416..f428c4db2b60 100644
--- a/arch/arm/mach-shmobile/include/mach/entry-macro.S
+++ b/arch/arm/mach-shmobile/include/mach/entry-macro.S
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2010 Magnus Damm
  * Copyright (C) 2008 Renesas Solutions Corp.
  *
  * This program is free software; you can redistribute it and/or modify
@@ -14,24 +15,45 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-#include <mach/hardware.h>
 #include <mach/irqs.h>
 
+#define INTCA_BASE	0xe6980000
+#define INTFLGA_OFFS	0x00000018 /* accept pending interrupt */
+#define INTEVTA_OFFS	0x00000020 /* vector number of accepted interrupt */
+#define INTLVLA_OFFS	0x00000030 /* priority level of accepted interrupt */
+#define INTLVLB_OFFS	0x00000034 /* previous priority level */
+
 	.macro  disable_fiq
 	.endm
 
 	.macro  get_irqnr_preamble, base, tmp
-	ldr     \base, =INTFLGA
+	ldr     \base, =INTCA_BASE
 	.endm
 
 	.macro  arch_ret_to_user, tmp1, tmp2
 	.endm
 
 	.macro  get_irqnr_and_base, irqnr, irqstat, base, tmp
-	ldr     \irqnr, [\base]
+	/* The single INTFLGA read access below results in the following:
+	 *
+	 * 1. INTLVLB is updated with old priority value from INTLVLA
+	 * 2. Highest priority interrupt is accepted
+	 * 3. INTLVLA is updated to contain priority of accepted interrupt
+	 * 4. Accepted interrupt vector is stored in INTFLGA and INTEVTA
+	 */
+	ldr     \irqnr, [\base, #INTFLGA_OFFS]
+
+	/* Restore INTLVLA with the value saved in INTLVLB.
+	 * This is required to support interrupt priorities properly.
+	 */
+	ldrb	\tmp, [\base, #INTLVLB_OFFS]
+	strb    \tmp, [\base, #INTLVLA_OFFS]
+
+	/* Handle invalid vector number case */
 	cmp	\irqnr, #0
 	beq	1000f
-	/* intevt to irq number */
+
+	/* Convert vector to irq number, same as the evt2irq() macro */
 	lsr	\irqnr, \irqnr, #0x5
 	subs	\irqnr, \irqnr, #16
 
diff --git a/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt b/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt
new file mode 100644
index 000000000000..e3ebfa73956e
--- /dev/null
+++ b/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt
@@ -0,0 +1,87 @@
+LIST "partner-jet-setup.txt"
+LIST "(C) Copyright 2010 Renesas Solutions Corp"
+LIST "Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>"
+
+LIST "RWT Setting"
+EW 0xE6020004, 0xA500
+EW 0xE6030004, 0xA500
+
+DD 0x01001000, 0x01001000
+
+LIST "GPIO Setting"
+EB 0xE6051013, 0xA2
+
+LIST "CPG"
+ED 0xE6150080, 0x00000180
+ED 0xE61500C0, 0x00000002
+
+WAIT 1, 0xFE40009C
+
+LIST "FRQCR"
+ED 0xE6150000, 0x2D1305C3
+ED 0xE61500E0, 0x9E40358E
+ED 0xE6150004, 0x80331050
+
+WAIT 1, 0xFE40009C
+
+ED 0xE61500E4, 0x00002000
+
+WAIT 1, 0xFE40009C
+
+LIST "PLL"
+ED 0xE6150028, 0x00004000
+
+WAIT 1, 0xFE40009C
+
+ED 0xE615002C, 0x93000040
+
+WAIT 1, 0xFE40009C
+
+LIST "BSC"
+ED 0xFEC10000, 0x00E0001B
+
+LIST "SBSC1"
+ED 0xFE400354, 0x01AD8000
+ED 0xFE400354, 0x01AD8001
+
+WAIT 5, 0xFE40009C
+
+ED 0xFE400008, 0xBCC90151
+ED 0xFE400040, 0x41774113
+ED 0xFE400044, 0x2712E229
+ED 0xFE400048, 0x20C18505
+ED 0xFE40004C, 0x00110209
+ED 0xFE400010, 0x00000087
+
+WAIT 10, 0xFE40009C
+
+ED 0xFE400084, 0x0000003F
+EB 0xFE500000, 0x00
+
+WAIT 5, 0xFE40009C
+
+ED 0xFE400084, 0x0000FF0A
+EB 0xFE500000, 0x00
+
+WAIT 1, 0xFE40009C
+
+ED 0xFE400084, 0x00002201
+EB 0xFE500000, 0x00
+ED 0xFE400084, 0x00000302
+EB 0xFE500000, 0x00
+EB 0xFE5C0000, 0x00
+ED 0xFE400008, 0xBCC90159
+ED 0xFE40008C, 0x88800004
+ED 0xFE400094, 0x00000004
+ED 0xFE400028, 0xA55A0032
+ED 0xFE40002C, 0xA55A000C
+ED 0xFE400020, 0xA55A2048
+ED 0xFE400008, 0xBCC90959
+
+LIST "Change CPGA setting"
+ED 0xE61500E0, 0x9E40352E
+ED 0xE6150004, 0x80331050
+
+WAIT 1, 0xFE40009C
+
+ED 0xE6150354, 0x00000002
diff --git a/arch/arm/mach-shmobile/include/mach/vmalloc.h b/arch/arm/mach-shmobile/include/mach/vmalloc.h
index 4aecf6e3a859..2b8fd8b942fe 100644
--- a/arch/arm/mach-shmobile/include/mach/vmalloc.h
+++ b/arch/arm/mach-shmobile/include/mach/vmalloc.h
@@ -2,6 +2,6 @@
 #define __ASM_MACH_VMALLOC_H
 
 /* Vmalloc at ... - 0xe5ffffff */
-#define VMALLOC_END 0xe6000000
+#define VMALLOC_END 0xe6000000UL
 
 #endif /* __ASM_MACH_VMALLOC_H */
diff --git a/arch/arm/mach-shmobile/include/mach/zboot.h b/arch/arm/mach-shmobile/include/mach/zboot.h
new file mode 100644
index 000000000000..3ad86b7708e9
--- /dev/null
+++ b/arch/arm/mach-shmobile/include/mach/zboot.h
@@ -0,0 +1,20 @@
+#ifndef ZBOOT_H
+#define ZBOOT_H
+
+#include <asm/mach-types.h>
+#include <mach/zboot_macros.h>
+
+/**************************************************
+ *
+ *		board specific settings
+ *
+ **************************************************/
+
+#ifdef CONFIG_MACH_AP4EVB
+#define MACH_TYPE	MACH_TYPE_AP4EVB
+#include "mach/head-ap4evb.txt"
+#else
+#error "unsupported board."
+#endif
+
+#endif /* ZBOOT_H */
diff --git a/arch/arm/mach-shmobile/include/mach/zboot_macros.h b/arch/arm/mach-shmobile/include/mach/zboot_macros.h
new file mode 100644
index 000000000000..aa6111fbc989
--- /dev/null
+++ b/arch/arm/mach-shmobile/include/mach/zboot_macros.h
@@ -0,0 +1,65 @@
+#ifndef __ZBOOT_MACRO_H
+#define __ZBOOT_MACRO_H
+
+/* The LIST command is used to include comments in the script */
+.macro	LIST comment
+.endm
+
+/* The ED command is used to write a 32-bit word */
+.macro ED, addr, data
+	LDR	r0, 1f
+	LDR	r1, 2f
+	STR	r1, [r0]
+	B	3f
+1 :	.long	\addr
+2 :	.long	\data
+3 :
+.endm
+
+/* The EW command is used to write a 16-bit word */
+.macro EW, addr, data
+	LDR	r0, 1f
+	LDR	r1, 2f
+	STRH	r1, [r0]
+	B	3f
+1 :	.long	\addr
+2 :	.long	\data
+3 :
+.endm
+
+/* The EB command is used to write an 8-bit word */
+.macro EB, addr, data
+	LDR	r0, 1f
+	LDR	r1, 2f
+	STRB	r1, [r0]
+	B	3f
+1 :	.long	\addr
+2 :	.long	\data
+3 :
+.endm
+
+/* The WAIT command is used to delay the execution */
+.macro  WAIT, time, reg
+	LDR	r1, 1f
+	LDR	r0, 2f
+	STR	r0, [r1]
+10 :
+	LDR	r0, [r1]
+	CMP	r0, #0x00000000
+	BNE	10b
+	NOP
+	B	3f
+1 :	.long	\reg
+2 :	.long	\time * 100
+3 :
+.endm
+
+/* The DD command is used to read a 32-bit word */
+.macro  DD, start, end
+	LDR	r1, 1f
+	B	2f
+1 :	.long	\start
+2 :
+.endm
+
+#endif /* __ZBOOT_MACRO_H */
diff --git a/arch/arm/mach-tcc8k/clock.c b/arch/arm/mach-tcc8k/clock.c
index ba32a15127ab..3970a9cdce26 100644
--- a/arch/arm/mach-tcc8k/clock.c
+++ b/arch/arm/mach-tcc8k/clock.c
@@ -12,8 +12,7 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
-
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #include <mach/clock.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-tcc8k/time.c b/arch/arm/mach-tcc8k/time.c
index 78d06008841d..e0a8d609afe1 100644
--- a/arch/arm/mach-tcc8k/time.c
+++ b/arch/arm/mach-tcc8k/time.c
@@ -35,7 +35,6 @@ static struct clocksource clocksource_tcc = {
 	.rating		= 200,
 	.read		= tcc_get_cycles,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 28,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -103,9 +102,7 @@ static int __init tcc_clockevent_init(struct clk *clock)
 {
 	unsigned int c = clk_get_rate(clock);
 
-	clocksource_tcc.mult = clocksource_hz2mult(c,
-					clocksource_tcc.shift);
-	clocksource_register(&clocksource_tcc);
+	clocksource_register_hz(&clocksource_tcc, c);
 
 	clockevent_tcc.mult = div_sc(c, NSEC_PER_SEC,
 					clockevent_tcc.shift);
diff --git a/arch/arm/mach-tegra/clock.c b/arch/arm/mach-tegra/clock.c
index ae19f95585be..77948e0f4909 100644
--- a/arch/arm/mach-tegra/clock.c
+++ b/arch/arm/mach-tegra/clock.c
@@ -25,7 +25,7 @@
 #include <linux/slab.h>
 #include <linux/seq_file.h>
 #include <linux/regulator/consumer.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #include "clock.h"
 #include "board.h"
diff --git a/arch/arm/mach-tegra/clock.h b/arch/arm/mach-tegra/clock.h
index 94fd859770f1..083a4cfc6cf0 100644
--- a/arch/arm/mach-tegra/clock.h
+++ b/arch/arm/mach-tegra/clock.h
@@ -21,7 +21,7 @@
 #define __MACH_TEGRA_CLOCK_H
 
 #include <linux/list.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #define DIV_BUS			(1 << 0)
 #define DIV_U71			(1 << 1)
diff --git a/arch/arm/mach-tegra/hotplug.c b/arch/arm/mach-tegra/hotplug.c
index 8e7f115aa21e..a5cb1ce76ff2 100644
--- a/arch/arm/mach-tegra/hotplug.c
+++ b/arch/arm/mach-tegra/hotplug.c
@@ -11,12 +11,9 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/smp.h>
-#include <linux/completion.h>
 
 #include <asm/cacheflush.h>
 
-static DECLARE_COMPLETION(cpu_killed);
-
 static inline void cpu_enter_lowpower(void)
 {
 	unsigned int v;
@@ -29,13 +26,13 @@ static inline void cpu_enter_lowpower(void)
 	 * Turn off coherency
 	 */
 	"	mrc	p15, 0, %0, c1, c0, 1\n"
-	"	bic	%0, %0, #0x20\n"
+	"	bic	%0, %0, %2\n"
 	"	mcr	p15, 0, %0, c1, c0, 1\n"
 	"	mrc	p15, 0, %0, c1, c0, 0\n"
 	"	bic	%0, %0, #0x04\n"
 	"	mcr	p15, 0, %0, c1, c0, 0\n"
 	  : "=&r" (v)
-	  : "r" (0)
+	  : "r" (0), "Ir" (CR_C)
 	  : "cc");
 }
 
@@ -45,17 +42,17 @@ static inline void cpu_leave_lowpower(void)
 
 	asm volatile(
 	"mrc	p15, 0, %0, c1, c0, 0\n"
-	"	orr	%0, %0, #0x04\n"
+	"	orr	%0, %0, %1\n"
 	"	mcr	p15, 0, %0, c1, c0, 0\n"
 	"	mrc	p15, 0, %0, c1, c0, 1\n"
 	"	orr	%0, %0, #0x20\n"
 	"	mcr	p15, 0, %0, c1, c0, 1\n"
 	  : "=&r" (v)
-	  :
+	  : "Ir" (CR_C)
 	  : "cc");
 }
 
-static inline void platform_do_lowpower(unsigned int cpu)
+static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
 {
 	/*
 	 * there is no power-control hardware on this platform, so all
@@ -79,22 +76,19 @@ static inline void platform_do_lowpower(unsigned int cpu)
 		/*}*/
 
 		/*
-		 * getting here, means that we have come out of WFI without
+		 * Getting here, means that we have come out of WFI without
 		 * having been woken up - this shouldn't happen
 		 *
-		 * The trouble is, letting people know about this is not really
-		 * possible, since we are currently running incoherently, and
-		 * therefore cannot safely call printk() or anything else
+		 * Just note it happening - when we're woken, we can report
+		 * its occurrence.
 		 */
-#ifdef DEBUG
-		printk(KERN_WARN "CPU%u: spurious wakeup call\n", cpu);
-#endif
+		(*spurious)++;
 	}
 }
 
 int platform_cpu_kill(unsigned int cpu)
 {
-	return wait_for_completion_timeout(&cpu_killed, 5000);
+	return 1;
 }
 
 /*
@@ -104,30 +98,22 @@ int platform_cpu_kill(unsigned int cpu)
  */
 void platform_cpu_die(unsigned int cpu)
 {
-#ifdef DEBUG
-	unsigned int this_cpu = hard_smp_processor_id();
-
-	if (cpu != this_cpu) {
-		printk(KERN_CRIT "Eek! platform_cpu_die running on %u, should be %u\n",
-			   this_cpu, cpu);
-		BUG();
-	}
-#endif
-
-	printk(KERN_NOTICE "CPU%u: shutdown\n", cpu);
-	complete(&cpu_killed);
+	int spurious = 0;
 
 	/*
 	 * we're ready for shutdown now, so do it
 	 */
 	cpu_enter_lowpower();
-	platform_do_lowpower(cpu);
+	platform_do_lowpower(cpu, &spurious);
 
 	/*
 	 * bring this CPU back into the world of cache
 	 * coherency, and then restore interrupts
 	 */
 	cpu_leave_lowpower();
+
+	if (spurious)
+		pr_warn("CPU%u: %u spurious wakeup calls\n", cpu, spurious);
 }
 
 int platform_cpu_disable(unsigned int cpu)
diff --git a/arch/arm/mach-tegra/include/mach/entry-macro.S b/arch/arm/mach-tegra/include/mach/entry-macro.S
index 2ba9e5c9d2f6..dd165c53889d 100644
--- a/arch/arm/mach-tegra/include/mach/entry-macro.S
+++ b/arch/arm/mach-tegra/include/mach/entry-macro.S
@@ -16,8 +16,8 @@
 #include <mach/io.h>
 
 #if defined(CONFIG_ARM_GIC)
-
-#include <asm/hardware/gic.h>
+#define HAVE_GET_IRQNR_PREAMBLE
+#include <asm/hardware/entry-macro-gic.S>
 
 	/* Uses the GIC interrupt controller built into the cpu */
 #define ICTRL_BASE (IO_CPU_VIRT + 0x100)
@@ -32,68 +32,6 @@
 
 	.macro  arch_ret_to_user, tmp1, tmp2
 	.endm
-
-	/*
-	 * The interrupt numbering scheme is defined in the
-	 * interrupt controller spec.  To wit:
-	 *
-	 * Interrupts 0-15 are IPI
-	 * 16-28 are reserved
-	 * 29-31 are local.  We allow 30 to be used for the watchdog.
-	 * 32-1020 are global
-	 * 1021-1022 are reserved
-	 * 1023 is "spurious" (no interrupt)
-	 *
-	 * For now, we ignore all local interrupts so only return an interrupt
-	 * if it's between 30 and 1020.  The test_for_ipi routine below will
-	 * pick up on IPIs.
-	 *
-	 * A simple read from the controller will tell us the number of the
-	 * highest priority enabled interrupt.  We then just need to check
-	 * whether it is in the valid range for an IRQ (30-1020 inclusive).
-	 */
-
-	.macro  get_irqnr_and_base, irqnr, irqstat, base, tmp
-
-	/* bits 12-10 = src CPU, 9-0 = int # */
-	ldr     \irqstat, [\base, #GIC_CPU_INTACK]
-
-	ldr		\tmp, =1021
-
-	bic     \irqnr, \irqstat, #0x1c00
-
-	cmp     \irqnr, #29
-	cmpcc	\irqnr, \irqnr
-	cmpne	\irqnr, \tmp
-	cmpcs	\irqnr, \irqnr
-
-	.endm
-
-	/* We assume that irqstat (the raw value of the IRQ acknowledge
-	 * register) is preserved from the macro above.
-	 * If there is an IPI, we immediately signal end of interrupt on the
-	 * controller, since this requires the original irqstat value which
-	 * we won't easily be able to recreate later.
-	 */
-
-	.macro test_for_ipi, irqnr, irqstat, base, tmp
-	bic	\irqnr, \irqstat, #0x1c00
-	cmp	\irqnr, #16
-	strcc	\irqstat, [\base, #GIC_CPU_EOI]
-	cmpcs	\irqnr, \irqnr
-	.endm
-
-	/* As above, this assumes that irqstat and base are preserved.. */
-
-	.macro test_for_ltirq, irqnr, irqstat, base, tmp
-	bic	\irqnr, \irqstat, #0x1c00
-	mov 	\tmp, #0
-	cmp	\irqnr, #29
-	moveq	\tmp, #1
-	streq	\irqstat, [\base, #GIC_CPU_EOI]
-	cmp	\tmp, #0
-	.endm
-
 #else
 	/* legacy interrupt controller for AP16 */
 	.macro	disable_fiq
diff --git a/arch/arm/mach-tegra/include/mach/io.h b/arch/arm/mach-tegra/include/mach/io.h
index f0981b1ac59e..4cea2230c8dc 100644
--- a/arch/arm/mach-tegra/include/mach/io.h
+++ b/arch/arm/mach-tegra/include/mach/io.h
@@ -65,8 +65,8 @@
 
 #ifndef __ASSEMBLER__
 
-#define __arch_ioremap(p, s, t)	tegra_ioremap(p, s, t)
-#define __arch_iounmap(v)	tegra_iounmap(v)
+#define __arch_ioremap		tegra_ioremap
+#define __arch_iounmap		tegra_iounmap
 
 void __iomem *tegra_ioremap(unsigned long phys, size_t size, unsigned int type);
 void tegra_iounmap(volatile void __iomem *addr);
diff --git a/arch/arm/mach-tegra/include/mach/smp.h b/arch/arm/mach-tegra/include/mach/smp.h
index e4a34a35a544..c8221b38ee7c 100644
--- a/arch/arm/mach-tegra/include/mach/smp.h
+++ b/arch/arm/mach-tegra/include/mach/smp.h
@@ -2,21 +2,13 @@
 #define ASMARM_ARCH_SMP_H
 
 #include <asm/hardware/gic.h>
-#include <asm/smp_mpidr.h>
 
 /*
  * We use IRQ1 as the IPI
  */
-static inline void smp_cross_call(const struct cpumask *mask)
-{
-	gic_raise_softirq(mask, 1);
-}
-
-/*
- * Do nothing on MPcore.
- */
-static inline void smp_cross_call_done(cpumask_t callmap)
+static inline void smp_cross_call(const struct cpumask *mask, int ipi)
 {
+	gic_raise_softirq(mask, ipi);
 }
 
 #endif
diff --git a/arch/arm/mach-tegra/irq.c b/arch/arm/mach-tegra/irq.c
index 50a8dfb9a0cf..5407de01abf0 100644
--- a/arch/arm/mach-tegra/irq.c
+++ b/arch/arm/mach-tegra/irq.c
@@ -94,8 +94,8 @@ void __init tegra_init_irq(void)
 		writel(0, ictlr_to_virt(i) + ICTLR_CPU_IEP_CLASS);
 	}
 
-	gic_dist_init(0, IO_ADDRESS(TEGRA_ARM_INT_DIST_BASE), 29);
-	gic_cpu_init(0, IO_ADDRESS(TEGRA_ARM_PERIF_BASE + 0x100));
+	gic_init(0, 29, IO_ADDRESS(TEGRA_ARM_INT_DIST_BASE),
+		 IO_ADDRESS(TEGRA_ARM_PERIF_BASE + 0x100));
 
 	gic = get_irq_chip(29);
 	gic_unmask_irq = gic->unmask;
diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
index 1c0fd92cab39..ec1f68924edf 100644
--- a/arch/arm/mach-tegra/platsmp.c
+++ b/arch/arm/mach-tegra/platsmp.c
@@ -22,7 +22,6 @@
 #include <asm/cacheflush.h>
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
-#include <asm/localtimer.h>
 #include <asm/smp_scu.h>
 
 #include <mach/iomap.h>
@@ -41,14 +40,12 @@ static void __iomem *scu_base = IO_ADDRESS(TEGRA_ARM_PERIF_BASE);
 
 void __cpuinit platform_secondary_init(unsigned int cpu)
 {
-	trace_hardirqs_off();
-
 	/*
 	 * if any interrupts are already enabled for the primary
 	 * core (e.g. timer irq), then they will not have been enabled
 	 * for us: do so
 	 */
-	gic_cpu_init(0, IO_ADDRESS(TEGRA_ARM_PERIF_BASE) + 0x100);
+	gic_secondary_init(0);
 
 	/*
 	 * Synchronise with the boot thread.
@@ -117,24 +114,20 @@ void __init smp_init_cpus(void)
 {
 	unsigned int i, ncores = scu_get_core_count(scu_base);
 
+	if (ncores > NR_CPUS) {
+		printk(KERN_ERR "Tegra: no. of cores (%u) greater than configured (%u), clipping\n",
+			ncores, NR_CPUS);
+		ncores = NR_CPUS;
+	}
+
 	for (i = 0; i < ncores; i++)
 		cpu_set(i, cpu_possible_map);
 }
 
-void __init smp_prepare_cpus(unsigned int max_cpus)
+void __init platform_smp_prepare_cpus(unsigned int max_cpus)
 {
-	unsigned int ncores = scu_get_core_count(scu_base);
-	unsigned int cpu = smp_processor_id();
 	int i;
 
-	smp_store_cpu_info(cpu);
-
-	/*
-	 * are we trying to boot more cores than exist?
-	 */
-	if (max_cpus > ncores)
-		max_cpus = ncores;
-
 	/*
 	 * Initialise the present map, which describes the set of CPUs
 	 * actually populated at the present time.
@@ -142,15 +135,5 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	for (i = 0; i < max_cpus; i++)
 		set_cpu_present(i, true);
 
-	/*
-	 * Initialise the SCU if there are more than one CPU and let
-	 * them know where to start. Note that, on modern versions of
-	 * MILO, the "poke" doesn't actually do anything until each
-	 * individual core is sent a soft interrupt to get it out of
-	 * WFI
-	 */
-	if (max_cpus > 1) {
-		percpu_timer_setup();
-		scu_enable(scu_base);
-	}
+	scu_enable(scu_base);
 }
diff --git a/arch/arm/mach-tegra/tegra2_clocks.c b/arch/arm/mach-tegra/tegra2_clocks.c
index ae3b308e22a4..f0dae6d8ba52 100644
--- a/arch/arm/mach-tegra/tegra2_clocks.c
+++ b/arch/arm/mach-tegra/tegra2_clocks.c
@@ -24,8 +24,7 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/hrtimer.h>
-
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #include <mach/iomap.h>
 
diff --git a/arch/arm/mach-tegra/timer.c b/arch/arm/mach-tegra/timer.c
index 9057d6fd1d31..7b8ad1f98f44 100644
--- a/arch/arm/mach-tegra/timer.c
+++ b/arch/arm/mach-tegra/timer.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/sched.h>
 #include <linux/time.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
@@ -25,10 +26,10 @@
 #include <linux/clocksource.h>
 #include <linux/clk.h>
 #include <linux/io.h>
-#include <linux/cnt32_to_63.h>
 
 #include <asm/mach/time.h>
 #include <asm/localtimer.h>
+#include <asm/sched_clock.h>
 
 #include <mach/iomap.h>
 #include <mach/irqs.h>
@@ -91,7 +92,7 @@ static void tegra_timer_set_mode(enum clock_event_mode mode,
 
 static cycle_t tegra_clocksource_read(struct clocksource *cs)
 {
-	return cnt32_to_63(timer_readl(TIMERUS_CNTR_1US));
+	return timer_readl(TIMERUS_CNTR_1US);
 }
 
 static struct clock_event_device tegra_clockevent = {
@@ -106,14 +107,29 @@ static struct clocksource tegra_clocksource = {
 	.name	= "timer_us",
 	.rating	= 300,
 	.read	= tegra_clocksource_read,
-	.mask	= 0x7FFFFFFFFFFFFFFFULL,
+	.mask	= CLOCKSOURCE_MASK(32),
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-unsigned long long sched_clock(void)
+static DEFINE_CLOCK_DATA(cd);
+
+/*
+ * Constants generated by clocks_calc_mult_shift(m, s, 1MHz, NSEC_PER_SEC, 60).
+ * This gives a resolution of about 1us and a wrap period of about 1h11min.
+ */
+#define SC_MULT		4194304000u
+#define SC_SHIFT	22
+
+unsigned long long notrace sched_clock(void)
 {
-	return clocksource_cyc2ns(tegra_clocksource.read(&tegra_clocksource),
-		tegra_clocksource.mult, tegra_clocksource.shift);
+	u32 cyc = timer_readl(TIMERUS_CNTR_1US);
+	return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT);
+}
+
+static void notrace tegra_update_sched_clock(void)
+{
+	u32 cyc = timer_readl(TIMERUS_CNTR_1US);
+	update_sched_clock(&cd, cyc, (u32)~0);
 }
 
 static irqreturn_t tegra_timer_interrupt(int irq, void *dev_id)
@@ -158,6 +174,9 @@ static void __init tegra_init_timer(void)
 		WARN(1, "Unknown clock rate");
 	}
 
+	init_fixed_sched_clock(&cd, tegra_update_sched_clock, 32,
+			       1000000, SC_MULT, SC_SHIFT);
+
 	if (clocksource_register_hz(&tegra_clocksource, 1000000)) {
 		printk(KERN_ERR "Failed to register clocksource\n");
 		BUG();
diff --git a/arch/arm/mach-u300/clock.c b/arch/arm/mach-u300/clock.c
index 7458fc6df5c6..fabcc49abe80 100644
--- a/arch/arm/mach-u300/clock.c
+++ b/arch/arm/mach-u300/clock.c
@@ -25,8 +25,8 @@
 #include <linux/timer.h>
 #include <linux/io.h>
 #include <linux/seq_file.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
 #include <mach/hardware.h>
 #include <mach/syscon.h>
 
diff --git a/arch/arm/mach-u300/timer.c b/arch/arm/mach-u300/timer.c
index 3fc4472719be..3ec58bd2d6e4 100644
--- a/arch/arm/mach-u300/timer.c
+++ b/arch/arm/mach-u300/timer.c
@@ -9,6 +9,7 @@
  * Author: Linus Walleij <linus.walleij@stericsson.com>
  */
 #include <linux/interrupt.h>
+#include <linux/sched.h>
 #include <linux/time.h>
 #include <linux/timex.h>
 #include <linux/clockchips.h>
@@ -21,6 +22,7 @@
 #include <mach/hardware.h>
 
 /* Generic stuff */
+#include <asm/sched_clock.h>
 #include <asm/mach/map.h>
 #include <asm/mach/time.h>
 #include <asm/mach/irq.h>
@@ -352,12 +354,18 @@ static struct clocksource clocksource_u300_1mhz = {
  * this wraps around for now, since it is just a relative time
  * stamp. (Inspired by OMAP implementation.)
  */
+static DEFINE_CLOCK_DATA(cd);
+
 unsigned long long notrace sched_clock(void)
 {
-	return clocksource_cyc2ns(clocksource_u300_1mhz.read(
-				  &clocksource_u300_1mhz),
-				  clocksource_u300_1mhz.mult,
-				  clocksource_u300_1mhz.shift);
+	u32 cyc = readl(U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2CC);
+	return cyc_to_sched_clock(&cd, cyc, (u32)~0);
+}
+
+static void notrace u300_update_sched_clock(void)
+{
+	u32 cyc = readl(U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2CC);
+	update_sched_clock(&cd, cyc, (u32)~0);
 }
 
 
@@ -375,6 +383,8 @@ static void __init u300_timer_init(void)
 	clk_enable(clk);
 	rate = clk_get_rate(clk);
 
+	init_sched_clock(&cd, u300_update_sched_clock, 32, rate);
+
 	/*
 	 * Disable the "OS" and "DD" timers - these are designed for Symbian!
 	 * Example usage in cnh1601578 cpu subsystem pd_timer_app.c
@@ -412,9 +422,7 @@ static void __init u300_timer_init(void)
 	writel(U300_TIMER_APP_EGPT2_TIMER_ENABLE,
 		U300_TIMER_APP_VBASE + U300_TIMER_APP_EGPT2);
 
-	clocksource_calc_mult_shift(&clocksource_u300_1mhz,
-				    rate, APPTIMER_MIN_RANGE);
-	if (clocksource_register(&clocksource_u300_1mhz))
+	if (clocksource_register_hz(&clocksource_u300_1mhz, rate))
 		printk(KERN_ERR "timer: failed to initialize clock "
 		       "source %s\n", clocksource_u300_1mhz.name);
 
diff --git a/arch/arm/mach-ux500/clock.c b/arch/arm/mach-ux500/clock.c
index 912d1cc18c57..ccff2dae167f 100644
--- a/arch/arm/mach-ux500/clock.c
+++ b/arch/arm/mach-ux500/clock.c
@@ -13,8 +13,7 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
-
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #include <plat/mtu.h>
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c
index a3700bc374d3..5730409c0f7d 100644
--- a/arch/arm/mach-ux500/cpu.c
+++ b/arch/arm/mach-ux500/cpu.c
@@ -52,8 +52,8 @@ void __init ux500_map_io(void)
 
 void __init ux500_init_irq(void)
 {
-	gic_dist_init(0, __io_address(UX500_GIC_DIST_BASE), 29);
-	gic_cpu_init(0, __io_address(UX500_GIC_CPU_BASE));
+	gic_init(0, 29, __io_address(UX500_GIC_DIST_BASE),
+		 __io_address(UX500_GIC_CPU_BASE));
 
 	/*
 	 * Init clocks here so that they are available for system timer
diff --git a/arch/arm/mach-ux500/headsmp.S b/arch/arm/mach-ux500/headsmp.S
index a6be2cdf2b2f..64fa451edcfd 100644
--- a/arch/arm/mach-ux500/headsmp.S
+++ b/arch/arm/mach-ux500/headsmp.S
@@ -23,7 +23,6 @@ ENTRY(u8500_secondary_startup)
 	ldmia	r4, {r5, r6}
 	sub	r4, r4, r5
 	add	r6, r6, r4
-	dsb
 pen:	ldr	r7, [r6]
 	cmp	r7, r0
 	bne	pen
diff --git a/arch/arm/mach-ux500/hotplug.c b/arch/arm/mach-ux500/hotplug.c
index b782a03024be..dd8037ebccf8 100644
--- a/arch/arm/mach-ux500/hotplug.c
+++ b/arch/arm/mach-ux500/hotplug.c
@@ -11,14 +11,11 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/smp.h>
-#include <linux/completion.h>
 
 #include <asm/cacheflush.h>
 
 extern volatile int pen_release;
 
-static DECLARE_COMPLETION(cpu_killed);
-
 static inline void platform_do_lowpower(unsigned int cpu)
 {
 	flush_cache_all();
@@ -38,7 +35,7 @@ static inline void platform_do_lowpower(unsigned int cpu)
 
 int platform_cpu_kill(unsigned int cpu)
 {
-	return wait_for_completion_timeout(&cpu_killed, 5000);
+	return 1;
 }
 
 /*
@@ -48,19 +45,6 @@ int platform_cpu_kill(unsigned int cpu)
  */
 void platform_cpu_die(unsigned int cpu)
 {
-#ifdef DEBUG
-	unsigned int this_cpu = hard_smp_processor_id();
-
-	if (cpu != this_cpu) {
-		printk(KERN_CRIT "Eek! platform_cpu_die running on %u, should be %u\n",
-			   this_cpu, cpu);
-		BUG();
-	}
-#endif
-
-	printk(KERN_NOTICE "CPU%u: shutdown\n", cpu);
-	complete(&cpu_killed);
-
 	/* directly enter low power state, skipping secure registers */
 	platform_do_lowpower(cpu);
 }
diff --git a/arch/arm/mach-ux500/include/mach/entry-macro.S b/arch/arm/mach-ux500/include/mach/entry-macro.S
index 60ea88db8283..a37f585a3ecb 100644
--- a/arch/arm/mach-ux500/include/mach/entry-macro.S
+++ b/arch/arm/mach-ux500/include/mach/entry-macro.S
@@ -11,7 +11,8 @@
  * warranty of any kind, whether express or implied.
  */
 #include <mach/hardware.h>
-#include <asm/hardware/gic.h>
+#define HAVE_GET_IRQNR_PREAMBLE
+#include <asm/hardware/entry-macro-gic.S>
 
 		.macro	disable_fiq
 		.endm
@@ -22,68 +23,3 @@
 
 		.macro  arch_ret_to_user, tmp1, tmp2
 		.endm
-
-		/*
-		 * The interrupt numbering scheme is defined in the
-		 * interrupt controller spec.  To wit:
-		 *
-		 * Interrupts 0-15 are IPI
-		 * 16-28 are reserved
-		 * 29-31 are local.  We allow 30 to be used for the watchdog.
-		 * 32-1020 are global
-		 * 1021-1022 are reserved
-		 * 1023 is "spurious" (no interrupt)
-		 *
-		 * For now, we ignore all local interrupts so only return an
-		 * interrupt if it's between 30 and 1020. The test_for_ipi
-		 * routine below will pick up on IPIs.
-		 *
-		 * A simple read from the controller will tell us the number
-		 * of the highest priority enabled interrupt. We then just
-		 * need to check whether it is in the valid range for an
-		 * IRQ (30-1020 inclusive).
-		 */
-
-		.macro  get_irqnr_and_base, irqnr, irqstat, base, tmp
-
-		/* bits 12-10 = src CPU, 9-0 = int # */
-		ldr     \irqstat, [\base, #GIC_CPU_INTACK]
-
-		ldr	\tmp, =1021
-
-		bic     \irqnr, \irqstat, #0x1c00
-
-		cmp     \irqnr, #29
-		cmpcc	\irqnr, \irqnr
-		cmpne	\irqnr, \tmp
-		cmpcs	\irqnr, \irqnr
-
-		.endm
-
-		/* We assume that irqstat (the raw value of the IRQ
-		 * acknowledge register) is preserved from the macro above.
-		 * If there is an IPI, we immediately signal end of
-		 * interrupt on the controller, since this requires the
-		 * original irqstat value which we won't easily be able
-		 * to recreate later.
-		 */
-
-		.macro test_for_ipi, irqnr, irqstat, base, tmp
-		bic	\irqnr, \irqstat, #0x1c00
-		cmp	\irqnr, #16
-		strcc	\irqstat, [\base, #GIC_CPU_EOI]
-		cmpcs	\irqnr, \irqnr
-		.endm
-
-		/* As above, this assumes that irqstat and base
-		 * are preserved..
-		 */
-
-		.macro test_for_ltirq, irqnr, irqstat, base, tmp
-		bic	\irqnr, \irqstat, #0x1c00
-		mov 	\tmp, #0
-		cmp	\irqnr, #29
-		moveq	\tmp, #1
-		streq	\irqstat, [\base, #GIC_CPU_EOI]
-		cmp	\tmp, #0
-		.endm
diff --git a/arch/arm/mach-ux500/include/mach/smp.h b/arch/arm/mach-ux500/include/mach/smp.h
index 197e8417375e..ca2b15b1b3b1 100644
--- a/arch/arm/mach-ux500/include/mach/smp.h
+++ b/arch/arm/mach-ux500/include/mach/smp.h
@@ -10,7 +10,6 @@
 #define ASMARM_ARCH_SMP_H
 
 #include <asm/hardware/gic.h>
-#include <asm/smp_mpidr.h>
 
 /* This is required to wakeup the secondary core */
 extern void u8500_secondary_startup(void);
@@ -18,8 +17,8 @@ extern void u8500_secondary_startup(void);
 /*
  * We use IRQ1 as the IPI
  */
-static inline void smp_cross_call(const struct cpumask *mask)
+static inline void smp_cross_call(const struct cpumask *mask, int ipi)
 {
-	gic_raise_softirq(mask, 1);
+	gic_raise_softirq(mask, ipi);
 }
 #endif
diff --git a/arch/arm/mach-ux500/platsmp.c b/arch/arm/mach-ux500/platsmp.c
index ade2e17f253c..d77e76cb7edd 100644
--- a/arch/arm/mach-ux500/platsmp.c
+++ b/arch/arm/mach-ux500/platsmp.c
@@ -18,7 +18,6 @@
 #include <linux/io.h>
 
 #include <asm/cacheflush.h>
-#include <asm/localtimer.h>
 #include <asm/smp_scu.h>
 #include <mach/hardware.h>
 
@@ -28,29 +27,35 @@
  */
 volatile int pen_release = -1;
 
-static unsigned int __init get_core_count(void)
+/*
+ * Write pen_release in a way that is guaranteed to be visible to all
+ * observers, irrespective of whether they're taking part in coherency
+ * or not.  This is necessary for the hotplug code to work reliably.
+ */
+static void write_pen_release(int val)
 {
-	return scu_get_core_count(__io_address(UX500_SCU_BASE));
+	pen_release = val;
+	smp_wmb();
+	__cpuc_flush_dcache_area((void *)&pen_release, sizeof(pen_release));
+	outer_clean_range(__pa(&pen_release), __pa(&pen_release + 1));
 }
 
 static DEFINE_SPINLOCK(boot_lock);
 
 void __cpuinit platform_secondary_init(unsigned int cpu)
 {
-	trace_hardirqs_off();
-
 	/*
 	 * if any interrupts are already enabled for the primary
 	 * core (e.g. timer irq), then they will not have been enabled
 	 * for us: do so
 	 */
-	gic_cpu_init(0, __io_address(UX500_GIC_CPU_BASE));
+	gic_secondary_init(0);
 
 	/*
 	 * let the primary processor know we're out of the
 	 * pen, then head off into the C entry point
 	 */
-	pen_release = -1;
+	write_pen_release(-1);
 
 	/*
 	 * Synchronise with the boot thread.
@@ -74,11 +79,9 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
 	 * the holding pen - release it, then wait for it to flag
 	 * that it has been released by resetting pen_release.
 	 */
-	pen_release = cpu;
-	__cpuc_flush_dcache_area((void *)&pen_release, sizeof(pen_release));
-	outer_clean_range(__pa(&pen_release), __pa(&pen_release) + 1);
+	write_pen_release(cpu);
 
-	smp_cross_call(cpumask_of(cpu));
+	smp_cross_call(cpumask_of(cpu), 1);
 
 	timeout = jiffies + (1 * HZ);
 	while (time_before(jiffies, timeout)) {
@@ -97,9 +100,6 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
 
 static void __init wakeup_secondary(void)
 {
-	/* nobody is to be released from the pen yet */
-	pen_release = -1;
-
 	/*
 	 * write the address of secondary startup into the backup ram register
 	 * at offset 0x1FF4, then write the magic number 0xA1FEED01 to the
@@ -126,40 +126,26 @@ static void __init wakeup_secondary(void)
  */
 void __init smp_init_cpus(void)
 {
-	unsigned int i, ncores = get_core_count();
+	unsigned int i, ncores;
 
-	for (i = 0; i < ncores; i++)
-		set_cpu_possible(i, true);
-}
-
-void __init smp_prepare_cpus(unsigned int max_cpus)
-{
-	unsigned int ncores = get_core_count();
-	unsigned int cpu = smp_processor_id();
-	int i;
+	ncores = scu_get_core_count(__io_address(UX500_SCU_BASE));
 
 	/* sanity check */
-	if (ncores == 0) {
-		printk(KERN_ERR
-		       "U8500: strange CM count of 0? Default to 1\n");
-		ncores = 1;
-	}
-
-	if (ncores > num_possible_cpus())	{
+	if (ncores > NR_CPUS) {
 		printk(KERN_WARNING
 		       "U8500: no. of cores (%d) greater than configured "
 		       "maximum of %d - clipping\n",
-		       ncores, num_possible_cpus());
-		ncores = num_possible_cpus();
+		       ncores, NR_CPUS);
+		ncores = NR_CPUS;
 	}
 
-	smp_store_cpu_info(cpu);
+	for (i = 0; i < ncores; i++)
+		set_cpu_possible(i, true);
+}
 
-	/*
-	 * are we trying to boot more cores than exist?
-	 */
-	if (max_cpus > ncores)
-		max_cpus = ncores;
+void __init platform_smp_prepare_cpus(unsigned int max_cpus)
+{
+	int i;
 
 	/*
 	 * Initialise the present map, which describes the set of CPUs
@@ -168,13 +154,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	for (i = 0; i < max_cpus; i++)
 		set_cpu_present(i, true);
 
-	if (max_cpus > 1) {
-		/*
-		 * Enable the local timer or broadcast device for the
-		 * boot CPU, but only if we have more than one CPU.
-		 */
-		percpu_timer_setup();
-		scu_enable(__io_address(UX500_SCU_BASE));
-		wakeup_secondary();
-	}
+	scu_enable(__io_address(UX500_SCU_BASE));
+	wakeup_secondary();
 }
diff --git a/arch/arm/mach-versatile/Kconfig b/arch/arm/mach-versatile/Kconfig
index c781f30c8368..3f7b5e9d83c5 100644
--- a/arch/arm/mach-versatile/Kconfig
+++ b/arch/arm/mach-versatile/Kconfig
@@ -4,6 +4,7 @@ menu "Versatile platform type"
 config ARCH_VERSATILE_PB
 	bool "Support Versatile/PB platform"
 	select CPU_ARM926T
+	select MIGHT_HAVE_PCI
 	default y
 	help
 	  Include support for the ARM(R) Versatile/PB platform.
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index e38acb0f89c8..13a83e45a33b 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -31,8 +31,8 @@
 #include <linux/amba/pl022.h>
 #include <linux/io.h>
 #include <linux/gfp.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
 #include <asm/system.h>
 #include <asm/irq.h>
 #include <asm/leds.h>
@@ -46,10 +46,11 @@
 #include <asm/mach/irq.h>
 #include <asm/mach/time.h>
 #include <asm/mach/map.h>
-#include <mach/clkdev.h>
 #include <mach/hardware.h>
 #include <mach/platform.h>
-#include <plat/timer-sp.h>
+#include <asm/hardware/timer-sp.h>
+
+#include <plat/sched_clock.h>
 
 #include "core.h"
 
@@ -886,6 +887,12 @@ void __init versatile_init(void)
 }
 
 /*
+ * The sched_clock counter
+ */
+#define REFCOUNTER		(__io_address(VERSATILE_SYS_BASE) + \
+				 VERSATILE_SYS_24MHz_OFFSET)
+
+/*
  * Where is the timer (VA)?
  */
 #define TIMER0_VA_BASE		 __io_address(VERSATILE_TIMER0_1_BASE)
@@ -900,6 +907,8 @@ static void __init versatile_timer_init(void)
 {
 	u32 val;
 
+	versatile_sched_clock_init(REFCOUNTER, 24000000);
+
 	/* 
 	 * set clock frequency: 
 	 *	VERSATILE_REFCLK is 32KHz
diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile
index 1b71b77ade22..2c0ac7de2814 100644
--- a/arch/arm/mach-vexpress/Makefile
+++ b/arch/arm/mach-vexpress/Makefile
@@ -5,4 +5,5 @@
 obj-y					:= v2m.o
 obj-$(CONFIG_ARCH_VEXPRESS_CA9X4)	+= ct-ca9x4.o
 obj-$(CONFIG_SMP)			+= platsmp.o headsmp.o
+obj-$(CONFIG_HOTPLUG_CPU)		+= hotplug.o
 obj-$(CONFIG_LOCAL_TIMERS)		+= localtimer.o
diff --git a/arch/arm/mach-vexpress/core.h b/arch/arm/mach-vexpress/core.h
index 57dd95ce41f9..362780d868de 100644
--- a/arch/arm/mach-vexpress/core.h
+++ b/arch/arm/mach-vexpress/core.h
@@ -22,5 +22,3 @@ struct map_desc;
 
 void v2m_map_io(struct map_desc *tile, size_t num);
 extern struct sys_timer v2m_timer;
-
-extern void __iomem *gic_cpu_base_addr;
diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c
index fd25ccd7272f..e628402b754c 100644
--- a/arch/arm/mach-vexpress/ct-ca9x4.c
+++ b/arch/arm/mach-vexpress/ct-ca9x4.c
@@ -8,8 +8,8 @@
 #include <linux/platform_device.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/clcd.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
 #include <asm/pgtable.h>
 #include <asm/hardware/arm_timer.h>
 #include <asm/hardware/cache-l2x0.h>
@@ -18,10 +18,9 @@
 #include <asm/pmu.h>
 #include <asm/smp_twd.h>
 
-#include <mach/clkdev.h>
 #include <mach/ct-ca9x4.h>
 
-#include <plat/timer-sp.h>
+#include <asm/hardware/timer-sp.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
@@ -60,13 +59,10 @@ static void __init ct_ca9x4_map_io(void)
 	v2m_map_io(ct_ca9x4_io_desc, ARRAY_SIZE(ct_ca9x4_io_desc));
 }
 
-void __iomem *gic_cpu_base_addr;
-
 static void __init ct_ca9x4_init_irq(void)
 {
-	gic_cpu_base_addr = MMIO_P2V(A9_MPCORE_GIC_CPU);
-	gic_dist_init(0, MMIO_P2V(A9_MPCORE_GIC_DIST), 29);
-	gic_cpu_init(0, gic_cpu_base_addr);
+	gic_init(0, 29, MMIO_P2V(A9_MPCORE_GIC_DIST),
+		 MMIO_P2V(A9_MPCORE_GIC_CPU));
 }
 
 #if 0
diff --git a/arch/arm/mach-vexpress/hotplug.c b/arch/arm/mach-vexpress/hotplug.c
new file mode 100644
index 000000000000..ea4cbfb90a66
--- /dev/null
+++ b/arch/arm/mach-vexpress/hotplug.c
@@ -0,0 +1,128 @@
+/*
+ *  linux/arch/arm/mach-realview/hotplug.c
+ *
+ *  Copyright (C) 2002 ARM Ltd.
+ *  All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/smp.h>
+
+#include <asm/cacheflush.h>
+
+extern volatile int pen_release;
+
+static inline void cpu_enter_lowpower(void)
+{
+	unsigned int v;
+
+	flush_cache_all();
+	asm volatile(
+		"mcr	p15, 0, %1, c7, c5, 0\n"
+	"	mcr	p15, 0, %1, c7, c10, 4\n"
+	/*
+	 * Turn off coherency
+	 */
+	"	mrc	p15, 0, %0, c1, c0, 1\n"
+	"	bic	%0, %0, %3\n"
+	"	mcr	p15, 0, %0, c1, c0, 1\n"
+	"	mrc	p15, 0, %0, c1, c0, 0\n"
+	"	bic	%0, %0, %2\n"
+	"	mcr	p15, 0, %0, c1, c0, 0\n"
+	  : "=&r" (v)
+	  : "r" (0), "Ir" (CR_C), "Ir" (0x40)
+	  : "cc");
+}
+
+static inline void cpu_leave_lowpower(void)
+{
+	unsigned int v;
+
+	asm volatile(
+		"mrc	p15, 0, %0, c1, c0, 0\n"
+	"	orr	%0, %0, %1\n"
+	"	mcr	p15, 0, %0, c1, c0, 0\n"
+	"	mrc	p15, 0, %0, c1, c0, 1\n"
+	"	orr	%0, %0, %2\n"
+	"	mcr	p15, 0, %0, c1, c0, 1\n"
+	  : "=&r" (v)
+	  : "Ir" (CR_C), "Ir" (0x40)
+	  : "cc");
+}
+
+static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
+{
+	/*
+	 * there is no power-control hardware on this platform, so all
+	 * we can do is put the core into WFI; this is safe as the calling
+	 * code will have already disabled interrupts
+	 */
+	for (;;) {
+		/*
+		 * here's the WFI
+		 */
+		asm(".word	0xe320f003\n"
+		    :
+		    :
+		    : "memory", "cc");
+
+		if (pen_release == cpu) {
+			/*
+			 * OK, proper wakeup, we're done
+			 */
+			break;
+		}
+
+		/*
+		 * Getting here, means that we have come out of WFI without
+		 * having been woken up - this shouldn't happen
+		 *
+		 * Just note it happening - when we're woken, we can report
+		 * its occurrence.
+		 */
+		(*spurious)++;
+	}
+}
+
+int platform_cpu_kill(unsigned int cpu)
+{
+	return 1;
+}
+
+/*
+ * platform-specific code to shutdown a CPU
+ *
+ * Called with IRQs disabled
+ */
+void platform_cpu_die(unsigned int cpu)
+{
+	int spurious = 0;
+
+	/*
+	 * we're ready for shutdown now, so do it
+	 */
+	cpu_enter_lowpower();
+	platform_do_lowpower(cpu, &spurious);
+
+	/*
+	 * bring this CPU back into the world of cache
+	 * coherency, and then restore interrupts
+	 */
+	cpu_leave_lowpower();
+
+	if (spurious)
+		pr_warn("CPU%u: %u spurious wakeup calls\n", cpu, spurious);
+}
+
+int platform_cpu_disable(unsigned int cpu)
+{
+	/*
+	 * we don't allow CPU 0 to be shutdown (it is still too special
+	 * e.g. clock tick interrupts)
+	 */
+	return cpu == 0 ? -EPERM : 0;
+}
diff --git a/arch/arm/mach-vexpress/include/mach/entry-macro.S b/arch/arm/mach-vexpress/include/mach/entry-macro.S
index 20e9fb514f0a..73c11297509e 100644
--- a/arch/arm/mach-vexpress/include/mach/entry-macro.S
+++ b/arch/arm/mach-vexpress/include/mach/entry-macro.S
@@ -1,67 +1,7 @@
-#include <asm/hardware/gic.h>
+#include <asm/hardware/entry-macro-gic.S>
 
 	.macro	disable_fiq
 	.endm
 
-	.macro	get_irqnr_preamble, base, tmp
-	ldr	\base, =gic_cpu_base_addr
-	ldr	\base, [\base]
-	.endm
-
 	.macro	arch_ret_to_user, tmp1, tmp2
 	.endm
-
-	/*
-	 * The interrupt numbering scheme is defined in the
-	 * interrupt controller spec.  To wit:
-	 *
-	 * Interrupts 0-15 are IPI
-	 * 16-28 are reserved
-	 * 29-31 are local.  We allow 30 to be used for the watchdog.
-	 * 32-1020 are global
-	 * 1021-1022 are reserved
-	 * 1023 is "spurious" (no interrupt)
-	 *
-	 * For now, we ignore all local interrupts so only return an interrupt if it's
-	 * between 30 and 1020.  The test_for_ipi routine below will pick up on IPIs.
-	 *
-	 * A simple read from the controller will tell us the number of the highest
-	 * priority enabled interrupt.  We then just need to check whether it is in the
-	 * valid range for an IRQ (30-1020 inclusive).
-	 */
-
-	.macro	get_irqnr_and_base, irqnr, irqstat, base, tmp
-	ldr     \irqstat, [\base, #GIC_CPU_INTACK] /* bits 12-10 = src CPU, 9-0 = int # */
-	ldr	\tmp, =1021
-	bic     \irqnr, \irqstat, #0x1c00
-	cmp     \irqnr, #29
-	cmpcc	\irqnr, \irqnr
-	cmpne	\irqnr, \tmp
-	cmpcs	\irqnr, \irqnr
-	.endm
-
-	/* We assume that irqstat (the raw value of the IRQ acknowledge
-	 * register) is preserved from the macro above.
-	 * If there is an IPI, we immediately signal end of interrupt on the
-	 * controller, since this requires the original irqstat value which
-	 * we won't easily be able to recreate later.
-	 */
-
-	.macro test_for_ipi, irqnr, irqstat, base, tmp
-	bic	\irqnr, \irqstat, #0x1c00
-	cmp	\irqnr, #16
-	strcc	\irqstat, [\base, #GIC_CPU_EOI]
-	cmpcs	\irqnr, \irqnr
-	.endm
-
-	/* As above, this assumes that irqstat and base are preserved.. */
-
-	.macro test_for_ltirq, irqnr, irqstat, base, tmp
-	bic	\irqnr, \irqstat, #0x1c00
-	mov 	\tmp, #0
-	cmp	\irqnr, #29
-	moveq	\tmp, #1
-	streq	\irqstat, [\base, #GIC_CPU_EOI]
-	cmp	\tmp, #0
-	.endm
-
diff --git a/arch/arm/mach-vexpress/include/mach/smp.h b/arch/arm/mach-vexpress/include/mach/smp.h
index 5a6da4fd247e..4c05e4a9713a 100644
--- a/arch/arm/mach-vexpress/include/mach/smp.h
+++ b/arch/arm/mach-vexpress/include/mach/smp.h
@@ -2,13 +2,12 @@
 #define __MACH_SMP_H
 
 #include <asm/hardware/gic.h>
-#include <asm/smp_mpidr.h>
 
 /*
  * We use IRQ1 as the IPI
  */
-static inline void smp_cross_call(const struct cpumask *mask)
+static inline void smp_cross_call(const struct cpumask *mask, int ipi)
 {
-	gic_raise_softirq(mask, 1);
+	gic_raise_softirq(mask, ipi);
 }
 #endif
diff --git a/arch/arm/mach-vexpress/platsmp.c b/arch/arm/mach-vexpress/platsmp.c
index 670970699ba9..b1687b6abe63 100644
--- a/arch/arm/mach-vexpress/platsmp.c
+++ b/arch/arm/mach-vexpress/platsmp.c
@@ -17,7 +17,6 @@
 #include <linux/io.h>
 
 #include <asm/cacheflush.h>
-#include <asm/localtimer.h>
 #include <asm/smp_scu.h>
 #include <asm/unified.h>
 
@@ -35,6 +34,19 @@ extern void vexpress_secondary_startup(void);
  */
 volatile int __cpuinitdata pen_release = -1;
 
+/*
+ * Write pen_release in a way that is guaranteed to be visible to all
+ * observers, irrespective of whether they're taking part in coherency
+ * or not.  This is necessary for the hotplug code to work reliably.
+ */
+static void write_pen_release(int val)
+{
+	pen_release = val;
+	smp_wmb();
+	__cpuc_flush_dcache_area((void *)&pen_release, sizeof(pen_release));
+	outer_clean_range(__pa(&pen_release), __pa(&pen_release + 1));
+}
+
 static void __iomem *scu_base_addr(void)
 {
 	return MMIO_P2V(A9_MPCORE_SCU);
@@ -44,21 +56,18 @@ static DEFINE_SPINLOCK(boot_lock);
 
 void __cpuinit platform_secondary_init(unsigned int cpu)
 {
-	trace_hardirqs_off();
-
 	/*
 	 * if any interrupts are already enabled for the primary
 	 * core (e.g. timer irq), then they will not have been enabled
 	 * for us: do so
 	 */
-	gic_cpu_init(0, gic_cpu_base_addr);
+	gic_secondary_init(0);
 
 	/*
 	 * let the primary processor know we're out of the
 	 * pen, then head off into the C entry point
 	 */
-	pen_release = -1;
-	smp_wmb();
+	write_pen_release(-1);
 
 	/*
 	 * Synchronise with the boot thread.
@@ -83,16 +92,14 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
 	 * since we haven't sent them a soft interrupt, they shouldn't
 	 * be there.
 	 */
-	pen_release = cpu;
-	__cpuc_flush_dcache_area((void *)&pen_release, sizeof(pen_release));
-	outer_clean_range(__pa(&pen_release), __pa(&pen_release + 1));
+	write_pen_release(cpu);
 
 	/*
 	 * Send the secondary CPU a soft interrupt, thereby causing
 	 * the boot monitor to read the system wide flags register,
 	 * and branch to the address found there.
 	 */
-	smp_cross_call(cpumask_of(cpu));
+	smp_cross_call(cpumask_of(cpu), 1);
 
 	timeout = jiffies + (1 * HZ);
 	while (time_before(jiffies, timeout)) {
@@ -124,13 +131,6 @@ void __init smp_init_cpus(void)
 	ncores = scu_base ? scu_get_core_count(scu_base) : 1;
 
 	/* sanity check */
-	if (ncores == 0) {
-		printk(KERN_ERR
-		       "vexpress: strange CM count of 0? Default to 1\n");
-
-		ncores = 1;
-	}
-
 	if (ncores > NR_CPUS) {
 		printk(KERN_WARNING
 		       "vexpress: no. of cores (%d) greater than configured "
@@ -143,20 +143,10 @@ void __init smp_init_cpus(void)
 		set_cpu_possible(i, true);
 }
 
-void __init smp_prepare_cpus(unsigned int max_cpus)
+void __init platform_smp_prepare_cpus(unsigned int max_cpus)
 {
-	unsigned int ncores = num_possible_cpus();
-	unsigned int cpu = smp_processor_id();
 	int i;
 
-	smp_store_cpu_info(cpu);
-
-	/*
-	 * are we trying to boot more cores than exist?
-	 */
-	if (max_cpus > ncores)
-		max_cpus = ncores;
-
 	/*
 	 * Initialise the present map, which describes the set of CPUs
 	 * actually populated at the present time.
@@ -164,27 +154,15 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	for (i = 0; i < max_cpus; i++)
 		set_cpu_present(i, true);
 
+	scu_enable(scu_base_addr());
+
 	/*
-	 * Initialise the SCU if there are more than one CPU and let
-	 * them know where to start.
+	 * Write the address of secondary startup into the
+	 * system-wide flags register. The boot monitor waits
+	 * until it receives a soft interrupt, and then the
+	 * secondary CPU branches to this address.
 	 */
-	if (max_cpus > 1) {
-		/*
-		 * Enable the local timer or broadcast device for the
-		 * boot CPU, but only if we have more than one CPU.
-		 */
-		percpu_timer_setup();
-
-		scu_enable(scu_base_addr());
-
-		/*
-		 * Write the address of secondary startup into the
-		 * system-wide flags register. The boot monitor waits
-		 * until it receives a soft interrupt, and then the
-		 * secondary CPU branches to this address.
-		 */
-		writel(~0, MMIO_P2V(V2M_SYS_FLAGSCLR));
-		writel(BSYM(virt_to_phys(vexpress_secondary_startup)),
-			MMIO_P2V(V2M_SYS_FLAGSSET));
-	}
+	writel(~0, MMIO_P2V(V2M_SYS_FLAGSCLR));
+	writel(BSYM(virt_to_phys(vexpress_secondary_startup)),
+		MMIO_P2V(V2M_SYS_FLAGSSET));
 }
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index 7eaa232180a5..a9ed3428a2fa 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -11,18 +11,18 @@
 #include <linux/spinlock.h>
 #include <linux/sysdev.h>
 #include <linux/usb/isp1760.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
 #include <asm/sizes.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
 #include <asm/mach/time.h>
 #include <asm/hardware/arm_timer.h>
+#include <asm/hardware/timer-sp.h>
 
-#include <mach/clkdev.h>
 #include <mach/motherboard.h>
 
-#include <plat/timer-sp.h>
+#include <plat/sched_clock.h>
 
 #include "core.h"
 
@@ -50,6 +50,8 @@ void __init v2m_map_io(struct map_desc *tile, size_t num)
 
 static void __init v2m_timer_init(void)
 {
+	versatile_sched_clock_init(MMIO_P2V(V2M_SYS_24MHZ), 24000000);
+
 	writel(0, MMIO_P2V(V2M_TIMER0) + TIMER_CTRL);
 	writel(0, MMIO_P2V(V2M_TIMER1) + TIMER_CTRL);
 
diff --git a/arch/arm/mach-w90x900/clock.h b/arch/arm/mach-w90x900/clock.h
index c56ddab3d912..b88a1b16b2e9 100644
--- a/arch/arm/mach-w90x900/clock.h
+++ b/arch/arm/mach-w90x900/clock.h
@@ -10,7 +10,7 @@
  * the Free Software Foundation; either version 2 of the License.
  */
 
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 void nuc900_clk_enable(struct clk *clk, int enable);
 void nuc900_subclk_enable(struct clk *clk, int enable);
diff --git a/arch/arm/mach-w90x900/time.c b/arch/arm/mach-w90x900/time.c
index b80f769bc135..4b089cb930dc 100644
--- a/arch/arm/mach-w90x900/time.c
+++ b/arch/arm/mach-w90x900/time.c
@@ -153,7 +153,6 @@ static struct clocksource clocksource_nuc900 = {
 	.rating	= 200,
 	.read	= nuc900_get_cycles,
 	.mask	= CLOCKSOURCE_MASK(TDR_SHIFT),
-	.shift	= 10,
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -176,9 +175,7 @@ static void __init nuc900_clocksource_init(void)
 	val |= (COUNTEN | PERIOD | PRESCALE);
 	__raw_writel(val, REG_TCSR1);
 
-	clocksource_nuc900.mult =
-		clocksource_khz2mult((rate / 1000), clocksource_nuc900.shift);
-	clocksource_register(&clocksource_nuc900);
+	clocksource_register_hz(&clocksource_nuc900, rate);
 }
 
 static void __init nuc900_timer_init(void)
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index a099efed0e63..49db8b3e4a49 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -605,6 +605,14 @@ config CPU_CP15_MPU
 	help
 	  Processor has the CP15 register, which has MPU related registers.
 
+config CPU_USE_DOMAINS
+	bool
+	depends on MMU
+	default y if !CPU_32v6K
+	help
+	  This option enables or disables the use of domain switching
+	  via the set_fs() function.
+
 #
 # CPU supports 36-bit I/O
 #
@@ -634,6 +642,33 @@ config ARM_THUMBEE
 	  Say Y here if you have a CPU with the ThumbEE extension and code to
 	  make use of it. Say N for code that can run on CPUs without ThumbEE.
 
+config SWP_EMULATE
+	bool "Emulate SWP/SWPB instructions"
+	depends on CPU_V7
+	select HAVE_PROC_CPU if PROC_FS
+	default y if SMP
+	help
+	  ARMv6 architecture deprecates use of the SWP/SWPB instructions.
+	  ARMv7 multiprocessing extensions introduce the ability to disable
+	  these instructions, triggering an undefined instruction exception
+	  when executed. Say Y here to enable software emulation of these
+	  instructions for userspace (not kernel) using LDREX/STREX.
+	  Also creates /proc/cpu/swp_emulation for statistics.
+
+	  In some older versions of glibc [<=2.8] SWP is used during futex
+	  trylock() operations with the assumption that the code will not
+	  be preempted. This invalid assumption may be more likely to fail
+	  with SWP emulation enabled, leading to deadlock of the user
+	  application.
+
+	  NOTE: when accessing uncached shared regions, LDREX/STREX rely
+	  on an external transaction monitoring block called a global
+	  monitor to maintain update atomicity. If your system does not
+	  implement a global monitor, this option can cause programs that
+	  perform SWP operations to uncached memory to deadlock.
+
+	  If unsure, say Y.
+
 config CPU_BIG_ENDIAN
 	bool "Build big-endian kernel"
 	depends on ARCH_SUPPORTS_BIG_ENDIAN
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index d63b6c413758..00d74a04af3a 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -5,8 +5,8 @@
 obj-y				:= dma-mapping.o extable.o fault.o init.o \
 				   iomap.o
 
-obj-$(CONFIG_MMU)		+= fault-armv.o flush.o ioremap.o mmap.o \
-				   pgd.o mmu.o vmregion.o
+obj-$(CONFIG_MMU)		+= fault-armv.o flush.o idmap.o ioremap.o \
+				   mmap.o pgd.o mmu.o vmregion.o
 
 ifneq ($(CONFIG_MMU),y)
 obj-y				+= nommu.o
diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
index 6e77c042d8e9..e0b0e7a4ec68 100644
--- a/arch/arm/mm/cache-feroceon-l2.c
+++ b/arch/arm/mm/cache-feroceon-l2.c
@@ -13,13 +13,9 @@
  */
 
 #include <linux/init.h>
+#include <linux/highmem.h>
 #include <asm/cacheflush.h>
-#include <asm/kmap_types.h>
-#include <asm/fixmap.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
 #include <plat/cache-feroceon-l2.h>
-#include "mm.h"
 
 /*
  * Low-level cache maintenance operations.
@@ -39,27 +35,30 @@
  * between which we don't want to be preempted.
  */
 
-static inline unsigned long l2_start_va(unsigned long paddr)
+static inline unsigned long l2_get_va(unsigned long paddr)
 {
 #ifdef CONFIG_HIGHMEM
 	/*
-	 * Let's do our own fixmap stuff in a minimal way here.
 	 * Because range ops can't be done on physical addresses,
 	 * we simply install a virtual mapping for it only for the
 	 * TLB lookup to occur, hence no need to flush the untouched
-	 * memory mapping.  This is protected with the disabling of
-	 * interrupts by the caller.
+	 * memory mapping afterwards (note: a cache flush may happen
+	 * in some circumstances depending on the path taken in kunmap_atomic).
 	 */
-	unsigned long idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id();
-	unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-	set_pte_ext(TOP_PTE(vaddr), pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL), 0);
-	local_flush_tlb_kernel_page(vaddr);
-	return vaddr + (paddr & ~PAGE_MASK);
+	void *vaddr = kmap_atomic_pfn(paddr >> PAGE_SHIFT);
+	return (unsigned long)vaddr + (paddr & ~PAGE_MASK);
 #else
 	return __phys_to_virt(paddr);
 #endif
 }
 
+static inline void l2_put_va(unsigned long vaddr)
+{
+#ifdef CONFIG_HIGHMEM
+	kunmap_atomic((void *)vaddr);
+#endif
+}
+
 static inline void l2_clean_pa(unsigned long addr)
 {
 	__asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr));
@@ -76,13 +75,14 @@ static inline void l2_clean_pa_range(unsigned long start, unsigned long end)
 	 */
 	BUG_ON((start ^ end) >> PAGE_SHIFT);
 
-	raw_local_irq_save(flags);
-	va_start = l2_start_va(start);
+	va_start = l2_get_va(start);
 	va_end = va_start + (end - start);
+	raw_local_irq_save(flags);
 	__asm__("mcr p15, 1, %0, c15, c9, 4\n\t"
 		"mcr p15, 1, %1, c15, c9, 5"
 		: : "r" (va_start), "r" (va_end));
 	raw_local_irq_restore(flags);
+	l2_put_va(va_start);
 }
 
 static inline void l2_clean_inv_pa(unsigned long addr)
@@ -106,13 +106,14 @@ static inline void l2_inv_pa_range(unsigned long start, unsigned long end)
 	 */
 	BUG_ON((start ^ end) >> PAGE_SHIFT);
 
-	raw_local_irq_save(flags);
-	va_start = l2_start_va(start);
+	va_start = l2_get_va(start);
 	va_end = va_start + (end - start);
+	raw_local_irq_save(flags);
 	__asm__("mcr p15, 1, %0, c15, c11, 4\n\t"
 		"mcr p15, 1, %1, c15, c11, 5"
 		: : "r" (va_start), "r" (va_end));
 	raw_local_irq_restore(flags);
+	l2_put_va(va_start);
 }
 
 static inline void l2_inv_all(void)
diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c
index c3154928bccd..5a32020471e3 100644
--- a/arch/arm/mm/cache-xsc3l2.c
+++ b/arch/arm/mm/cache-xsc3l2.c
@@ -17,14 +17,10 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/init.h>
+#include <linux/highmem.h>
 #include <asm/system.h>
 #include <asm/cputype.h>
 #include <asm/cacheflush.h>
-#include <asm/kmap_types.h>
-#include <asm/fixmap.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include "mm.h"
 
 #define CR_L2	(1 << 26)
 
@@ -71,16 +67,15 @@ static inline void xsc3_l2_inv_all(void)
 	dsb();
 }
 
+static inline void l2_unmap_va(unsigned long va)
+{
 #ifdef CONFIG_HIGHMEM
-#define l2_map_save_flags(x)		raw_local_save_flags(x)
-#define l2_map_restore_flags(x)		raw_local_irq_restore(x)
-#else
-#define l2_map_save_flags(x)		((x) = 0)
-#define l2_map_restore_flags(x)		((void)(x))
+	if (va != -1)
+		kunmap_atomic((void *)va);
 #endif
+}
 
-static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va,
-				      unsigned long flags)
+static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va)
 {
 #ifdef CONFIG_HIGHMEM
 	unsigned long va = prev_va & PAGE_MASK;
@@ -89,17 +84,10 @@ static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va,
 		/*
 		 * Switching to a new page.  Because cache ops are
 		 * using virtual addresses only, we must put a mapping
-		 * in place for it.  We also enable interrupts for a
-		 * short while and disable them again to protect this
-		 * mapping.
+		 * in place for it.
 		 */
-		unsigned long idx;
-		raw_local_irq_restore(flags);
-		idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id();
-		va = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-		raw_local_irq_restore(flags | PSR_I_BIT);
-		set_pte_ext(TOP_PTE(va), pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL), 0);
-		local_flush_tlb_kernel_page(va);
+		l2_unmap_va(prev_va);
+		va = (unsigned long)kmap_atomic_pfn(pa >> PAGE_SHIFT);
 	}
 	return va + (pa_offset >> (32 - PAGE_SHIFT));
 #else
@@ -109,7 +97,7 @@ static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va,
 
 static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
 {
-	unsigned long vaddr, flags;
+	unsigned long vaddr;
 
 	if (start == 0 && end == -1ul) {
 		xsc3_l2_inv_all();
@@ -117,13 +105,12 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
 	}
 
 	vaddr = -1;  /* to force the first mapping */
-	l2_map_save_flags(flags);
 
 	/*
 	 * Clean and invalidate partial first cache line.
 	 */
 	if (start & (CACHE_LINE_SIZE - 1)) {
-		vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr, flags);
+		vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr);
 		xsc3_l2_clean_mva(vaddr);
 		xsc3_l2_inv_mva(vaddr);
 		start = (start | (CACHE_LINE_SIZE - 1)) + 1;
@@ -133,7 +120,7 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
 	 * Invalidate all full cache lines between 'start' and 'end'.
 	 */
 	while (start < (end & ~(CACHE_LINE_SIZE - 1))) {
-		vaddr = l2_map_va(start, vaddr, flags);
+		vaddr = l2_map_va(start, vaddr);
 		xsc3_l2_inv_mva(vaddr);
 		start += CACHE_LINE_SIZE;
 	}
@@ -142,31 +129,30 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
 	 * Clean and invalidate partial last cache line.
 	 */
 	if (start < end) {
-		vaddr = l2_map_va(start, vaddr, flags);
+		vaddr = l2_map_va(start, vaddr);
 		xsc3_l2_clean_mva(vaddr);
 		xsc3_l2_inv_mva(vaddr);
 	}
 
-	l2_map_restore_flags(flags);
+	l2_unmap_va(vaddr);
 
 	dsb();
 }
 
 static void xsc3_l2_clean_range(unsigned long start, unsigned long end)
 {
-	unsigned long vaddr, flags;
+	unsigned long vaddr;
 
 	vaddr = -1;  /* to force the first mapping */
-	l2_map_save_flags(flags);
 
 	start &= ~(CACHE_LINE_SIZE - 1);
 	while (start < end) {
-		vaddr = l2_map_va(start, vaddr, flags);
+		vaddr = l2_map_va(start, vaddr);
 		xsc3_l2_clean_mva(vaddr);
 		start += CACHE_LINE_SIZE;
 	}
 
-	l2_map_restore_flags(flags);
+	l2_unmap_va(vaddr);
 
 	dsb();
 }
@@ -193,7 +179,7 @@ static inline void xsc3_l2_flush_all(void)
 
 static void xsc3_l2_flush_range(unsigned long start, unsigned long end)
 {
-	unsigned long vaddr, flags;
+	unsigned long vaddr;
 
 	if (start == 0 && end == -1ul) {
 		xsc3_l2_flush_all();
@@ -201,17 +187,16 @@ static void xsc3_l2_flush_range(unsigned long start, unsigned long end)
 	}
 
 	vaddr = -1;  /* to force the first mapping */
-	l2_map_save_flags(flags);
 
 	start &= ~(CACHE_LINE_SIZE - 1);
 	while (start < end) {
-		vaddr = l2_map_va(start, vaddr, flags);
+		vaddr = l2_map_va(start, vaddr);
 		xsc3_l2_clean_mva(vaddr);
 		xsc3_l2_inv_mva(vaddr);
 		start += CACHE_LINE_SIZE;
 	}
 
-	l2_map_restore_flags(flags);
+	l2_unmap_va(vaddr);
 
 	dsb();
 }
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ac6a36142fcd..6b48e0a3d7aa 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -17,6 +17,7 @@
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <linux/highmem.h>
 
 #include <asm/memory.h>
 #include <asm/highmem.h>
@@ -311,7 +312,7 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
 		addr = page_address(page);
 
 	if (addr)
-		*handle = page_to_dma(dev, page);
+		*handle = pfn_to_dma(dev, page_to_pfn(page));
 
 	return addr;
 }
@@ -406,7 +407,7 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr
 	if (!arch_is_coherent())
 		__dma_free_remap(cpu_addr, size);
 
-	__dma_free_buffer(dma_to_page(dev, handle), size);
+	__dma_free_buffer(pfn_to_page(dma_to_pfn(dev, handle)), size);
 }
 EXPORT_SYMBOL(dma_free_coherent);
 
@@ -480,10 +481,10 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset,
 				op(vaddr, len, dir);
 				kunmap_high(page);
 			} else if (cache_is_vipt()) {
-				pte_t saved_pte;
-				vaddr = kmap_high_l1_vipt(page, &saved_pte);
+				/* unmapped pages might still be cached */
+				vaddr = kmap_atomic(page);
 				op(vaddr + offset, len, dir);
-				kunmap_high_l1_vipt(page, saved_pte);
+				kunmap_atomic(vaddr);
 			}
 		} else {
 			vaddr = page_address(page) + offset;
@@ -554,17 +555,20 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 	struct scatterlist *s;
 	int i, j;
 
+	BUG_ON(!valid_dma_direction(dir));
+
 	for_each_sg(sg, s, nents, i) {
-		s->dma_address = dma_map_page(dev, sg_page(s), s->offset,
+		s->dma_address = __dma_map_page(dev, sg_page(s), s->offset,
 						s->length, dir);
 		if (dma_mapping_error(dev, s->dma_address))
 			goto bad_mapping;
 	}
+	debug_dma_map_sg(dev, sg, nents, nents, dir);
 	return nents;
 
  bad_mapping:
 	for_each_sg(sg, s, i, j)
-		dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir);
+		__dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir);
 	return 0;
 }
 EXPORT_SYMBOL(dma_map_sg);
@@ -585,8 +589,10 @@ void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 	struct scatterlist *s;
 	int i;
 
+	debug_dma_unmap_sg(dev, sg, nents, dir);
+
 	for_each_sg(sg, s, nents, i)
-		dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir);
+		__dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir);
 }
 EXPORT_SYMBOL(dma_unmap_sg);
 
@@ -611,6 +617,8 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
 		__dma_page_dev_to_cpu(sg_page(s), s->offset,
 				      s->length, dir);
 	}
+
+	debug_dma_sync_sg_for_cpu(dev, sg, nents, dir);
 }
 EXPORT_SYMBOL(dma_sync_sg_for_cpu);
 
@@ -635,5 +643,16 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 		__dma_page_cpu_to_dev(sg_page(s), s->offset,
 				      s->length, dir);
 	}
+
+	debug_dma_sync_sg_for_device(dev, sg, nents, dir);
 }
 EXPORT_SYMBOL(dma_sync_sg_for_device);
+
+#define PREALLOC_DMA_DEBUG_ENTRIES	4096
+
+static int __init dma_debug_do_init(void)
+{
+	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+	return 0;
+}
+fs_initcall(dma_debug_do_init);
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index 83e59f870426..01210dba0221 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -26,7 +26,7 @@
 
 #include "mm.h"
 
-static unsigned long shared_pte_mask = L_PTE_MT_BUFFERABLE;
+static pteval_t shared_pte_mask = L_PTE_MT_BUFFERABLE;
 
 #if __LINUX_ARM_ARCH__ < 6
 /*
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 1e21e125fe3a..f10f9bac2206 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -108,7 +108,7 @@ void show_pte(struct mm_struct *mm, unsigned long addr)
 
 		pte = pte_offset_map(pmd, addr);
 		printk(", *pte=%08lx", pte_val(*pte));
-		printk(", *ppte=%08lx", pte_val(pte[-PTRS_PER_PTE]));
+		printk(", *ppte=%08lx", pte_val(pte[PTE_HWTABLE_PTRS]));
 		pte_unmap(pte);
 	} while(0);
 
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 391ffae75098..c29f2839f1d2 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
+#include <linux/highmem.h>
 
 #include <asm/cacheflush.h>
 #include <asm/cachetype.h>
@@ -180,10 +181,10 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)
 			__cpuc_flush_dcache_area(addr, PAGE_SIZE);
 			kunmap_high(page);
 		} else if (cache_is_vipt()) {
-			pte_t saved_pte;
-			addr = kmap_high_l1_vipt(page, &saved_pte);
+			/* unmapped pages might still be cached */
+			addr = kmap_atomic(page);
 			__cpuc_flush_dcache_area(addr, PAGE_SIZE);
-			kunmap_high_l1_vipt(page, saved_pte);
+			kunmap_atomic(addr);
 		}
 	}
 
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index c435fd9e1da9..807c0573abbe 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -140,90 +140,3 @@ struct page *kmap_atomic_to_page(const void *ptr)
 	pte = TOP_PTE(vaddr);
 	return pte_page(*pte);
 }
-
-#ifdef CONFIG_CPU_CACHE_VIPT
-
-#include <linux/percpu.h>
-
-/*
- * The VIVT cache of a highmem page is always flushed before the page
- * is unmapped. Hence unmapped highmem pages need no cache maintenance
- * in that case.
- *
- * However unmapped pages may still be cached with a VIPT cache, and
- * it is not possible to perform cache maintenance on them using physical
- * addresses unfortunately.  So we have no choice but to set up a temporary
- * virtual mapping for that purpose.
- *
- * Yet this VIPT cache maintenance may be triggered from DMA support
- * functions which are possibly called from interrupt context. As we don't
- * want to keep interrupt disabled all the time when such maintenance is
- * taking place, we therefore allow for some reentrancy by preserving and
- * restoring the previous fixmap entry before the interrupted context is
- * resumed.  If the reentrancy depth is 0 then there is no need to restore
- * the previous fixmap, and leaving the current one in place allow it to
- * be reused the next time without a TLB flush (common with DMA).
- */
-
-static DEFINE_PER_CPU(int, kmap_high_l1_vipt_depth);
-
-void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte)
-{
-	unsigned int idx, cpu;
-	int *depth;
-	unsigned long vaddr, flags;
-	pte_t pte, *ptep;
-
-	if (!in_interrupt())
-		preempt_disable();
-
-	cpu = smp_processor_id();
-	depth = &per_cpu(kmap_high_l1_vipt_depth, cpu);
-
-	idx = KM_L1_CACHE + KM_TYPE_NR * cpu;
-	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-	ptep = TOP_PTE(vaddr);
-	pte = mk_pte(page, kmap_prot);
-
-	raw_local_irq_save(flags);
-	(*depth)++;
-	if (pte_val(*ptep) == pte_val(pte)) {
-		*saved_pte = pte;
-	} else {
-		*saved_pte = *ptep;
-		set_pte_ext(ptep, pte, 0);
-		local_flush_tlb_kernel_page(vaddr);
-	}
-	raw_local_irq_restore(flags);
-
-	return (void *)vaddr;
-}
-
-void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte)
-{
-	unsigned int idx, cpu = smp_processor_id();
-	int *depth = &per_cpu(kmap_high_l1_vipt_depth, cpu);
-	unsigned long vaddr, flags;
-	pte_t pte, *ptep;
-
-	idx = KM_L1_CACHE + KM_TYPE_NR * cpu;
-	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-	ptep = TOP_PTE(vaddr);
-	pte = mk_pte(page, kmap_prot);
-
-	BUG_ON(pte_val(*ptep) != pte_val(pte));
-	BUG_ON(*depth <= 0);
-
-	raw_local_irq_save(flags);
-	(*depth)--;
-	if (*depth != 0 && pte_val(pte) != pte_val(saved_pte)) {
-		set_pte_ext(ptep, saved_pte, 0);
-		local_flush_tlb_kernel_page(vaddr);
-	}
-	raw_local_irq_restore(flags);
-
-	if (!in_interrupt())
-		preempt_enable();
-}
-
-#endif  /* CONFIG_CPU_CACHE_VIPT */
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
new file mode 100644
index 000000000000..57299446f787
--- /dev/null
+++ b/arch/arm/mm/idmap.c
@@ -0,0 +1,67 @@
+#include <linux/kernel.h>
+
+#include <asm/cputype.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+
+static void idmap_add_pmd(pgd_t *pgd, unsigned long addr, unsigned long end,
+	unsigned long prot)
+{
+	pmd_t *pmd = pmd_offset(pgd, addr);
+
+	addr = (addr & PMD_MASK) | prot;
+	pmd[0] = __pmd(addr);
+	addr += SECTION_SIZE;
+	pmd[1] = __pmd(addr);
+	flush_pmd_entry(pmd);
+}
+
+void identity_mapping_add(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+	unsigned long prot, next;
+
+	prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE;
+	if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale())
+		prot |= PMD_BIT4;
+
+	pgd += pgd_index(addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		idmap_add_pmd(pgd, addr, next, prot);
+	} while (pgd++, addr = next, addr != end);
+}
+
+#ifdef CONFIG_SMP
+static void idmap_del_pmd(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+	pmd_t *pmd = pmd_offset(pgd, addr);
+	pmd_clear(pmd);
+}
+
+void identity_mapping_del(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+
+	pgd += pgd_index(addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		idmap_del_pmd(pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
+}
+#endif
+
+/*
+ * In order to soft-boot, we need to insert a 1:1 mapping in place of
+ * the user-mode pages.  This will then ensure that we have predictable
+ * results when turning the mmu off
+ */
+void setup_mm_for_reboot(char mode)
+{
+	/*
+	 * We need to access to user-mode page tables here. For kernel threads
+	 * we don't have any user-mode mappings so we use the context that we
+	 * "borrowed".
+	 */
+	identity_mapping_add(current->active_mm->pgd, 0, TASK_SIZE);
+	local_flush_tlb_all();
+}
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 55c17a6fb22f..ab506272b2d3 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -204,12 +204,8 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn,
 	/*
 	 * Don't allow RAM to be mapped - this causes problems with ARMv6+
 	 */
-	if (pfn_valid(pfn)) {
-		printk(KERN_WARNING "BUG: Your driver calls ioremap() on system memory.  This leads\n"
-		       "to architecturally unpredictable behaviour on ARMv6+, and ioremap()\n"
-		       "will fail in the next kernel release.  Please fix your driver.\n");
-		WARN_ON(1);
-	}
+	if (WARN_ON(pfn_valid(pfn)))
+		return NULL;
 
 	type = get_mem_type(mtype);
 	if (!type)
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index 6630620380a4..36960df5fb76 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -16,7 +16,7 @@ static inline pmd_t *pmd_off_k(unsigned long virt)
 }
 
 struct mem_type {
-	unsigned int prot_pte;
+	pteval_t prot_pte;
 	unsigned int prot_l1;
 	unsigned int prot_sect;
 	unsigned int domain;
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 72ad3e1f56cf..3c67e92f7d59 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -24,6 +24,7 @@
 #include <asm/smp_plat.h>
 #include <asm/tlb.h>
 #include <asm/highmem.h>
+#include <asm/traps.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
@@ -62,7 +63,7 @@ struct cachepolicy {
 	const char	policy[16];
 	unsigned int	cr_mask;
 	unsigned int	pmd;
-	unsigned int	pte;
+	pteval_t	pte;
 };
 
 static struct cachepolicy cache_policies[] __initdata = {
@@ -190,7 +191,7 @@ void adjust_cr(unsigned long mask, unsigned long set)
 }
 #endif
 
-#define PROT_PTE_DEVICE		L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_WRITE
+#define PROT_PTE_DEVICE		L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN
 #define PROT_SECT_DEVICE	PMD_TYPE_SECT|PMD_SECT_AP_WRITE
 
 static struct mem_type mem_types[] = {
@@ -235,19 +236,18 @@ static struct mem_type mem_types[] = {
 	},
 	[MT_LOW_VECTORS] = {
 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
-				L_PTE_EXEC,
+				L_PTE_RDONLY,
 		.prot_l1   = PMD_TYPE_TABLE,
 		.domain    = DOMAIN_USER,
 	},
 	[MT_HIGH_VECTORS] = {
 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
-				L_PTE_USER | L_PTE_EXEC,
+				L_PTE_USER | L_PTE_RDONLY,
 		.prot_l1   = PMD_TYPE_TABLE,
 		.domain    = DOMAIN_USER,
 	},
 	[MT_MEMORY] = {
-		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
-				L_PTE_WRITE | L_PTE_EXEC,
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
 		.prot_l1   = PMD_TYPE_TABLE,
 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
 		.domain    = DOMAIN_KERNEL,
@@ -258,21 +258,20 @@ static struct mem_type mem_types[] = {
 	},
 	[MT_MEMORY_NONCACHED] = {
 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
-				L_PTE_WRITE | L_PTE_EXEC | L_PTE_MT_BUFFERABLE,
+				L_PTE_MT_BUFFERABLE,
 		.prot_l1   = PMD_TYPE_TABLE,
 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
 		.domain    = DOMAIN_KERNEL,
 	},
 	[MT_MEMORY_DTCM] = {
 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
-				L_PTE_WRITE,
+				L_PTE_XN,
 		.prot_l1   = PMD_TYPE_TABLE,
 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
 		.domain    = DOMAIN_KERNEL,
 	},
 	[MT_MEMORY_ITCM] = {
-		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
-				L_PTE_WRITE | L_PTE_EXEC,
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
 		.prot_l1   = PMD_TYPE_TABLE,
 		.domain    = DOMAIN_KERNEL,
 	},
@@ -479,7 +478,7 @@ static void __init build_mem_type_table(void)
 
 	pgprot_user   = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
 	pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
-				 L_PTE_DIRTY | L_PTE_WRITE | kern_pgprot);
+				 L_PTE_DIRTY | kern_pgprot);
 
 	mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
 	mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
@@ -535,7 +534,7 @@ static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned l
 {
 	if (pmd_none(*pmd)) {
 		pte_t *pte = early_alloc(2 * PTRS_PER_PTE * sizeof(pte_t));
-		__pmd_populate(pmd, __pa(pte) | prot);
+		__pmd_populate(pmd, __pa(pte), prot);
 	}
 	BUG_ON(pmd_bad(*pmd));
 	return pte_offset_kernel(pmd, addr);
@@ -553,7 +552,7 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
 }
 
 static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,
-				      unsigned long end, unsigned long phys,
+				      unsigned long end, phys_addr_t phys,
 				      const struct mem_type *type)
 {
 	pmd_t *pmd = pmd_offset(pgd, addr);
@@ -588,7 +587,8 @@ static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,
 static void __init create_36bit_mapping(struct map_desc *md,
 					const struct mem_type *type)
 {
-	unsigned long phys, addr, length, end;
+	unsigned long addr, length, end;
+	phys_addr_t phys;
 	pgd_t *pgd;
 
 	addr = md->virtual;
@@ -914,12 +914,11 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
 {
 	struct map_desc map;
 	unsigned long addr;
-	void *vectors;
 
 	/*
 	 * Allocate the vector page early.
 	 */
-	vectors = early_alloc(PAGE_SIZE);
+	vectors_page = early_alloc(PAGE_SIZE);
 
 	for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
 		pmd_clear(pmd_off_k(addr));
@@ -959,7 +958,7 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
 	 * location (0xffff0000).  If we aren't using high-vectors, also
 	 * create a mapping at the low-vectors virtual address.
 	 */
-	map.pfn = __phys_to_pfn(virt_to_phys(vectors));
+	map.pfn = __phys_to_pfn(virt_to_phys(vectors_page));
 	map.virtual = 0xffff0000;
 	map.length = PAGE_SIZE;
 	map.type = MT_HIGH_VECTORS;
@@ -1044,38 +1043,3 @@ void __init paging_init(struct machine_desc *mdesc)
 	empty_zero_page = virt_to_page(zero_page);
 	__flush_dcache_page(NULL, empty_zero_page);
 }
-
-/*
- * In order to soft-boot, we need to insert a 1:1 mapping in place of
- * the user-mode pages.  This will then ensure that we have predictable
- * results when turning the mmu off
- */
-void setup_mm_for_reboot(char mode)
-{
-	unsigned long base_pmdval;
-	pgd_t *pgd;
-	int i;
-
-	/*
-	 * We need to access to user-mode page tables here. For kernel threads
-	 * we don't have any user-mode mappings so we use the context that we
-	 * "borrowed".
-	 */
-	pgd = current->active_mm->pgd;
-
-	base_pmdval = PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT;
-	if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale())
-		base_pmdval |= PMD_BIT4;
-
-	for (i = 0; i < FIRST_USER_PGD_NR + USER_PTRS_PER_PGD; i++, pgd++) {
-		unsigned long pmdval = (i << PGDIR_SHIFT) | base_pmdval;
-		pmd_t *pmd;
-
-		pmd = pmd_off(pgd, i << PGDIR_SHIFT);
-		pmd[0] = __pmd(pmdval);
-		pmd[1] = __pmd(pmdval + (1 << (PGDIR_SHIFT - 1)));
-		flush_pmd_entry(pmd);
-	}
-
-	local_flush_tlb_all();
-}
diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
index 69bbfc6645a6..93292a18cf77 100644
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -17,12 +17,10 @@
 
 #include "mm.h"
 
-#define FIRST_KERNEL_PGD_NR	(FIRST_USER_PGD_NR + USER_PTRS_PER_PGD)
-
 /*
  * need to get a 16k page for level 1
  */
-pgd_t *get_pgd_slow(struct mm_struct *mm)
+pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *new_pgd, *init_pgd;
 	pmd_t *new_pmd, *init_pmd;
@@ -32,14 +30,14 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 	if (!new_pgd)
 		goto no_pgd;
 
-	memset(new_pgd, 0, FIRST_KERNEL_PGD_NR * sizeof(pgd_t));
+	memset(new_pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
 
 	/*
 	 * Copy over the kernel and IO PGD entries
 	 */
 	init_pgd = pgd_offset_k(0);
-	memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR,
-		       (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t));
+	memcpy(new_pgd + USER_PTRS_PER_PGD, init_pgd + USER_PTRS_PER_PGD,
+		       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
 
 	clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t));
 
@@ -73,28 +71,29 @@ no_pgd:
 	return NULL;
 }
 
-void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd)
+void pgd_free(struct mm_struct *mm, pgd_t *pgd_base)
 {
+	pgd_t *pgd;
 	pmd_t *pmd;
 	pgtable_t pte;
 
-	if (!pgd)
+	if (!pgd_base)
 		return;
 
-	/* pgd is always present and good */
-	pmd = pmd_off(pgd, 0);
-	if (pmd_none(*pmd))
-		goto free;
-	if (pmd_bad(*pmd)) {
-		pmd_ERROR(*pmd);
-		pmd_clear(pmd);
-		goto free;
-	}
+	pgd = pgd_base + pgd_index(0);
+	if (pgd_none_or_clear_bad(pgd))
+		goto no_pgd;
+
+	pmd = pmd_offset(pgd, 0);
+	if (pmd_none_or_clear_bad(pmd))
+		goto no_pmd;
 
 	pte = pmd_pgtable(*pmd);
 	pmd_clear(pmd);
 	pte_free(mm, pte);
+no_pmd:
+	pgd_clear(pgd);
 	pmd_free(mm, pmd);
-free:
-	free_pages((unsigned long) pgd, 2);
+no_pgd:
+	free_pages((unsigned long) pgd_base, 2);
 }
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index b795afd0a2c6..e32fa499194c 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -91,7 +91,7 @@
 #if L_PTE_SHARED != PTE_EXT_SHARED
 #error PTE shared bit mismatch
 #endif
-#if (L_PTE_EXEC+L_PTE_USER+L_PTE_WRITE+L_PTE_DIRTY+L_PTE_YOUNG+\
+#if (L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\
      L_PTE_FILE+L_PTE_PRESENT) > L_PTE_SHARED
 #error Invalid Linux PTE bit settings
 #endif
@@ -109,6 +109,10 @@
  *  110x   0   1   0	r/w	r/o
  *  11x0   0   1   0	r/w	r/o
  *  1111   0   1   1	r/w	r/w
+ *
+ * If !CONFIG_CPU_USE_DOMAINS, the following permissions are changed:
+ *  110x   1   1   1	r/o	r/o
+ *  11x0   1   1   1	r/o	r/o
  */
 	.macro	armv6_mt_table pfx
 \pfx\()_mt_table:
@@ -131,7 +135,7 @@
 	.endm
 
 	.macro	armv6_set_pte_ext pfx
-	str	r1, [r0], #-2048		@ linux version
+	str	r1, [r0], #2048			@ linux version
 
 	bic	r3, r1, #0x000003fc
 	bic	r3, r3, #PTE_TYPE_MASK
@@ -142,17 +146,20 @@
 	and	r2, r1, #L_PTE_MT_MASK
 	ldr	r2, [ip, r2]
 
-	tst	r1, #L_PTE_WRITE
-	tstne	r1, #L_PTE_DIRTY
-	orreq	r3, r3, #PTE_EXT_APX
+	eor	r1, r1, #L_PTE_DIRTY
+	tst	r1, #L_PTE_DIRTY|L_PTE_RDONLY
+	orrne	r3, r3, #PTE_EXT_APX
 
 	tst	r1, #L_PTE_USER
 	orrne	r3, r3, #PTE_EXT_AP1
+#ifdef CONFIG_CPU_USE_DOMAINS
+	@ allow kernel read/write access to read-only user pages
 	tstne	r3, #PTE_EXT_APX
 	bicne	r3, r3, #PTE_EXT_APX | PTE_EXT_AP0
+#endif
 
-	tst	r1, #L_PTE_EXEC
-	orreq	r3, r3, #PTE_EXT_XN
+	tst	r1, #L_PTE_XN
+	orrne	r3, r3, #PTE_EXT_XN
 
 	orr	r3, r3, r2
 
@@ -180,9 +187,9 @@
  *  1111  0xff	r/w	r/w
  */
 	.macro	armv3_set_pte_ext wc_disable=1
-	str	r1, [r0], #-2048		@ linux version
+	str	r1, [r0], #2048			@ linux version
 
-	eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+	eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY
 
 	bic	r2, r1, #PTE_SMALL_AP_MASK	@ keep C, B bits
 	bic	r2, r2, #PTE_TYPE_MASK
@@ -191,7 +198,7 @@
 	tst	r3, #L_PTE_USER			@ user?
 	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
 
-	tst	r3, #L_PTE_WRITE | L_PTE_DIRTY	@ write and dirty?
+	tst	r3, #L_PTE_RDONLY | L_PTE_DIRTY	@ write and dirty?
 	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
 
 	tst	r3, #L_PTE_PRESENT | L_PTE_YOUNG	@ present and young?
@@ -203,7 +210,7 @@
 	bicne	r2, r2, #PTE_BUFFERABLE
 #endif
 	.endif
-	str	r2, [r0]			@ hardware version
+	str	r2, [r0]		@ hardware version
 	.endm
 
 
@@ -223,9 +230,9 @@
  *  1111  11	r/w	r/w
  */
 	.macro	xscale_set_pte_ext_prologue
-	str	r1, [r0], #-2048		@ linux version
+	str	r1, [r0]			@ linux version
 
-	eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+	eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY
 
 	bic	r2, r1, #PTE_SMALL_AP_MASK	@ keep C, B bits
 	orr	r2, r2, #PTE_TYPE_EXT		@ extended page
@@ -233,7 +240,7 @@
 	tst	r3, #L_PTE_USER			@ user?
 	orrne	r2, r2, #PTE_EXT_AP_URO_SRW	@ yes -> user r/o, system r/w
 
-	tst	r3, #L_PTE_WRITE | L_PTE_DIRTY	@ write and dirty?
+	tst	r3, #L_PTE_RDONLY | L_PTE_DIRTY	@ write and dirty?
 	orreq	r2, r2, #PTE_EXT_AP_UNO_SRW	@ yes -> user n/a, system r/w
 						@ combined with user -> user r/w
 	.endm
@@ -242,7 +249,7 @@
 	tst	r3, #L_PTE_PRESENT | L_PTE_YOUNG	@ present and young?
 	movne	r2, #0				@ no -> fault
 
-	str	r2, [r0]			@ hardware version
+	str	r2, [r0, #2048]!		@ hardware version
 	mov	ip, #0
 	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
 	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 9b9ff5d949fd..b49fab21517c 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -124,15 +124,13 @@ ENDPROC(cpu_v7_switch_mm)
  *	Set a level 2 translation table entry.
  *
  *	- ptep  - pointer to level 2 translation table entry
- *		  (hardware version is stored at -1024 bytes)
+ *		  (hardware version is stored at +2048 bytes)
  *	- pte   - PTE value to store
  *	- ext	- value for extended PTE bits
  */
 ENTRY(cpu_v7_set_pte_ext)
 #ifdef CONFIG_MMU
- ARM(	str	r1, [r0], #-2048	)	@ linux version
- THUMB(	str	r1, [r0]		)	@ linux version
- THUMB(	sub	r0, r0, #2048		)
+	str	r1, [r0]			@ linux version
 
 	bic	r3, r1, #0x000003f0
 	bic	r3, r3, #PTE_TYPE_MASK
@@ -142,23 +140,26 @@ ENTRY(cpu_v7_set_pte_ext)
 	tst	r1, #1 << 4
 	orrne	r3, r3, #PTE_EXT_TEX(1)
 
-	tst	r1, #L_PTE_WRITE
-	tstne	r1, #L_PTE_DIRTY
-	orreq	r3, r3, #PTE_EXT_APX
+	eor	r1, r1, #L_PTE_DIRTY
+	tst	r1, #L_PTE_RDONLY | L_PTE_DIRTY
+	orrne	r3, r3, #PTE_EXT_APX
 
 	tst	r1, #L_PTE_USER
 	orrne	r3, r3, #PTE_EXT_AP1
+#ifdef CONFIG_CPU_USE_DOMAINS
+	@ allow kernel read/write access to read-only user pages
 	tstne	r3, #PTE_EXT_APX
 	bicne	r3, r3, #PTE_EXT_APX | PTE_EXT_AP0
+#endif
 
-	tst	r1, #L_PTE_EXEC
-	orreq	r3, r3, #PTE_EXT_XN
+	tst	r1, #L_PTE_XN
+	orrne	r3, r3, #PTE_EXT_XN
 
 	tst	r1, #L_PTE_YOUNG
 	tstne	r1, #L_PTE_PRESENT
 	moveq	r3, #0
 
-	str	r3, [r0]
+	str	r3, [r0, #2048]!
 	mcr	p15, 0, r0, c7, c10, 1		@ flush_pte
 #endif
 	mov	pc, lr
@@ -273,8 +274,6 @@ __v7_setup:
 	ALT_SMP(orr	r4, r4, #TTB_FLAGS_SMP)
 	ALT_UP(orr	r4, r4, #TTB_FLAGS_UP)
 	mcr	p15, 0, r4, c2, c0, 1		@ load TTB1
-	mov	r10, #0x1f			@ domains 0, 1 = manager
-	mcr	p15, 0, r10, c3, c0, 0		@ load domain access register
 	/*
 	 * Memory region attributes with SCTLR.TRE=1
 	 *
@@ -313,6 +312,10 @@ __v7_setup:
 #ifdef CONFIG_CPU_ENDIAN_BE8
 	orr	r6, r6, #1 << 25		@ big-endian page tables
 #endif
+#ifdef CONFIG_SWP_EMULATE
+	orr     r5, r5, #(1 << 10)              @ set SW bit in "clear"
+	bic     r6, r6, #(1 << 10)              @ clear it in "mmuset"
+#endif
    	mrc	p15, 0, r0, c1, c0, 0		@ read control register
 	bic	r0, r0, r5			@ clear bits them
 	orr	r0, r0, r6			@ set them
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index 523408c0bb38..5a37c5e45c41 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -500,8 +500,8 @@ ENTRY(cpu_xscale_set_pte_ext)
 	@
 	@ Erratum 40: must set memory to write-through for user read-only pages
 	@
-	and	ip, r1, #(L_PTE_MT_MASK | L_PTE_USER | L_PTE_WRITE) & ~(4 << 2)
-	teq	ip, #L_PTE_MT_WRITEBACK | L_PTE_USER
+	and	ip, r1, #(L_PTE_MT_MASK | L_PTE_USER | L_PTE_RDONLY) & ~(4 << 2)
+	teq	ip, #L_PTE_MT_WRITEBACK | L_PTE_USER | L_PTE_RDONLY
 
 	moveq	r1, #L_PTE_MT_WRITETHROUGH
 	and	r1, r1, #L_PTE_MT_MASK
diff --git a/arch/arm/plat-iop/time.c b/arch/arm/plat-iop/time.c
index 558cdfaf76b6..07f23bb42bed 100644
--- a/arch/arm/plat-iop/time.c
+++ b/arch/arm/plat-iop/time.c
@@ -17,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/time.h>
 #include <linux/init.h>
+#include <linux/sched.h>
 #include <linux/timex.h>
 #include <linux/sched.h>
 #include <linux/io.h>
@@ -24,6 +25,7 @@
 #include <linux/clockchips.h>
 #include <mach/hardware.h>
 #include <asm/irq.h>
+#include <asm/sched_clock.h>
 #include <asm/uaccess.h>
 #include <asm/mach/irq.h>
 #include <asm/mach/time.h>
@@ -50,15 +52,21 @@ static struct clocksource iop_clocksource = {
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
+static DEFINE_CLOCK_DATA(cd);
+
 /*
  * IOP sched_clock() implementation via its clocksource.
  */
-unsigned long long sched_clock(void)
+unsigned long long notrace sched_clock(void)
 {
-	cycle_t cyc = iop_clocksource_read(NULL);
-	struct clocksource *cs = &iop_clocksource;
+	u32 cyc = 0xffffffffu - read_tcr1();
+	return cyc_to_sched_clock(&cd, cyc, (u32)~0);
+}
 
-	return clocksource_cyc2ns(cyc, cs->mult, cs->shift);
+static void notrace iop_update_sched_clock(void)
+{
+	u32 cyc = 0xffffffffu - read_tcr1();
+	update_sched_clock(&cd, cyc, (u32)~0);
 }
 
 /*
@@ -88,6 +96,7 @@ static void iop_set_mode(enum clock_event_mode mode,
 	case CLOCK_EVT_MODE_PERIODIC:
 		write_tmr0(tmr & ~IOP_TMR_EN);
 		write_tcr0(ticks_per_jiffy - 1);
+		write_trr0(ticks_per_jiffy - 1);
 		tmr |= (IOP_TMR_RELOAD | IOP_TMR_EN);
 		break;
 	case CLOCK_EVT_MODE_ONESHOT:
@@ -143,6 +152,8 @@ void __init iop_init_time(unsigned long tick_rate)
 {
 	u32 timer_ctl;
 
+	init_sched_clock(&cd, iop_update_sched_clock, 32, tick_rate);
+
 	ticks_per_jiffy = DIV_ROUND_CLOSEST(tick_rate, HZ);
 	iop_tick_rate = tick_rate;
 
@@ -153,6 +164,7 @@ void __init iop_init_time(unsigned long tick_rate)
 	 * Set up interrupting clockevent timer 0.
 	 */
 	write_tmr0(timer_ctl & ~IOP_TMR_EN);
+	write_tisr(1);
 	setup_irq(IRQ_IOP_TIMER0, &iop_timer_irq);
 	clockevents_calc_mult_shift(&iop_clockevent,
 				    tick_rate, IOP_MIN_RANGE);
@@ -162,9 +174,6 @@ void __init iop_init_time(unsigned long tick_rate)
 		clockevent_delta2ns(0xf, &iop_clockevent);
 	iop_clockevent.cpumask = cpumask_of(0);
 	clockevents_register_device(&iop_clockevent);
-	write_trr0(ticks_per_jiffy - 1);
-	write_tcr0(ticks_per_jiffy - 1);
-	write_tmr0(timer_ctl);
 
 	/*
 	 * Set up free-running clocksource timer 1.
@@ -172,7 +181,5 @@ void __init iop_init_time(unsigned long tick_rate)
 	write_trr1(0xffffffff);
 	write_tcr1(0xffffffff);
 	write_tmr1(timer_ctl);
-	clocksource_calc_mult_shift(&iop_clocksource, tick_rate,
-				    IOP_MIN_RANGE);
-	clocksource_register(&iop_clocksource);
+	clocksource_register_hz(&iop_clocksource, tick_rate);
 }
diff --git a/arch/arm/plat-mxc/epit.c b/arch/arm/plat-mxc/epit.c
index ee9582f4972e..d69d343ff61f 100644
--- a/arch/arm/plat-mxc/epit.c
+++ b/arch/arm/plat-mxc/epit.c
@@ -93,7 +93,6 @@ static struct clocksource clocksource_epit = {
 	.rating		= 200,
 	.read		= epit_read,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -101,9 +100,7 @@ static int __init epit_clocksource_init(struct clk *timer_clk)
 {
 	unsigned int c = clk_get_rate(timer_clk);
 
-	clocksource_epit.mult = clocksource_hz2mult(c,
-					clocksource_epit.shift);
-	clocksource_register(&clocksource_epit);
+	clocksource_register_hz(&clocksource_epit, c);
 
 	return 0;
 }
diff --git a/arch/arm/plat-mxc/time.c b/arch/arm/plat-mxc/time.c
index f9a1b059a76c..9f0c2610595e 100644
--- a/arch/arm/plat-mxc/time.c
+++ b/arch/arm/plat-mxc/time.c
@@ -120,7 +120,6 @@ static struct clocksource clocksource_mxc = {
 	.rating		= 200,
 	.read		= mx1_2_get_cycles,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift 		= 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -131,9 +130,7 @@ static int __init mxc_clocksource_init(struct clk *timer_clk)
 	if (timer_is_v2())
 		clocksource_mxc.read = v2_get_cycles;
 
-	clocksource_mxc.mult = clocksource_hz2mult(c,
-					clocksource_mxc.shift);
-	clocksource_register(&clocksource_mxc);
+	clocksource_register_hz(&clocksource_mxc, c);
 
 	return 0;
 }
diff --git a/arch/arm/plat-nomadik/Kconfig b/arch/arm/plat-nomadik/Kconfig
index 5da3f97c537b..187f4e84bb22 100644
--- a/arch/arm/plat-nomadik/Kconfig
+++ b/arch/arm/plat-nomadik/Kconfig
@@ -14,6 +14,7 @@ if PLAT_NOMADIK
 
 config HAS_MTU
 	bool
+	select HAVE_SCHED_CLOCK
 	help
 	  Support for Multi Timer Unit. MTU provides access
 	  to multiple interrupt generating programmable
diff --git a/arch/arm/plat-nomadik/timer.c b/arch/arm/plat-nomadik/timer.c
index 63cdc6025bd7..41723402006b 100644
--- a/arch/arm/plat-nomadik/timer.c
+++ b/arch/arm/plat-nomadik/timer.c
@@ -17,9 +17,9 @@
 #include <linux/clk.h>
 #include <linux/jiffies.h>
 #include <linux/err.h>
-#include <linux/cnt32_to_63.h>
-#include <linux/timer.h>
+#include <linux/sched.h>
 #include <asm/mach/time.h>
+#include <asm/sched_clock.h>
 
 #include <plat/mtu.h>
 
@@ -52,81 +52,24 @@ static struct clocksource nmdk_clksrc = {
  * Override the global weak sched_clock symbol with this
  * local implementation which uses the clocksource to get some
  * better resolution when scheduling the kernel.
- *
- * Because the hardware timer period may be quite short
- * (32.3 secs on the 133 MHz MTU timer selection on ux500)
- * and because cnt32_to_63() needs to be called at least once per
- * half period to work properly, a kernel keepwarm() timer is set up
- * to ensure this requirement is always met.
- *
- * Also the sched_clock timer will wrap around at some point,
- * here we set it to run continously for a year.
  */
-#define SCHED_CLOCK_MIN_WRAP 3600*24*365
-static struct timer_list cnt32_to_63_keepwarm_timer;
-static u32 sched_mult;
-static u32 sched_shift;
+static DEFINE_CLOCK_DATA(cd);
 
 unsigned long long notrace sched_clock(void)
 {
-	u64 cycles;
+	u32 cyc;
 
 	if (unlikely(!mtu_base))
 		return 0;
 
-	cycles = cnt32_to_63(-readl(mtu_base + MTU_VAL(0)));
-	/*
-	 * sched_mult is guaranteed to be even so will
-	 * shift out bit 63
-	 */
-	return (cycles * sched_mult) >> sched_shift;
+	cyc = -readl(mtu_base + MTU_VAL(0));
+	return cyc_to_sched_clock(&cd, cyc, (u32)~0);
 }
 
-/* Just kick sched_clock every so often */
-static void cnt32_to_63_keepwarm(unsigned long data)
+static void notrace nomadik_update_sched_clock(void)
 {
-	mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data));
-	(void) sched_clock();
-}
-
-/*
- * Set up a timer to keep sched_clock():s 32_to_63 algorithm warm
- * once in half a 32bit timer wrap interval.
- */
-static void __init nmdk_sched_clock_init(unsigned long rate)
-{
-	u32 v;
-	unsigned long delta;
-	u64 days;
-
-	/* Find the apropriate mult and shift factors */
-	clocks_calc_mult_shift(&sched_mult, &sched_shift,
-			       rate, NSEC_PER_SEC, SCHED_CLOCK_MIN_WRAP);
-	/* We need to multiply by an even number to get rid of bit 63 */
-	if (sched_mult & 1)
-		sched_mult++;
-
-	/* Let's see what we get, take max counter and scale it */
-	days = (0xFFFFFFFFFFFFFFFFLLU * sched_mult) >> sched_shift;
-	do_div(days, NSEC_PER_SEC);
-	do_div(days, (3600*24));
-
-	pr_info("sched_clock: using %d bits @ %lu Hz wrap in %lu days\n",
-		(64 - sched_shift), rate, (unsigned long) days);
-
-	/*
-	 * Program a timer to kick us at half 32bit wraparound
-	 * Formula: seconds per wrap = (2^32) / f
-	 */
-	v = 0xFFFFFFFFUL / rate;
-	/* We want half of the wrap time to keep cnt32_to_63 warm */
-	v /= 2;
-	pr_debug("sched_clock: prescaled timer rate: %lu Hz, "
-		 "initialize keepwarm timer every %d seconds\n", rate, v);
-	/* Convert seconds to jiffies */
-	delta = msecs_to_jiffies(v*1000);
-	setup_timer(&cnt32_to_63_keepwarm_timer, cnt32_to_63_keepwarm, delta);
-	mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + delta));
+	u32 cyc = -readl(mtu_base + MTU_VAL(0));
+	update_sched_clock(&cd, cyc, (u32)~0);
 }
 
 /* Clockevent device: use one-shot mode */
@@ -222,7 +165,6 @@ void __init nmdk_timer_init(void)
 	} else {
 		cr |= MTU_CRn_PRESCALE_1;
 	}
-	clocksource_calc_mult_shift(&nmdk_clksrc, rate, MTU_MIN_RANGE);
 
 	/* Timer 0 is the free running clocksource */
 	writel(cr, mtu_base + MTU_CR(0));
@@ -233,11 +175,11 @@ void __init nmdk_timer_init(void)
 	/* Now the clock source is ready */
 	nmdk_clksrc.read = nmdk_read_timer;
 
-	if (clocksource_register(&nmdk_clksrc))
+	if (clocksource_register_hz(&nmdk_clksrc, rate))
 		pr_err("timer: failed to initialize clock source %s\n",
 		       nmdk_clksrc.name);
 
-	nmdk_sched_clock_init(rate);
+	init_sched_clock(&cd, nomadik_update_sched_clock, 32, rate);
 
 	/* Timer 1 is used for events */
 
diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig
index 92c5bb7909f5..c9408434a855 100644
--- a/arch/arm/plat-omap/Kconfig
+++ b/arch/arm/plat-omap/Kconfig
@@ -11,13 +11,13 @@ choice
 
 config ARCH_OMAP1
 	bool "TI OMAP1"
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	help
 	  "Systems based on omap7xx, omap15xx or omap16xx"
 
 config ARCH_OMAP2PLUS
 	bool "TI OMAP2/3/4"
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	help
 	  "Systems based on OMAP2, OMAP3 or OMAP4"
 
diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c
index 8722a136f3a5..ea4644021fb9 100644
--- a/arch/arm/plat-omap/counter_32k.c
+++ b/arch/arm/plat-omap/counter_32k.c
@@ -15,8 +15,11 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/clk.h>
-#include <linux/io.h>
 #include <linux/err.h>
+#include <linux/io.h>
+#include <linux/sched.h>
+
+#include <asm/sched_clock.h>
 
 #include <plat/common.h>
 #include <plat/board.h>
@@ -45,7 +48,7 @@
 static u32 offset_32k __read_mostly;
 
 #ifdef CONFIG_ARCH_OMAP16XX
-static cycle_t omap16xx_32k_read(struct clocksource *cs)
+static cycle_t notrace omap16xx_32k_read(struct clocksource *cs)
 {
 	return omap_readl(OMAP16XX_TIMER_32K_SYNCHRONIZED) - offset_32k;
 }
@@ -54,7 +57,7 @@ static cycle_t omap16xx_32k_read(struct clocksource *cs)
 #endif
 
 #ifdef CONFIG_ARCH_OMAP2420
-static cycle_t omap2420_32k_read(struct clocksource *cs)
+static cycle_t notrace omap2420_32k_read(struct clocksource *cs)
 {
 	return omap_readl(OMAP2420_32KSYNCT_BASE + 0x10) - offset_32k;
 }
@@ -63,7 +66,7 @@ static cycle_t omap2420_32k_read(struct clocksource *cs)
 #endif
 
 #ifdef CONFIG_ARCH_OMAP2430
-static cycle_t omap2430_32k_read(struct clocksource *cs)
+static cycle_t notrace omap2430_32k_read(struct clocksource *cs)
 {
 	return omap_readl(OMAP2430_32KSYNCT_BASE + 0x10) - offset_32k;
 }
@@ -72,7 +75,7 @@ static cycle_t omap2430_32k_read(struct clocksource *cs)
 #endif
 
 #ifdef CONFIG_ARCH_OMAP3
-static cycle_t omap34xx_32k_read(struct clocksource *cs)
+static cycle_t notrace omap34xx_32k_read(struct clocksource *cs)
 {
 	return omap_readl(OMAP3430_32KSYNCT_BASE + 0x10) - offset_32k;
 }
@@ -81,7 +84,7 @@ static cycle_t omap34xx_32k_read(struct clocksource *cs)
 #endif
 
 #ifdef CONFIG_ARCH_OMAP4
-static cycle_t omap44xx_32k_read(struct clocksource *cs)
+static cycle_t notrace omap44xx_32k_read(struct clocksource *cs)
 {
 	return omap_readl(OMAP4430_32KSYNCT_BASE + 0x10) - offset_32k;
 }
@@ -93,7 +96,7 @@ static cycle_t omap44xx_32k_read(struct clocksource *cs)
  * Kernel assumes that sched_clock can be called early but may not have
  * things ready yet.
  */
-static cycle_t omap_32k_read_dummy(struct clocksource *cs)
+static cycle_t notrace omap_32k_read_dummy(struct clocksource *cs)
 {
 	return 0;
 }
@@ -103,7 +106,6 @@ static struct clocksource clocksource_32k = {
 	.rating		= 250,
 	.read		= omap_32k_read_dummy,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 10,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -111,10 +113,25 @@ static struct clocksource clocksource_32k = {
  * Returns current time from boot in nsecs. It's OK for this to wrap
  * around for now, as it's just a relative time stamp.
  */
-unsigned long long sched_clock(void)
+static DEFINE_CLOCK_DATA(cd);
+
+/*
+ * Constants generated by clocks_calc_mult_shift(m, s, 32768, NSEC_PER_SEC, 60).
+ * This gives a resolution of about 30us and a wrap period of about 36hrs.
+ */
+#define SC_MULT		4000000000u
+#define SC_SHIFT	17
+
+unsigned long long notrace sched_clock(void)
+{
+	u32 cyc = clocksource_32k.read(&clocksource_32k);
+	return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT);
+}
+
+static void notrace omap_update_sched_clock(void)
 {
-	return clocksource_cyc2ns(clocksource_32k.read(&clocksource_32k),
-				  clocksource_32k.mult, clocksource_32k.shift);
+	u32 cyc = clocksource_32k.read(&clocksource_32k);
+	update_sched_clock(&cd, cyc, (u32)~0);
 }
 
 /**
@@ -168,13 +185,13 @@ static int __init omap_init_clocksource_32k(void)
 		if (!IS_ERR(sync_32k_ick))
 			clk_enable(sync_32k_ick);
 
-		clocksource_32k.mult = clocksource_hz2mult(32768,
-					    clocksource_32k.shift);
-
 		offset_32k = clocksource_32k.read(&clocksource_32k);
 
-		if (clocksource_register(&clocksource_32k))
+		if (clocksource_register_hz(&clocksource_32k, 32768))
 			printk(err, clocksource_32k.name);
+
+		init_fixed_sched_clock(&cd, omap_update_sched_clock, 32,
+				       32768, SC_MULT, SC_SHIFT);
 	}
 	return 0;
 }
diff --git a/arch/arm/plat-omap/include/plat/clkdev_omap.h b/arch/arm/plat-omap/include/plat/clkdev_omap.h
index bb937f3fabed..4b2028ab4d2b 100644
--- a/arch/arm/plat-omap/include/plat/clkdev_omap.h
+++ b/arch/arm/plat-omap/include/plat/clkdev_omap.h
@@ -8,7 +8,7 @@
 #ifndef __ARCH_ARM_PLAT_OMAP_INCLUDE_PLAT_CLKDEV_OMAP_H
 #define __ARCH_ARM_PLAT_OMAP_INCLUDE_PLAT_CLKDEV_OMAP_H
 
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 struct omap_clk {
 	u16				cpu;
diff --git a/arch/arm/plat-omap/include/plat/io.h b/arch/arm/plat-omap/include/plat/io.h
index 128b549c2796..204865f91d93 100644
--- a/arch/arm/plat-omap/include/plat/io.h
+++ b/arch/arm/plat-omap/include/plat/io.h
@@ -294,8 +294,8 @@ static inline void omap44xx_map_common_io(void)
 extern void omap2_init_common_hw(struct omap_sdrc_params *sdrc_cs0,
 				 struct omap_sdrc_params *sdrc_cs1);
 
-#define __arch_ioremap(p,s,t)	omap_ioremap(p,s,t)
-#define __arch_iounmap(v)	omap_iounmap(v)
+#define __arch_ioremap	omap_ioremap
+#define __arch_iounmap	omap_iounmap
 
 void __iomem *omap_ioremap(unsigned long phys, size_t size, unsigned int type);
 void omap_iounmap(volatile void __iomem *addr);
diff --git a/arch/arm/plat-omap/include/plat/memory.h b/arch/arm/plat-omap/include/plat/memory.h
index d5306bee44b2..f8d922fb5584 100644
--- a/arch/arm/plat-omap/include/plat/memory.h
+++ b/arch/arm/plat-omap/include/plat/memory.h
@@ -61,17 +61,17 @@
 #define lbus_to_virt(x)		((x) - OMAP1510_LB_OFFSET + PAGE_OFFSET)
 #define is_lbus_device(dev)	(cpu_is_omap15xx() && dev && (strncmp(dev_name(dev), "ohci", 4) == 0))
 
-#define __arch_page_to_dma(dev, page)	\
-	({ dma_addr_t __dma = page_to_phys(page); \
+#define __arch_pfn_to_dma(dev, pfn)	\
+	({ dma_addr_t __dma = __pfn_to_phys(pfn); \
 	   if (is_lbus_device(dev)) \
 		__dma = __dma - PHYS_OFFSET + OMAP1510_LB_OFFSET; \
 	   __dma; })
 
-#define __arch_dma_to_page(dev, addr)	\
+#define __arch_dma_to_pfn(dev, addr)	\
 	({ dma_addr_t __dma = addr;				\
 	   if (is_lbus_device(dev))				\
 		__dma += PHYS_OFFSET - OMAP1510_LB_OFFSET;	\
-	   phys_to_page(__dma);					\
+	   __phys_to_pfn(__dma);				\
 	})
 
 #define __arch_dma_to_virt(dev, addr)	({ (void *) (is_lbus_device(dev) ? \
diff --git a/arch/arm/plat-omap/include/plat/smp.h b/arch/arm/plat-omap/include/plat/smp.h
index ecd6a488c497..7a10257909ef 100644
--- a/arch/arm/plat-omap/include/plat/smp.h
+++ b/arch/arm/plat-omap/include/plat/smp.h
@@ -18,7 +18,6 @@
 #define OMAP_ARCH_SMP_H
 
 #include <asm/hardware/gic.h>
-#include <asm/smp_mpidr.h>
 
 /* Needed for secondary core boot */
 extern void omap_secondary_startup(void);
@@ -29,9 +28,9 @@ extern u32 omap_read_auxcoreboot0(void);
 /*
  * We use Soft IRQ1 as the IPI
  */
-static inline void smp_cross_call(const struct cpumask *mask)
+static inline void smp_cross_call(const struct cpumask *mask, int ipi)
 {
-	gic_raise_softirq(mask, 1);
+	gic_raise_softirq(mask, ipi);
 }
 
 #endif
diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c
index 715a30177f28..c3da2478b2aa 100644
--- a/arch/arm/plat-orion/time.c
+++ b/arch/arm/plat-orion/time.c
@@ -13,11 +13,11 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/cnt32_to_63.h>
 #include <linux/timer.h>
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <asm/sched_clock.h>
 #include <asm/mach/time.h>
 #include <mach/bridge-regs.h>
 #include <mach/hardware.h>
@@ -44,52 +44,26 @@ static u32 ticks_per_jiffy;
 
 /*
  * Orion's sched_clock implementation. It has a resolution of
- * at least 7.5ns (133MHz TCLK) and a maximum value of 834 days.
- *
- * Because the hardware timer period is quite short (21 secs if
- * 200MHz TCLK) and because cnt32_to_63() needs to be called at
- * least once per half period to work properly, a kernel timer is
- * set up to ensure this requirement is always met.
+ * at least 7.5ns (133MHz TCLK).
  */
-#define TCLK2NS_SCALE_FACTOR 8
-
-static unsigned long tclk2ns_scale;
+static DEFINE_CLOCK_DATA(cd);
 
-unsigned long long sched_clock(void)
+unsigned long long notrace sched_clock(void)
 {
-	unsigned long long v = cnt32_to_63(0xffffffff - readl(TIMER0_VAL));
-	return (v * tclk2ns_scale) >> TCLK2NS_SCALE_FACTOR;
+	u32 cyc = 0xffffffff - readl(TIMER0_VAL);
+	return cyc_to_sched_clock(&cd, cyc, (u32)~0);
 }
 
-static struct timer_list cnt32_to_63_keepwarm_timer;
 
-static void cnt32_to_63_keepwarm(unsigned long data)
+static void notrace orion_update_sched_clock(void)
 {
-	mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data));
-	(void) sched_clock();
+	u32 cyc = 0xffffffff - readl(TIMER0_VAL);
+	update_sched_clock(&cd, cyc, (u32)~0);
 }
 
 static void __init setup_sched_clock(unsigned long tclk)
 {
-	unsigned long long v;
-	unsigned long data;
-
-	v = NSEC_PER_SEC;
-	v <<= TCLK2NS_SCALE_FACTOR;
-	v += tclk/2;
-	do_div(v, tclk);
-	/*
-	 * We want an even value to automatically clear the top bit
-	 * returned by cnt32_to_63() without an additional run time
-	 * instruction. So if the LSB is 1 then round it up.
-	 */
-	if (v & 1)
-		v++;
-	tclk2ns_scale = v;
-
-	data = (0xffffffffUL / tclk / 2 - 2) * HZ;
-	setup_timer(&cnt32_to_63_keepwarm_timer, cnt32_to_63_keepwarm, data);
-	mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data));
+	init_sched_clock(&cd, orion_update_sched_clock, 32, tclk);
 }
 
 /*
@@ -102,7 +76,6 @@ static cycle_t orion_clksrc_read(struct clocksource *cs)
 
 static struct clocksource orion_clksrc = {
 	.name		= "orion_clocksource",
-	.shift		= 20,
 	.rating		= 300,
 	.read		= orion_clksrc_read,
 	.mask		= CLOCKSOURCE_MASK(32),
@@ -245,8 +218,7 @@ void __init orion_time_init(unsigned int irq, unsigned int tclk)
 	writel(u & ~BRIDGE_INT_TIMER0, BRIDGE_MASK);
 	u = readl(TIMER_CTRL);
 	writel(u | TIMER0_EN | TIMER0_RELOAD_EN, TIMER_CTRL);
-	orion_clksrc.mult = clocksource_hz2mult(tclk, orion_clksrc.shift);
-	clocksource_register(&orion_clksrc);
+	clocksource_register_hz(&orion_clksrc, tclk);
 
 	/*
 	 * Setup clockevent timer (interrupt-driven.)
diff --git a/arch/arm/plat-s3c24xx/Kconfig b/arch/arm/plat-s3c24xx/Kconfig
index 5a27b1b538f2..eb105e61c746 100644
--- a/arch/arm/plat-s3c24xx/Kconfig
+++ b/arch/arm/plat-s3c24xx/Kconfig
@@ -8,7 +8,7 @@ config PLAT_S3C24XX
 	default y
 	select NO_IOPORT
 	select ARCH_REQUIRE_GPIOLIB
-	select S3C_DEVICE_NAND
+	select S3C_DEV_NAND
 	select S3C_GPIO_CFG_S3C24XX
 	help
 	  Base platform code for any Samsung S3C24XX device
diff --git a/arch/arm/plat-spear/include/plat/clock.h b/arch/arm/plat-spear/include/plat/clock.h
index 298bafc0a52f..2572260f990f 100644
--- a/arch/arm/plat-spear/include/plat/clock.h
+++ b/arch/arm/plat-spear/include/plat/clock.h
@@ -15,7 +15,7 @@
 #define __PLAT_CLOCK_H
 
 #include <linux/list.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 #include <linux/types.h>
 
 /* clk structure flags */
diff --git a/arch/arm/plat-spear/time.c b/arch/arm/plat-spear/time.c
index ab211652e4ca..839c88df9994 100644
--- a/arch/arm/plat-spear/time.c
+++ b/arch/arm/plat-spear/time.c
@@ -81,8 +81,6 @@ static struct clocksource clksrc = {
 	.rating = 200,		/* its a pretty decent clock */
 	.read = clocksource_read_cycles,
 	.mask = 0xFFFF,		/* 16 bits */
-	.mult = 0,		/* to be computed */
-	.shift = 0,		/* to be computed */
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -105,10 +103,8 @@ static void spear_clocksource_init(void)
 	val |= CTRL_ENABLE ;
 	writew(val, gpt_base + CR(CLKSRC));
 
-	clocksource_calc_mult_shift(&clksrc, tick_rate, SPEAR_MIN_RANGE);
-
 	/* register the clocksource */
-	clocksource_register(&clksrc);
+	clocksource_register_hz(&clksrc, tick_rate);
 }
 
 static struct clock_event_device clkevt = {
diff --git a/arch/arm/plat-stmp3xxx/clock.c b/arch/arm/plat-stmp3xxx/clock.c
index e593a2a801c6..2e712e17ce72 100644
--- a/arch/arm/plat-stmp3xxx/clock.c
+++ b/arch/arm/plat-stmp3xxx/clock.c
@@ -25,9 +25,9 @@
 #include <linux/err.h>
 #include <linux/delay.h>
 #include <linux/io.h>
+#include <linux/clkdev.h>
 
 #include <asm/mach-types.h>
-#include <asm/clkdev.h>
 #include <mach/platform.h>
 #include <mach/regs-clkctrl.h>
 
diff --git a/arch/arm/plat-stmp3xxx/timer.c b/arch/arm/plat-stmp3xxx/timer.c
index 063c7bc0e740..c395630a6edc 100644
--- a/arch/arm/plat-stmp3xxx/timer.c
+++ b/arch/arm/plat-stmp3xxx/timer.c
@@ -89,7 +89,6 @@ static struct clocksource cksrc_stmp3xxx = {
 	.rating         = 250,
 	.read           = stmp3xxx_clock_read,
 	.mask           = CLOCKSOURCE_MASK(16),
-	.shift          = 10,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -106,8 +105,6 @@ static struct irqaction stmp3xxx_timer_irq = {
  */
 static void __init stmp3xxx_init_timer(void)
 {
-	cksrc_stmp3xxx.mult = clocksource_hz2mult(CLOCK_TICK_RATE,
-				cksrc_stmp3xxx.shift);
 	ckevt_timrot.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
 				ckevt_timrot.shift);
 	ckevt_timrot.min_delta_ns = clockevent_delta2ns(2, &ckevt_timrot);
@@ -140,7 +137,7 @@ static void __init stmp3xxx_init_timer(void)
 
 	setup_irq(IRQ_TIMER0, &stmp3xxx_timer_irq);
 
-	clocksource_register(&cksrc_stmp3xxx);
+	clocksource_register_hz(&cksrc_stmp3xxx, CLOCK_TICK_RATE);
 	clockevents_register_device(&ckevt_timrot);
 }
 
diff --git a/arch/arm/plat-versatile/Makefile b/arch/arm/plat-versatile/Makefile
index 5cf88e8427b1..16dde0819934 100644
--- a/arch/arm/plat-versatile/Makefile
+++ b/arch/arm/plat-versatile/Makefile
@@ -1,7 +1,7 @@
 obj-y	:= clock.o
-obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o
-obj-$(CONFIG_ARCH_REALVIEW) += sched-clock.o
-obj-$(CONFIG_ARCH_VERSATILE) += sched-clock.o
+ifneq ($(CONFIG_ARCH_INTEGRATOR),y)
+obj-y	+= sched-clock.o
+endif
 ifeq ($(CONFIG_LEDS_CLASS),y)
 obj-$(CONFIG_ARCH_REALVIEW) += leds.o
 obj-$(CONFIG_ARCH_VERSATILE) += leds.o
diff --git a/arch/arm/plat-versatile/include/plat/sched_clock.h b/arch/arm/plat-versatile/include/plat/sched_clock.h
new file mode 100644
index 000000000000..5c3e4fc9fa0c
--- /dev/null
+++ b/arch/arm/plat-versatile/include/plat/sched_clock.h
@@ -0,0 +1,6 @@
+#ifndef ARM_PLAT_SCHED_CLOCK_H
+#define ARM_PLAT_SCHED_CLOCK_H
+
+void versatile_sched_clock_init(void __iomem *, unsigned long);
+
+#endif
diff --git a/arch/arm/plat-versatile/sched-clock.c b/arch/arm/plat-versatile/sched-clock.c
index 9696ddc238c9..3d6a4c292cab 100644
--- a/arch/arm/plat-versatile/sched-clock.c
+++ b/arch/arm/plat-versatile/sched-clock.c
@@ -18,37 +18,41 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
-#include <linux/cnt32_to_63.h>
 #include <linux/io.h>
 #include <linux/sched.h>
-#include <asm/div64.h>
 
-#include <mach/hardware.h>
-#include <mach/platform.h>
+#include <asm/sched_clock.h>
+#include <plat/sched_clock.h>
 
-#ifdef VERSATILE_SYS_BASE
-#define REFCOUNTER	(__io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_24MHz_OFFSET)
-#endif
-
-#ifdef REALVIEW_SYS_BASE
-#define REFCOUNTER	(__io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_24MHz_OFFSET)
-#endif
+static DEFINE_CLOCK_DATA(cd);
+static void __iomem *ctr;
 
 /*
- * This is the Realview and Versatile sched_clock implementation.  This
- * has a resolution of 41.7ns, and a maximum value of about 35583 days.
- *
- * The return value is guaranteed to be monotonic in that range as
- * long as there is always less than 89 seconds between successive
- * calls to this function.
+ * Constants generated by clocks_calc_mult_shift(m, s, 24MHz, NSEC_PER_SEC, 60).
+ * This gives a resolution of about 41ns and a wrap period of about 178s.
  */
-unsigned long long sched_clock(void)
+#define SC_MULT		2796202667u
+#define SC_SHIFT	26
+
+unsigned long long notrace sched_clock(void)
 {
-	unsigned long long v = cnt32_to_63(readl(REFCOUNTER));
+	if (ctr) {
+		u32 cyc = readl(ctr);
+		return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0,
+						SC_MULT, SC_SHIFT);
+	} else
+		return 0;
+}
 
-	/* the <<1 gets rid of the cnt_32_to_63 top bit saving on a bic insn */
-	v *= 125<<1;
-	do_div(v, 3<<1);
+static void notrace versatile_update_sched_clock(void)
+{
+	u32 cyc = readl(ctr);
+	update_sched_clock(&cd, cyc, (u32)~0);
+}
 
-	return v;
+void __init versatile_sched_clock_init(void __iomem *reg, unsigned long rate)
+{
+	ctr = reg;
+	init_fixed_sched_clock(&cd, versatile_update_sched_clock,
+			       32, rate, SC_MULT, SC_SHIFT);
 }
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 8063a322c790..0797cb528b46 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -10,9 +10,12 @@
  */
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/cpu.h>
 #include <linux/kernel.h>
+#include <linux/notifier.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/smp.h>
 #include <linux/init.h>
 
 #include <asm/cputype.h>
@@ -484,7 +487,24 @@ void vfp_flush_hwstate(struct thread_info *thread)
 	put_cpu();
 }
 
-#include <linux/smp.h>
+/*
+ * VFP hardware can lose all context when a CPU goes offline.
+ * Safely clear our held state when a CPU has been killed, and
+ * re-enable access to VFP when the CPU comes back online.
+ *
+ * Both CPU_DYING and CPU_STARTING are called on the CPU which
+ * is being offlined/onlined.
+ */
+static int vfp_hotplug(struct notifier_block *b, unsigned long action,
+	void *hcpu)
+{
+	if (action == CPU_DYING || action == CPU_DYING_FROZEN) {
+		unsigned int cpu = (long)hcpu;
+		last_VFP_context[cpu] = NULL;
+	} else if (action == CPU_STARTING || action == CPU_STARTING_FROZEN)
+		vfp_enable(NULL);
+	return NOTIFY_OK;
+}
 
 /*
  * VFP support code initialisation.
@@ -514,6 +534,8 @@ static int __init vfp_init(void)
 	else if (vfpsid & FPSID_NODOUBLE) {
 		printk("no double precision support\n");
 	} else {
+		hotcpu_notifier(vfp_hotplug, 0);
+
 		smp_call_function(vfp_enable, NULL, 1);
 
 		VFP_arch = (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT;  /* Extract the architecture version */