From 230a15f171ba24226486664fc44542b175107ab7 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Wed, 11 Jun 2014 10:57:50 +0200
Subject: ARC: remove checks for CONFIG_ARC_MMU_V4

There's no Kconfig symbol ARC_MMU_V4 so the checks for CONFIG_ARC_MMU_V4
will always evaluate to false. Remove them.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/cache.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index c1d3d2da1191..b3c750979aa1 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -60,7 +60,7 @@ extern void read_decode_cache_bcr(void);
 #define ARC_REG_IC_IVIC		0x10
 #define ARC_REG_IC_CTRL		0x11
 #define ARC_REG_IC_IVIL		0x19
-#if defined(CONFIG_ARC_MMU_V3) || defined (CONFIG_ARC_MMU_V4)
+#if defined(CONFIG_ARC_MMU_V3)
 #define ARC_REG_IC_PTAG		0x1E
 #endif
 
@@ -74,7 +74,7 @@ extern void read_decode_cache_bcr(void);
 #define ARC_REG_DC_IVDL		0x4A
 #define ARC_REG_DC_FLSH		0x4B
 #define ARC_REG_DC_FLDL		0x4C
-#if defined(CONFIG_ARC_MMU_V3) || defined (CONFIG_ARC_MMU_V4)
+#if defined(CONFIG_ARC_MMU_V3)
 #define ARC_REG_DC_PTAG		0x5C
 #endif
 
-- 
cgit v1.2.3


From e114ba20fe4f2f0a075941a06271e3f0a3539551 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Wed, 11 Jun 2014 11:00:56 +0100
Subject: MIPS: smp-cps: Convert smp_mb__after_atomic_dec()

Commit 91bbefe6b0fc "arch,mips: Convert smp_mb__*()" replaced the
smp_mb__after_atomic_dec function with smp_mb__after_atomic, whilst
commit 1d8f1f5a780a "MIPS: smp-cps: hotplug support" introduced a new
use of it. Replace that use with smp_mb__after_atomic in order to avoid
a build failure:

  arch/mips/kernel/smp-cps.c: In function 'cps_cpu_disable':
  arch/mips/kernel/smp-cps.c:304:2: error: 'smp_mb__after_atomic_dec' is
    deprecated (declared at include/linux/atomic.h:35)
    [-Werror=deprecated-declarations]

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7085/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/smp-cps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index df0598d9bfdd..949f2c6827a0 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -301,7 +301,7 @@ static int cps_cpu_disable(void)
 
 	core_cfg = &mips_cps_core_bootcfg[current_cpu_data.core];
 	atomic_sub(1 << cpu_vpe_id(&current_cpu_data), &core_cfg->vpe_mask);
-	smp_mb__after_atomic_dec();
+	smp_mb__after_atomic();
 	set_cpu_online(cpu, false);
 	cpu_clear(cpu, cpu_callin_map);
 
-- 
cgit v1.2.3


From 7c5491b808d8ea0781c4402792b21a103151135f Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Wed, 11 Jun 2014 11:00:57 +0100
Subject: MIPS: pm-cps: convert smp_mb__*()

Commit 91bbefe6b0fc "arch,mips: Convert smp_mb__*()" replaced the
smp_mb__* functions with a simpler API, whilst commit 3179d37ee1ed
"MIPS: pm-cps: add PM state entry code for CPS systems" introduced
new uses of smp_mb__before_atomic_inc & smp_mb__after_clear_bit.
Replace those calls with the corresponding before & after atomic
functions of the new, simpler API in order to avoid a build failure:

  arch/mips/kernel/pm-cps.c: In function 'coupled_barrier':
  arch/mips/kernel/pm-cps.c:104:2: error: 'smp_mb__before_atomic_inc' is
    deprecated (declared at include/linux/atomic.h:11)
    [-Werror=deprecated-declarations]

  arch/mips/kernel/pm-cps.c: In function 'cps_pm_enter_state':
  arch/mips/kernel/pm-cps.c:161:2: error: 'smp_mb__after_clear_bit' is
    deprecated (declared at include/linux/bitops.h:48)
    [-Werror=deprecated-declarations]

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7086/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/pm-cps.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c
index 5aa4c6f8cf83..c4c2069d3a20 100644
--- a/arch/mips/kernel/pm-cps.c
+++ b/arch/mips/kernel/pm-cps.c
@@ -101,7 +101,7 @@ static void coupled_barrier(atomic_t *a, unsigned online)
 	if (!coupled_coherence)
 		return;
 
-	smp_mb__before_atomic_inc();
+	smp_mb__before_atomic();
 	atomic_inc(a);
 
 	while (atomic_read(a) < online)
@@ -158,7 +158,7 @@ int cps_pm_enter_state(enum cps_pm_state state)
 
 	/* Indicate that this CPU might not be coherent */
 	cpumask_clear_cpu(cpu, &cpu_coherent_mask);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 
 	/* Create a non-coherent mapping of the core ready_count */
 	core_ready_count = per_cpu(ready_count, core);
-- 
cgit v1.2.3


From 91496ea9f86f55e87be78ecc88c8a6b7e3802601 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 13 Jun 2014 15:26:14 +0100
Subject: MIPS: math-emu: Work around limitations of older GCC.

Older GCC doesn't get named initializations of anonymous structs right,
that is members are not initializable in the containing structure through
name however old style initializations are working fine.

The issue exists with gcc up to 4.5.x.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/math-emu/ieee754.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/math-emu/ieee754.c b/arch/mips/math-emu/ieee754.c
index 53f1d2287084..cb9214da372f 100644
--- a/arch/mips/math-emu/ieee754.c
+++ b/arch/mips/math-emu/ieee754.c
@@ -34,11 +34,17 @@
  * Special constants
  */
 
+/*
+ * Older GCC requires the inner braces for initialization of union ieee754dp's
+ * anonymous struct member.  Without an error will result.
+ */
 #define DPCNST(s, b, m)							\
 {									\
-	.sign	= (s),							\
-	.bexp	= (b) + DP_EBIAS,					\
-	.mant	= (m)							\
+	{								\
+		.sign	= (s),						\
+		.bexp	= (b) + DP_EBIAS,				\
+		.mant	= (m)						\
+	}								\
 }
 
 const union ieee754dp __ieee754dp_spcvals[] = {
@@ -61,11 +67,17 @@ const union ieee754dp __ieee754dp_spcvals[] = {
 	DPCNST(0, 63,          0x0000000000000ULL),	/* + 1.0e63 */
 };
 
+/*
+ * Older GCC requires the inner braces for initialization of union ieee754sp's
+ * anonymous struct member.  Without an error will result.
+ */
 #define SPCNST(s, b, m)							\
 {									\
+	{								\
 	.sign	= (s),							\
 	.bexp	= (b) + SP_EBIAS,					\
 	.mant	= (m)							\
+	}								\
 }
 
 const union ieee754sp __ieee754sp_spcvals[] = {
-- 
cgit v1.2.3


From b92ad209c26a1891c4e04cd75fc771dcb002603f Mon Sep 17 00:00:00 2001
From: Leela Krishna Amudala <leela.krishna@linaro.org>
Date: Wed, 28 May 2014 00:43:21 +0900
Subject: ARM: EXYNOS: Use wfi macro in platform_do_lowpower

This patch is originally based on commit b3377d186572 ("ARM: 7064/1:
vexpress: Use wfi macro in platform_do_lowpower.")

Current Exynos CPU hotplug code includes a hardcoded WFI instruction,
in ARM encoding. When the kernel is compiled in Thumb-2 mode, this
is invalid and causes the machine to hang hard when a CPU is offlined.

Use wfi macro instead of the hardcoded WFI instruction.

Signed-off-by: Leela Krishna Amudala <leela.krishna@linaro.org>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/mach-exynos/hotplug.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-exynos/hotplug.c b/arch/arm/mach-exynos/hotplug.c
index 69fa48397394..8a134d019cb3 100644
--- a/arch/arm/mach-exynos/hotplug.c
+++ b/arch/arm/mach-exynos/hotplug.c
@@ -46,13 +46,7 @@ static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
 		if (cpu == 1)
 			exynos_cpu_power_down(cpu);
 
-		/*
-		 * here's the WFI
-		 */
-		asm(".word	0xe320f003\n"
-		    :
-		    :
-		    : "memory", "cc");
+		wfi();
 
 		if (pen_release == cpu_logical_map(cpu)) {
 			/*
-- 
cgit v1.2.3


From cf286b405c446cc2c61e4ab210ef42e5852a6da3 Mon Sep 17 00:00:00 2001
From: Tomasz Figa <t.figa@samsung.com>
Date: Sat, 31 May 2014 02:21:42 +0900
Subject: ARM: dts: fix reg sizes of GIC for exynos4

This patch fixes reg entry sizes in GIC node that were not
large enough to cover whole regions.

Signed-off-by: Tomasz Figa <t.figa@samsung.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/boot/dts/exynos4.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi
index b8ece4be41ca..fbaf426d2daa 100644
--- a/arch/arm/boot/dts/exynos4.dtsi
+++ b/arch/arm/boot/dts/exynos4.dtsi
@@ -113,7 +113,7 @@
 		compatible = "arm,cortex-a9-gic";
 		#interrupt-cells = <3>;
 		interrupt-controller;
-		reg = <0x10490000 0x1000>, <0x10480000 0x100>;
+		reg = <0x10490000 0x10000>, <0x10480000 0x10000>;
 	};
 
 	combiner: interrupt-controller@10440000 {
-- 
cgit v1.2.3


From 4ca2ad55553ef528c055761a9fa4d2c140f7318b Mon Sep 17 00:00:00 2001
From: Fugang Duan <b38611@freescale.com>
Date: Mon, 19 May 2014 15:46:41 +0800
Subject: ARM: imx6sl: add missing enet clock for imx6sl

There's a enet clock gate missing in clock tree, thus add it.

Signed-off-by: Fugang Duan <B38611@freescale.com>
Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 arch/arm/mach-imx/clk-imx6sl.c           | 1 +
 include/dt-bindings/clock/imx6sl-clock.h | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-imx/clk-imx6sl.c b/arch/arm/mach-imx/clk-imx6sl.c
index 21cf06cebade..5408ca70c8d6 100644
--- a/arch/arm/mach-imx/clk-imx6sl.c
+++ b/arch/arm/mach-imx/clk-imx6sl.c
@@ -312,6 +312,7 @@ static void __init imx6sl_clocks_init(struct device_node *ccm_node)
 	clks[IMX6SL_CLK_ECSPI2]       = imx_clk_gate2("ecspi2",       "ecspi_root",        base + 0x6c, 2);
 	clks[IMX6SL_CLK_ECSPI3]       = imx_clk_gate2("ecspi3",       "ecspi_root",        base + 0x6c, 4);
 	clks[IMX6SL_CLK_ECSPI4]       = imx_clk_gate2("ecspi4",       "ecspi_root",        base + 0x6c, 6);
+	clks[IMX6SL_CLK_ENET]         = imx_clk_gate2("enet",         "ipg",               base + 0x6c, 10);
 	clks[IMX6SL_CLK_EPIT1]        = imx_clk_gate2("epit1",        "perclk",            base + 0x6c, 12);
 	clks[IMX6SL_CLK_EPIT2]        = imx_clk_gate2("epit2",        "perclk",            base + 0x6c, 14);
 	clks[IMX6SL_CLK_EXTERN_AUDIO] = imx_clk_gate2("extern_audio", "extern_audio_podf", base + 0x6c, 16);
diff --git a/include/dt-bindings/clock/imx6sl-clock.h b/include/dt-bindings/clock/imx6sl-clock.h
index 7cf5c9969336..b91dd462ba85 100644
--- a/include/dt-bindings/clock/imx6sl-clock.h
+++ b/include/dt-bindings/clock/imx6sl-clock.h
@@ -145,6 +145,7 @@
 #define IMX6SL_CLK_USDHC4		132
 #define IMX6SL_CLK_PLL4_AUDIO_DIV	133
 #define IMX6SL_CLK_SPBA			134
-#define IMX6SL_CLK_END			135
+#define IMX6SL_CLK_ENET			135
+#define IMX6SL_CLK_END			136
 
 #endif /* __DT_BINDINGS_CLOCK_IMX6SL_H */
-- 
cgit v1.2.3


From 8c562a1ef8a1f7c74323d5d664a40c6a4317ae4e Mon Sep 17 00:00:00 2001
From: Fugang Duan <b38611@freescale.com>
Date: Mon, 19 May 2014 15:46:56 +0800
Subject: ARM: dts: imx6sl: correct the fec ipg clock source

imx6sl fec MDIO clock source is from ipg 66Mhz, but the currect imx6sl
device tree define it as "enet_ref" clock (50Mhz), so the patch just
corrects imx6sl dtsi fec "ipg" clock.

Signed-off-by: Fugang Duan <B38611@freescale.com>
Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 arch/arm/boot/dts/imx6sl.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/imx6sl.dtsi b/arch/arm/boot/dts/imx6sl.dtsi
index 2d4e5285f3f3..57d4abe03a94 100644
--- a/arch/arm/boot/dts/imx6sl.dtsi
+++ b/arch/arm/boot/dts/imx6sl.dtsi
@@ -686,7 +686,7 @@
 				compatible = "fsl,imx6sl-fec", "fsl,imx25-fec";
 				reg = <0x02188000 0x4000>;
 				interrupts = <0 114 IRQ_TYPE_LEVEL_HIGH>;
-				clocks = <&clks IMX6SL_CLK_ENET_REF>,
+				clocks = <&clks IMX6SL_CLK_ENET>,
 					 <&clks IMX6SL_CLK_ENET_REF>;
 				clock-names = "ipg", "ahb";
 				status = "disabled";
-- 
cgit v1.2.3


From 27fe8945e42e5fa64a8bca65e8c99a4c4989b6f2 Mon Sep 17 00:00:00 2001
From: Tim Harvey <tharvey@gateworks.com>
Date: Wed, 21 May 2014 23:04:53 -0700
Subject: ARM: dts: imx6: ventana: fix include typo

Fix typo and include the right dtsi file for the gw51xx board.

Signed-off-by: Tim Harvey <tharvey@gateworks.com>
Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 arch/arm/boot/dts/imx6q-gw51xx.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/imx6q-gw51xx.dts b/arch/arm/boot/dts/imx6q-gw51xx.dts
index af4929aee075..0e1406e58eff 100644
--- a/arch/arm/boot/dts/imx6q-gw51xx.dts
+++ b/arch/arm/boot/dts/imx6q-gw51xx.dts
@@ -11,7 +11,7 @@
 
 /dts-v1/;
 #include "imx6q.dtsi"
-#include "imx6qdl-gw54xx.dtsi"
+#include "imx6qdl-gw51xx.dtsi"
 
 / {
 	model = "Gateworks Ventana i.MX6 Quad GW51XX";
-- 
cgit v1.2.3


From 3c3868c52e8c002bc9c3af4529cbc79372f11b11 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Mon, 26 May 2014 10:40:31 +0200
Subject: ARM: dts: imx6qdl-gw5xxx: Fix Linear Technology vendor prefix

The vendor prefix for Linear Technology should be lltc,
same as the NASDAQ symbol.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 arch/arm/boot/dts/imx6qdl-gw51xx.dtsi | 2 +-
 arch/arm/boot/dts/imx6qdl-gw52xx.dtsi | 2 +-
 arch/arm/boot/dts/imx6qdl-gw53xx.dtsi | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi
index 31665adcbf39..0db15af41cb1 100644
--- a/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi
@@ -161,7 +161,7 @@
 	status = "okay";
 
 	pmic: ltc3676@3c {
-		compatible = "ltc,ltc3676";
+		compatible = "lltc,ltc3676";
 		reg = <0x3c>;
 
 		regulators {
diff --git a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi
index 367af3ec9435..c0ac27fc59e9 100644
--- a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi
@@ -220,7 +220,7 @@
 	};
 
 	pmic: ltc3676@3c {
-		compatible = "ltc,ltc3676";
+		compatible = "lltc,ltc3676";
 		reg = <0x3c>;
 
 		regulators {
diff --git a/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi
index c91b5a6c769b..adf150c1be90 100644
--- a/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi
@@ -234,7 +234,7 @@
 	};
 
 	pmic: ltc3676@3c {
-		compatible = "ltc,ltc3676";
+		compatible = "lltc,ltc3676";
 		reg = <0x3c>;
 
 		regulators {
-- 
cgit v1.2.3


From 5b4c180abc9d91a63f76f0388aefcc7908199eef Mon Sep 17 00:00:00 2001
From: Tim Harvey <tharvey@gateworks.com>
Date: Mon, 2 Jun 2014 11:44:01 -0700
Subject: ARM: dts: imx6: ventana: correct gw52xx sgtl5000 clock source

Correct the invalid clock for the sgtl5000 audio codec on the GW52xx Ventana
baseboard.

Signed-off-by: Tim Harvey <tharvey@gateworks.com>
Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 arch/arm/boot/dts/imx6qdl-gw52xx.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi
index c0ac27fc59e9..744c8a2d81f6 100644
--- a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi
@@ -288,7 +288,7 @@
 	codec: sgtl5000@0a {
 		compatible = "fsl,sgtl5000";
 		reg = <0x0a>;
-		clocks = <&clks 169>;
+		clocks = <&clks 201>;
 		VDDA-supply = <&reg_1p8v>;
 		VDDIO-supply = <&reg_3p3v>;
 	};
-- 
cgit v1.2.3


From eea53bb16d0ff19c1fe2d29c359100df739a95d7 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Tue, 17 Jun 2014 14:40:56 +0100
Subject: ARM: dts: cubox-i: add support for PWM-driven front panel LED

The front panel LED on the Cubox-i is driven by one of the iMX6 PWM
channels, and is wired between the PWM output and supply.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 arch/arm/boot/dts/imx6qdl-cubox-i.dtsi | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi b/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi
index 25da82a03110..9202c8d18a30 100644
--- a/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi
@@ -12,6 +12,19 @@
 		pinctrl-0 = <&pinctrl_cubox_i_ir>;
 	};
 
+	pwmleds {
+		compatible = "pwm-leds";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_cubox_i_pwm1>;
+
+		front {
+			active-low;
+			label = "imx6:red:front";
+			max-brightness = <248>;
+			pwms = <&pwm1 0 50000>;
+		};
+	};
+
 	regulators {
 		compatible = "simple-bus";
 
@@ -109,6 +122,10 @@
 			>;
 		};
 
+		pinctrl_cubox_i_pwm1: cubox-i-pwm1-front-led {
+			fsl,pins = <MX6QDL_PAD_DISP0_DAT8__PWM1_OUT 0x1b0b0>;
+		};
+
 		pinctrl_cubox_i_spdif: cubox-i-spdif {
 			fsl,pins = <MX6QDL_PAD_GPIO_17__SPDIF_OUT 0x13091>;
 		};
-- 
cgit v1.2.3


From 589681b20688267a06a068d77ef10fc7c34168ba Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Tue, 17 Jun 2014 14:41:01 +0100
Subject: ARM: dts: hummingboard/cubox-i: move usb otg configuration to
 platform level

The configuration of the USB OTG is a platform configuration decision,
not a microsom decision.  Move this configuration out to the platform
level files.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 arch/arm/boot/dts/imx6dl-hummingboard.dts | 10 ++++++++++
 arch/arm/boot/dts/imx6qdl-cubox-i.dtsi    | 10 ++++++++++
 arch/arm/boot/dts/imx6qdl-microsom.dtsi   | 13 -------------
 3 files changed, 20 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/imx6dl-hummingboard.dts b/arch/arm/boot/dts/imx6dl-hummingboard.dts
index 5373a5f2782b..c8e51dd41b8f 100644
--- a/arch/arm/boot/dts/imx6dl-hummingboard.dts
+++ b/arch/arm/boot/dts/imx6dl-hummingboard.dts
@@ -143,6 +143,14 @@
 			fsl,pins = <MX6QDL_PAD_GPIO_0__GPIO1_IO00 0x1b0b0>;
 		};
 
+		pinctrl_hummingboard_usbotg_id: hummingboard-usbotg-id {
+			/*
+			 * Similar to pinctrl_usbotg_2, but we want it
+			 * pulled down for a fixed host connection.
+			 */
+			fsl,pins = <MX6QDL_PAD_GPIO_1__USB_OTG_ID 0x13059>;
+		};
+
 		pinctrl_hummingboard_usbotg_vbus: hummingboard-usbotg-vbus {
 			fsl,pins = <MX6QDL_PAD_EIM_D22__GPIO3_IO22 0x1b0b0>;
 		};
@@ -178,6 +186,8 @@
 };
 
 &usbotg {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_hummingboard_usbotg_id>;
 	vbus-supply = <&reg_usbotg_vbus>;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi b/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi
index 9202c8d18a30..e8e781656b3f 100644
--- a/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi
@@ -134,6 +134,14 @@
 			fsl,pins = <MX6QDL_PAD_GPIO_0__GPIO1_IO00 0x4001b0b0>;
 		};
 
+		pinctrl_cubox_i_usbotg_id: cubox-i-usbotg-id {
+			/*
+			 * The Cubox-i pulls this low, but as it's pointless
+			 * leaving it as a pull-up, even if it is just 10uA.
+			 */
+			fsl,pins = <MX6QDL_PAD_GPIO_1__USB_OTG_ID 0x13059>;
+		};
+
 		pinctrl_cubox_i_usbotg_vbus: cubox-i-usbotg-vbus {
 			fsl,pins = <MX6QDL_PAD_EIM_D22__GPIO3_IO22 0x4001b0b0>;
 		};
@@ -170,6 +178,8 @@
 };
 
 &usbotg {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_cubox_i_usbotg_id>;
 	vbus-supply = <&reg_usbotg_vbus>;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6qdl-microsom.dtsi b/arch/arm/boot/dts/imx6qdl-microsom.dtsi
index d729d0b15f25..79eac6849d4c 100644
--- a/arch/arm/boot/dts/imx6qdl-microsom.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-microsom.dtsi
@@ -10,14 +10,6 @@
 				MX6QDL_PAD_CSI0_DAT11__UART1_RX_DATA	0x1b0b1
 			>;
 		};
-
-		pinctrl_microsom_usbotg: microsom-usbotg {
-			/*
-			 * Similar to pinctrl_usbotg_2, but we want it
-			 * pulled down for a fixed host connection.
-			 */
-			fsl,pins = <MX6QDL_PAD_GPIO_1__USB_OTG_ID 0x13059>;
-		};
 	};
 };
 
@@ -26,8 +18,3 @@
 	pinctrl-0 = <&pinctrl_microsom_uart1>;
 	status = "okay";
 };
-
-&usbotg {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_microsom_usbotg>;
-};
-- 
cgit v1.2.3


From 9796853e902447e53a17dae5df9eb609f0e31e6a Mon Sep 17 00:00:00 2001
From: Peter Griffin <peter.griffin@linaro.org>
Date: Mon, 16 Jun 2014 11:23:00 +0200
Subject: ARM: STi: DT: Properly define sti-ethclk & stmmaceth for stih415/6

This patch fixes two problems: -

1) The device tree isn't currently providing sti-ethclk which is
required by the dwmac glue code to correctly configure the ethernet
PHY clock speed.

This means depending on what the bootloader/jtag has
configured this clock to, and what switch/hub the board is plugged
into you most likely will NOT successfully negotiate a ethernet link.

2) The stmmaceth clock was associated with the wrong clock. It was
referencing the PHY clock rather than the interconnect clock which
clocks the IP.

This patch also brings us closer to not having to boot the upstream
kernel with the clk_ignore_unused parameter.

Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Peter Griffin <peter.griffin@linaro.org>
Signed-off-by: Maxime Coquelin <maxime.coquelin@st.com>
---
 arch/arm/boot/dts/stih415.dtsi           | 8 ++++----
 arch/arm/boot/dts/stih416.dtsi           | 8 ++++----
 include/dt-bindings/clock/stih415-clks.h | 1 +
 include/dt-bindings/clock/stih416-clks.h | 1 +
 4 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/stih415.dtsi b/arch/arm/boot/dts/stih415.dtsi
index d6f254f302fe..a0f6f75fe3b5 100644
--- a/arch/arm/boot/dts/stih415.dtsi
+++ b/arch/arm/boot/dts/stih415.dtsi
@@ -169,8 +169,8 @@
 
 			pinctrl-names 	= "default";
 			pinctrl-0	= <&pinctrl_mii0>;
-			clock-names	= "stmmaceth";
-			clocks		= <&clk_s_a1_ls CLK_GMAC0_PHY>;
+			clock-names	= "stmmaceth", "sti-ethclk";
+			clocks		= <&clk_s_a1_ls CLK_ICN_IF_2>, <&clk_s_a1_ls CLK_GMAC0_PHY>;
 		};
 
 		ethernet1: dwmac@fef08000 {
@@ -192,8 +192,8 @@
 			reset-names		= "stmmaceth";
 			pinctrl-names 	= "default";
 			pinctrl-0	= <&pinctrl_mii1>;
-			clock-names	= "stmmaceth";
-			clocks		= <&clk_s_a0_ls CLK_ETH1_PHY>;
+			clock-names	= "stmmaceth", "sti-ethclk";
+			clocks		= <&clk_s_a0_ls CLK_ICN_REG>, <&clk_s_a0_ls CLK_ETH1_PHY>;
 		};
 
 		rc: rc@fe518000 {
diff --git a/arch/arm/boot/dts/stih416.dtsi b/arch/arm/boot/dts/stih416.dtsi
index 06473c5d9ea9..84758d76d064 100644
--- a/arch/arm/boot/dts/stih416.dtsi
+++ b/arch/arm/boot/dts/stih416.dtsi
@@ -175,8 +175,8 @@
 			reset-names		= "stmmaceth";
 			pinctrl-names 	= "default";
 			pinctrl-0	= <&pinctrl_mii0>;
-			clock-names	= "stmmaceth";
-			clocks		= <&clk_s_a1_ls CLK_GMAC0_PHY>;
+			clock-names	= "stmmaceth", "sti-ethclk";
+			clocks		= <&clk_s_a1_ls CLK_ICN_IF_2>, <&clk_s_a1_ls CLK_GMAC0_PHY>;
 		};
 
 		ethernet1: dwmac@fef08000 {
@@ -197,8 +197,8 @@
 			reset-names	= "stmmaceth";
 			pinctrl-names 	= "default";
 			pinctrl-0	= <&pinctrl_mii1>;
-			clock-names	= "stmmaceth";
-			clocks		= <&clk_s_a0_ls CLK_ETH1_PHY>;
+			clock-names	= "stmmaceth", "sti-ethclk";
+			clocks		= <&clk_s_a0_ls CLK_ICN_REG>, <&clk_s_a0_ls CLK_ETH1_PHY>;
 		};
 
 		rc: rc@fe518000 {
diff --git a/include/dt-bindings/clock/stih415-clks.h b/include/dt-bindings/clock/stih415-clks.h
index 0d2c7397e028..d80caa68aebd 100644
--- a/include/dt-bindings/clock/stih415-clks.h
+++ b/include/dt-bindings/clock/stih415-clks.h
@@ -10,6 +10,7 @@
 #define CLK_ETH1_PHY		4
 
 /* CLOCKGEN A1 */
+#define CLK_ICN_IF_2		0
 #define CLK_GMAC0_PHY		3
 
 #endif
diff --git a/include/dt-bindings/clock/stih416-clks.h b/include/dt-bindings/clock/stih416-clks.h
index 552c779eb6af..f9bdbd13568d 100644
--- a/include/dt-bindings/clock/stih416-clks.h
+++ b/include/dt-bindings/clock/stih416-clks.h
@@ -10,6 +10,7 @@
 #define CLK_ETH1_PHY		4
 
 /* CLOCKGEN A1 */
+#define CLK_ICN_IF_2		0
 #define CLK_GMAC0_PHY		3
 
 #endif
-- 
cgit v1.2.3


From c0c3c3590d0d178cd461f0c29aca0e83294c4bc4 Mon Sep 17 00:00:00 2001
From: Abhilash Kesavan <a.kesavan@samsung.com>
Date: Wed, 18 Jun 2014 08:08:49 +0900
Subject: ARM: EXYNOS: fix pm code to check for cortex A9 rather than the SoC

We have an soc check to ensure that the scu and certain A9 specific
registers are not accessed on Exynos5250 (which is A15 based).
Rather than adding another soc specific check for 5420 let us test
for the Cortex A9 primary part number.

This resolves the below crash seen on exynos5420 during core switching
after the CPUIdle consolidation series was merged.

[  155.975589] [<c0013174>] (scu_enable) from [<c001b0dc>] (exynos_cpu_pm_notifier+0x80/0xc4)
[  155.983833] [<c001b0dc>] (exynos_cpu_pm_notifier) from [<c003c1b0>] (notifier_call_chain+0x44/0x84)
[  155.992851] [<c003c1b0>] (notifier_call_chain) from [<c007a49c>] (cpu_pm_notify+0x20/0x3c)
[  156.001089] [<c007a49c>] (cpu_pm_notify) from [<c007a564>] (cpu_pm_exit+0x20/0x38)
[  156.008635] [<c007a564>] (cpu_pm_exit) from [<c0019e98>] (bL_switcher_thread+0x298/0x40c)
[  156.016788] [<c0019e98>] (bL_switcher_thread) from [<c003842c>] (kthread+0xcc/0xe8)
[  156.024426] [<c003842c>] (kthread) from [<c000e438>] (ret_from_fork+0x14/0x3c)
[  156.031621] Code: ea017fec c0530a00 c052e3f8 c0012dcc (e5903000

Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/mach-exynos/pm.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-exynos/pm.c b/arch/arm/mach-exynos/pm.c
index 87c0d34c7fba..202ca73e49c4 100644
--- a/arch/arm/mach-exynos/pm.c
+++ b/arch/arm/mach-exynos/pm.c
@@ -300,7 +300,7 @@ static int exynos_pm_suspend(void)
 	tmp = (S5P_USE_STANDBY_WFI0 | S5P_USE_STANDBY_WFE0);
 	__raw_writel(tmp, S5P_CENTRAL_SEQ_OPTION);
 
-	if (!soc_is_exynos5250())
+	if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
 		exynos_cpu_save_register();
 
 	return 0;
@@ -334,7 +334,7 @@ static void exynos_pm_resume(void)
 	if (exynos_pm_central_resume())
 		goto early_wakeup;
 
-	if (!soc_is_exynos5250())
+	if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
 		exynos_cpu_restore_register();
 
 	/* For release retention */
@@ -353,7 +353,7 @@ static void exynos_pm_resume(void)
 
 	s3c_pm_do_restore_core(exynos_core_save, ARRAY_SIZE(exynos_core_save));
 
-	if (!soc_is_exynos5250())
+	if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
 		scu_enable(S5P_VA_SCU);
 
 early_wakeup:
@@ -440,15 +440,18 @@ static int exynos_cpu_pm_notifier(struct notifier_block *self,
 	case CPU_PM_ENTER:
 		if (cpu == 0) {
 			exynos_pm_central_suspend();
-			exynos_cpu_save_register();
+			if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
+				exynos_cpu_save_register();
 		}
 		break;
 
 	case CPU_PM_EXIT:
 		if (cpu == 0) {
-			if (!soc_is_exynos5250())
+			if (read_cpuid_part_number() ==
+					ARM_CPU_PART_CORTEX_A9) {
 				scu_enable(S5P_VA_SCU);
-			exynos_cpu_restore_register();
+				exynos_cpu_restore_register();
+			}
 			exynos_pm_central_resume();
 		}
 		break;
-- 
cgit v1.2.3


From 3f620ddc0dde30fe88be4cc80ae9ebda8bd37448 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Date: Tue, 17 Jun 2014 20:08:56 -0300
Subject: ARM: mvebu: enable Dove SoC in mvebu_v7_defconfig

Since commit:

commit d93003e8e4e1fbbc8a06ec561a63f5aa105a4c45
Author: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Date:   Thu Apr 24 22:58:30 2014 +0100

    ARM: 8042/1: iwmmxt: allow to build iWMMXt on Marvell PJ4B

fixed the error that prevented Dove SoC from being built with
the rest of the mvebu SoCs, we can now add it to the defconfig.

In addition, this commit enables SERIAL_OF_PLATFORM, which is required
to have UART on some of the boards.

Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/configs/mvebu_v7_defconfig | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/configs/mvebu_v7_defconfig b/arch/arm/configs/mvebu_v7_defconfig
index e11170e37442..b0bfefa23902 100644
--- a/arch/arm/configs/mvebu_v7_defconfig
+++ b/arch/arm/configs/mvebu_v7_defconfig
@@ -14,6 +14,7 @@ CONFIG_MACH_ARMADA_370=y
 CONFIG_MACH_ARMADA_375=y
 CONFIG_MACH_ARMADA_38X=y
 CONFIG_MACH_ARMADA_XP=y
+CONFIG_MACH_DOVE=y
 CONFIG_NEON=y
 # CONFIG_CACHE_L2X0 is not set
 # CONFIG_SWP_EMULATE is not set
@@ -52,6 +53,7 @@ CONFIG_INPUT_EVDEV=y
 CONFIG_KEYBOARD_GPIO=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_I2C=y
 CONFIG_SPI=y
 CONFIG_SPI_ORION=y
-- 
cgit v1.2.3


From 88a1c67ff6e6fe5d8391cd87ea89744a5f2728a4 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Wed, 18 Jun 2014 11:30:17 +0100
Subject: ARM: stih41x: Rename stih416-b2020-revE.dts to stih416-b2020e.dts

Two reasons for this rename.  Firstly, it removes the camel case
convention which isn't used by any other platform and secondly it
matches the naming convention for the internal kernel, which can
become annoying when flipping between the two.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Maxime Coquelin <maxime.coquelin@st.com>
---
 arch/arm/boot/dts/Makefile               |  2 +-
 arch/arm/boot/dts/stih416-b2020-revE.dts | 35 --------------------------------
 arch/arm/boot/dts/stih416-b2020e.dts     | 35 ++++++++++++++++++++++++++++++++
 3 files changed, 36 insertions(+), 36 deletions(-)
 delete mode 100644 arch/arm/boot/dts/stih416-b2020-revE.dts
 create mode 100644 arch/arm/boot/dts/stih416-b2020e.dts

(limited to 'arch')

diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 5986ff63b901..adb5ed9e269e 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -357,7 +357,7 @@ dtb-$(CONFIG_ARCH_STI)+= stih407-b2120.dtb \
 	stih415-b2020.dtb \
 	stih416-b2000.dtb \
 	stih416-b2020.dtb \
-	stih416-b2020-revE.dtb
+	stih416-b2020e.dtb
 dtb-$(CONFIG_MACH_SUN4I) += \
 	sun4i-a10-a1000.dtb \
 	sun4i-a10-cubieboard.dtb \
diff --git a/arch/arm/boot/dts/stih416-b2020-revE.dts b/arch/arm/boot/dts/stih416-b2020-revE.dts
deleted file mode 100644
index ba0fa2caaf18..000000000000
--- a/arch/arm/boot/dts/stih416-b2020-revE.dts
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 2014 STMicroelectronics (R&D) Limited.
- * Author: Lee Jones <lee.jones@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * publishhed by the Free Software Foundation.
- */
-/dts-v1/;
-#include "stih416.dtsi"
-#include "stih41x-b2020.dtsi"
-/ {
-	model = "STiH416 B2020 REV-E";
-	compatible = "st,stih416-b2020", "st,stih416";
-
-	soc {
-		leds {
-			compatible = "gpio-leds";
-			red {
-				#gpio-cells		= <1>;
-				label			= "Front Panel LED";
-				gpios			= <&PIO4 1>;
-				linux,default-trigger	= "heartbeat";
-			};
-			green {
-				gpios			= <&PIO1 3>;
-				default-state 		= "off";
-			};
-		};
-
-		ethernet1: dwmac@fef08000 {
-			snps,reset-gpio = <&PIO0 7>;
-		};
-	};
-};
diff --git a/arch/arm/boot/dts/stih416-b2020e.dts b/arch/arm/boot/dts/stih416-b2020e.dts
new file mode 100644
index 000000000000..ba0fa2caaf18
--- /dev/null
+++ b/arch/arm/boot/dts/stih416-b2020e.dts
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2014 STMicroelectronics (R&D) Limited.
+ * Author: Lee Jones <lee.jones@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * publishhed by the Free Software Foundation.
+ */
+/dts-v1/;
+#include "stih416.dtsi"
+#include "stih41x-b2020.dtsi"
+/ {
+	model = "STiH416 B2020 REV-E";
+	compatible = "st,stih416-b2020", "st,stih416";
+
+	soc {
+		leds {
+			compatible = "gpio-leds";
+			red {
+				#gpio-cells		= <1>;
+				label			= "Front Panel LED";
+				gpios			= <&PIO4 1>;
+				linux,default-trigger	= "heartbeat";
+			};
+			green {
+				gpios			= <&PIO1 3>;
+				default-state 		= "off";
+			};
+		};
+
+		ethernet1: dwmac@fef08000 {
+			snps,reset-gpio = <&PIO0 7>;
+		};
+	};
+};
-- 
cgit v1.2.3


From 5f56e7167e6d438324fcba87018255d81e201383 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Wed, 18 Jun 2014 15:59:46 -0700
Subject: x86/vdso: Discard the __bug_table section

It serves no purpose in user code.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/2a5bebff42defd8a5e81d96f7dc00f21143c80e8.1403129369.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/vdso/vdso-layout.lds.S | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S
index 2ec72f651ebf..c84166cbcd28 100644
--- a/arch/x86/vdso/vdso-layout.lds.S
+++ b/arch/x86/vdso/vdso-layout.lds.S
@@ -75,6 +75,7 @@ SECTIONS
 	/DISCARD/ : {
 		*(.discard)
 		*(.discard.*)
+		*(__bug_table)
 	}
 }
 
-- 
cgit v1.2.3


From c1979c370273fd9f7326ffa27a63b9ddb0f495f4 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Wed, 18 Jun 2014 15:59:47 -0700
Subject: x86/vdso2c: Use better macros for ELF bitness

Rather than using a separate macro for each replacement, use generic
macros.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/d953cd2e70ceee1400985d091188cdd65fba2f05.1403129369.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/vdso/vdso2c.c | 42 +++++++++++++-----------------------------
 arch/x86/vdso/vdso2c.h | 23 ++++++++++++-----------
 2 files changed, 25 insertions(+), 40 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/vdso/vdso2c.c b/arch/x86/vdso/vdso2c.c
index 7a6bf50f9165..734389976cc0 100644
--- a/arch/x86/vdso/vdso2c.c
+++ b/arch/x86/vdso/vdso2c.c
@@ -83,37 +83,21 @@ extern void bad_put_le(void);
 
 #define NSYMS (sizeof(required_syms) / sizeof(required_syms[0]))
 
-#define BITS 64
-#define GOFUNC go64
-#define Elf_Ehdr Elf64_Ehdr
-#define Elf_Shdr Elf64_Shdr
-#define Elf_Phdr Elf64_Phdr
-#define Elf_Sym Elf64_Sym
-#define Elf_Dyn Elf64_Dyn
+#define BITSFUNC3(name, bits) name##bits
+#define BITSFUNC2(name, bits) BITSFUNC3(name, bits)
+#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS)
+
+#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
+#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
+#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x)
+
+#define ELF_BITS 64
 #include "vdso2c.h"
-#undef BITS
-#undef GOFUNC
-#undef Elf_Ehdr
-#undef Elf_Shdr
-#undef Elf_Phdr
-#undef Elf_Sym
-#undef Elf_Dyn
-
-#define BITS 32
-#define GOFUNC go32
-#define Elf_Ehdr Elf32_Ehdr
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Phdr Elf32_Phdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Dyn Elf32_Dyn
+#undef ELF_BITS
+
+#define ELF_BITS 32
 #include "vdso2c.h"
-#undef BITS
-#undef GOFUNC
-#undef Elf_Ehdr
-#undef Elf_Shdr
-#undef Elf_Phdr
-#undef Elf_Sym
-#undef Elf_Dyn
+#undef ELF_BITS
 
 static void go(void *addr, size_t len, FILE *outfile, const char *name)
 {
diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h
index c6eefaf389b9..8e185ce39e69 100644
--- a/arch/x86/vdso/vdso2c.h
+++ b/arch/x86/vdso/vdso2c.h
@@ -4,23 +4,24 @@
  * are built for 32-bit userspace.
  */
 
-static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name)
+static void BITSFUNC(go)(void *addr, size_t len,
+			 FILE *outfile, const char *name)
 {
 	int found_load = 0;
 	unsigned long load_size = -1;  /* Work around bogus warning */
 	unsigned long data_size;
-	Elf_Ehdr *hdr = (Elf_Ehdr *)addr;
+	ELF(Ehdr) *hdr = (ELF(Ehdr) *)addr;
 	int i;
 	unsigned long j;
-	Elf_Shdr *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
+	ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
 		*alt_sec = NULL;
-	Elf_Dyn *dyn = 0, *dyn_end = 0;
+	ELF(Dyn) *dyn = 0, *dyn_end = 0;
 	const char *secstrings;
 	uint64_t syms[NSYMS] = {};
 
 	uint64_t fake_sections_value = 0, fake_sections_size = 0;
 
-	Elf_Phdr *pt = (Elf_Phdr *)(addr + GET_LE(&hdr->e_phoff));
+	ELF(Phdr) *pt = (ELF(Phdr) *)(addr + GET_LE(&hdr->e_phoff));
 
 	/* Walk the segment table. */
 	for (i = 0; i < GET_LE(&hdr->e_phnum); i++) {
@@ -61,7 +62,7 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name)
 		GET_LE(&hdr->e_shentsize)*GET_LE(&hdr->e_shstrndx);
 	secstrings = addr + GET_LE(&secstrings_hdr->sh_offset);
 	for (i = 0; i < GET_LE(&hdr->e_shnum); i++) {
-		Elf_Shdr *sh = addr + GET_LE(&hdr->e_shoff) +
+		ELF(Shdr) *sh = addr + GET_LE(&hdr->e_shoff) +
 			GET_LE(&hdr->e_shentsize) * i;
 		if (GET_LE(&sh->sh_type) == SHT_SYMTAB)
 			symtab_hdr = sh;
@@ -82,7 +83,7 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name)
 	     i < GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize);
 	     i++) {
 		int k;
-		Elf_Sym *sym = addr + GET_LE(&symtab_hdr->sh_offset) +
+		ELF(Sym) *sym = addr + GET_LE(&symtab_hdr->sh_offset) +
 			GET_LE(&symtab_hdr->sh_entsize) * i;
 		const char *name = addr + GET_LE(&strtab_hdr->sh_offset) +
 			GET_LE(&sym->st_name);
@@ -123,12 +124,12 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name)
 		fail("end_mapping must be a multiple of 4096\n");
 
 	/* Remove sections or use fakes */
-	if (fake_sections_size % sizeof(Elf_Shdr))
+	if (fake_sections_size % sizeof(ELF(Shdr)))
 		fail("vdso_fake_sections size is not a multiple of %ld\n",
-		     (long)sizeof(Elf_Shdr));
+		     (long)sizeof(ELF(Shdr)));
 	PUT_LE(&hdr->e_shoff, fake_sections_value);
-	PUT_LE(&hdr->e_shentsize, fake_sections_value ? sizeof(Elf_Shdr) : 0);
-	PUT_LE(&hdr->e_shnum, fake_sections_size / sizeof(Elf_Shdr));
+	PUT_LE(&hdr->e_shentsize, fake_sections_value ? sizeof(ELF(Shdr)) : 0);
+	PUT_LE(&hdr->e_shnum, fake_sections_size / sizeof(ELF(Shdr)));
 	PUT_LE(&hdr->e_shstrndx, SHN_UNDEF);
 
 	if (!name) {
-- 
cgit v1.2.3


From bfad381c0d1e19cae8461e105d8d4387dd2a14fe Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Wed, 18 Jun 2014 15:59:48 -0700
Subject: x86/vdso: Improve the fake section headers

Fully stripping the vDSO has other unfortunate side effects:

 - binutils is unable to find ELF notes without a SHT_NOTE section.

 - Even elfutils has trouble: it can find ELF notes without a section
   table at all, but if a section table is present, it won't look for
   PT_NOTE.

 - gdb wants section names to match between stripped DSOs and their
   symbols; otherwise it will corrupt symbol addresses.

We're also breaking the rules: section 0 is supposed to be SHT_NULL.

Fix these problems by building a better fake section table.  While
we're at it, we might as well let buggy Go versions keep working well
by giving the SHT_DYNSYM entry the correct size.

This is a bit unfortunate: it adds quite a bit of size to the vdso
image.

If/when binutils improves and the improved versions become widespread,
it would be worth considering dropping most of this.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/0e546a5eeaafdf1840e6ee654a55c1e727c26663.1403129369.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/vdso/Makefile                   |   4 +-
 arch/x86/vdso/vdso-fakesections.c        |  44 ++++----
 arch/x86/vdso/vdso-layout.lds.S          |  40 +++++--
 arch/x86/vdso/vdso.lds.S                 |   2 +
 arch/x86/vdso/vdso2c.c                   |  31 ++++--
 arch/x86/vdso/vdso2c.h                   | 180 +++++++++++++++++++++++++++----
 arch/x86/vdso/vdso32/vdso-fakesections.c |   1 +
 arch/x86/vdso/vdsox32.lds.S              |   2 +
 8 files changed, 237 insertions(+), 67 deletions(-)
 create mode 100644 arch/x86/vdso/vdso32/vdso-fakesections.c

(limited to 'arch')

diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 3c0809a0631f..2c1ca98eb612 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -11,7 +11,6 @@ VDSO32-$(CONFIG_COMPAT)		:= y
 
 # files to link into the vdso
 vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vdso-fakesections.o
-vobjs-nox32 := vdso-fakesections.o
 
 # files to link into kernel
 obj-y				+= vma.o
@@ -134,7 +133,7 @@ override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
 
 targets += vdso32/vdso32.lds
 targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o)
-targets += vdso32/vclock_gettime.o
+targets += vdso32/vclock_gettime.o vdso32/vdso-fakesections.o
 
 $(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%)
 
@@ -155,6 +154,7 @@ $(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
 $(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
 				 $(obj)/vdso32/vdso32.lds \
 				 $(obj)/vdso32/vclock_gettime.o \
+				 $(obj)/vdso32/vdso-fakesections.o \
 				 $(obj)/vdso32/note.o \
 				 $(obj)/vdso32/%.o
 	$(call if_changed,vdso)
diff --git a/arch/x86/vdso/vdso-fakesections.c b/arch/x86/vdso/vdso-fakesections.c
index cb8a8d72c24b..56927a7e4977 100644
--- a/arch/x86/vdso/vdso-fakesections.c
+++ b/arch/x86/vdso/vdso-fakesections.c
@@ -2,31 +2,23 @@
  * Copyright 2014 Andy Lutomirski
  * Subject to the GNU Public License, v.2
  *
- * Hack to keep broken Go programs working.
- *
- * The Go runtime had a couple of bugs: it would read the section table to try
- * to figure out how many dynamic symbols there were (it shouldn't have looked
- * at the section table at all) and, if there were no SHT_SYNDYM section table
- * entry, it would use an uninitialized value for the number of symbols.  As a
- * workaround, we supply a minimal section table.  vdso2c will adjust the
- * in-memory image so that "vdso_fake_sections" becomes the section table.
- *
- * The bug was introduced by:
- * https://code.google.com/p/go/source/detail?r=56ea40aac72b (2012-08-31)
- * and is being addressed in the Go runtime in this issue:
- * https://code.google.com/p/go/issues/detail?id=8197
+ * String table for loadable section headers.  See vdso2c.h for why
+ * this exists.
  */
 
-#ifndef __x86_64__
-#error This hack is specific to the 64-bit vDSO
-#endif
-
-#include <linux/elf.h>
-
-extern const __visible struct elf64_shdr vdso_fake_sections[];
-const __visible struct elf64_shdr vdso_fake_sections[] = {
-	{
-		.sh_type = SHT_DYNSYM,
-		.sh_entsize = sizeof(Elf64_Sym),
-	}
-};
+const char fake_shstrtab[] __attribute__((section(".fake_shstrtab"))) =
+	".hash\0"
+	".dynsym\0"
+	".dynstr\0"
+	".gnu.version\0"
+	".gnu.version_d\0"
+	".dynamic\0"
+	".rodata\0"
+	".fake_shstrtab\0"  /* Yay, self-referential code. */
+	".note\0"
+	".data\0"
+	".altinstructions\0"
+	".altinstr_replacement\0"
+	".eh_frame_hdr\0"
+	".eh_frame\0"
+	".text";
diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S
index c84166cbcd28..e4cbc2145bab 100644
--- a/arch/x86/vdso/vdso-layout.lds.S
+++ b/arch/x86/vdso/vdso-layout.lds.S
@@ -6,6 +6,16 @@
  * This script controls its layout.
  */
 
+#if defined(BUILD_VDSO64)
+# define SHDR_SIZE 64
+#elif defined(BUILD_VDSO32) || defined(BUILD_VDSOX32)
+# define SHDR_SIZE 40
+#else
+# error unknown VDSO target
+#endif
+
+#define NUM_FAKE_SHDRS 16
+
 SECTIONS
 {
 	. = SIZEOF_HEADERS;
@@ -25,15 +35,29 @@ SECTIONS
 
 	.dynamic	: { *(.dynamic) }		:text	:dynamic
 
-	.rodata		: { *(.rodata*) }		:text
+	.rodata		: {
+		*(.rodata*)
+
+		/*
+		 * Ideally this would live in a C file, but that won't
+		 * work cleanly for x32 until we start building the x32
+		 * C code using an x32 toolchain.
+		 */
+		VDSO_FAKE_SECTION_TABLE_START = .;
+		. = . + NUM_FAKE_SHDRS * SHDR_SIZE;
+		VDSO_FAKE_SECTION_TABLE_END = .;
+	}						:text
+
+	.fake_shstrtab	: { *(.fake_shstrtab) }		:text
+
 	.data		: {
-	      *(.data*)
-	      *(.sdata*)
-	      *(.got.plt) *(.got)
-	      *(.gnu.linkonce.d.*)
-	      *(.bss*)
-	      *(.dynbss*)
-	      *(.gnu.linkonce.b.*)
+		*(.data*)
+		*(.sdata*)
+		*(.got.plt) *(.got)
+		*(.gnu.linkonce.d.*)
+		*(.bss*)
+		*(.dynbss*)
+		*(.gnu.linkonce.b.*)
 	}
 
 	.altinstructions	: { *(.altinstructions) }
diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/vdso/vdso.lds.S
index 75e3404c83b1..6807932643c2 100644
--- a/arch/x86/vdso/vdso.lds.S
+++ b/arch/x86/vdso/vdso.lds.S
@@ -6,6 +6,8 @@
  * the DSO.
  */
 
+#define BUILD_VDSO64
+
 #include "vdso-layout.lds.S"
 
 /*
diff --git a/arch/x86/vdso/vdso2c.c b/arch/x86/vdso/vdso2c.c
index 734389976cc0..238dbe82776e 100644
--- a/arch/x86/vdso/vdso2c.c
+++ b/arch/x86/vdso/vdso2c.c
@@ -23,6 +23,8 @@ enum {
 	sym_vvar_page,
 	sym_hpet_page,
 	sym_end_mapping,
+	sym_VDSO_FAKE_SECTION_TABLE_START,
+	sym_VDSO_FAKE_SECTION_TABLE_END,
 };
 
 const int special_pages[] = {
@@ -30,15 +32,26 @@ const int special_pages[] = {
 	sym_hpet_page,
 };
 
-char const * const required_syms[] = {
-	[sym_vvar_page] = "vvar_page",
-	[sym_hpet_page] = "hpet_page",
-	[sym_end_mapping] = "end_mapping",
-	"VDSO32_NOTE_MASK",
-	"VDSO32_SYSENTER_RETURN",
-	"__kernel_vsyscall",
-	"__kernel_sigreturn",
-	"__kernel_rt_sigreturn",
+struct vdso_sym {
+	const char *name;
+	bool export;
+};
+
+struct vdso_sym required_syms[] = {
+	[sym_vvar_page] = {"vvar_page", true},
+	[sym_hpet_page] = {"hpet_page", true},
+	[sym_end_mapping] = {"end_mapping", true},
+	[sym_VDSO_FAKE_SECTION_TABLE_START] = {
+		"VDSO_FAKE_SECTION_TABLE_START", false
+	},
+	[sym_VDSO_FAKE_SECTION_TABLE_END] = {
+		"VDSO_FAKE_SECTION_TABLE_END", false
+	},
+	{"VDSO32_NOTE_MASK", true},
+	{"VDSO32_SYSENTER_RETURN", true},
+	{"__kernel_vsyscall", true},
+	{"__kernel_sigreturn", true},
+	{"__kernel_rt_sigreturn", true},
 };
 
 __attribute__((format(printf, 1, 2))) __attribute__((noreturn))
diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h
index 8e185ce39e69..f01ed4bde880 100644
--- a/arch/x86/vdso/vdso2c.h
+++ b/arch/x86/vdso/vdso2c.h
@@ -4,6 +4,116 @@
  * are built for 32-bit userspace.
  */
 
+/*
+ * We're writing a section table for a few reasons:
+ *
+ * The Go runtime had a couple of bugs: it would read the section
+ * table to try to figure out how many dynamic symbols there were (it
+ * shouldn't have looked at the section table at all) and, if there
+ * were no SHT_SYNDYM section table entry, it would use an
+ * uninitialized value for the number of symbols.  An empty DYNSYM
+ * table would work, but I see no reason not to write a valid one (and
+ * keep full performance for old Go programs).  This hack is only
+ * needed on x86_64.
+ *
+ * The bug was introduced on 2012-08-31 by:
+ * https://code.google.com/p/go/source/detail?r=56ea40aac72b
+ * and was fixed on 2014-06-13 by:
+ * https://code.google.com/p/go/source/detail?r=fc1cd5e12595
+ *
+ * Binutils has issues debugging the vDSO: it reads the section table to
+ * find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which
+ * would break build-id if we removed the section table.  Binutils
+ * also requires that shstrndx != 0.  See:
+ * https://sourceware.org/bugzilla/show_bug.cgi?id=17064
+ *
+ * elfutils might not look for PT_NOTE if there is a section table at
+ * all.  I don't know whether this matters for any practical purpose.
+ *
+ * For simplicity, rather than hacking up a partial section table, we
+ * just write a mostly complete one.  We omit non-dynamic symbols,
+ * though, since they're rather large.
+ *
+ * Once binutils gets fixed, we might be able to drop this for all but
+ * the 64-bit vdso, since build-id only works in kernel RPMs, and
+ * systems that update to new enough kernel RPMs will likely update
+ * binutils in sync.  build-id has never worked for home-built kernel
+ * RPMs without manual symlinking, and I suspect that no one ever does
+ * that.
+ */
+struct BITSFUNC(fake_sections)
+{
+	ELF(Shdr) *table;
+	unsigned long table_offset;
+	int count, max_count;
+
+	int in_shstrndx;
+	unsigned long shstr_offset;
+	const char *shstrtab;
+	size_t shstrtab_len;
+
+	int out_shstrndx;
+};
+
+static unsigned int BITSFUNC(find_shname)(struct BITSFUNC(fake_sections) *out,
+					  const char *name)
+{
+	const char *outname = out->shstrtab;
+	while (outname - out->shstrtab < out->shstrtab_len) {
+		if (!strcmp(name, outname))
+			return (outname - out->shstrtab) + out->shstr_offset;
+		outname += strlen(outname) + 1;
+	}
+
+	if (*name)
+		printf("Warning: could not find output name \"%s\"\n", name);
+	return out->shstr_offset + out->shstrtab_len - 1;  /* Use a null. */
+}
+
+static void BITSFUNC(init_sections)(struct BITSFUNC(fake_sections) *out)
+{
+	if (!out->in_shstrndx)
+		fail("didn't find the fake shstrndx\n");
+
+	memset(out->table, 0, out->max_count * sizeof(ELF(Shdr)));
+
+	if (out->max_count < 1)
+		fail("we need at least two fake output sections\n");
+
+	PUT_LE(&out->table[0].sh_type, SHT_NULL);
+	PUT_LE(&out->table[0].sh_name, BITSFUNC(find_shname)(out, ""));
+
+	out->count = 1;
+}
+
+static void BITSFUNC(copy_section)(struct BITSFUNC(fake_sections) *out,
+				   int in_idx, const ELF(Shdr) *in,
+				   const char *name)
+{
+	uint64_t flags = GET_LE(&in->sh_flags);
+
+	bool copy = flags & SHF_ALLOC;
+
+	if (!copy)
+		return;
+
+	if (out->count >= out->max_count)
+		fail("too many copied sections (max = %d)\n", out->max_count);
+
+	if (in_idx == out->in_shstrndx)
+		out->out_shstrndx = out->count;
+
+	out->table[out->count] = *in;
+	PUT_LE(&out->table[out->count].sh_name,
+	       BITSFUNC(find_shname)(out, name));
+
+	/* elfutils requires that a strtab have the correct type. */
+	if (!strcmp(name, ".fake_shstrtab"))
+		PUT_LE(&out->table[out->count].sh_type, SHT_STRTAB);
+
+	out->count++;
+}
+
 static void BITSFUNC(go)(void *addr, size_t len,
 			 FILE *outfile, const char *name)
 {
@@ -19,7 +129,7 @@ static void BITSFUNC(go)(void *addr, size_t len,
 	const char *secstrings;
 	uint64_t syms[NSYMS] = {};
 
-	uint64_t fake_sections_value = 0, fake_sections_size = 0;
+	struct BITSFUNC(fake_sections) fake_sections = {};
 
 	ELF(Phdr) *pt = (ELF(Phdr) *)(addr + GET_LE(&hdr->e_phoff));
 
@@ -89,23 +199,57 @@ static void BITSFUNC(go)(void *addr, size_t len,
 			GET_LE(&sym->st_name);
 
 		for (k = 0; k < NSYMS; k++) {
-			if (!strcmp(name, required_syms[k])) {
+			if (!strcmp(name, required_syms[k].name)) {
 				if (syms[k]) {
 					fail("duplicate symbol %s\n",
-					     required_syms[k]);
+					     required_syms[k].name);
 				}
 				syms[k] = GET_LE(&sym->st_value);
 			}
 		}
 
-		if (!strcmp(name, "vdso_fake_sections")) {
-			if (fake_sections_value)
-				fail("duplicate vdso_fake_sections\n");
-			fake_sections_value = GET_LE(&sym->st_value);
-			fake_sections_size = GET_LE(&sym->st_size);
+		if (!strcmp(name, "fake_shstrtab")) {
+			ELF(Shdr) *sh;
+
+			fake_sections.in_shstrndx = GET_LE(&sym->st_shndx);
+			fake_sections.shstrtab = addr + GET_LE(&sym->st_value);
+			fake_sections.shstrtab_len = GET_LE(&sym->st_size);
+			sh = addr + GET_LE(&hdr->e_shoff) +
+				GET_LE(&hdr->e_shentsize) *
+				fake_sections.in_shstrndx;
+			fake_sections.shstr_offset = GET_LE(&sym->st_value) -
+				GET_LE(&sh->sh_addr);
 		}
 	}
 
+	/* Build the output section table. */
+	if (!syms[sym_VDSO_FAKE_SECTION_TABLE_START] ||
+	    !syms[sym_VDSO_FAKE_SECTION_TABLE_END])
+		fail("couldn't find fake section table\n");
+	if ((syms[sym_VDSO_FAKE_SECTION_TABLE_END] -
+	     syms[sym_VDSO_FAKE_SECTION_TABLE_START]) % sizeof(ELF(Shdr)))
+		fail("fake section table size isn't a multiple of sizeof(Shdr)\n");
+	fake_sections.table = addr + syms[sym_VDSO_FAKE_SECTION_TABLE_START];
+	fake_sections.table_offset = syms[sym_VDSO_FAKE_SECTION_TABLE_START];
+	fake_sections.max_count = (syms[sym_VDSO_FAKE_SECTION_TABLE_END] -
+				   syms[sym_VDSO_FAKE_SECTION_TABLE_START]) /
+		sizeof(ELF(Shdr));
+
+	BITSFUNC(init_sections)(&fake_sections);
+	for (i = 0; i < GET_LE(&hdr->e_shnum); i++) {
+		ELF(Shdr) *sh = addr + GET_LE(&hdr->e_shoff) +
+			GET_LE(&hdr->e_shentsize) * i;
+		BITSFUNC(copy_section)(&fake_sections, i, sh,
+				       secstrings + GET_LE(&sh->sh_name));
+	}
+	if (!fake_sections.out_shstrndx)
+		fail("didn't generate shstrndx?!?\n");
+
+	PUT_LE(&hdr->e_shoff, fake_sections.table_offset);
+	PUT_LE(&hdr->e_shentsize, sizeof(ELF(Shdr)));
+	PUT_LE(&hdr->e_shnum, fake_sections.count);
+	PUT_LE(&hdr->e_shstrndx, fake_sections.out_shstrndx);
+
 	/* Validate mapping addresses. */
 	for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) {
 		if (!syms[i])
@@ -113,25 +257,17 @@ static void BITSFUNC(go)(void *addr, size_t len,
 
 		if (syms[i] % 4096)
 			fail("%s must be a multiple of 4096\n",
-			     required_syms[i]);
+			     required_syms[i].name);
 		if (syms[i] < data_size)
 			fail("%s must be after the text mapping\n",
-			     required_syms[i]);
+			     required_syms[i].name);
 		if (syms[sym_end_mapping] < syms[i] + 4096)
-			fail("%s overruns end_mapping\n", required_syms[i]);
+			fail("%s overruns end_mapping\n",
+			     required_syms[i].name);
 	}
 	if (syms[sym_end_mapping] % 4096)
 		fail("end_mapping must be a multiple of 4096\n");
 
-	/* Remove sections or use fakes */
-	if (fake_sections_size % sizeof(ELF(Shdr)))
-		fail("vdso_fake_sections size is not a multiple of %ld\n",
-		     (long)sizeof(ELF(Shdr)));
-	PUT_LE(&hdr->e_shoff, fake_sections_value);
-	PUT_LE(&hdr->e_shentsize, fake_sections_value ? sizeof(ELF(Shdr)) : 0);
-	PUT_LE(&hdr->e_shnum, fake_sections_size / sizeof(ELF(Shdr)));
-	PUT_LE(&hdr->e_shstrndx, SHN_UNDEF);
-
 	if (!name) {
 		fwrite(addr, load_size, 1, outfile);
 		return;
@@ -169,9 +305,9 @@ static void BITSFUNC(go)(void *addr, size_t len,
 			(unsigned long)GET_LE(&alt_sec->sh_size));
 	}
 	for (i = 0; i < NSYMS; i++) {
-		if (syms[i])
+		if (required_syms[i].export && syms[i])
 			fprintf(outfile, "\t.sym_%s = 0x%" PRIx64 ",\n",
-				required_syms[i], syms[i]);
+				required_syms[i].name, syms[i]);
 	}
 	fprintf(outfile, "};\n");
 }
diff --git a/arch/x86/vdso/vdso32/vdso-fakesections.c b/arch/x86/vdso/vdso32/vdso-fakesections.c
new file mode 100644
index 000000000000..541468e25265
--- /dev/null
+++ b/arch/x86/vdso/vdso32/vdso-fakesections.c
@@ -0,0 +1 @@
+#include "../vdso-fakesections.c"
diff --git a/arch/x86/vdso/vdsox32.lds.S b/arch/x86/vdso/vdsox32.lds.S
index 46b991b578a8..697c11ece90c 100644
--- a/arch/x86/vdso/vdsox32.lds.S
+++ b/arch/x86/vdso/vdsox32.lds.S
@@ -6,6 +6,8 @@
  * the DSO.
  */
 
+#define BUILD_VDSOX32
+
 #include "vdso-layout.lds.S"
 
 /*
-- 
cgit v1.2.3


From 0e3727a8839c988a3c56170bc8da76d55a16acad Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Wed, 18 Jun 2014 15:59:49 -0700
Subject: x86/vdso: Remove some redundant in-memory section headers

.data doesn't need to be separate from .rodata: they're both readonly.

.altinstructions and .altinstr_replacement aren't needed by anything
except vdso2c; strip them from the final image.

While we're at it, rather than aligning the actual executable text,
just shove some unused-at-runtime data in between real data and
text.

My vdso image is still above 4k, but I'm disinclined to try to
trim it harder for 3.16.  For future trimming, I suspect that these
sections could be moved to later in the file and dropped from
the in-memory image:

.gnu.version and .gnu.version_d   (this may lose versions in gdb)
.eh_frame                         (should be harmless)
.eh_frame_hdr                     (I'm not really sure)
.hash                             (AFAIK nothing needs this section header)

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/2e96d0c49016ea6d026a614ae645e93edd325961.1403129369.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/vdso/vdso-fakesections.c |  3 ---
 arch/x86/vdso/vdso-layout.lds.S   | 43 +++++++++++++++++++++------------------
 arch/x86/vdso/vdso2c.h            |  4 +++-
 3 files changed, 26 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/vdso/vdso-fakesections.c b/arch/x86/vdso/vdso-fakesections.c
index 56927a7e4977..aa5fbfab20a5 100644
--- a/arch/x86/vdso/vdso-fakesections.c
+++ b/arch/x86/vdso/vdso-fakesections.c
@@ -16,9 +16,6 @@ const char fake_shstrtab[] __attribute__((section(".fake_shstrtab"))) =
 	".rodata\0"
 	".fake_shstrtab\0"  /* Yay, self-referential code. */
 	".note\0"
-	".data\0"
-	".altinstructions\0"
-	".altinstr_replacement\0"
 	".eh_frame_hdr\0"
 	".eh_frame\0"
 	".text";
diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S
index e4cbc2145bab..9197544eea9a 100644
--- a/arch/x86/vdso/vdso-layout.lds.S
+++ b/arch/x86/vdso/vdso-layout.lds.S
@@ -14,7 +14,7 @@
 # error unknown VDSO target
 #endif
 
-#define NUM_FAKE_SHDRS 16
+#define NUM_FAKE_SHDRS 13
 
 SECTIONS
 {
@@ -28,15 +28,17 @@ SECTIONS
 	.gnu.version_d	: { *(.gnu.version_d) }
 	.gnu.version_r	: { *(.gnu.version_r) }
 
-	.note		: { *(.note.*) }		:text	:note
-
-	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
-	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
-
 	.dynamic	: { *(.dynamic) }		:text	:dynamic
 
 	.rodata		: {
 		*(.rodata*)
+		*(.data*)
+		*(.sdata*)
+		*(.got.plt) *(.got)
+		*(.gnu.linkonce.d.*)
+		*(.bss*)
+		*(.dynbss*)
+		*(.gnu.linkonce.b.*)
 
 		/*
 		 * Ideally this would live in a C file, but that won't
@@ -50,27 +52,28 @@ SECTIONS
 
 	.fake_shstrtab	: { *(.fake_shstrtab) }		:text
 
-	.data		: {
-		*(.data*)
-		*(.sdata*)
-		*(.got.plt) *(.got)
-		*(.gnu.linkonce.d.*)
-		*(.bss*)
-		*(.dynbss*)
-		*(.gnu.linkonce.b.*)
-	}
 
-	.altinstructions	: { *(.altinstructions) }
-	.altinstr_replacement	: { *(.altinstr_replacement) }
+	.note		: { *(.note.*) }		:text	:note
+
+	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+
 
 	/*
-	 * Align the actual code well away from the non-instruction data.
-	 * This is the best thing for the I-cache.
+	 * Text is well-separated from actual data: there's plenty of
+	 * stuff that isn't used at runtime in between.
 	 */
-	. = ALIGN(0x100);
 
 	.text		: { *(.text*) }			:text	=0x90909090,
 
+	/*
+	 * At the end so that eu-elflint stays happy when vdso2c strips
+	 * these.  A better implementation would avoid allocating space
+	 * for these.
+	 */
+	.altinstructions	: { *(.altinstructions) }	:text
+	.altinstr_replacement	: { *(.altinstr_replacement) }	:text
+
 	/*
 	 * The remainder of the vDSO consists of special pages that are
 	 * shared between the kernel and userspace.  It needs to be at the
diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h
index f01ed4bde880..f42e2ddc663d 100644
--- a/arch/x86/vdso/vdso2c.h
+++ b/arch/x86/vdso/vdso2c.h
@@ -92,7 +92,9 @@ static void BITSFUNC(copy_section)(struct BITSFUNC(fake_sections) *out,
 {
 	uint64_t flags = GET_LE(&in->sh_flags);
 
-	bool copy = flags & SHF_ALLOC;
+	bool copy = flags & SHF_ALLOC &&
+		strcmp(name, ".altinstructions") &&
+		strcmp(name, ".altinstr_replacement");
 
 	if (!copy)
 		return;
-- 
cgit v1.2.3


From 6d43925f5a21e0cfdb59714f7b78a8d278cf2357 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Wed, 18 Jun 2014 09:52:22 +0100
Subject: ARM: multi_v7_defconfig: Add QCOM GSBI driver

This patch adds QCOM GSBI config option to multi_v7_defconfig. Serial
driver on QCOM APQ8064 depends on GSBI driver, so without this patch
there is no serial console on IF6410 board using multi_v7_defconfig.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/configs/multi_v7_defconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 17d9462b9fb9..be1a3455a9fe 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -353,6 +353,7 @@ CONFIG_MFD_NVEC=y
 CONFIG_KEYBOARD_NVEC=y
 CONFIG_SERIO_NVEC_PS2=y
 CONFIG_NVEC_POWER=y
+CONFIG_QCOM_GSBI=y
 CONFIG_COMMON_CLK_QCOM=y
 CONFIG_MSM_GCC_8660=y
 CONFIG_MSM_MMCC_8960=y
-- 
cgit v1.2.3


From 8f95da90e9ac2abf38a40cd33c980d7d09b3278b Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Fri, 20 Jun 2014 00:02:24 -0300
Subject: ARM: imx_v6_v7_defconfig: Enable CONFIG_IMX_IPUV3_CORE

Since commit 39b9004d1f (gpu: ipu-v3: Move i.MX IPUv3 core driver out of
staging) the ipuv3 core driver is no longer built bey default.

Select CONFIG_IMX_IPUV3_CORE so that the core ipuv3 code can be built again.

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 arch/arm/configs/imx_v6_v7_defconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index ef8815327e5b..59b7e45142d8 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -186,6 +186,7 @@ CONFIG_VIDEO_MX3=y
 CONFIG_V4L_MEM2MEM_DRIVERS=y
 CONFIG_VIDEO_CODA=y
 CONFIG_SOC_CAMERA_OV2640=y
+CONFIG_IMX_IPUV3_CORE=y
 CONFIG_DRM=y
 CONFIG_DRM_PANEL_SIMPLE=y
 CONFIG_BACKLIGHT_LCD_SUPPORT=y
-- 
cgit v1.2.3


From dda1e95cee38b416b23f751cac65421d781e3c10 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Fri, 20 Jun 2014 12:20:44 -0700
Subject: x86/vdso: Create .build-id links for unstripped vdso files

With this change, doing 'make vdso_install' and telling gdb:

set debug-file-directory /lib/modules/KVER/vdso

will enable vdso debugging with symbols.  This is useful for
testing, but kernel RPM builds will probably want to manually delete
these symlinks or otherwise do something sensible when they strip
the vdso/*.so files.

If ld does not support --build-id, then the symlinks will not be
created.

Note that kernel packagers that use vdso_install may need to adjust
their packaging scripts to accomdate this change.  For example,
Fedora's scripts create build-id symlinks themselves in a different
location, so the spec should probably be updated to remove the
symlinks created by make vdso_install.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/a424b189ce3ced85fe1e82d032a20e765e0fe0d3.1403291930.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/vdso/Makefile | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 2c1ca98eb612..68a15c4dd6e4 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -169,14 +169,24 @@ quiet_cmd_vdso = VDSO    $@
 		 sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
 
 VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
-	-Wl,-Bsymbolic $(LTO_CFLAGS)
+	$(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic $(LTO_CFLAGS)
 GCOV_PROFILE := n
 
 #
-# Install the unstripped copies of vdso*.so.
+# Install the unstripped copies of vdso*.so.  If our toolchain supports
+# build-id, install .build-id links as well.
 #
 quiet_cmd_vdso_install = INSTALL $(@:install_%=%)
-      cmd_vdso_install = cp $< $(MODLIB)/vdso/$(@:install_%=%)
+define cmd_vdso_install
+	cp $< "$(MODLIB)/vdso/$(@:install_%=%)"; \
+	if readelf -n $< |grep -q 'Build ID'; then \
+	  buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \
+	  first=`echo $$buildid | cut -b-2`; \
+	  last=`echo $$buildid | cut -b3-`; \
+	  mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \
+	  ln -sf "../../$(@:install_%=%)" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \
+	fi
+endef
 
 vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%)
 
-- 
cgit v1.2.3


From ed2d859119f9cb04410d94b5fbd2bb12907c8932 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Date: Mon, 9 Jun 2014 17:52:45 -0300
Subject: ARM: mvebu: Fix broken SoC ID detection

Currently the mvebu boards need to detect the SoC revision in order to apply
some quirks needed to workaround issues found on I2C and thermal controllers
present only in very early SoC.

This detection requires PCI address translation to work, so we need to
explicitly select OF_ADDRESS_PCI.

This can be considered a partial revert of the following commit, that
wrongly removed the option selection:

commit 55400f3a1f89e39761f45c19f6e4235a329c400b
Author: Rob Herring <robh@kernel.org>
Date:   Tue Apr 22 14:15:52 2014 -0500

    ARM: mvebu: clean-up unneeded kconfig selects

Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Link: https://lkml.kernel.org/r/1402347165-19988-1-git-send-email-ezequiel.garcia@free-electrons.com
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 arch/arm/mach-mvebu/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig
index 6090b9eb00c8..61ac80a8acac 100644
--- a/arch/arm/mach-mvebu/Kconfig
+++ b/arch/arm/mach-mvebu/Kconfig
@@ -10,6 +10,7 @@ config ARCH_MVEBU
 	select ZONE_DMA if ARM_LPAE
 	select ARCH_REQUIRE_GPIOLIB
 	select PCI_QUIRKS if PCI
+	select OF_ADDRESS_PCI
 
 if ARCH_MVEBU
 
-- 
cgit v1.2.3


From 52fcc56753de91ae337aeaa0a664f72d93f19827 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Wed, 11 Jun 2014 14:06:37 +0200
Subject: ARM: mvebu: select ARM_CPU_SUSPEND for Marvell EBU v7 platforms

On Marvell Armada platforms, the PMSU (Power Management Service Unit)
controls a number of power management related activities, needed for
things like suspend/resume, CPU hotplug, cpuidle or even simply SMP.

Since cpuidle support was added for Armada XP, the pmsu.c file in
arch/arm/mach-mvebu/ calls the cpu_suspend() and cpu_resume() ARM
functions, which are only available when
CONFIG_ARM_CPU_SUSPEND=y. Therefore, configurations that have
CONFIG_ARM_CPU_SUSPEND disabled due to PM_SLEEP being disabled no
longer build properly, due to undefined references to cpu_suspend()
and cpu_resume().

To fix this, this patch simply ensures CONFIG_ARM_CPU_SUSPEND is
always enabled for Marvell EBU v7 platforms. Doing things in a more
fine-grained way would require a lot of #ifdef-ery in pmsu.c to
isolate the parts that use cpu_suspend()/cpu_resume(), and those parts
would anyway have been needed as soon as either one of suspend/resume,
CPU hotplug or cpuidle was enabled.

Reported-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Link: https://lkml.kernel.org/r/1402488397-31381-1-git-send-email-thomas.petazzoni@free-electrons.com
Acked-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 arch/arm/mach-mvebu/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig
index 61ac80a8acac..ea039ed7a3ae 100644
--- a/arch/arm/mach-mvebu/Kconfig
+++ b/arch/arm/mach-mvebu/Kconfig
@@ -20,6 +20,7 @@ config MACH_MVEBU_V7
 	bool
 	select ARMADA_370_XP_TIMER
 	select CACHE_L2X0
+	select ARM_CPU_SUSPEND
 
 config MACH_ARMADA_370
 	bool "Marvell Armada 370 boards" if ARCH_MULTI_V7
-- 
cgit v1.2.3


From cbb6c3fe33f94d59c1fb276af00056020bdfd874 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 13 Jun 2014 02:22:39 +0200
Subject: ARM: dts: mx5: Fix IPU port node placement

The "port" node was misplaced in the original patch, therefore making
the LCD dysfunctional on this board. Fix this by moving the "port" DT
node into the "display {}" node.

Signed-off-by: Marek Vasut <marex@denx.de>
Acked-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 arch/arm/boot/dts/imx53-m53evk.dts | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/imx53-m53evk.dts b/arch/arm/boot/dts/imx53-m53evk.dts
index d5d146a8b149..3a04b9eba231 100644
--- a/arch/arm/boot/dts/imx53-m53evk.dts
+++ b/arch/arm/boot/dts/imx53-m53evk.dts
@@ -43,11 +43,11 @@
 					vsync-active = <1>;
 				};
 			};
-		};
 
-		port {
-			display1_in: endpoint {
-				remote-endpoint = <&ipu_di1_disp1>;
+			port {
+				display1_in: endpoint {
+					remote-endpoint = <&ipu_di1_disp1>;
+				};
 			};
 		};
 	};
-- 
cgit v1.2.3


From be149c75fc50961675a6969c72cb0dc38116a0b9 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 13 Jun 2014 02:22:40 +0200
Subject: ARM: dts: mx5: Move the display out of soc {} node

Move the display {} node out of the soc {} node . This just aligns
the DT with other boards, there is no functional change.

Signed-off-by: Marek Vasut <marex@denx.de>
Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 arch/arm/boot/dts/imx53-m53evk.dts | 48 ++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 25 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/imx53-m53evk.dts b/arch/arm/boot/dts/imx53-m53evk.dts
index 3a04b9eba231..c4956b0ffb35 100644
--- a/arch/arm/boot/dts/imx53-m53evk.dts
+++ b/arch/arm/boot/dts/imx53-m53evk.dts
@@ -21,33 +21,31 @@
 		      <0xb0000000 0x20000000>;
 	};
 
-	soc {
-		display1: display@di1 {
-			compatible = "fsl,imx-parallel-display";
-			interface-pix-fmt = "bgr666";
-			pinctrl-names = "default";
-			pinctrl-0 = <&pinctrl_ipu_disp1>;
-
-			display-timings {
-				800x480p60 {
-					native-mode;
-					clock-frequency = <31500000>;
-					hactive = <800>;
-					vactive = <480>;
-					hfront-porch = <40>;
-					hback-porch = <88>;
-					hsync-len = <128>;
-					vback-porch = <33>;
-					vfront-porch = <9>;
-					vsync-len = <3>;
-					vsync-active = <1>;
-				};
+	display1: display@di1 {
+		compatible = "fsl,imx-parallel-display";
+		interface-pix-fmt = "bgr666";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_ipu_disp1>;
+
+		display-timings {
+			800x480p60 {
+				native-mode;
+				clock-frequency = <31500000>;
+				hactive = <800>;
+				vactive = <480>;
+				hfront-porch = <40>;
+				hback-porch = <88>;
+				hsync-len = <128>;
+				vback-porch = <33>;
+				vfront-porch = <9>;
+				vsync-len = <3>;
+				vsync-active = <1>;
 			};
+		};
 
-			port {
-				display1_in: endpoint {
-					remote-endpoint = <&ipu_di1_disp1>;
-				};
+		port {
+			display1_in: endpoint {
+				remote-endpoint = <&ipu_di1_disp1>;
 			};
 		};
 	};
-- 
cgit v1.2.3


From dacf49223fc680e6d5b5ca4ea43dcd197c1814c5 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 23 May 2014 14:33:04 +0200
Subject: ARM: dts: imx51-babbage: Fix esdhc setup

Since commit 89d7e5c13122 (mmc: sdhci-esdhc-imx: add runtime pm
support), controller based card detection / write protection is not
supported anymore by esdhc driver.  Let's use GPIO for CD/WP on esdhc1
instead.

While at it, fix cd gpio polarity for esdhc2. This is wrong and
currently only works because the imx esdhc driver ignores the polarity.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 arch/arm/boot/dts/imx51-babbage.dts | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/imx51-babbage.dts b/arch/arm/boot/dts/imx51-babbage.dts
index 6bc3243a80d3..181d77fa2fa6 100644
--- a/arch/arm/boot/dts/imx51-babbage.dts
+++ b/arch/arm/boot/dts/imx51-babbage.dts
@@ -315,15 +315,15 @@
 &esdhc1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_esdhc1>;
-	fsl,cd-controller;
-	fsl,wp-controller;
+	cd-gpios = <&gpio1 0 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio1 1 GPIO_ACTIVE_HIGH>;
 	status = "okay";
 };
 
 &esdhc2 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_esdhc2>;
-	cd-gpios = <&gpio1 6 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&gpio1 6 GPIO_ACTIVE_LOW>;
 	wp-gpios = <&gpio1 5 GPIO_ACTIVE_HIGH>;
 	status = "okay";
 };
@@ -468,8 +468,8 @@
 				MX51_PAD_SD1_DATA1__SD1_DATA1		0x20d5
 				MX51_PAD_SD1_DATA2__SD1_DATA2		0x20d5
 				MX51_PAD_SD1_DATA3__SD1_DATA3		0x20d5
-				MX51_PAD_GPIO1_0__SD1_CD		0x20d5
-				MX51_PAD_GPIO1_1__SD1_WP		0x20d5
+				MX51_PAD_GPIO1_0__GPIO1_0		0x100
+				MX51_PAD_GPIO1_1__GPIO1_1		0x100
 			>;
 		};
 
-- 
cgit v1.2.3


From 7d278f271c082530e6841e40bb47942d526d9b4e Mon Sep 17 00:00:00 2001
From: Denis Carikli <denis@eukrea.com>
Date: Wed, 18 Jun 2014 14:56:56 +0200
Subject: ARM: dts: imx51-eukrea-mbimxsd51-baseboard: unbreak esdhc.

The following commit:
89d7e5c mmc: sdhci-esdhc-imx: add runtime pm support
has the effect of also disabling the hardware card detect
in runtime pm.

We switch to GPIO based detection to avoid this issue.

This patch is based on:
ARM: dts: imx51-babbage: Fix esdhc setup

Signed-off-by: Denis Carikli <denis@eukrea.com>
Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 arch/arm/boot/dts/imx51-eukrea-mbimxsd51-baseboard.dts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/imx51-eukrea-mbimxsd51-baseboard.dts b/arch/arm/boot/dts/imx51-eukrea-mbimxsd51-baseboard.dts
index 75e66c9c6144..31cfb7f2b02e 100644
--- a/arch/arm/boot/dts/imx51-eukrea-mbimxsd51-baseboard.dts
+++ b/arch/arm/boot/dts/imx51-eukrea-mbimxsd51-baseboard.dts
@@ -107,7 +107,7 @@
 &esdhc1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_esdhc1 &pinctrl_esdhc1_cd>;
-	fsl,cd-controller;
+	cd-gpios = <&gpio1 0 GPIO_ACTIVE_LOW>;
 	status = "okay";
 };
 
@@ -206,7 +206,7 @@
 
 		pinctrl_esdhc1_cd: esdhc1_cd {
 			fsl,pins = <
-				MX51_PAD_GPIO1_0__SD1_CD 0x20d5
+				MX51_PAD_GPIO1_0__GPIO1_0 0xd5
 			>;
 		};
 
-- 
cgit v1.2.3


From 7cbcb9d46f9194eb1f88c253a08c0292b2883acc Mon Sep 17 00:00:00 2001
From: Doug Anderson <dianders@chromium.org>
Date: Sat, 21 Jun 2014 19:30:53 +0900
Subject: ARM: EXYNOS: Don't rely on firmware's secondary_cpu_start for mcpm

On exynos mcpm systems the firmware is hardcoded to jump to an address
in SRAM (0x02073000) when secondary CPUs come up.  By default the
firmware puts a bunch of code at that location.  That code expects the
kernel to fill in a few slots with addresses that it uses to jump back
to the kernel's entry point for secondary CPUs.

Originally (on prerelease hardware) this firmware code contained a
bunch of workarounds to deal with boot ROM bugs.  However on all
shipped hardware we simply use this code to redirect to a kernel
function for bringing up the CPUs.

Let's stop relying on the code provided by the bootloader and just
plumb in our own (simple) code jump to the kernel.  This has the nice
benefit of fixing problems due to the fact that older bootloaders
(like the one shipped on the Samsung Chromebook 2) might have put
slightly different code into this location.

Once suspend/resume is implemented for systems using exynos-mcpm we'll
need to make sure we reinstall our fixed up code after resume.  ...but
that's not anything new since IRAM (and thus the address of the
mcpm_entry_point) is lost across suspend/resume anyway.

Signed-off-by: Doug Anderson <dianders@chromium.org>
Acked-by: Kevin Hilman <khilman@linaro.org>
Tested-by: Kevin Hilman <khilman@linaro.org>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/mach-exynos/mcpm-exynos.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c
index 0498d0b887ef..ace0ed617476 100644
--- a/arch/arm/mach-exynos/mcpm-exynos.c
+++ b/arch/arm/mach-exynos/mcpm-exynos.c
@@ -25,7 +25,6 @@
 
 #define EXYNOS5420_CPUS_PER_CLUSTER	4
 #define EXYNOS5420_NR_CLUSTERS		2
-#define MCPM_BOOT_ADDR_OFFSET		0x1c
 
 /*
  * The common v7_exit_coherency_flush API could not be used because of the
@@ -343,11 +342,13 @@ static int __init exynos_mcpm_init(void)
 	pr_info("Exynos MCPM support installed\n");
 
 	/*
-	 * Future entries into the kernel can now go
-	 * through the cluster entry vectors.
+	 * U-Boot SPL is hardcoded to jump to the start of ns_sram_base_addr
+	 * as part of secondary_cpu_start().  Let's redirect it to the
+	 * mcpm_entry_point().
 	 */
-	__raw_writel(virt_to_phys(mcpm_entry_point),
-			ns_sram_base_addr + MCPM_BOOT_ADDR_OFFSET);
+	__raw_writel(0xe59f0000, ns_sram_base_addr);     /* ldr r0, [pc, #0] */
+	__raw_writel(0xe12fff10, ns_sram_base_addr + 4); /* bx  r0 */
+	__raw_writel(virt_to_phys(mcpm_entry_point), ns_sram_base_addr + 8);
 
 	iounmap(ns_sram_base_addr);
 
-- 
cgit v1.2.3


From 760ecbc7d0e822d0e2ad2daeb9144bdd295637b5 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Mon, 23 Jun 2014 11:42:44 +0530
Subject: ARC: fix build warning in devtree

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/devtree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c
index 0b3ef4025d89..fffdb5e41b20 100644
--- a/arch/arc/kernel/devtree.c
+++ b/arch/arc/kernel/devtree.c
@@ -41,7 +41,7 @@ const struct machine_desc * __init setup_machine_fdt(void *dt)
 {
 	const struct machine_desc *mdesc;
 	unsigned long dt_root;
-	void *clk;
+	const void *clk;
 	int len;
 
 	if (!early_init_dt_scan(dt))
-- 
cgit v1.2.3


From b514fb28ead93332a1660d3ce81def9eb74ed640 Mon Sep 17 00:00:00 2001
From: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Date: Mon, 23 Jun 2014 22:25:15 +0200
Subject: ARM: dts: kirkwood: fix phy-connection-type for Guruplug

Commit eeb845459a72e792a959278b858f9c417e9995bd
 ("ARM: dts: kirkwood: set Guruplug phy-connection-type to rgmii-id")
added phy-connection-type properties to ethernet PHY nodes.

Actually, the property has to be set for the ethernet port node instead.
Fix it by moving the corresponding properties to the correct nodes.

Signed-off-by: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Link: https://lkml.kernel.org/r/1403555115-13111-1-git-send-email-sebastian.hesselbarth@gmail.com
Fixes: eeb845459a72: ('ARM: dts: kirkwood: set Guruplug phy-connection-type to rgmii-id')
Cc: <stable@vger.kernel.org> # v3.16+
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 arch/arm/boot/dts/kirkwood-guruplug-server-plus.dts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/kirkwood-guruplug-server-plus.dts b/arch/arm/boot/dts/kirkwood-guruplug-server-plus.dts
index c5a1fc75c7a3..b2d9834bf458 100644
--- a/arch/arm/boot/dts/kirkwood-guruplug-server-plus.dts
+++ b/arch/arm/boot/dts/kirkwood-guruplug-server-plus.dts
@@ -105,7 +105,6 @@
 		compatible = "ethernet-phy-id0141.0cb0",
 		             "ethernet-phy-ieee802.3-c22";
 		reg = <0>;
-		phy-connection-type = "rgmii-id";
 	};
 
 	ethphy1: ethernet-phy@1 {
@@ -113,7 +112,6 @@
 		compatible = "ethernet-phy-id0141.0cb0",
 		             "ethernet-phy-ieee802.3-c22";
 		reg = <1>;
-		phy-connection-type = "rgmii-id";
 	};
 };
 
@@ -121,6 +119,7 @@
 	status = "okay";
 	ethernet0-port@0 {
 		phy-handle = <&ethphy0>;
+		phy-connection-type = "rgmii-id";
 	};
 };
 
@@ -128,5 +127,6 @@
 	status = "okay";
 	ethernet1-port@0 {
 		phy-handle = <&ethphy1>;
+		phy-connection-type = "rgmii-id";
 	};
 };
-- 
cgit v1.2.3


From 554086d85e71f30abe46fc014fea31929a7c6a8a Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Mon, 23 Jun 2014 14:22:15 -0700
Subject: x86_32, entry: Do syscall exit work on badsys (CVE-2014-4508)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bad syscall nr paths are their own incomprehensible route
through the entry control flow.  Rearrange them to work just like
syscalls that return -ENOSYS.

This fixes an OOPS in the audit code when fast-path auditing is
enabled and sysenter gets a bad syscall nr (CVE-2014-4508).

This has probably been broken since Linux 2.6.27:
af0575bba0 i386 syscall audit fast-path

Cc: stable@vger.kernel.org
Cc: Roland McGrath <roland@redhat.com>
Reported-by: Toralf Förster <toralf.foerster@gmx.de>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/e09c499eade6fc321266dd6b54da7beb28d6991c.1403558229.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/entry_32.S | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f0da82b8e634..dbaa23e78b36 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -423,9 +423,10 @@ sysenter_past_esp:
 	jnz sysenter_audit
 sysenter_do_call:
 	cmpl $(NR_syscalls), %eax
-	jae syscall_badsys
+	jae sysenter_badsys
 	call *sys_call_table(,%eax,4)
 	movl %eax,PT_EAX(%esp)
+sysenter_after_call:
 	LOCKDEP_SYS_EXIT
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
@@ -675,7 +676,12 @@ END(syscall_fault)
 
 syscall_badsys:
 	movl $-ENOSYS,PT_EAX(%esp)
-	jmp resume_userspace
+	jmp syscall_exit
+END(syscall_badsys)
+
+sysenter_badsys:
+	movl $-ENOSYS,PT_EAX(%esp)
+	jmp sysenter_after_call
 END(syscall_badsys)
 	CFI_ENDPROC
 
-- 
cgit v1.2.3


From 6ba19a670c8b604e9802d30b511e6a4778118592 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Sat, 21 Jun 2014 07:31:55 -0700
Subject: x86_32, signal: Fix vdso rt_sigreturn

This commit:

    commit 6f121e548f83674ab4920a4e60afb58d4f61b829
    Author: Andy Lutomirski <luto@amacapital.net>
    Date:   Mon May 5 12:19:34 2014 -0700

        x86, vdso: Reimplement vdso.so preparation in build-time C

Contained this obvious typo:

-               restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
+               restorer = current->mm->context.vdso +
+                       selected_vdso32->sym___kernel_sigreturn;

Note the missing 'rt_' in the new code.  Fix it.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/1eb40ad923acde2e18357ef2832867432e70ac42.1403361010.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/signal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index a0da58db43a8..2851d63c1202 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -363,7 +363,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
 
 		/* Set up to return from userspace.  */
 		restorer = current->mm->context.vdso +
-			selected_vdso32->sym___kernel_sigreturn;
+			selected_vdso32->sym___kernel_rt_sigreturn;
 		if (ksig->ka.sa.sa_flags & SA_RESTORER)
 			restorer = ksig->ka.sa.sa_restorer;
 		put_user_ex(restorer, &frame->pretcode);
-- 
cgit v1.2.3


From f3aca3d09525f87731ba6b892c9b010570bc54b4 Mon Sep 17 00:00:00 2001
From: Aaron Tomlin <atomlin@redhat.com>
Date: Mon, 23 Jun 2014 13:22:05 -0700
Subject: nmi: provide the option to issue an NMI back trace to every cpu but
 current

Sometimes it is preferred not to use the trigger_all_cpu_backtrace()
routine when one wants to avoid capturing a back trace for current.  For
instance if one was previously captured recently.

This patch provides a new routine namely
trigger_allbutself_cpu_backtrace() which offers the flexibility to issue
an NMI to every cpu but current and capture a back trace accordingly.

Patch x86 and sparc to support new routine.

[dzickus@redhat.com: add stub in #else clause]
[dzickus@redhat.com: don't print message in single processor case, wrap with get/put_cpu based on Oleg's suggestion]
[sfr@canb.auug.org.au: undo C99ism]
Signed-off-by: Aaron Tomlin <atomlin@redhat.com>
Signed-off-by: Don Zickus <dzickus@redhat.com>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Mateusz Guzik <mguzik@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sparc/include/asm/irq_64.h |  2 +-
 arch/sparc/kernel/process_64.c  | 18 ++++++++++++------
 arch/x86/include/asm/irq.h      |  2 +-
 arch/x86/kernel/apic/hw_nmi.c   | 18 ++++++++++++++----
 include/linux/nmi.h             | 11 ++++++++++-
 5 files changed, 38 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
index 375cffcf7dbd..91d219381306 100644
--- a/arch/sparc/include/asm/irq_64.h
+++ b/arch/sparc/include/asm/irq_64.h
@@ -89,7 +89,7 @@ static inline unsigned long get_softint(void)
 	return retval;
 }
 
-void arch_trigger_all_cpu_backtrace(void);
+void arch_trigger_all_cpu_backtrace(bool);
 #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
 
 extern void *hardirq_stack[NR_CPUS];
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index b2988f25e230..027e09986194 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -239,7 +239,7 @@ static void __global_reg_poll(struct global_reg_snapshot *gp)
 	}
 }
 
-void arch_trigger_all_cpu_backtrace(void)
+void arch_trigger_all_cpu_backtrace(bool include_self)
 {
 	struct thread_info *tp = current_thread_info();
 	struct pt_regs *regs = get_irq_regs();
@@ -251,16 +251,22 @@ void arch_trigger_all_cpu_backtrace(void)
 
 	spin_lock_irqsave(&global_cpu_snapshot_lock, flags);
 
-	memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
-
 	this_cpu = raw_smp_processor_id();
 
-	__global_reg_self(tp, regs, this_cpu);
+	memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
+
+	if (include_self)
+		__global_reg_self(tp, regs, this_cpu);
 
 	smp_fetch_global_regs();
 
 	for_each_online_cpu(cpu) {
-		struct global_reg_snapshot *gp = &global_cpu_snapshot[cpu].reg;
+		struct global_reg_snapshot *gp;
+
+		if (!include_self && cpu == this_cpu)
+			continue;
+
+		gp = &global_cpu_snapshot[cpu].reg;
 
 		__global_reg_poll(gp);
 
@@ -292,7 +298,7 @@ void arch_trigger_all_cpu_backtrace(void)
 
 static void sysrq_handle_globreg(int key)
 {
-	arch_trigger_all_cpu_backtrace();
+	arch_trigger_all_cpu_backtrace(true);
 }
 
 static struct sysrq_key_op sparc_globalreg_op = {
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index cb6cfcd034cf..a80cbb88ea91 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -43,7 +43,7 @@ extern int vector_used_by_percpu_irq(unsigned int vector);
 extern void init_ISA_irqs(void);
 
 #ifdef CONFIG_X86_LOCAL_APIC
-void arch_trigger_all_cpu_backtrace(void);
+void arch_trigger_all_cpu_backtrace(bool);
 #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
 #endif
 
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index c3fcb5de5083..6a1e71bde323 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -33,31 +33,41 @@ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
 /* "in progress" flag of arch_trigger_all_cpu_backtrace */
 static unsigned long backtrace_flag;
 
-void arch_trigger_all_cpu_backtrace(void)
+void arch_trigger_all_cpu_backtrace(bool include_self)
 {
 	int i;
+	int cpu = get_cpu();
 
-	if (test_and_set_bit(0, &backtrace_flag))
+	if (test_and_set_bit(0, &backtrace_flag)) {
 		/*
 		 * If there is already a trigger_all_cpu_backtrace() in progress
 		 * (backtrace_flag == 1), don't output double cpu dump infos.
 		 */
+		put_cpu();
 		return;
+	}
 
 	cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
+	if (!include_self)
+		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
 
-	printk(KERN_INFO "sending NMI to all CPUs:\n");
-	apic->send_IPI_all(NMI_VECTOR);
+	if (!cpumask_empty(to_cpumask(backtrace_mask))) {
+		pr_info("sending NMI to %s CPUs:\n",
+			(include_self ? "all" : "other"));
+		apic->send_IPI_mask(to_cpumask(backtrace_mask), NMI_VECTOR);
+	}
 
 	/* Wait for up to 10 seconds for all CPUs to do the backtrace */
 	for (i = 0; i < 10 * 1000; i++) {
 		if (cpumask_empty(to_cpumask(backtrace_mask)))
 			break;
 		mdelay(1);
+		touch_softlockup_watchdog();
 	}
 
 	clear_bit(0, &backtrace_flag);
 	smp_mb__after_atomic();
+	put_cpu();
 }
 
 static int
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 6a45fb583ff1..a17ab6398d7c 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -32,15 +32,24 @@ static inline void touch_nmi_watchdog(void)
 #ifdef arch_trigger_all_cpu_backtrace
 static inline bool trigger_all_cpu_backtrace(void)
 {
-	arch_trigger_all_cpu_backtrace();
+	arch_trigger_all_cpu_backtrace(true);
 
 	return true;
 }
+static inline bool trigger_allbutself_cpu_backtrace(void)
+{
+	arch_trigger_all_cpu_backtrace(false);
+	return true;
+}
 #else
 static inline bool trigger_all_cpu_backtrace(void)
 {
 	return false;
 }
+static inline bool trigger_allbutself_cpu_backtrace(void)
+{
+	return false;
+}
 #endif
 
 #ifdef CONFIG_LOCKUP_DETECTOR
-- 
cgit v1.2.3


From f9af420fc8208d3add2fe3198dc5d8215f5a81ba Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 23 Jun 2014 13:22:07 -0700
Subject: ia64: arch/ia64/include/uapi/asm/fcntl.h needs personality.h

fs/notify/fanotify/fanotify_user.c: In function 'SYSC_fanotify_init':
fs/notify/fanotify/fanotify_user.c:726: error: implicit declaration of function 'personality'
fs/notify/fanotify/fanotify_user.c:726: error: 'PER_LINUX32' undeclared (first use in this function)
fs/notify/fanotify/fanotify_user.c:726: error: (Each undeclared identifier is reported only once
fs/notify/fanotify/fanotify_user.c:726: error: for each function it appears in.)

Reported-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: Will Woods <wwoods@redhat.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: <stable@vger.kernel.org>	[3.15.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/include/uapi/asm/fcntl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/ia64/include/uapi/asm/fcntl.h b/arch/ia64/include/uapi/asm/fcntl.h
index 1dd275dc8f65..7b485876cad4 100644
--- a/arch/ia64/include/uapi/asm/fcntl.h
+++ b/arch/ia64/include/uapi/asm/fcntl.h
@@ -8,6 +8,7 @@
 #define force_o_largefile()	\
 		(personality(current->personality) != PER_LINUX32)
 
+#include <linux/personality.h>
 #include <asm-generic/fcntl.h>
 
 #endif /* _ASM_IA64_FCNTL_H */
-- 
cgit v1.2.3


From 0eb5736828722bd274519fab87b915da287ffd59 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Mon, 23 Jun 2014 10:56:22 +1000
Subject: powerpc/kerenl: Enable EEH for IO accessors

In arch/powerpc/kernel/iomap.c, lots of IO reading accessors missed
to check EEH error as Ben pointed. The patch fixes it.

For the writing accessors, we change the called functions only for
making them look similar to the reading counterparts.

Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/iomap.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
index b82227e7e21b..12e48d56f771 100644
--- a/arch/powerpc/kernel/iomap.c
+++ b/arch/powerpc/kernel/iomap.c
@@ -23,7 +23,7 @@ unsigned int ioread16(void __iomem *addr)
 }
 unsigned int ioread16be(void __iomem *addr)
 {
-	return in_be16(addr);
+	return readw_be(addr);
 }
 unsigned int ioread32(void __iomem *addr)
 {
@@ -31,7 +31,7 @@ unsigned int ioread32(void __iomem *addr)
 }
 unsigned int ioread32be(void __iomem *addr)
 {
-	return in_be32(addr);
+	return readl_be(addr);
 }
 EXPORT_SYMBOL(ioread8);
 EXPORT_SYMBOL(ioread16);
@@ -49,7 +49,7 @@ void iowrite16(u16 val, void __iomem *addr)
 }
 void iowrite16be(u16 val, void __iomem *addr)
 {
-	out_be16(addr, val);
+	writew_be(val, addr);
 }
 void iowrite32(u32 val, void __iomem *addr)
 {
@@ -57,7 +57,7 @@ void iowrite32(u32 val, void __iomem *addr)
 }
 void iowrite32be(u32 val, void __iomem *addr)
 {
-	out_be32(addr, val);
+	writel_be(val, addr);
 }
 EXPORT_SYMBOL(iowrite8);
 EXPORT_SYMBOL(iowrite16);
@@ -75,15 +75,15 @@ EXPORT_SYMBOL(iowrite32be);
  */
 void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
 {
-	_insb((u8 __iomem *) addr, dst, count);
+	readsb(addr, dst, count);
 }
 void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
 {
-	_insw_ns((u16 __iomem *) addr, dst, count);
+	readsw(addr, dst, count);
 }
 void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
 {
-	_insl_ns((u32 __iomem *) addr, dst, count);
+	readsl(addr, dst, count);
 }
 EXPORT_SYMBOL(ioread8_rep);
 EXPORT_SYMBOL(ioread16_rep);
@@ -91,15 +91,15 @@ EXPORT_SYMBOL(ioread32_rep);
 
 void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
 {
-	_outsb((u8 __iomem *) addr, src, count);
+	writesb(addr, src, count);
 }
 void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
 {
-	_outsw_ns((u16 __iomem *) addr, src, count);
+	writesw(addr, src, count);
 }
 void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
 {
-	_outsl_ns((u32 __iomem *) addr, src, count);
+	writesl(addr, src, count);
 }
 EXPORT_SYMBOL(iowrite8_rep);
 EXPORT_SYMBOL(iowrite16_rep);
-- 
cgit v1.2.3


From bf77ee2a7a544f09683f73dcd7cad7ef81f1005c Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 23 Jun 2014 14:17:47 +1000
Subject: powerpc: Remove ancient DEBUG_SIG code

We have some compile-time disabled debug code in signal_xx.c. It's from
some ancient time BG, almost certainly part of the original port, given
the very similar code on other arches.

The show_unhandled_signal logic, added in d0c3d534a438 (2.6.24) is
cleaner and prints more useful information, so drop the debug code.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/signal_32.c | 9 ---------
 arch/powerpc/kernel/signal_64.c | 9 ---------
 2 files changed, 18 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 4e47db686b5d..1bc5a1755ed4 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -54,7 +54,6 @@
 
 #include "signal.h"
 
-#undef DEBUG_SIG
 
 #ifdef CONFIG_PPC64
 #define sys_rt_sigreturn	compat_sys_rt_sigreturn
@@ -1063,10 +1062,6 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
 	return 1;
 
 badframe:
-#ifdef DEBUG_SIG
-	printk("badframe in handle_rt_signal, regs=%p frame=%p newsp=%lx\n",
-	       regs, frame, newsp);
-#endif
 	if (show_unhandled_signals)
 		printk_ratelimited(KERN_INFO
 				   "%s[%d]: bad frame in handle_rt_signal32: "
@@ -1484,10 +1479,6 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
 	return 1;
 
 badframe:
-#ifdef DEBUG_SIG
-	printk("badframe in handle_signal, regs=%p frame=%p newsp=%lx\n",
-	       regs, frame, newsp);
-#endif
 	if (show_unhandled_signals)
 		printk_ratelimited(KERN_INFO
 				   "%s[%d]: bad frame in handle_signal32: "
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index d501dc4dc3e6..97c1e4b683fc 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -38,7 +38,6 @@
 
 #include "signal.h"
 
-#define DEBUG_SIG 0
 
 #define GP_REGS_SIZE	min(sizeof(elf_gregset_t), sizeof(struct pt_regs))
 #define FP_REGS_SIZE	sizeof(elf_fpregset_t)
@@ -700,10 +699,6 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
 	return 0;
 
 badframe:
-#if DEBUG_SIG
-	printk("badframe in sys_rt_sigreturn, regs=%p uc=%p &uc->uc_mcontext=%p\n",
-	       regs, uc, &uc->uc_mcontext);
-#endif
 	if (show_unhandled_signals)
 		printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32,
 				   current->comm, current->pid, "rt_sigreturn",
@@ -809,10 +804,6 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,
 	return 1;
 
 badframe:
-#if DEBUG_SIG
-	printk("badframe in setup_rt_frame, regs=%p frame=%p newsp=%lx\n",
-	       regs, frame, newsp);
-#endif
 	if (show_unhandled_signals)
 		printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32,
 				   current->comm, current->pid, "setup_rt_frame",
-- 
cgit v1.2.3


From 716821c9438bf7a61edd4c097ed1415a9cf14a7d Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 24 Jun 2014 12:28:56 +1000
Subject: powerpc: Remove __arch_swab*

The generic code uses gcc built-ins which work fine so there's no benefit
in implementing our own anymore.

We can't completely remove the ld/st_le* functions as some historical
cruft still uses them, but that's next on the radar

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/swab.h | 43 -----------------------------------------
 1 file changed, 43 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/swab.h b/arch/powerpc/include/asm/swab.h
index b9bd1ca944d0..96f59de61855 100644
--- a/arch/powerpc/include/asm/swab.h
+++ b/arch/powerpc/include/asm/swab.h
@@ -9,10 +9,6 @@
 
 #include <uapi/asm/swab.h>
 
-#ifdef __GNUC__
-#ifndef __powerpc64__
-#endif /* __powerpc64__ */
-
 static __inline__ __u16 ld_le16(const volatile __u16 *addr)
 {
 	__u16 val;
@@ -20,19 +16,12 @@ static __inline__ __u16 ld_le16(const volatile __u16 *addr)
 	__asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
 	return val;
 }
-#define __arch_swab16p ld_le16
 
 static __inline__ void st_le16(volatile __u16 *addr, const __u16 val)
 {
 	__asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
 }
 
-static inline void __arch_swab16s(__u16 *addr)
-{
-	st_le16(addr, *addr);
-}
-#define __arch_swab16s __arch_swab16s
-
 static __inline__ __u32 ld_le32(const volatile __u32 *addr)
 {
 	__u32 val;
@@ -40,42 +29,10 @@ static __inline__ __u32 ld_le32(const volatile __u32 *addr)
 	__asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
 	return val;
 }
-#define __arch_swab32p ld_le32
 
 static __inline__ void st_le32(volatile __u32 *addr, const __u32 val)
 {
 	__asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
 }
 
-static inline void __arch_swab32s(__u32 *addr)
-{
-	st_le32(addr, *addr);
-}
-#define __arch_swab32s __arch_swab32s
-
-static inline __attribute_const__ __u16 __arch_swab16(__u16 value)
-{
-	__u16 result;
-
-	__asm__("rlwimi %0,%1,8,16,23"
-	    : "=r" (result)
-	    : "r" (value), "0" (value >> 8));
-	return result;
-}
-#define __arch_swab16 __arch_swab16
-
-static inline __attribute_const__ __u32 __arch_swab32(__u32 value)
-{
-	__u32 result;
-
-	__asm__("rlwimi %0,%1,24,16,23\n\t"
-	    "rlwimi %0,%1,8,8,15\n\t"
-	    "rlwimi %0,%1,24,0,7"
-	    : "=r" (result)
-	    : "r" (value), "0" (value >> 24));
-	return result;
-}
-#define __arch_swab32 __arch_swab32
-
-#endif /* __GNUC__ */
 #endif /* _ASM_POWERPC_SWAB_H */
-- 
cgit v1.2.3


From d997c00c5a9b61474f7ff5f27ed64173bb007987 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 17 Jun 2014 16:15:32 +1000
Subject: powerpc: Add ppc_global_function_entry()

ABIv2 has the concept of a global and local entry point to a function.
In most cases we are interested in the local entry point, and so that is
what ppc_function_entry() returns.

However we have a case in the ftrace code where we want the global entry
point, and there may be other places we need it too. Rather than special
casing each, add an accessor.

For ABIv1 and 32-bit there is only a single entry point, so we return
that. That means it's safe for the caller to use this without also
checking the ABI version.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/code-patching.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index 37991e154ef8..840a5509b3f1 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -88,4 +88,15 @@ static inline unsigned long ppc_function_entry(void *func)
 #endif
 }
 
+static inline unsigned long ppc_global_function_entry(void *func)
+{
+#if defined(CONFIG_PPC64) && defined(_CALL_ELF) && _CALL_ELF == 2
+	/* PPC64 ABIv2 the global entry point is at the address */
+	return (unsigned long)func;
+#else
+	/* All other cases there is no change vs ppc_function_entry() */
+	return ppc_function_entry(func);
+#endif
+}
+
 #endif /* _ASM_POWERPC_CODE_PATCHING_H */
-- 
cgit v1.2.3


From dfc382a19a2478768ef09d1c745b116e4fca9b4e Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 17 Jun 2014 16:15:33 +1000
Subject: powerpc/ftrace: Fix typo in mask of opcode

In commit 24a1bdc35, "Fix ABIv2 issues with __ftrace_make_call", Anton
changed the logic that checks for the expected code sequence when
patching a module.

We missed the typo in the mask, 0xffff00000 should be 0xffff0000, which
has the effect of making the test always true.

That makes it impossible to ftrace against modules, eg:

  Unexpected call sequence: 48000008 e8410018
  WARNING: at ../kernel/trace/ftrace.c:1638
  ftrace failed to modify [<d000000007cf001c>] rng_dev_open+0x1c/0x70 [rng_core]

Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/ftrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index f202d0731b06..f5d1a3420192 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -307,7 +307,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	 * The load offset is different depending on the ABI. For simplicity
 	 * just mask it out when doing the compare.
 	 */
-	if ((op[0] != 0x48000008) || ((op[1] & 0xffff00000) != 0xe8410000)) {
+	if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) {
 		printk(KERN_ERR "Unexpected call sequence: %x %x\n",
 			op[0], op[1]);
 		return -EINVAL;
-- 
cgit v1.2.3


From b7b348c682fac04cade7b860c49d4a17f158dad4 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 17 Jun 2014 16:15:34 +1000
Subject: powerpc/ftrace: Fix inverted check of create_branch()

In commit 24a1bdc35, "Fix ABIv2 issues with __ftrace_make_call", Anton
changed the logic that creates and patches the branch, and added a
thinko in the check of create_branch(). create_branch() returns the
instruction that was generated, so if we get zero then it succeeded.

The result is we can't ftrace modules:

  Branch out of range
  WARNING: at ../kernel/trace/ftrace.c:1638
  ftrace failed to modify [<d000000004ba001c>] fuse_req_init_context+0x1c/0x90 [fuse]

We should probably fix patch_instruction() to do that check and make the
API saner, but that's a separate patch. For now just invert the test.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/ftrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index f5d1a3420192..8fc0c1742498 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -320,7 +320,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	}
 
 	/* Ensure branch is within 24 bits */
-	if (create_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) {
+	if (!create_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) {
 		printk(KERN_ERR "Branch out of range");
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From d84e0d69c26b4d739214974d6ad6baf23f510580 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 17 Jun 2014 16:15:35 +1000
Subject: powerpc/ftrace: Fix nop of modules on 64bit LE (ABIv2)

There is a bug in the handling of the function entry when we are nopping
out a branch from a module in ftrace.

We compare the result of module_trampoline_target() with the value of
ppc_function_entry(), and expect them to be true. But they never will
be.

module_trampoline_target() will always return the global entry point of
the function, whereas ppc_function_entry() will always return the local.

Fix it by using the newly added ppc_global_function_entry().

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/ftrace.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 8fc0c1742498..96efc664b49d 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -105,7 +105,7 @@ __ftrace_make_nop(struct module *mod,
 		  struct dyn_ftrace *rec, unsigned long addr)
 {
 	unsigned int op;
-	unsigned long ptr;
+	unsigned long entry, ptr;
 	unsigned long ip = rec->ip;
 	void *tramp;
 
@@ -136,10 +136,11 @@ __ftrace_make_nop(struct module *mod,
 
 	pr_devel("trampoline target %lx", ptr);
 
+	entry = ppc_global_function_entry((void *)addr);
 	/* This should match what was called */
-	if (ptr != ppc_function_entry((void *)addr)) {
+	if (ptr != entry) {
 		printk(KERN_ERR "addr %lx does not match expected %lx\n",
-			ptr, ppc_function_entry((void *)addr));
+			ptr, entry);
 		return -EINVAL;
 	}
 
-- 
cgit v1.2.3


From 072c4c018e547b4bd6cfa4086aae881140b62759 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 17 Jun 2014 16:15:36 +1000
Subject: powerpc/ftrace: Use pr_fmt() to namespace error messages

The printks() in our ftrace code have no prefix, so they appear on the
console with very little context, eg:

  Branch out of range

Use pr_fmt() & pr_err() to add a prefix. While we're at it, collapse a
few split lines that don't need to be, and add a missing newline to one
message.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/ftrace.c | 43 ++++++++++++++++++++-----------------------
 1 file changed, 20 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 96efc664b49d..d178834fe508 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -10,6 +10,8 @@
  *
  */
 
+#define pr_fmt(fmt) "ftrace-powerpc: " fmt
+
 #include <linux/spinlock.h>
 #include <linux/hardirq.h>
 #include <linux/uaccess.h>
@@ -115,7 +117,7 @@ __ftrace_make_nop(struct module *mod,
 
 	/* Make sure that that this is still a 24bit jump */
 	if (!is_bl_op(op)) {
-		printk(KERN_ERR "Not expected bl: opcode is %x\n", op);
+		pr_err("Not expected bl: opcode is %x\n", op);
 		return -EINVAL;
 	}
 
@@ -125,12 +127,12 @@ __ftrace_make_nop(struct module *mod,
 	pr_devel("ip:%lx jumps to %p", ip, tramp);
 
 	if (!is_module_trampoline(tramp)) {
-		printk(KERN_ERR "Not a trampoline\n");
+		pr_err("Not a trampoline\n");
 		return -EINVAL;
 	}
 
 	if (module_trampoline_target(mod, tramp, &ptr)) {
-		printk(KERN_ERR "Failed to get trampoline target\n");
+		pr_err("Failed to get trampoline target\n");
 		return -EFAULT;
 	}
 
@@ -139,8 +141,7 @@ __ftrace_make_nop(struct module *mod,
 	entry = ppc_global_function_entry((void *)addr);
 	/* This should match what was called */
 	if (ptr != entry) {
-		printk(KERN_ERR "addr %lx does not match expected %lx\n",
-			ptr, entry);
+		pr_err("addr %lx does not match expected %lx\n", ptr, entry);
 		return -EINVAL;
 	}
 
@@ -180,7 +181,7 @@ __ftrace_make_nop(struct module *mod,
 
 	/* Make sure that that this is still a 24bit jump */
 	if (!is_bl_op(op)) {
-		printk(KERN_ERR "Not expected bl: opcode is %x\n", op);
+		pr_err("Not expected bl: opcode is %x\n", op);
 		return -EINVAL;
 	}
 
@@ -199,7 +200,7 @@ __ftrace_make_nop(struct module *mod,
 
 	/* Find where the trampoline jumps to */
 	if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
-		printk(KERN_ERR "Failed to read %lx\n", tramp);
+		pr_err("Failed to read %lx\n", tramp);
 		return -EFAULT;
 	}
 
@@ -210,7 +211,7 @@ __ftrace_make_nop(struct module *mod,
 	    ((jmp[1] & 0xffff0000) != 0x398c0000) ||
 	    (jmp[2] != 0x7d8903a6) ||
 	    (jmp[3] != 0x4e800420)) {
-		printk(KERN_ERR "Not a trampoline\n");
+		pr_err("Not a trampoline\n");
 		return -EINVAL;
 	}
 
@@ -222,8 +223,7 @@ __ftrace_make_nop(struct module *mod,
 	pr_devel(" %lx ", tramp);
 
 	if (tramp != addr) {
-		printk(KERN_ERR
-		       "Trampoline location %08lx does not match addr\n",
+		pr_err("Trampoline location %08lx does not match addr\n",
 		       tramp);
 		return -EINVAL;
 	}
@@ -264,15 +264,13 @@ int ftrace_make_nop(struct module *mod,
 	 */
 	if (!rec->arch.mod) {
 		if (!mod) {
-			printk(KERN_ERR "No module loaded addr=%lx\n",
-			       addr);
+			pr_err("No module loaded addr=%lx\n", addr);
 			return -EFAULT;
 		}
 		rec->arch.mod = mod;
 	} else if (mod) {
 		if (mod != rec->arch.mod) {
-			printk(KERN_ERR
-			       "Record mod %p not equal to passed in mod %p\n",
+			pr_err("Record mod %p not equal to passed in mod %p\n",
 			       rec->arch.mod, mod);
 			return -EINVAL;
 		}
@@ -309,25 +307,24 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	 * just mask it out when doing the compare.
 	 */
 	if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) {
-		printk(KERN_ERR "Unexpected call sequence: %x %x\n",
-			op[0], op[1]);
+		pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);
 		return -EINVAL;
 	}
 
 	/* If we never set up a trampoline to ftrace_caller, then bail */
 	if (!rec->arch.mod->arch.tramp) {
-		printk(KERN_ERR "No ftrace trampoline\n");
+		pr_err("No ftrace trampoline\n");
 		return -EINVAL;
 	}
 
 	/* Ensure branch is within 24 bits */
 	if (!create_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) {
-		printk(KERN_ERR "Branch out of range");
+		pr_err("Branch out of range\n");
 		return -EINVAL;
 	}
 
 	if (patch_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) {
-		printk(KERN_ERR "REL24 out of range!\n");
+		pr_err("REL24 out of range!\n");
 		return -EINVAL;
 	}
 
@@ -346,13 +343,13 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 
 	/* It should be pointing to a nop */
 	if (op != PPC_INST_NOP) {
-		printk(KERN_ERR "Expected NOP but have %x\n", op);
+		pr_err("Expected NOP but have %x\n", op);
 		return -EINVAL;
 	}
 
 	/* If we never set up a trampoline to ftrace_caller, then bail */
 	if (!rec->arch.mod->arch.tramp) {
-		printk(KERN_ERR "No ftrace trampoline\n");
+		pr_err("No ftrace trampoline\n");
 		return -EINVAL;
 	}
 
@@ -360,7 +357,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	op = create_branch((unsigned int *)ip,
 			   rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
 	if (!op) {
-		printk(KERN_ERR "REL24 out of range!\n");
+		pr_err("REL24 out of range!\n");
 		return -EINVAL;
 	}
 
@@ -398,7 +395,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	 * already have a module defined.
 	 */
 	if (!rec->arch.mod) {
-		printk(KERN_ERR "No module loaded\n");
+		pr_err("No module loaded\n");
 		return -EINVAL;
 	}
 
-- 
cgit v1.2.3


From 2f0143c91d30823f6f6e7d94d7fa818f7ab18a18 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 23 Jun 2014 13:23:31 +1000
Subject: powerpc/kprobes: Fix jprobes on ABI v2 (LE)

In commit 721aeaa9 "Build little endian ppc64 kernel with ABIv2", we
missed some updates required in the kprobes code to make jprobes work
when the kernel is built with ABI v2.

Firstly update arch_deref_entry_point() to do the right thing. Now that
we have added ppc_global_function_entry() we can just always use that, it
will do the right thing for 32 & 64 bit and ABI v1 & v2.

Secondly we need to update the code that sets up the register state before
calling the jprobe handler. On ABI v1 we setup r2 to hold the TOC, on ABI
v2 we need to populate r12 with the function entry point address.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/kprobes.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 90fab64d911d..2f72af82513c 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -32,6 +32,7 @@
 #include <linux/module.h>
 #include <linux/kdebug.h>
 #include <linux/slab.h>
+#include <asm/code-patching.h>
 #include <asm/cacheflush.h>
 #include <asm/sstep.h>
 #include <asm/uaccess.h>
@@ -491,12 +492,10 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 	return ret;
 }
 
-#ifdef CONFIG_PPC64
 unsigned long arch_deref_entry_point(void *entry)
 {
-	return ((func_descr_t *)entry)->entry;
+	return ppc_global_function_entry(entry);
 }
-#endif
 
 int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
@@ -508,7 +507,11 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	/* setup return addr to the jprobe handler routine */
 	regs->nip = arch_deref_entry_point(jp->entry);
 #ifdef CONFIG_PPC64
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+	regs->gpr[12] = (unsigned long)jp->entry;
+#else
 	regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
+#endif
 #endif
 
 	return 1;
-- 
cgit v1.2.3


From f5fc82290c14b65511fea3db35f0bcbe6de746d3 Mon Sep 17 00:00:00 2001
From: Rickard Strandqvist <rickard_strandqvist@spectrumdigital.se>
Date: Sat, 14 Jun 2014 18:25:11 +0200
Subject: powerpc/cell: cbe_thermal.c: Cleaning up a variable is of the wrong
 type

This variable is of the wrong type, everywhere it is used it
should be an unsigned int rather than a int.

Signed-off-by: Rickard Strandqvist <rickard_strandqvist@spectrumdigital.se>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/cell/cbe_thermal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c
index 94560db788bf..2c15ff094483 100644
--- a/arch/powerpc/platforms/cell/cbe_thermal.c
+++ b/arch/powerpc/platforms/cell/cbe_thermal.c
@@ -125,7 +125,7 @@ static ssize_t show_throttle(struct cbe_pmd_regs __iomem *pmd_regs, char *buf, i
 static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char *buf, size_t size, int pos)
 {
 	u64 reg_value;
-	int temp;
+	unsigned int temp;
 	u64 new_value;
 	int ret;
 
-- 
cgit v1.2.3


From a1d23d5c94256ffa0de510a3d59d6eff551d97ae Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 13 Jun 2014 09:44:21 +0100
Subject: powerpc/kmemleak: Do not scan the DART table

The DART table allocation is registered to kmemleak via the
memblock_alloc_base() call. However, the DART table is later unmapped
and dart_tablebase VA no longer accessible. This patch tells kmemleak
not to scan this block and avoid an unhandled paging request.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/sysdev/dart_iommu.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 62c47bb76517..9e5353ff6d1b 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -476,6 +476,11 @@ void __init alloc_dart_table(void)
 	 */
 	dart_tablebase = (unsigned long)
 		__va(memblock_alloc_base(1UL<<24, 1UL<<24, 0x80000000L));
+	/*
+	 * The DART space is later unmapped from the kernel linear mapping and
+	 * accessing dart_tablebase during kmemleak scanning will fault.
+	 */
+	kmemleak_no_scan((void *)dart_tablebase);
 
 	printk(KERN_INFO "DART table allocated at: %lx\n", dart_tablebase);
 }
-- 
cgit v1.2.3


From 8dbdb8e704db34085f5978c335c10256b0fb9629 Mon Sep 17 00:00:00 2001
From: Gregory CLEMENT <gregory.clement@free-electrons.com>
Date: Mon, 23 Jun 2014 16:16:51 +0200
Subject: ARM: mvebu: Fix the improper use of the compatible string armada38x
 using a wildcard

Wildcards in compatible strings should be avoid. "marvell,armada38x"
was recently introduced but was not yet used.

The armada 385 SoC is a superset of the armada 380 SoC (with more CPUs
and more PCIe slots). So this patch replaces the use of
"marvell,armada38x" by the "marvell,armada380" string.

Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Link: https://lkml.kernel.org/r/1403533011-21339-1-git-send-email-gregory.clement@free-electrons.com
Acked-by: Andrew Lunn <andrew@lunn.ch>
Cc: <stable@vger.kernel.org> # v3.15+
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 Documentation/devicetree/bindings/arm/armada-38x.txt | 14 ++++++++++++--
 arch/arm/boot/dts/armada-380.dtsi                    |  2 +-
 arch/arm/boot/dts/armada-385-db.dts                  |  2 +-
 arch/arm/boot/dts/armada-385-rd.dts                  |  2 +-
 arch/arm/boot/dts/armada-385.dtsi                    |  2 +-
 arch/arm/boot/dts/armada-38x.dtsi                    |  2 +-
 6 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/Documentation/devicetree/bindings/arm/armada-38x.txt b/Documentation/devicetree/bindings/arm/armada-38x.txt
index 11f2330a6554..ad9f8ed4d9bd 100644
--- a/Documentation/devicetree/bindings/arm/armada-38x.txt
+++ b/Documentation/devicetree/bindings/arm/armada-38x.txt
@@ -6,5 +6,15 @@ following property:
 
 Required root node property:
 
- - compatible: must contain either "marvell,armada380" or
-   "marvell,armada385" depending on the variant of the SoC being used.
+ - compatible: must contain "marvell,armada380"
+
+In addition, boards using the Marvell Armada 385 SoC shall have the
+following property before the previous one:
+
+Required root node property:
+
+compatible: must contain "marvell,armada385"
+
+Example:
+
+compatible = "marvell,a385-rd", "marvell,armada385", "marvell,armada380";
diff --git a/arch/arm/boot/dts/armada-380.dtsi b/arch/arm/boot/dts/armada-380.dtsi
index e69bc6759c39..4173a8ab34e7 100644
--- a/arch/arm/boot/dts/armada-380.dtsi
+++ b/arch/arm/boot/dts/armada-380.dtsi
@@ -16,7 +16,7 @@
 
 / {
 	model = "Marvell Armada 380 family SoC";
-	compatible = "marvell,armada380", "marvell,armada38x";
+	compatible = "marvell,armada380";
 
 	cpus {
 		#address-cells = <1>;
diff --git a/arch/arm/boot/dts/armada-385-db.dts b/arch/arm/boot/dts/armada-385-db.dts
index ff9637dd8d0f..4be6a2838a41 100644
--- a/arch/arm/boot/dts/armada-385-db.dts
+++ b/arch/arm/boot/dts/armada-385-db.dts
@@ -16,7 +16,7 @@
 
 / {
 	model = "Marvell Armada 385 Development Board";
-	compatible = "marvell,a385-db", "marvell,armada385", "marvell,armada38x";
+	compatible = "marvell,a385-db", "marvell,armada385", "marvell,armada380";
 
 	chosen {
 		bootargs = "console=ttyS0,115200 earlyprintk";
diff --git a/arch/arm/boot/dts/armada-385-rd.dts b/arch/arm/boot/dts/armada-385-rd.dts
index 40893255a3f0..aaca2861dc87 100644
--- a/arch/arm/boot/dts/armada-385-rd.dts
+++ b/arch/arm/boot/dts/armada-385-rd.dts
@@ -17,7 +17,7 @@
 
 / {
 	model = "Marvell Armada 385 Reference Design";
-	compatible = "marvell,a385-rd", "marvell,armada385", "marvell,armada38x";
+	compatible = "marvell,a385-rd", "marvell,armada385", "marvell,armada380";
 
 	chosen {
 		bootargs = "console=ttyS0,115200 earlyprintk";
diff --git a/arch/arm/boot/dts/armada-385.dtsi b/arch/arm/boot/dts/armada-385.dtsi
index f011009bf4cf..6283d7912f71 100644
--- a/arch/arm/boot/dts/armada-385.dtsi
+++ b/arch/arm/boot/dts/armada-385.dtsi
@@ -16,7 +16,7 @@
 
 / {
 	model = "Marvell Armada 385 family SoC";
-	compatible = "marvell,armada385", "marvell,armada38x";
+	compatible = "marvell,armada385", "marvell,armada380";
 
 	cpus {
 		#address-cells = <1>;
diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi
index 3de364e81b52..689fa1a46728 100644
--- a/arch/arm/boot/dts/armada-38x.dtsi
+++ b/arch/arm/boot/dts/armada-38x.dtsi
@@ -20,7 +20,7 @@
 
 / {
 	model = "Marvell Armada 38x family SoC";
-	compatible = "marvell,armada38x";
+	compatible = "marvell,armada380";
 
 	aliases {
 		gpio0 = &gpio0;
-- 
cgit v1.2.3


From 11f9323a48875101bc4de6077155c17aed82cfc8 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 24 Jun 2014 14:08:07 +0200
Subject: ARM: integrator: fix OF-related regression

Commit 07e461cd7e73a84f0e3757932b93cc80976fd749
"of: Ensure unique names without sacrificing determinism"
caused a boot failure regression on the Integrator machines.

The problem is probably caused by fiddling too much with
the device tree population in the OF init function, such
as passing the SoC bus device as parent when populating
the device tree.

This patch fixes the problem by:

- Avoiding to explicitly look up the tree root
- Look up devices needed before device population from
  the match only, passing NULL as root
- Passing NULL as root and parent when calling
  of_platform_populate()

After this the Integrators boot again. Tested on
Integrator/AP and Integrator/CP.

Cc: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-integrator/integrator_ap.c | 26 +++++++-------------------
 arch/arm/mach-integrator/integrator_cp.c | 23 ++++++-----------------
 2 files changed, 13 insertions(+), 36 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c
index dd0cc677d596..660ca6feff40 100644
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -480,25 +480,18 @@ static const struct of_device_id ebi_match[] = {
 static void __init ap_init_of(void)
 {
 	unsigned long sc_dec;
-	struct device_node *root;
 	struct device_node *syscon;
 	struct device_node *ebi;
 	struct device *parent;
 	struct soc_device *soc_dev;
 	struct soc_device_attribute *soc_dev_attr;
 	u32 ap_sc_id;
-	int err;
 	int i;
 
-	/* Here we create an SoC device for the root node */
-	root = of_find_node_by_path("/");
-	if (!root)
-		return;
-
-	syscon = of_find_matching_node(root, ap_syscon_match);
+	syscon = of_find_matching_node(NULL, ap_syscon_match);
 	if (!syscon)
 		return;
-	ebi = of_find_matching_node(root, ebi_match);
+	ebi = of_find_matching_node(NULL, ebi_match);
 	if (!ebi)
 		return;
 
@@ -509,19 +502,17 @@ static void __init ap_init_of(void)
 	if (!ebi_base)
 		return;
 
+	of_platform_populate(NULL, of_default_bus_match_table,
+			ap_auxdata_lookup, NULL);
+
 	ap_sc_id = readl(ap_syscon_base);
 
 	soc_dev_attr = kzalloc(sizeof(*soc_dev_attr), GFP_KERNEL);
 	if (!soc_dev_attr)
 		return;
 
-	err = of_property_read_string(root, "compatible",
-				      &soc_dev_attr->soc_id);
-	if (err)
-		return;
-	err = of_property_read_string(root, "model", &soc_dev_attr->machine);
-	if (err)
-		return;
+	soc_dev_attr->soc_id = "XVC";
+	soc_dev_attr->machine = "Integrator/AP";
 	soc_dev_attr->family = "Integrator";
 	soc_dev_attr->revision = kasprintf(GFP_KERNEL, "%c",
 					   'A' + (ap_sc_id & 0x0f));
@@ -536,9 +527,6 @@ static void __init ap_init_of(void)
 	parent = soc_device_to_device(soc_dev);
 	integrator_init_sysfs(parent, ap_sc_id);
 
-	of_platform_populate(root, of_default_bus_match_table,
-			ap_auxdata_lookup, parent);
-
 	sc_dec = readl(ap_syscon_base + INTEGRATOR_SC_DEC_OFFSET);
 	for (i = 0; i < 4; i++) {
 		struct lm_device *lmdev;
diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c
index a938242b0c95..0e57f8f820a5 100644
--- a/arch/arm/mach-integrator/integrator_cp.c
+++ b/arch/arm/mach-integrator/integrator_cp.c
@@ -279,20 +279,13 @@ static const struct of_device_id intcp_syscon_match[] = {
 
 static void __init intcp_init_of(void)
 {
-	struct device_node *root;
 	struct device_node *cpcon;
 	struct device *parent;
 	struct soc_device *soc_dev;
 	struct soc_device_attribute *soc_dev_attr;
 	u32 intcp_sc_id;
-	int err;
 
-	/* Here we create an SoC device for the root node */
-	root = of_find_node_by_path("/");
-	if (!root)
-		return;
-
-	cpcon = of_find_matching_node(root, intcp_syscon_match);
+	cpcon = of_find_matching_node(NULL, intcp_syscon_match);
 	if (!cpcon)
 		return;
 
@@ -300,19 +293,17 @@ static void __init intcp_init_of(void)
 	if (!intcp_con_base)
 		return;
 
+	of_platform_populate(NULL, of_default_bus_match_table,
+			     intcp_auxdata_lookup, NULL);
+
 	intcp_sc_id = readl(intcp_con_base);
 
 	soc_dev_attr = kzalloc(sizeof(*soc_dev_attr), GFP_KERNEL);
 	if (!soc_dev_attr)
 		return;
 
-	err = of_property_read_string(root, "compatible",
-				      &soc_dev_attr->soc_id);
-	if (err)
-		return;
-	err = of_property_read_string(root, "model", &soc_dev_attr->machine);
-	if (err)
-		return;
+	soc_dev_attr->soc_id = "XCV";
+	soc_dev_attr->machine = "Integrator/CP";
 	soc_dev_attr->family = "Integrator";
 	soc_dev_attr->revision = kasprintf(GFP_KERNEL, "%c",
 					   'A' + (intcp_sc_id & 0x0f));
@@ -326,8 +317,6 @@ static void __init intcp_init_of(void)
 
 	parent = soc_device_to_device(soc_dev);
 	integrator_init_sysfs(parent, intcp_sc_id);
-	of_platform_populate(root, of_default_bus_match_table,
-			intcp_auxdata_lookup, parent);
 }
 
 static const char * intcp_dt_board_compat[] = {
-- 
cgit v1.2.3


From 46b57a76930f01ebf31230ed35af5beeccb5ad95 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Tue, 24 Jun 2014 13:46:52 -0700
Subject: x86/vdso: Move DISABLE_BRANCH_PROFILING into the vdso makefile

DISABLE_BRANCH_PROFILING turns off branch profiling (i.e. a
redefinition of 'if').  Branch profiling depends on a bunch of
kernel-internal symbols and generates extra output sections, none of
which are useful or functional in the vDSO.

It's currently turned off for vclock_gettime.c, but vgetcpu.c also
triggers branch profiling, so just turn it off in the makefile.

This fixes the build on some configurations: the vdso could contain
undefined symbols, and the fake section table overflowed due to
ftrace's added sections.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/bf1ec29e03b2bbc081f6dcaefa64db1c3a83fb21.1403642755.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/vdso/Makefile         | 4 +++-
 arch/x86/vdso/vclock_gettime.c | 3 ---
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 68a15c4dd6e4..61b04fe36e66 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -66,7 +66,8 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso2c FORCE
 #
 CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
        $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
-       -fno-omit-frame-pointer -foptimize-sibling-calls
+       -fno-omit-frame-pointer -foptimize-sibling-calls \
+       -DDISABLE_BRANCH_PROFILING
 
 $(vobjs): KBUILD_CFLAGS += $(CFL)
 
@@ -149,6 +150,7 @@ KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
 KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
 KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
 KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
+KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
 $(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
 
 $(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index b2e4f493e5b0..9793322751e0 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -11,9 +11,6 @@
  * Check with readelf after changing.
  */
 
-/* Disable profiling for userspace code: */
-#define DISABLE_BRANCH_PROFILING
-
 #include <uapi/linux/time.h>
 #include <asm/vgtod.h>
 #include <asm/hpet.h>
-- 
cgit v1.2.3


From 6a89d71078dad9b1c49ccdf1ffa656fbe36ccd1e Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Tue, 24 Jun 2014 13:46:53 -0700
Subject: x86/vdso: Error out in vdso2c if DT_RELA is present

vdso2c was checking for various types of relocations to detect when
the vdso had undefined symbols or was otherwise dependent on
relocation at load time.  Undefined symbols in the vdso would fail if
accessed at runtime, and certain implementation errors (e.g. branch
profiling or incorrect symbol visibilities) could result in data
access through the GOT that requires relocations.  This could be
as simple as:

    extern char foo;
    return foo;

Without some kind of visibility control, the compiler would assume
that foo could be interposed at load time and would generate a
relocation.

x86-64 and x32 (as opposed to i386) use explicit-addent (RELA) instead
of implicit-addent (REL) relocations for data access, and vdso2c
forgot to detect those.

Whether these bad relocations would actually fail at runtime depends
on what the linker sticks in the unrelocated references.  Nonetheless,
these relocations have no business existing in the vDSO and should be
fixed rather than silently ignored.

This error could trigger on some configurations due to branch
profiling.  The previous patch fixed that.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/74ef0c00b4d2a3b573e00a4113874e62f772e348.1403642755.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/vdso/vdso2c.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h
index f42e2ddc663d..df95a2fdff73 100644
--- a/arch/x86/vdso/vdso2c.h
+++ b/arch/x86/vdso/vdso2c.h
@@ -164,7 +164,7 @@ static void BITSFUNC(go)(void *addr, size_t len,
 	for (i = 0; dyn + i < dyn_end &&
 		     GET_LE(&dyn[i].d_tag) != DT_NULL; i++) {
 		typeof(dyn[i].d_tag) tag = GET_LE(&dyn[i].d_tag);
-		if (tag == DT_REL || tag == DT_RELSZ ||
+		if (tag == DT_REL || tag == DT_RELSZ || tag == DT_RELA ||
 		    tag == DT_RELENT || tag == DT_TEXTREL)
 			fail("vdso image contains dynamic relocations\n");
 	}
-- 
cgit v1.2.3


From e2500be2b801f4e95d6a1efbc50af3bf14eeb940 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 24 Jun 2014 17:17:47 +1000
Subject: powerpc/powernv: Remove OPAL v1 takeover

In commit 27f4488872d9 "Add OPAL takeover from PowerVM" we added support
for "takeover" on OPAL v1 machines.

This was a mode of operation where we would boot under pHyp, and query
for the presence of OPAL. If detected we would then do a special
sequence to take over the machine, and the kernel would end up running
in hypervisor mode.

OPAL v1 was never a supported product, and was never shipped outside
IBM. As far as we know no one is still using it.

Newer versions of OPAL do not use the takeover mechanism. Although the
query for OPAL should be harmless on machines with newer OPAL, we have
seen a machine where it causes a crash in Open Firmware.

The code in early_init_devtree() to copy boot_command_line into cmd_line
was added in commit 817c21ad9a1f "Get kernel command line accross OPAL
takeover", and AFAIK is only used by takeover, so should also be
removed.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/Kconfig.debug                     |   1 -
 arch/powerpc/include/asm/opal.h                |  29 ----
 arch/powerpc/kernel/prom.c                     |   7 -
 arch/powerpc/kernel/prom_init.c                | 211 -------------------------
 arch/powerpc/kernel/prom_init_check.sh         |   4 +-
 arch/powerpc/platforms/powernv/Makefile        |   2 +-
 arch/powerpc/platforms/powernv/opal-takeover.S | 140 ----------------
 7 files changed, 2 insertions(+), 392 deletions(-)
 delete mode 100644 arch/powerpc/platforms/powernv/opal-takeover.S

(limited to 'arch')

diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 790352f93700..35d16bd2760b 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -303,7 +303,6 @@ config PPC_EARLY_DEBUG_OPAL_VTERMNO
 	  This correspond to which /dev/hvcN you want to use for early
 	  debug.
 
-	  On OPAL v1 (takeover) this should always be 0
 	  On OPAL v2, this will be 0 for network console and 1 or 2 for
 	  the machine built-in serial ports.
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 460018889ba9..0da1dbd42e02 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -12,27 +12,7 @@
 #ifndef __OPAL_H
 #define __OPAL_H
 
-/****** Takeover interface ********/
-
-/* PAPR H-Call used to querty the HAL existence and/or instanciate
- * it from within pHyp (tech preview only).
- *
- * This is exclusively used in prom_init.c
- */
-
 #ifndef __ASSEMBLY__
-
-struct opal_takeover_args {
-	u64	k_image;		/* r4 */
-	u64	k_size;			/* r5 */
-	u64	k_entry;		/* r6 */
-	u64	k_entry2;		/* r7 */
-	u64	hal_addr;		/* r8 */
-	u64	rd_image;		/* r9 */
-	u64	rd_size;		/* r10 */
-	u64	rd_loc;			/* r11 */
-};
-
 /*
  * SG entry
  *
@@ -55,15 +35,6 @@ struct opal_sg_list {
 /* We calculate number of sg entries based on PAGE_SIZE */
 #define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry))
 
-extern long opal_query_takeover(u64 *hal_size, u64 *hal_align);
-
-extern long opal_do_takeover(struct opal_takeover_args *args);
-
-struct rtas_args;
-extern int opal_enter_rtas(struct rtas_args *args,
-			   unsigned long data,
-			   unsigned long entry);
-
 #endif /* __ASSEMBLY__ */
 
 /****** OPAL APIs ******/
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 613a860a203c..b694b0730971 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -662,13 +662,6 @@ void __init early_init_devtree(void *params)
 	of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);
 #endif
 
-	/* Pre-initialize the cmd_line with the content of boot_commmand_line,
-	 * which will be empty except when the content of the variable has
-	 * been overriden by a bootloading mechanism. This happens typically
-	 * with HAL takeover
-	 */
-	strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE);
-
 	/* Retrieve various informations from the /chosen node of the
 	 * device-tree, including the platform type, initrd location and
 	 * size, TCE reserve, and more ...
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 078145acf7fb..1a85d8f96739 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1268,201 +1268,6 @@ static u64 __initdata prom_opal_base;
 static u64 __initdata prom_opal_entry;
 #endif
 
-#ifdef __BIG_ENDIAN__
-/* XXX Don't change this structure without updating opal-takeover.S */
-static struct opal_secondary_data {
-	s64				ack;	/*  0 */
-	u64				go;	/*  8 */
-	struct opal_takeover_args	args;	/* 16 */
-} opal_secondary_data;
-
-static u64 __initdata prom_opal_align;
-static u64 __initdata prom_opal_size;
-static int __initdata prom_rtas_start_cpu;
-static u64 __initdata prom_rtas_data;
-static u64 __initdata prom_rtas_entry;
-
-extern char opal_secondary_entry;
-
-static void __init prom_query_opal(void)
-{
-	long rc;
-
-	/* We must not query for OPAL presence on a machine that
-	 * supports TNK takeover (970 blades), as this uses the same
-	 * h-call with different arguments and will crash
-	 */
-	if (PHANDLE_VALID(call_prom("finddevice", 1, 1,
-				    ADDR("/tnk-memory-map")))) {
-		prom_printf("TNK takeover detected, skipping OPAL check\n");
-		return;
-	}
-
-	prom_printf("Querying for OPAL presence... ");
-
-	rc = opal_query_takeover(&prom_opal_size,
-				 &prom_opal_align);
-	prom_debug("(rc = %ld) ", rc);
-	if (rc != 0) {
-		prom_printf("not there.\n");
-		return;
-	}
-	of_platform = PLATFORM_OPAL;
-	prom_printf(" there !\n");
-	prom_debug("  opal_size  = 0x%lx\n", prom_opal_size);
-	prom_debug("  opal_align = 0x%lx\n", prom_opal_align);
-	if (prom_opal_align < 0x10000)
-		prom_opal_align = 0x10000;
-}
-
-static int __init prom_rtas_call(int token, int nargs, int nret,
-				 int *outputs, ...)
-{
-	struct rtas_args rtas_args;
-	va_list list;
-	int i;
-
-	rtas_args.token = token;
-	rtas_args.nargs = nargs;
-	rtas_args.nret  = nret;
-	rtas_args.rets  = (rtas_arg_t *)&(rtas_args.args[nargs]);
-	va_start(list, outputs);
-	for (i = 0; i < nargs; ++i)
-		rtas_args.args[i] = va_arg(list, rtas_arg_t);
-	va_end(list);
-
-	for (i = 0; i < nret; ++i)
-		rtas_args.rets[i] = 0;
-
-	opal_enter_rtas(&rtas_args, prom_rtas_data,
-			prom_rtas_entry);
-
-	if (nret > 1 && outputs != NULL)
-		for (i = 0; i < nret-1; ++i)
-			outputs[i] = rtas_args.rets[i+1];
-	return (nret > 0)? rtas_args.rets[0]: 0;
-}
-
-static void __init prom_opal_hold_cpus(void)
-{
-	int i, cnt, cpu, rc;
-	long j;
-	phandle node;
-	char type[64];
-	u32 servers[8];
-	void *entry = (unsigned long *)&opal_secondary_entry;
-	struct opal_secondary_data *data = &opal_secondary_data;
-
-	prom_debug("prom_opal_hold_cpus: start...\n");
-	prom_debug("    - entry       = 0x%x\n", entry);
-	prom_debug("    - data        = 0x%x\n", data);
-
-	data->ack = -1;
-	data->go = 0;
-
-	/* look for cpus */
-	for (node = 0; prom_next_node(&node); ) {
-		type[0] = 0;
-		prom_getprop(node, "device_type", type, sizeof(type));
-		if (strcmp(type, "cpu") != 0)
-			continue;
-
-		/* Skip non-configured cpus. */
-		if (prom_getprop(node, "status", type, sizeof(type)) > 0)
-			if (strcmp(type, "okay") != 0)
-				continue;
-
-		cnt = prom_getprop(node, "ibm,ppc-interrupt-server#s", servers,
-			     sizeof(servers));
-		if (cnt == PROM_ERROR)
-			break;
-		cnt >>= 2;
-		for (i = 0; i < cnt; i++) {
-			cpu = servers[i];
-			prom_debug("CPU %d ... ", cpu);
-			if (cpu == prom.cpu) {
-				prom_debug("booted !\n");
-				continue;
-			}
-			prom_debug("starting ... ");
-
-			/* Init the acknowledge var which will be reset by
-			 * the secondary cpu when it awakens from its OF
-			 * spinloop.
-			 */
-			data->ack = -1;
-			rc = prom_rtas_call(prom_rtas_start_cpu, 3, 1,
-					    NULL, cpu, entry, data);
-			prom_debug("rtas rc=%d ...", rc);
-
-			for (j = 0; j < 100000000 && data->ack == -1; j++) {
-				HMT_low();
-				mb();
-			}
-			HMT_medium();
-			if (data->ack != -1)
-				prom_debug("done, PIR=0x%x\n", data->ack);
-			else
-				prom_debug("timeout !\n");
-		}
-	}
-	prom_debug("prom_opal_hold_cpus: end...\n");
-}
-
-static void __init prom_opal_takeover(void)
-{
-	struct opal_secondary_data *data = &opal_secondary_data;
-	struct opal_takeover_args *args = &data->args;
-	u64 align = prom_opal_align;
-	u64 top_addr, opal_addr;
-
-	args->k_image	= (u64)_stext;
-	args->k_size	= _end - _stext;
-	args->k_entry	= 0;
-	args->k_entry2	= 0x60;
-
-	top_addr = _ALIGN_UP(args->k_size, align);
-
-	if (prom_initrd_start != 0) {
-		args->rd_image = prom_initrd_start;
-		args->rd_size = prom_initrd_end - args->rd_image;
-		args->rd_loc = top_addr;
-		top_addr = _ALIGN_UP(args->rd_loc + args->rd_size, align);
-	}
-
-	/* Pickup an address for the HAL. We want to go really high
-	 * up to avoid problem with future kexecs. On the other hand
-	 * we don't want to be all over the TCEs on P5IOC2 machines
-	 * which are going to be up there too. We assume the machine
-	 * has plenty of memory, and we ask for the HAL for now to
-	 * be just below the 1G point, or above the initrd
-	 */
-	opal_addr = _ALIGN_DOWN(0x40000000 - prom_opal_size, align);
-	if (opal_addr < top_addr)
-		opal_addr = top_addr;
-	args->hal_addr = opal_addr;
-
-	/* Copy the command line to the kernel image */
-	strlcpy(boot_command_line, prom_cmd_line,
-		COMMAND_LINE_SIZE);
-
-	prom_debug("  k_image    = 0x%lx\n", args->k_image);
-	prom_debug("  k_size     = 0x%lx\n", args->k_size);
-	prom_debug("  k_entry    = 0x%lx\n", args->k_entry);
-	prom_debug("  k_entry2   = 0x%lx\n", args->k_entry2);
-	prom_debug("  hal_addr   = 0x%lx\n", args->hal_addr);
-	prom_debug("  rd_image   = 0x%lx\n", args->rd_image);
-	prom_debug("  rd_size    = 0x%lx\n", args->rd_size);
-	prom_debug("  rd_loc     = 0x%lx\n", args->rd_loc);
-	prom_printf("Performing OPAL takeover,this can take a few minutes..\n");
-	prom_close_stdin();
-	mb();
-	data->go = 1;
-	for (;;)
-		opal_do_takeover(args);
-}
-#endif /* __BIG_ENDIAN__ */
-
 /*
  * Allocate room for and instantiate OPAL
  */
@@ -1597,12 +1402,6 @@ static void __init prom_instantiate_rtas(void)
 			 &val, sizeof(val)) != PROM_ERROR)
 		rtas_has_query_cpu_stopped = true;
 
-#if defined(CONFIG_PPC_POWERNV) && defined(__BIG_ENDIAN__)
-	/* PowerVN takeover hack */
-	prom_rtas_data = base;
-	prom_rtas_entry = entry;
-	prom_getprop(rtas_node, "start-cpu", &prom_rtas_start_cpu, 4);
-#endif
 	prom_debug("rtas base     = 0x%x\n", base);
 	prom_debug("rtas entry    = 0x%x\n", entry);
 	prom_debug("rtas size     = 0x%x\n", (long)size);
@@ -3027,16 +2826,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 		prom_instantiate_rtas();
 
 #ifdef CONFIG_PPC_POWERNV
-#ifdef __BIG_ENDIAN__
-	/* Detect HAL and try instanciating it & doing takeover */
-	if (of_platform == PLATFORM_PSERIES_LPAR) {
-		prom_query_opal();
-		if (of_platform == PLATFORM_OPAL) {
-			prom_opal_hold_cpus();
-			prom_opal_takeover();
-		}
-	} else
-#endif /* __BIG_ENDIAN__ */
 	if (of_platform == PLATFORM_OPAL)
 		prom_instantiate_opal();
 #endif /* CONFIG_PPC_POWERNV */
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index 77aa1e95e904..fe8e54b9ef7d 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -21,9 +21,7 @@ _end enter_prom memcpy memset reloc_offset __secondary_hold
 __secondary_hold_acknowledge __secondary_hold_spinloop __start
 strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224
 reloc_got2 kernstart_addr memstart_addr linux_banner _stext
-opal_query_takeover opal_do_takeover opal_enter_rtas opal_secondary_entry
-boot_command_line __prom_init_toc_start __prom_init_toc_end
-btext_setup_display TOC."
+__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."
 
 NM="$1"
 OBJ="$2"
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index d55891f89a2c..4ad227d04c1a 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -1,4 +1,4 @@
-obj-y			+= setup.o opal-takeover.o opal-wrappers.o opal.o opal-async.o
+obj-y			+= setup.o opal-wrappers.o opal.o opal-async.o
 obj-y			+= opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
 obj-y			+= rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
 obj-y			+= opal-msglog.o
diff --git a/arch/powerpc/platforms/powernv/opal-takeover.S b/arch/powerpc/platforms/powernv/opal-takeover.S
deleted file mode 100644
index 11a3169ee583..000000000000
--- a/arch/powerpc/platforms/powernv/opal-takeover.S
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * PowerNV OPAL takeover assembly code, for use by prom_init.c
- *
- * Copyright 2011 IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/hvcall.h>
-#include <asm/asm-offsets.h>
-#include <asm/opal.h>
-
-#define H_HAL_TAKEOVER			0x5124
-#define H_HAL_TAKEOVER_QUERY_MAGIC	-1
-
-	.text
-_GLOBAL(opal_query_takeover)
-	mfcr	r0
-	stw	r0,8(r1)
-	stdu	r1,-STACKFRAMESIZE(r1)
-	std	r3,STK_PARAM(R3)(r1)
-	std	r4,STK_PARAM(R4)(r1)
-	li	r3,H_HAL_TAKEOVER
-	li	r4,H_HAL_TAKEOVER_QUERY_MAGIC
-	HVSC
-	addi	r1,r1,STACKFRAMESIZE
-	ld	r10,STK_PARAM(R3)(r1)
-	std	r4,0(r10)
-	ld	r10,STK_PARAM(R4)(r1)
-	std	r5,0(r10)
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-	blr
-
-_GLOBAL(opal_do_takeover)
-	mfcr	r0
-	stw	r0,8(r1)
-	mflr	r0
-	std	r0,16(r1)
-	bl	__opal_do_takeover
-	ld	r0,16(r1)
-	mtlr	r0
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-	blr
-
-__opal_do_takeover:
-	ld	r4,0(r3)
-	ld	r5,0x8(r3)
-	ld	r6,0x10(r3)
-	ld	r7,0x18(r3)
-	ld	r8,0x20(r3)
-	ld	r9,0x28(r3)
-	ld	r10,0x30(r3)
-	ld	r11,0x38(r3)
-	li	r3,H_HAL_TAKEOVER
-	HVSC
-	blr
-
-	.globl opal_secondary_entry
-opal_secondary_entry:
-	mr	r31,r3
-	mfmsr	r11
-	li	r12,(MSR_SF | MSR_ISF)@highest
-	sldi	r12,r12,48
-	or	r11,r11,r12
-	mtmsrd	r11
-	isync
-	mfspr	r4,SPRN_PIR
-	std	r4,0(r3)
-1:	HMT_LOW
-	ld	r4,8(r3)
-	cmpli	cr0,r4,0
-	beq	1b
-	HMT_MEDIUM
-1:	addi	r3,r31,16
-	bl	__opal_do_takeover
-	b	1b
-
-_GLOBAL(opal_enter_rtas)
-	mflr	r0
-	std	r0,16(r1)
-        stdu	r1,-PROM_FRAME_SIZE(r1)	/* Save SP and create stack space */
-
-	/* Because PROM is running in 32b mode, it clobbers the high order half
-	 * of all registers that it saves.  We therefore save those registers
-	 * PROM might touch to the stack.  (r0, r3-r13 are caller saved)
-	*/
-	SAVE_GPR(2, r1)
-	SAVE_GPR(13, r1)
-	SAVE_8GPRS(14, r1)
-	SAVE_10GPRS(22, r1)
-	mfcr	r10
-	mfmsr	r11
-	std	r10,_CCR(r1)
-	std	r11,_MSR(r1)
-
-	/* Get the PROM entrypoint */
-	mtlr	r5
-
-	/* Switch MSR to 32 bits mode
-	 */
-        li      r12,1
-        rldicr  r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
-        andc    r11,r11,r12
-        li      r12,1
-        rldicr  r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
-        andc    r11,r11,r12
-        mtmsrd  r11
-        isync
-
-	/* Enter RTAS here... */
-	blrl
-
-	/* Just make sure that r1 top 32 bits didn't get
-	 * corrupt by OF
-	 */
-	rldicl	r1,r1,0,32
-
-	/* Restore the MSR (back to 64 bits) */
-	ld	r0,_MSR(r1)
-	MTMSRD(r0)
-        isync
-
-	/* Restore other registers */
-	REST_GPR(2, r1)
-	REST_GPR(13, r1)
-	REST_8GPRS(14, r1)
-	REST_10GPRS(22, r1)
-	ld	r4,_CCR(r1)
-	mtcr	r4
-
-        addi	r1,r1,PROM_FRAME_SIZE
-	ld	r0,16(r1)
-	mtlr    r0
-	blr
-- 
cgit v1.2.3


From c2cbcf533a1d7443cf1d6aae1127491792601587 Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Date: Tue, 24 Jun 2014 10:53:59 +0200
Subject: powerpc/module: Fix TOC symbol CRC

The commit 71ec7c55ed91 introduced the magic symbol ".TOC." for ELFv2 ABI.
This symbol is built manually and has no CRC value computed. A zero value
is put in the CRC section to avoid modpost complaining about a missing CRC.
Unfortunately, this breaks the kernel module loading when the kernel is
relocated (kdump case for instance) because of the relocation applied to
the kcrctab values.

This patch compute a CRC value for the TOC symbol which will match the one
compute by the kernel when it is relocated - aka '0 - relocate_start' done in
maybe_relocated called by check_version (module.c).

Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Cc: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/module_64.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 077d2ce6c5a7..d807ee626af9 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -315,8 +315,17 @@ static void dedotify_versions(struct modversion_info *vers,
 	struct modversion_info *end;
 
 	for (end = (void *)vers + size; vers < end; vers++)
-		if (vers->name[0] == '.')
+		if (vers->name[0] == '.') {
 			memmove(vers->name, vers->name+1, strlen(vers->name));
+#ifdef ARCH_RELOCATES_KCRCTAB
+			/* The TOC symbol has no CRC computed. To avoid CRC
+			 * check failing, we must force it to the expected
+			 * value (see CRC check in module.c).
+			 */
+			if (!strcmp(vers->name, "TOC."))
+				vers->crc = -(unsigned long)reloc_start;
+#endif
+		}
 }
 
 /* Undefined symbols which refer to .funcname, hack to funcname (or .TOC.) */
-- 
cgit v1.2.3


From 6663a4fa6711050036562ddfd2086edf735fae21 Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Tue, 24 Jun 2014 20:15:51 -0500
Subject: powerpc: Don't skip ePAPR spin-table CPUs

Commit 59a53afe70fd530040bdc69581f03d880157f15a "powerpc: Don't setup
CPUs with bad status" broke ePAPR SMP booting.  ePAPR says that CPUs
that aren't presently running shall have status of disabled, with
enable-method being used to determine whether the CPU can be enabled.

Fix by checking for spin-table, which is currently the only supported
enable-method.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Emil Medve <Emilian.Medve@Freescale.com>
Cc: stable@vger.kernel.org
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/setup-common.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index e239df3768ac..e5b022c55ccd 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -469,9 +469,17 @@ void __init smp_setup_cpu_maps(void)
 		}
 
 		for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
+			bool avail;
+
 			DBG("    thread %d -> cpu %d (hard id %d)\n",
 			    j, cpu, be32_to_cpu(intserv[j]));
-			set_cpu_present(cpu, of_device_is_available(dn));
+
+			avail = of_device_is_available(dn);
+			if (!avail)
+				avail = !of_property_match_string(dn,
+						"enable-method", "spin-table");
+
+			set_cpu_present(cpu, avail);
 			set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j]));
 			set_cpu_possible(cpu, true);
 			cpu++;
-- 
cgit v1.2.3


From b6616f11a85af3868e5080ef38846a0cbc496ed9 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Fri, 13 Jun 2014 13:25:34 +0200
Subject: ARM: at91/dt: sam9x5: correct PLLA ICPLL and OUT values

ICPLL can only take 0 or 1, it got mixed with OUT which can be in the [0-3]
range.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 arch/arm/boot/dts/at91sam9x5.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi
index 1a57298636a5..d6133f497207 100644
--- a/arch/arm/boot/dts/at91sam9x5.dtsi
+++ b/arch/arm/boot/dts/at91sam9x5.dtsi
@@ -140,8 +140,8 @@
 								       595000000 650000000 3 0
 								       545000000 600000000 0 1
 								       495000000 555000000 1 1
-								       445000000 500000000 1 2
-								       400000000 450000000 1 3>;
+								       445000000 500000000 2 1
+								       400000000 450000000 3 1>;
 				};
 
 				plladiv: plladivck {
-- 
cgit v1.2.3


From 8cbff69ca9d57ee53656d187f77774501794bf27 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Fri, 13 Jun 2014 13:28:12 +0200
Subject: ARM: at91/dt: sam9n12: correct PLLA ICPLL and OUT values

ICPLL can only take 0 or 1, it got mixed with OUT which can be in the [0-3]
range.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 arch/arm/boot/dts/at91sam9n12.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/at91sam9n12.dtsi b/arch/arm/boot/dts/at91sam9n12.dtsi
index d1b82e6635d5..287795985e32 100644
--- a/arch/arm/boot/dts/at91sam9n12.dtsi
+++ b/arch/arm/boot/dts/at91sam9n12.dtsi
@@ -132,8 +132,8 @@
 								      <595000000 650000000 3 0>,
 								      <545000000 600000000 0 1>,
 								      <495000000 555000000 1 1>,
-								      <445000000 500000000 1 2>,
-								      <400000000 450000000 1 3>;
+								      <445000000 500000000 2 1>,
+								      <400000000 450000000 3 1>;
 				};
 
 				plladiv: plladivck {
-- 
cgit v1.2.3


From 5de4728450417380d2d49fcf88905218f1787510 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Fri, 13 Jun 2014 14:02:29 +0200
Subject: ARM: at91/dt: sam9261: correctly define mainck

mainck (CKGR_MCFR register) is actually using main_osc (CKGR_MOR register).

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 arch/arm/boot/dts/at91sam9261.dtsi | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/at91sam9261.dtsi b/arch/arm/boot/dts/at91sam9261.dtsi
index b309c1c6e848..9de312e9bb3e 100644
--- a/arch/arm/boot/dts/at91sam9261.dtsi
+++ b/arch/arm/boot/dts/at91sam9261.dtsi
@@ -581,13 +581,19 @@
 					clocks = <&slow_rc_osc &slow_xtal>;
 				};
 
-				main: mainck {
-					compatible = "atmel,at91rm9200-clk-main";
+				main_osc: main_osc {
+					compatible = "atmel,at91rm9200-clk-main-osc";
 					#clock-cells = <0>;
 					interrupts-extended = <&pmc AT91_PMC_MOSCS>;
 					clocks = <&main_xtal>;
 				};
 
+				main: mainck {
+					compatible = "atmel,at91rm9200-clk-main";
+					#clock-cells = <0>;
+					clocks = <&main_osc>;
+				};
+
 				plla: pllack {
 					compatible = "atmel,at91rm9200-clk-pll";
 					#clock-cells = <0>;
-- 
cgit v1.2.3


From 78ca2ec920214089cb2c085252536cd04af687ca Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Sat, 14 Jun 2014 02:10:43 +0200
Subject: ARM: at91/dt: define sam9261ek slow crystal frequency

Define at91sam9261ek's slow crystal frequencies.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 arch/arm/boot/dts/at91sam9261ek.dts | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/at91sam9261ek.dts b/arch/arm/boot/dts/at91sam9261ek.dts
index c6683ea8b743..aa35a7aec9a8 100644
--- a/arch/arm/boot/dts/at91sam9261ek.dts
+++ b/arch/arm/boot/dts/at91sam9261ek.dts
@@ -20,6 +20,10 @@
 		reg = <0x20000000 0x4000000>;
 	};
 
+	slow_xtal {
+		clock-frequency = <32768>;
+	};
+
 	main_xtal {
 		clock-frequency = <18432000>;
 	};
-- 
cgit v1.2.3


From 971dc9ce106110745f246337f229013589354536 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Mon, 23 Jun 2014 08:51:41 +0200
Subject: ARM: at91/dt: sam9261: remove slow RC osc

The at91sam9261 doesn't actually have a slow RC oscillator, remove it from the
dtsi.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 arch/arm/boot/dts/at91sam9261.dtsi | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/at91sam9261.dtsi b/arch/arm/boot/dts/at91sam9261.dtsi
index 9de312e9bb3e..04927db1d6bf 100644
--- a/arch/arm/boot/dts/at91sam9261.dtsi
+++ b/arch/arm/boot/dts/at91sam9261.dtsi
@@ -568,19 +568,6 @@
 				#size-cells = <0>;
 				#interrupt-cells = <1>;
 
-				slow_rc_osc: slow_rc_osc {
-					compatible = "fixed-clock";
-					#clock-cells = <0>;
-					clock-frequency = <32768>;
-					clock-accuracy = <50000000>;
-				};
-
-				clk32k: slck {
-					compatible = "atmel,at91sam9260-clk-slow";
-					#clock-cells = <0>;
-					clocks = <&slow_rc_osc &slow_xtal>;
-				};
-
 				main_osc: main_osc {
 					compatible = "atmel,at91rm9200-clk-main-osc";
 					#clock-cells = <0>;
@@ -621,7 +608,7 @@
 					compatible = "atmel,at91rm9200-clk-master";
 					#clock-cells = <0>;
 					interrupts-extended = <&pmc AT91_PMC_MCKRDY>;
-					clocks = <&clk32k>, <&main>, <&plla>, <&pllb>;
+					clocks = <&slow_xtal>, <&main>, <&plla>, <&pllb>;
 					atmel,clk-output-range = <0 94000000>;
 					atmel,clk-divisors = <1 2 4 0>;
 				};
@@ -638,7 +625,7 @@
 					#address-cells = <1>;
 					#size-cells = <0>;
 					interrupt-parent = <&pmc>;
-					clocks = <&clk32k>, <&main>, <&plla>, <&pllb>;
+					clocks = <&slow_xtal>, <&main>, <&plla>, <&pllb>;
 
 					prog0: prog0 {
 						#clock-cells = <0>;
-- 
cgit v1.2.3


From ba25915fb2cd18152cb14b144dbe8bf2f2bd8e45 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 24 Jun 2014 19:33:39 +0530
Subject: ARC: Fix build breakage for !CONFIG_ARC_DW2_UNWIND

Fixes: ec7ac6afd07b (ARC: switch to generic ENTRY/END assembler annotations)
Reported-by: Anton Kolesov <akolesov@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/ctx_sw_asm.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S
index 2ff0347a2fd7..e248594097e7 100644
--- a/arch/arc/kernel/ctx_sw_asm.S
+++ b/arch/arc/kernel/ctx_sw_asm.S
@@ -10,9 +10,9 @@
  *  -This is the more "natural" hand written assembler
  */
 
+#include <linux/linkage.h>
 #include <asm/entry.h>       /* For the SAVE_* macros */
 #include <asm/asm-offsets.h>
-#include <asm/linkage.h>
 
 #define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
 
-- 
cgit v1.2.3


From bef444a33004f4062930df1fc703a25dd6d7b460 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 17 Apr 2014 17:13:26 +0530
Subject: ARC: optimize kernel bss clearing in early boot code

using ARC ZOL which reduces tot num of instructions by half

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/head.S        | 7 ++++---
 arch/arc/kernel/vmlinux.lds.S | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
index 07a58f2d3077..4d2481bd8b98 100644
--- a/arch/arc/kernel/head.S
+++ b/arch/arc/kernel/head.S
@@ -77,10 +77,11 @@ stext:
 	; Clear BSS before updating any globals
 	; XXX: use ZOL here
 	mov	r5, __bss_start
-	mov	r6, __bss_stop
+	sub	r6, __bss_stop, r5
+	lsr.f	lp_count, r6, 2
+	lpnz	1f
+	st.ab   0, [r5, 4]
 1:
-	st.ab   0, [r5,4]
-	brlt    r5, r6, 1b
 
 	; Uboot - kernel ABI
 	;    r0 = [0] No uboot interaction, [1] cmdline in r2, [2] DTB in r2
diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S
index 2555f5886af6..dd35bde39f69 100644
--- a/arch/arc/kernel/vmlinux.lds.S
+++ b/arch/arc/kernel/vmlinux.lds.S
@@ -116,7 +116,7 @@ SECTIONS
 
 	_edata = .;
 
-	BSS_SECTION(0, 0, 0)
+	BSS_SECTION(4, 4, 4)
 
 #ifdef CONFIG_ARC_DW2_UNWIND
 	. = ALIGN(PAGE_SIZE);
-- 
cgit v1.2.3


From a4b6cb735b25aa84a462a1985e3e43bebaf5beb4 Mon Sep 17 00:00:00 2001
From: Anton Kolesov <Anton.Kolesov@synopsys.com>
Date: Fri, 20 Jun 2014 20:28:39 +0400
Subject: ARC: Implement ptrace(PTRACE_GET_THREAD_AREA)

This patch adds implementation of GET_THREAD_AREA ptrace request type. This
is required by GDB to debug NPTL applications.

Signed-off-by: Anton Kolesov <Anton.Kolesov@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/uapi/asm/ptrace.h | 1 +
 arch/arc/kernel/ptrace.c           | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/arc/include/uapi/asm/ptrace.h b/arch/arc/include/uapi/asm/ptrace.h
index 2618cc13ba75..76a7739aab1c 100644
--- a/arch/arc/include/uapi/asm/ptrace.h
+++ b/arch/arc/include/uapi/asm/ptrace.h
@@ -11,6 +11,7 @@
 #ifndef _UAPI__ASM_ARC_PTRACE_H
 #define _UAPI__ASM_ARC_PTRACE_H
 
+#define PTRACE_GET_THREAD_AREA	25
 
 #ifndef __ASSEMBLY__
 /*
diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c
index 5d76706139dd..13b3ffb27a38 100644
--- a/arch/arc/kernel/ptrace.c
+++ b/arch/arc/kernel/ptrace.c
@@ -146,6 +146,10 @@ long arch_ptrace(struct task_struct *child, long request,
 	pr_debug("REQ=%ld: ADDR =0x%lx, DATA=0x%lx)\n", request, addr, data);
 
 	switch (request) {
+	case PTRACE_GET_THREAD_AREA:
+		ret = put_user(task_thread_info(child)->thr_ptr,
+			       (unsigned long __user *)data);
+		break;
 	default:
 		ret = ptrace_request(child, request, addr, data);
 		break;
-- 
cgit v1.2.3


From 7e5122190b47c2574eda26aa0bbda693b2821449 Mon Sep 17 00:00:00 2001
From: Noam Camus <noamc@ezchip.com>
Date: Thu, 28 Feb 2013 11:07:06 +0200
Subject: ARC: [SMP] Fix IPI IRQ registration

Handle it just like timer. Current request_percpu_irq() would fail on
non-boot cpus and thus IRQ will remian unmasked on those cpus.

[vgupta: fix changelong]
Signed-off-by: Noam Camus <noamc@ezchip.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/smp.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index cf90b6f4d3e0..c802bb500602 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -337,8 +337,19 @@ irqreturn_t do_IPI(int irq, void *dev_id)
  * API called by platform code to hookup arch-common ISR to their IPI IRQ
  */
 static DEFINE_PER_CPU(int, ipi_dev);
+
+static struct irqaction arc_ipi_irq = {
+        .name    = "IPI Interrupt",
+        .flags   = IRQF_PERCPU,
+        .handler = do_IPI,
+};
+
 int smp_ipi_irq_setup(int cpu, int irq)
 {
-	int *dev_id = &per_cpu(ipi_dev, smp_processor_id());
-	return request_percpu_irq(irq, do_IPI, "IPI Interrupt", dev_id);
+	if (!cpu)
+		return setup_irq(irq, &arc_ipi_irq);
+	else
+		arch_unmask_irq(irq);
+
+	return 0;
 }
-- 
cgit v1.2.3


From 2328af0c9ce89d3ec121ba832c4e6faeff9e70ac Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Sun, 17 Feb 2013 12:51:42 +0200
Subject: ARC: [SMP] Enable icache coherency

icaches are not snooped hence not cohrent in SMP setups which means
kernel has to do cross core calls to ensure the same.

The leaf routine __ic_line_inv_vaddr() now does cross core calls.

__sync_icache_dcache() is affected due to this:

* local dcache line flushed ahead of remote icache inv requests
* can't disable interrupts anymore, since
      __ic_line_inv_vaddr()->on_each_cpu() can deadlock.

| WARNING: CPU: 0 PID: 1 at kernel/smp.c:374
| smp_call_function_many+0x25a/0x2c4()
|
|  init_kprobes+0x90/0xc8
|     register_kprobe+0x1d6/0x510
|	__sync_icache_dcache+0x28/0x80
|
|	    DISABLE IRQ
|
|	    __ic_line_inv_vaddr
|		on_each_cpu
|		     smp_call_function_many+0x25a/0x2c4   --> WARN
|			__ic_line_inv_vaddr_local
|	    __dc_line_op

* TODO: Needs to use mask of relevant CPUs to avoid broadcasting

Signed-off-by: Noam Camus <noamc@ezchip.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/cache_arc700.c | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index 1f676c4794e0..353b202c37c9 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -389,7 +389,7 @@ static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
 /***********************************************************
  * Machine specific helper for per line I-Cache invalidate.
  */
-static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
+static void __ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr,
 				unsigned long sz)
 {
 	unsigned long flags;
@@ -405,6 +405,23 @@ static inline void __ic_entire_inv(void)
 	read_aux_reg(ARC_REG_IC_CTRL);	/* blocks */
 }
 
+struct ic_line_inv_vaddr_ipi {
+	unsigned long paddr, vaddr;
+	int sz;
+};
+
+static void __ic_line_inv_vaddr_helper(void *info)
+{
+        struct ic_line_inv_vaddr_ipi *ic_inv = (struct ic_line_inv_vaddr_ipi*) info;
+        __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz);
+}
+
+static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
+				unsigned long sz)
+{
+	struct ic_line_inv_vaddr_ipi ic_inv = { paddr, vaddr , sz};
+	on_each_cpu(__ic_line_inv_vaddr_helper, &ic_inv, 1);
+}
 #else
 
 #define __ic_entire_inv()
@@ -553,12 +570,8 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
  */
 void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__ic_line_inv_vaddr(paddr, vaddr, len);
 	__dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV);
-	local_irq_restore(flags);
+	__ic_line_inv_vaddr(paddr, vaddr, len);
 }
 
 /* wrapper to compile time eliminate alignment checks in flush loop */
-- 
cgit v1.2.3


From fb738f8544d8ceb3599598f3500f33bf6ff2fca4 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 13 Jun 2014 15:36:45 +0100
Subject: MIPS: math-emu: Reduce code duplication.

The fix in the preceeding commit did do exactly the same thing in two
places showing some code cleanup was due.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/math-emu/ieee754.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/math-emu/ieee754.c b/arch/mips/math-emu/ieee754.c
index cb9214da372f..8e97acbbe22c 100644
--- a/arch/mips/math-emu/ieee754.c
+++ b/arch/mips/math-emu/ieee754.c
@@ -38,15 +38,18 @@
  * Older GCC requires the inner braces for initialization of union ieee754dp's
  * anonymous struct member.  Without an error will result.
  */
-#define DPCNST(s, b, m)							\
+#define xPCNST(s, b, m, ebias)						\
 {									\
 	{								\
 		.sign	= (s),						\
-		.bexp	= (b) + DP_EBIAS,				\
+		.bexp	= (b) + ebias,					\
 		.mant	= (m)						\
 	}								\
 }
 
+#define DPCNST(s, b, m)							\
+	xPCNST(s, b, m, DP_EBIAS)
+
 const union ieee754dp __ieee754dp_spcvals[] = {
 	DPCNST(0, DP_EMIN - 1, 0x0000000000000ULL),	/* + zero   */
 	DPCNST(1, DP_EMIN - 1, 0x0000000000000ULL),	/* - zero   */
@@ -67,18 +70,8 @@ const union ieee754dp __ieee754dp_spcvals[] = {
 	DPCNST(0, 63,          0x0000000000000ULL),	/* + 1.0e63 */
 };
 
-/*
- * Older GCC requires the inner braces for initialization of union ieee754sp's
- * anonymous struct member.  Without an error will result.
- */
 #define SPCNST(s, b, m)							\
-{									\
-	{								\
-	.sign	= (s),							\
-	.bexp	= (b) + SP_EBIAS,					\
-	.mant	= (m)							\
-	}								\
-}
+	xPCNST(s, b, m, SP_EBIAS)
 
 const union ieee754sp __ieee754sp_spcvals[] = {
 	SPCNST(0, SP_EMIN - 1, 0x000000),	/* + zero   */
-- 
cgit v1.2.3


From 16f77de82f2d2f628306dab9bc4799df0d28a199 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Wed, 18 Jun 2014 15:00:46 +0100
Subject: Revert "MIPS: Save/restore MSA context around signals"

This reverts commit eec43a224cf1 "MIPS: Save/restore MSA context around
signals" and the MSA parts of ca750649e08c "MIPS: kernel: signal:
Prevent save/restore FPU context in user memory" (the restore path of
which appears incorrect anyway...).

The reverted patch took care not to break compatibility with userland
users of struct sigcontext, but inadvertantly changed the offset of the
uc_sigmask field of struct ucontext. Thus Linux v3.15 breaks the
userland ABI. The MSA context will need to be saved via some other
opt-in mechanism, but for now revert the change to reduce the fallout.

This will have minimal impact upon use of MSA since the only supported
CPU which includes it (the P5600) is 32-bit and therefore requires that
the experimental CONFIG_MIPS_O32_FP64_SUPPORT Kconfig option be selected
before the kernel will set FR=1 for a task, a requirement for MSA use.
Thus the users of MSA are limited to known small groups of people & this
patch won't be breaking any previously working MSA-using userland
outside of experimental settings.

[ralf@linux-mips.org: Fixed rejects.]

Cc: stable@vger.kernel.org
Reported-by: Joseph S. Myers <joseph@codesourcery.com>
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/7107/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/sigcontext.h      |   2 -
 arch/mips/include/uapi/asm/sigcontext.h |   8 --
 arch/mips/kernel/asm-offsets.c          |   3 -
 arch/mips/kernel/r4k_fpu.S              | 213 --------------------------------
 arch/mips/kernel/signal.c               |  79 ++----------
 arch/mips/kernel/signal32.c             |  74 ++---------
 6 files changed, 16 insertions(+), 363 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/sigcontext.h b/arch/mips/include/asm/sigcontext.h
index f54bdbe85c0d..eeeb0f48c767 100644
--- a/arch/mips/include/asm/sigcontext.h
+++ b/arch/mips/include/asm/sigcontext.h
@@ -32,8 +32,6 @@ struct sigcontext32 {
 	__u32		sc_lo2;
 	__u32		sc_hi3;
 	__u32		sc_lo3;
-	__u64		sc_msaregs[32];	/* Most significant 64 bits */
-	__u32		sc_msa_csr;
 };
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32 */
 #endif /* _ASM_SIGCONTEXT_H */
diff --git a/arch/mips/include/uapi/asm/sigcontext.h b/arch/mips/include/uapi/asm/sigcontext.h
index 681c17603a48..6c9906f59c6e 100644
--- a/arch/mips/include/uapi/asm/sigcontext.h
+++ b/arch/mips/include/uapi/asm/sigcontext.h
@@ -12,10 +12,6 @@
 #include <linux/types.h>
 #include <asm/sgidefs.h>
 
-/* Bits which may be set in sc_used_math */
-#define USEDMATH_FP	(1 << 0)
-#define USEDMATH_MSA	(1 << 1)
-
 #if _MIPS_SIM == _MIPS_SIM_ABI32
 
 /*
@@ -41,8 +37,6 @@ struct sigcontext {
 	unsigned long		sc_lo2;
 	unsigned long		sc_hi3;
 	unsigned long		sc_lo3;
-	unsigned long long	sc_msaregs[32];	/* Most significant 64 bits */
-	unsigned long		sc_msa_csr;
 };
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
@@ -76,8 +70,6 @@ struct sigcontext {
 	__u32	sc_used_math;
 	__u32	sc_dsp;
 	__u32	sc_reserved;
-	__u64	sc_msaregs[32];
-	__u32	sc_msa_csr;
 };
 
 
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index 02f075df8f2e..4bb5107511e2 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -293,7 +293,6 @@ void output_sc_defines(void)
 	OFFSET(SC_LO2, sigcontext, sc_lo2);
 	OFFSET(SC_HI3, sigcontext, sc_hi3);
 	OFFSET(SC_LO3, sigcontext, sc_lo3);
-	OFFSET(SC_MSAREGS, sigcontext, sc_msaregs);
 	BLANK();
 }
 #endif
@@ -308,7 +307,6 @@ void output_sc_defines(void)
 	OFFSET(SC_MDLO, sigcontext, sc_mdlo);
 	OFFSET(SC_PC, sigcontext, sc_pc);
 	OFFSET(SC_FPC_CSR, sigcontext, sc_fpc_csr);
-	OFFSET(SC_MSAREGS, sigcontext, sc_msaregs);
 	BLANK();
 }
 #endif
@@ -320,7 +318,6 @@ void output_sc32_defines(void)
 	OFFSET(SC32_FPREGS, sigcontext32, sc_fpregs);
 	OFFSET(SC32_FPC_CSR, sigcontext32, sc_fpc_csr);
 	OFFSET(SC32_FPC_EIR, sigcontext32, sc_fpc_eir);
-	OFFSET(SC32_MSAREGS, sigcontext32, sc_msaregs);
 	BLANK();
 }
 #endif
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index 71814272d148..8352523568e6 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -13,7 +13,6 @@
  * Copyright (C) 1999, 2001 Silicon Graphics, Inc.
  */
 #include <asm/asm.h>
-#include <asm/asmmacro.h>
 #include <asm/errno.h>
 #include <asm/fpregdef.h>
 #include <asm/mipsregs.h>
@@ -246,218 +245,6 @@ LEAF(_restore_fp_context32)
 	END(_restore_fp_context32)
 #endif
 
-#ifdef CONFIG_CPU_HAS_MSA
-
-	.macro	save_sc_msareg	wr, off, sc, tmp
-#ifdef CONFIG_64BIT
-	copy_u_d \tmp, \wr, 1
-	EX sd	\tmp, (\off+(\wr*8))(\sc)
-#elif defined(CONFIG_CPU_LITTLE_ENDIAN)
-	copy_u_w \tmp, \wr, 2
-	EX sw	\tmp, (\off+(\wr*8)+0)(\sc)
-	copy_u_w \tmp, \wr, 3
-	EX sw	\tmp, (\off+(\wr*8)+4)(\sc)
-#else /* CONFIG_CPU_BIG_ENDIAN */
-	copy_u_w \tmp, \wr, 2
-	EX sw	\tmp, (\off+(\wr*8)+4)(\sc)
-	copy_u_w \tmp, \wr, 3
-	EX sw	\tmp, (\off+(\wr*8)+0)(\sc)
-#endif
-	.endm
-
-/*
- * int _save_msa_context(struct sigcontext *sc)
- *
- * Save the upper 64 bits of each vector register along with the MSA_CSR
- * register into sc. Returns zero on success, else non-zero.
- */
-LEAF(_save_msa_context)
-	save_sc_msareg	0, SC_MSAREGS, a0, t0
-	save_sc_msareg	1, SC_MSAREGS, a0, t0
-	save_sc_msareg	2, SC_MSAREGS, a0, t0
-	save_sc_msareg	3, SC_MSAREGS, a0, t0
-	save_sc_msareg	4, SC_MSAREGS, a0, t0
-	save_sc_msareg	5, SC_MSAREGS, a0, t0
-	save_sc_msareg	6, SC_MSAREGS, a0, t0
-	save_sc_msareg	7, SC_MSAREGS, a0, t0
-	save_sc_msareg	8, SC_MSAREGS, a0, t0
-	save_sc_msareg	9, SC_MSAREGS, a0, t0
-	save_sc_msareg	10, SC_MSAREGS, a0, t0
-	save_sc_msareg	11, SC_MSAREGS, a0, t0
-	save_sc_msareg	12, SC_MSAREGS, a0, t0
-	save_sc_msareg	13, SC_MSAREGS, a0, t0
-	save_sc_msareg	14, SC_MSAREGS, a0, t0
-	save_sc_msareg	15, SC_MSAREGS, a0, t0
-	save_sc_msareg	16, SC_MSAREGS, a0, t0
-	save_sc_msareg	17, SC_MSAREGS, a0, t0
-	save_sc_msareg	18, SC_MSAREGS, a0, t0
-	save_sc_msareg	19, SC_MSAREGS, a0, t0
-	save_sc_msareg	20, SC_MSAREGS, a0, t0
-	save_sc_msareg	21, SC_MSAREGS, a0, t0
-	save_sc_msareg	22, SC_MSAREGS, a0, t0
-	save_sc_msareg	23, SC_MSAREGS, a0, t0
-	save_sc_msareg	24, SC_MSAREGS, a0, t0
-	save_sc_msareg	25, SC_MSAREGS, a0, t0
-	save_sc_msareg	26, SC_MSAREGS, a0, t0
-	save_sc_msareg	27, SC_MSAREGS, a0, t0
-	save_sc_msareg	28, SC_MSAREGS, a0, t0
-	save_sc_msareg	29, SC_MSAREGS, a0, t0
-	save_sc_msareg	30, SC_MSAREGS, a0, t0
-	save_sc_msareg	31, SC_MSAREGS, a0, t0
-	jr	ra
-	 li	v0, 0
-	END(_save_msa_context)
-
-#ifdef CONFIG_MIPS32_COMPAT
-
-/*
- * int _save_msa_context32(struct sigcontext32 *sc)
- *
- * Save the upper 64 bits of each vector register along with the MSA_CSR
- * register into sc. Returns zero on success, else non-zero.
- */
-LEAF(_save_msa_context32)
-	save_sc_msareg	0, SC32_MSAREGS, a0, t0
-	save_sc_msareg	1, SC32_MSAREGS, a0, t0
-	save_sc_msareg	2, SC32_MSAREGS, a0, t0
-	save_sc_msareg	3, SC32_MSAREGS, a0, t0
-	save_sc_msareg	4, SC32_MSAREGS, a0, t0
-	save_sc_msareg	5, SC32_MSAREGS, a0, t0
-	save_sc_msareg	6, SC32_MSAREGS, a0, t0
-	save_sc_msareg	7, SC32_MSAREGS, a0, t0
-	save_sc_msareg	8, SC32_MSAREGS, a0, t0
-	save_sc_msareg	9, SC32_MSAREGS, a0, t0
-	save_sc_msareg	10, SC32_MSAREGS, a0, t0
-	save_sc_msareg	11, SC32_MSAREGS, a0, t0
-	save_sc_msareg	12, SC32_MSAREGS, a0, t0
-	save_sc_msareg	13, SC32_MSAREGS, a0, t0
-	save_sc_msareg	14, SC32_MSAREGS, a0, t0
-	save_sc_msareg	15, SC32_MSAREGS, a0, t0
-	save_sc_msareg	16, SC32_MSAREGS, a0, t0
-	save_sc_msareg	17, SC32_MSAREGS, a0, t0
-	save_sc_msareg	18, SC32_MSAREGS, a0, t0
-	save_sc_msareg	19, SC32_MSAREGS, a0, t0
-	save_sc_msareg	20, SC32_MSAREGS, a0, t0
-	save_sc_msareg	21, SC32_MSAREGS, a0, t0
-	save_sc_msareg	22, SC32_MSAREGS, a0, t0
-	save_sc_msareg	23, SC32_MSAREGS, a0, t0
-	save_sc_msareg	24, SC32_MSAREGS, a0, t0
-	save_sc_msareg	25, SC32_MSAREGS, a0, t0
-	save_sc_msareg	26, SC32_MSAREGS, a0, t0
-	save_sc_msareg	27, SC32_MSAREGS, a0, t0
-	save_sc_msareg	28, SC32_MSAREGS, a0, t0
-	save_sc_msareg	29, SC32_MSAREGS, a0, t0
-	save_sc_msareg	30, SC32_MSAREGS, a0, t0
-	save_sc_msareg	31, SC32_MSAREGS, a0, t0
-	jr	ra
-	 li	v0, 0
-	END(_save_msa_context32)
-
-#endif /* CONFIG_MIPS32_COMPAT */
-
-	.macro restore_sc_msareg	wr, off, sc, tmp
-#ifdef CONFIG_64BIT
-	EX ld	\tmp, (\off+(\wr*8))(\sc)
-	insert_d \wr, 1, \tmp
-#elif defined(CONFIG_CPU_LITTLE_ENDIAN)
-	EX lw	\tmp, (\off+(\wr*8)+0)(\sc)
-	insert_w \wr, 2, \tmp
-	EX lw	\tmp, (\off+(\wr*8)+4)(\sc)
-	insert_w \wr, 3, \tmp
-#else /* CONFIG_CPU_BIG_ENDIAN */
-	EX lw	\tmp, (\off+(\wr*8)+4)(\sc)
-	insert_w \wr, 2, \tmp
-	EX lw	\tmp, (\off+(\wr*8)+0)(\sc)
-	insert_w \wr, 3, \tmp
-#endif
-	.endm
-
-/*
- * int _restore_msa_context(struct sigcontext *sc)
- */
-LEAF(_restore_msa_context)
-	restore_sc_msareg	0, SC_MSAREGS, a0, t0
-	restore_sc_msareg	1, SC_MSAREGS, a0, t0
-	restore_sc_msareg	2, SC_MSAREGS, a0, t0
-	restore_sc_msareg	3, SC_MSAREGS, a0, t0
-	restore_sc_msareg	4, SC_MSAREGS, a0, t0
-	restore_sc_msareg	5, SC_MSAREGS, a0, t0
-	restore_sc_msareg	6, SC_MSAREGS, a0, t0
-	restore_sc_msareg	7, SC_MSAREGS, a0, t0
-	restore_sc_msareg	8, SC_MSAREGS, a0, t0
-	restore_sc_msareg	9, SC_MSAREGS, a0, t0
-	restore_sc_msareg	10, SC_MSAREGS, a0, t0
-	restore_sc_msareg	11, SC_MSAREGS, a0, t0
-	restore_sc_msareg	12, SC_MSAREGS, a0, t0
-	restore_sc_msareg	13, SC_MSAREGS, a0, t0
-	restore_sc_msareg	14, SC_MSAREGS, a0, t0
-	restore_sc_msareg	15, SC_MSAREGS, a0, t0
-	restore_sc_msareg	16, SC_MSAREGS, a0, t0
-	restore_sc_msareg	17, SC_MSAREGS, a0, t0
-	restore_sc_msareg	18, SC_MSAREGS, a0, t0
-	restore_sc_msareg	19, SC_MSAREGS, a0, t0
-	restore_sc_msareg	20, SC_MSAREGS, a0, t0
-	restore_sc_msareg	21, SC_MSAREGS, a0, t0
-	restore_sc_msareg	22, SC_MSAREGS, a0, t0
-	restore_sc_msareg	23, SC_MSAREGS, a0, t0
-	restore_sc_msareg	24, SC_MSAREGS, a0, t0
-	restore_sc_msareg	25, SC_MSAREGS, a0, t0
-	restore_sc_msareg	26, SC_MSAREGS, a0, t0
-	restore_sc_msareg	27, SC_MSAREGS, a0, t0
-	restore_sc_msareg	28, SC_MSAREGS, a0, t0
-	restore_sc_msareg	29, SC_MSAREGS, a0, t0
-	restore_sc_msareg	30, SC_MSAREGS, a0, t0
-	restore_sc_msareg	31, SC_MSAREGS, a0, t0
-	jr	ra
-	 li	v0, 0
-	END(_restore_msa_context)
-
-#ifdef CONFIG_MIPS32_COMPAT
-
-/*
- * int _restore_msa_context32(struct sigcontext32 *sc)
- */
-LEAF(_restore_msa_context32)
-	restore_sc_msareg	0, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	1, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	2, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	3, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	4, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	5, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	6, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	7, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	8, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	9, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	10, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	11, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	12, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	13, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	14, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	15, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	16, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	17, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	18, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	19, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	20, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	21, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	22, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	23, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	24, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	25, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	26, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	27, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	28, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	29, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	30, SC32_MSAREGS, a0, t0
-	restore_sc_msareg	31, SC32_MSAREGS, a0, t0
-	jr	ra
-	 li	v0, 0
-	END(_restore_msa_context32)
-
-#endif /* CONFIG_MIPS32_COMPAT */
-
-#endif /* CONFIG_CPU_HAS_MSA */
-
 	.set	reorder
 
 	.type	fault@function
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 33133d3df3e5..9e60d117e41e 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -31,7 +31,6 @@
 #include <linux/bitops.h>
 #include <asm/cacheflush.h>
 #include <asm/fpu.h>
-#include <asm/msa.h>
 #include <asm/sim.h>
 #include <asm/ucontext.h>
 #include <asm/cpu-features.h>
@@ -48,9 +47,6 @@ static int (*restore_fp_context)(struct sigcontext __user *sc);
 extern asmlinkage int _save_fp_context(struct sigcontext __user *sc);
 extern asmlinkage int _restore_fp_context(struct sigcontext __user *sc);
 
-extern asmlinkage int _save_msa_context(struct sigcontext __user *sc);
-extern asmlinkage int _restore_msa_context(struct sigcontext __user *sc);
-
 struct sigframe {
 	u32 sf_ass[4];		/* argument save space for o32 */
 	u32 sf_pad[2];		/* Was: signal trampoline */
@@ -99,61 +95,21 @@ static int copy_fp_from_sigcontext(struct sigcontext __user *sc)
 	return err;
 }
 
-/*
- * These functions will save only the upper 64 bits of the vector registers,
- * since the lower 64 bits have already been saved as the scalar FP context.
- */
-static int copy_msa_to_sigcontext(struct sigcontext __user *sc)
-{
-	int i;
-	int err = 0;
-
-	for (i = 0; i < NUM_FPU_REGS; i++) {
-		err |=
-		    __put_user(get_fpr64(&current->thread.fpu.fpr[i], 1),
-			       &sc->sc_msaregs[i]);
-	}
-	err |= __put_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
-
-	return err;
-}
-
-static int copy_msa_from_sigcontext(struct sigcontext __user *sc)
-{
-	int i;
-	int err = 0;
-	u64 val;
-
-	for (i = 0; i < NUM_FPU_REGS; i++) {
-		err |= __get_user(val, &sc->sc_msaregs[i]);
-		set_fpr64(&current->thread.fpu.fpr[i], 1, val);
-	}
-	err |= __get_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
-
-	return err;
-}
-
 /*
  * Helper routines
  */
-static int protected_save_fp_context(struct sigcontext __user *sc,
-				     unsigned used_math)
+static int protected_save_fp_context(struct sigcontext __user *sc)
 {
 	int err;
-	bool save_msa = cpu_has_msa && (used_math & USEDMATH_MSA);
 #ifndef CONFIG_EVA
 	while (1) {
 		lock_fpu_owner();
 		if (is_fpu_owner()) {
 			err = save_fp_context(sc);
-			if (save_msa && !err)
-				err = _save_msa_context(sc);
 			unlock_fpu_owner();
 		} else {
 			unlock_fpu_owner();
 			err = copy_fp_to_sigcontext(sc);
-			if (save_msa && !err)
-				err = copy_msa_to_sigcontext(sc);
 		}
 		if (likely(!err))
 			break;
@@ -169,38 +125,24 @@ static int protected_save_fp_context(struct sigcontext __user *sc,
 	 * EVA does not have FPU EVA instructions so saving fpu context directly
 	 * does not work.
 	 */
-	disable_msa();
 	lose_fpu(1);
 	err = save_fp_context(sc); /* this might fail */
-	if (save_msa && !err)
-		err = copy_msa_to_sigcontext(sc);
 #endif
 	return err;
 }
 
-static int protected_restore_fp_context(struct sigcontext __user *sc,
-					unsigned used_math)
+static int protected_restore_fp_context(struct sigcontext __user *sc)
 {
 	int err, tmp __maybe_unused;
-	bool restore_msa = cpu_has_msa && (used_math & USEDMATH_MSA);
 #ifndef CONFIG_EVA
 	while (1) {
 		lock_fpu_owner();
 		if (is_fpu_owner()) {
 			err = restore_fp_context(sc);
-			if (restore_msa && !err) {
-				enable_msa();
-				err = _restore_msa_context(sc);
-			} else {
-				/* signal handler may have used MSA */
-				disable_msa();
-			}
 			unlock_fpu_owner();
 		} else {
 			unlock_fpu_owner();
 			err = copy_fp_from_sigcontext(sc);
-			if (!err && (used_math & USEDMATH_MSA))
-				err = copy_msa_from_sigcontext(sc);
 		}
 		if (likely(!err))
 			break;
@@ -216,11 +158,8 @@ static int protected_restore_fp_context(struct sigcontext __user *sc,
 	 * EVA does not have FPU EVA instructions so restoring fpu context
 	 * directly does not work.
 	 */
-	enable_msa();
 	lose_fpu(0);
 	err = restore_fp_context(sc); /* this might fail */
-	if (restore_msa && !err)
-		err = copy_msa_from_sigcontext(sc);
 #endif
 	return err;
 }
@@ -252,8 +191,7 @@ int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
 		err |= __put_user(rddsp(DSP_MASK), &sc->sc_dsp);
 	}
 
-	used_math = used_math() ? USEDMATH_FP : 0;
-	used_math |= thread_msa_context_live() ? USEDMATH_MSA : 0;
+	used_math = !!used_math();
 	err |= __put_user(used_math, &sc->sc_used_math);
 
 	if (used_math) {
@@ -261,7 +199,7 @@ int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
 		 * Save FPU state to signal context. Signal handler
 		 * will "inherit" current FPU state.
 		 */
-		err |= protected_save_fp_context(sc, used_math);
+		err |= protected_save_fp_context(sc);
 	}
 	return err;
 }
@@ -286,14 +224,14 @@ int fpcsr_pending(unsigned int __user *fpcsr)
 }
 
 static int
-check_and_restore_fp_context(struct sigcontext __user *sc, unsigned used_math)
+check_and_restore_fp_context(struct sigcontext __user *sc)
 {
 	int err, sig;
 
 	err = sig = fpcsr_pending(&sc->sc_fpc_csr);
 	if (err > 0)
 		err = 0;
-	err |= protected_restore_fp_context(sc, used_math);
+	err |= protected_restore_fp_context(sc);
 	return err ?: sig;
 }
 
@@ -333,10 +271,9 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
 	if (used_math) {
 		/* restore fpu context if we have used it before */
 		if (!err)
-			err = check_and_restore_fp_context(sc, used_math);
+			err = check_and_restore_fp_context(sc);
 	} else {
-		/* signal handler may have used FPU or MSA. Disable them. */
-		disable_msa();
+		/* signal handler may have used FPU.  Give it up. */
 		lose_fpu(0);
 	}
 
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index 299f956e4db3..bae2e6ee2109 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -30,7 +30,6 @@
 #include <asm/sim.h>
 #include <asm/ucontext.h>
 #include <asm/fpu.h>
-#include <asm/msa.h>
 #include <asm/war.h>
 #include <asm/vdso.h>
 #include <asm/dsp.h>
@@ -43,9 +42,6 @@ static int (*restore_fp_context32)(struct sigcontext32 __user *sc);
 extern asmlinkage int _save_fp_context32(struct sigcontext32 __user *sc);
 extern asmlinkage int _restore_fp_context32(struct sigcontext32 __user *sc);
 
-extern asmlinkage int _save_msa_context32(struct sigcontext32 __user *sc);
-extern asmlinkage int _restore_msa_context32(struct sigcontext32 __user *sc);
-
 /*
  * Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
  */
@@ -114,60 +110,20 @@ static int copy_fp_from_sigcontext32(struct sigcontext32 __user *sc)
 	return err;
 }
 
-/*
- * These functions will save only the upper 64 bits of the vector registers,
- * since the lower 64 bits have already been saved as the scalar FP context.
- */
-static int copy_msa_to_sigcontext32(struct sigcontext32 __user *sc)
-{
-	int i;
-	int err = 0;
-
-	for (i = 0; i < NUM_FPU_REGS; i++) {
-		err |=
-		    __put_user(get_fpr64(&current->thread.fpu.fpr[i], 1),
-			       &sc->sc_msaregs[i]);
-	}
-	err |= __put_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
-
-	return err;
-}
-
-static int copy_msa_from_sigcontext32(struct sigcontext32 __user *sc)
-{
-	int i;
-	int err = 0;
-	u64 val;
-
-	for (i = 0; i < NUM_FPU_REGS; i++) {
-		err |= __get_user(val, &sc->sc_msaregs[i]);
-		set_fpr64(&current->thread.fpu.fpr[i], 1, val);
-	}
-	err |= __get_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
-
-	return err;
-}
-
 /*
  * sigcontext handlers
  */
-static int protected_save_fp_context32(struct sigcontext32 __user *sc,
-				       unsigned used_math)
+static int protected_save_fp_context32(struct sigcontext32 __user *sc)
 {
 	int err;
-	bool save_msa = cpu_has_msa && (used_math & USEDMATH_MSA);
 	while (1) {
 		lock_fpu_owner();
 		if (is_fpu_owner()) {
 			err = save_fp_context32(sc);
-			if (save_msa && !err)
-				err = _save_msa_context32(sc);
 			unlock_fpu_owner();
 		} else {
 			unlock_fpu_owner();
 			err = copy_fp_to_sigcontext32(sc);
-			if (save_msa && !err)
-				err = copy_msa_to_sigcontext32(sc);
 		}
 		if (likely(!err))
 			break;
@@ -181,28 +137,17 @@ static int protected_save_fp_context32(struct sigcontext32 __user *sc,
 	return err;
 }
 
-static int protected_restore_fp_context32(struct sigcontext32 __user *sc,
-					  unsigned used_math)
+static int protected_restore_fp_context32(struct sigcontext32 __user *sc)
 {
 	int err, tmp __maybe_unused;
-	bool restore_msa = cpu_has_msa && (used_math & USEDMATH_MSA);
 	while (1) {
 		lock_fpu_owner();
 		if (is_fpu_owner()) {
 			err = restore_fp_context32(sc);
-			if (restore_msa && !err) {
-				enable_msa();
-				err = _restore_msa_context32(sc);
-			} else {
-				/* signal handler may have used MSA */
-				disable_msa();
-			}
 			unlock_fpu_owner();
 		} else {
 			unlock_fpu_owner();
 			err = copy_fp_from_sigcontext32(sc);
-			if (restore_msa && !err)
-				err = copy_msa_from_sigcontext32(sc);
 		}
 		if (likely(!err))
 			break;
@@ -241,8 +186,7 @@ static int setup_sigcontext32(struct pt_regs *regs,
 		err |= __put_user(mflo3(), &sc->sc_lo3);
 	}
 
-	used_math = used_math() ? USEDMATH_FP : 0;
-	used_math |= thread_msa_context_live() ? USEDMATH_MSA : 0;
+	used_math = !!used_math();
 	err |= __put_user(used_math, &sc->sc_used_math);
 
 	if (used_math) {
@@ -250,21 +194,20 @@ static int setup_sigcontext32(struct pt_regs *regs,
 		 * Save FPU state to signal context.  Signal handler
 		 * will "inherit" current FPU state.
 		 */
-		err |= protected_save_fp_context32(sc, used_math);
+		err |= protected_save_fp_context32(sc);
 	}
 	return err;
 }
 
 static int
-check_and_restore_fp_context32(struct sigcontext32 __user *sc,
-			       unsigned used_math)
+check_and_restore_fp_context32(struct sigcontext32 __user *sc)
 {
 	int err, sig;
 
 	err = sig = fpcsr_pending(&sc->sc_fpc_csr);
 	if (err > 0)
 		err = 0;
-	err |= protected_restore_fp_context32(sc, used_math);
+	err |= protected_restore_fp_context32(sc);
 	return err ?: sig;
 }
 
@@ -301,10 +244,9 @@ static int restore_sigcontext32(struct pt_regs *regs,
 	if (used_math) {
 		/* restore fpu context if we have used it before */
 		if (!err)
-			err = check_and_restore_fp_context32(sc, used_math);
+			err = check_and_restore_fp_context32(sc);
 	} else {
-		/* signal handler may have used FPU or MSA. Disable them. */
-		disable_msa();
+		/* signal handler may have used FPU.  Give it up. */
 		lose_fpu(0);
 	}
 
-- 
cgit v1.2.3


From a83d081ed14e4281d2620d182c6044c0c21c551e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 17 Jun 2014 12:16:18 +0200
Subject: MIPS: BPF JIT: Fix build error.

  mips: allmodconfig fails in 3.16-rc1 with lots of undefined symbols.

  arch/mips/net/bpf_jit.c: In function 'is_load_to_a':
  arch/mips/net/bpf_jit.c:559:7: error: 'BPF_S_LD_W_LEN' undeclared (first use in this function)
  arch/mips/net/bpf_jit.c:559:7: note: each undeclared identifier is reported only once for each function it appears in
  arch/mips/net/bpf_jit.c:560:7: error: 'BPF_S_LD_W_ABS' undeclared (first use in this function)
  [...]

The reason behind this is that 3480593131e0 ("net: filter: get rid of
BPF_S_* enum") was routed via net-next tree, that takes all BPF-related
changes, at a time where MIPS BPF JIT was not part of net-next, while
c6610de353da ("MIPS: net: Add BPF JIT") was routed via mips arch tree
and went into mainline within the same merge window. Thus, fix it up by
converting BPF_S_* in a similar fashion as in 3480593131e0 for MIPS.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-kernel@vger.kernel.org <linux-kernel@vger.kernel.org>
Cc: Linux MIPS Mailing List <linux-mips@linux-mips.org>
Patchwork: https://patchwork.linux-mips.org/patch/7099/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/net/bpf_jit.c | 143 +++++++++++++++++++++++-------------------------
 1 file changed, 69 insertions(+), 74 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index a67b9753330b..f7c206404989 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -556,18 +556,10 @@ static inline void update_on_xread(struct jit_ctx *ctx)
 static bool is_load_to_a(u16 inst)
 {
 	switch (inst) {
-	case BPF_S_LD_W_LEN:
-	case BPF_S_LD_W_ABS:
-	case BPF_S_LD_H_ABS:
-	case BPF_S_LD_B_ABS:
-	case BPF_S_ANC_CPU:
-	case BPF_S_ANC_IFINDEX:
-	case BPF_S_ANC_MARK:
-	case BPF_S_ANC_PROTOCOL:
-	case BPF_S_ANC_RXHASH:
-	case BPF_S_ANC_VLAN_TAG:
-	case BPF_S_ANC_VLAN_TAG_PRESENT:
-	case BPF_S_ANC_QUEUE:
+	case BPF_LD | BPF_W | BPF_LEN:
+	case BPF_LD | BPF_W | BPF_ABS:
+	case BPF_LD | BPF_H | BPF_ABS:
+	case BPF_LD | BPF_B | BPF_ABS:
 		return true;
 	default:
 		return false;
@@ -709,7 +701,7 @@ static void build_prologue(struct jit_ctx *ctx)
 		emit_jit_reg_move(r_X, r_zero, ctx);
 
 	/* Do not leak kernel data to userspace */
-	if ((first_inst != BPF_S_RET_K) && !(is_load_to_a(first_inst)))
+	if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst)))
 		emit_jit_reg_move(r_A, r_zero, ctx);
 }
 
@@ -783,41 +775,44 @@ static int build_body(struct jit_ctx *ctx)
 	u32 k, b_off __maybe_unused;
 
 	for (i = 0; i < prog->len; i++) {
+		u16 code;
+
 		inst = &(prog->insns[i]);
 		pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n",
 			 __func__, inst->code, inst->jt, inst->jf, inst->k);
 		k = inst->k;
+		code = bpf_anc_helper(inst);
 
 		if (ctx->target == NULL)
 			ctx->offsets[i] = ctx->idx * 4;
 
-		switch (inst->code) {
-		case BPF_S_LD_IMM:
+		switch (code) {
+		case BPF_LD | BPF_IMM:
 			/* A <- k ==> li r_A, k */
 			ctx->flags |= SEEN_A;
 			emit_load_imm(r_A, k, ctx);
 			break;
-		case BPF_S_LD_W_LEN:
+		case BPF_LD | BPF_W | BPF_LEN:
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
 			/* A <- len ==> lw r_A, offset(skb) */
 			ctx->flags |= SEEN_SKB | SEEN_A;
 			off = offsetof(struct sk_buff, len);
 			emit_load(r_A, r_skb, off, ctx);
 			break;
-		case BPF_S_LD_MEM:
+		case BPF_LD | BPF_MEM:
 			/* A <- M[k] ==> lw r_A, offset(M) */
 			ctx->flags |= SEEN_MEM | SEEN_A;
 			emit_load(r_A, r_M, SCRATCH_OFF(k), ctx);
 			break;
-		case BPF_S_LD_W_ABS:
+		case BPF_LD | BPF_W | BPF_ABS:
 			/* A <- P[k:4] */
 			load_order = 2;
 			goto load;
-		case BPF_S_LD_H_ABS:
+		case BPF_LD | BPF_H | BPF_ABS:
 			/* A <- P[k:2] */
 			load_order = 1;
 			goto load;
-		case BPF_S_LD_B_ABS:
+		case BPF_LD | BPF_B | BPF_ABS:
 			/* A <- P[k:1] */
 			load_order = 0;
 load:
@@ -852,15 +847,15 @@ load_common:
 			emit_b(b_imm(prog->len, ctx), ctx);
 			emit_reg_move(r_ret, r_zero, ctx);
 			break;
-		case BPF_S_LD_W_IND:
+		case BPF_LD | BPF_W | BPF_IND:
 			/* A <- P[X + k:4] */
 			load_order = 2;
 			goto load_ind;
-		case BPF_S_LD_H_IND:
+		case BPF_LD | BPF_H | BPF_IND:
 			/* A <- P[X + k:2] */
 			load_order = 1;
 			goto load_ind;
-		case BPF_S_LD_B_IND:
+		case BPF_LD | BPF_B | BPF_IND:
 			/* A <- P[X + k:1] */
 			load_order = 0;
 load_ind:
@@ -868,23 +863,23 @@ load_ind:
 			ctx->flags |= SEEN_OFF | SEEN_X;
 			emit_addiu(r_off, r_X, k, ctx);
 			goto load_common;
-		case BPF_S_LDX_IMM:
+		case BPF_LDX | BPF_IMM:
 			/* X <- k */
 			ctx->flags |= SEEN_X;
 			emit_load_imm(r_X, k, ctx);
 			break;
-		case BPF_S_LDX_MEM:
+		case BPF_LDX | BPF_MEM:
 			/* X <- M[k] */
 			ctx->flags |= SEEN_X | SEEN_MEM;
 			emit_load(r_X, r_M, SCRATCH_OFF(k), ctx);
 			break;
-		case BPF_S_LDX_W_LEN:
+		case BPF_LDX | BPF_W | BPF_LEN:
 			/* X <- len */
 			ctx->flags |= SEEN_X | SEEN_SKB;
 			off = offsetof(struct sk_buff, len);
 			emit_load(r_X, r_skb, off, ctx);
 			break;
-		case BPF_S_LDX_B_MSH:
+		case BPF_LDX | BPF_B | BPF_MSH:
 			/* X <- 4 * (P[k:1] & 0xf) */
 			ctx->flags |= SEEN_X | SEEN_CALL | SEEN_S0 | SEEN_SKB;
 			/* Load offset to a1 */
@@ -917,50 +912,50 @@ load_ind:
 			emit_b(b_imm(prog->len, ctx), ctx);
 			emit_load_imm(r_ret, 0, ctx); /* delay slot */
 			break;
-		case BPF_S_ST:
+		case BPF_ST:
 			/* M[k] <- A */
 			ctx->flags |= SEEN_MEM | SEEN_A;
 			emit_store(r_A, r_M, SCRATCH_OFF(k), ctx);
 			break;
-		case BPF_S_STX:
+		case BPF_STX:
 			/* M[k] <- X */
 			ctx->flags |= SEEN_MEM | SEEN_X;
 			emit_store(r_X, r_M, SCRATCH_OFF(k), ctx);
 			break;
-		case BPF_S_ALU_ADD_K:
+		case BPF_ALU | BPF_ADD | BPF_K:
 			/* A += K */
 			ctx->flags |= SEEN_A;
 			emit_addiu(r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ALU_ADD_X:
+		case BPF_ALU | BPF_ADD | BPF_X:
 			/* A += X */
 			ctx->flags |= SEEN_A | SEEN_X;
 			emit_addu(r_A, r_A, r_X, ctx);
 			break;
-		case BPF_S_ALU_SUB_K:
+		case BPF_ALU | BPF_SUB | BPF_K:
 			/* A -= K */
 			ctx->flags |= SEEN_A;
 			emit_addiu(r_A, r_A, -k, ctx);
 			break;
-		case BPF_S_ALU_SUB_X:
+		case BPF_ALU | BPF_SUB | BPF_X:
 			/* A -= X */
 			ctx->flags |= SEEN_A | SEEN_X;
 			emit_subu(r_A, r_A, r_X, ctx);
 			break;
-		case BPF_S_ALU_MUL_K:
+		case BPF_ALU | BPF_MUL | BPF_K:
 			/* A *= K */
 			/* Load K to scratch register before MUL */
 			ctx->flags |= SEEN_A | SEEN_S0;
 			emit_load_imm(r_s0, k, ctx);
 			emit_mul(r_A, r_A, r_s0, ctx);
 			break;
-		case BPF_S_ALU_MUL_X:
+		case BPF_ALU | BPF_MUL | BPF_X:
 			/* A *= X */
 			update_on_xread(ctx);
 			ctx->flags |= SEEN_A | SEEN_X;
 			emit_mul(r_A, r_A, r_X, ctx);
 			break;
-		case BPF_S_ALU_DIV_K:
+		case BPF_ALU | BPF_DIV | BPF_K:
 			/* A /= k */
 			if (k == 1)
 				break;
@@ -973,7 +968,7 @@ load_ind:
 			emit_load_imm(r_s0, k, ctx);
 			emit_div(r_A, r_s0, ctx);
 			break;
-		case BPF_S_ALU_MOD_K:
+		case BPF_ALU | BPF_MOD | BPF_K:
 			/* A %= k */
 			if (k == 1 || optimize_div(&k)) {
 				ctx->flags |= SEEN_A;
@@ -984,7 +979,7 @@ load_ind:
 				emit_mod(r_A, r_s0, ctx);
 			}
 			break;
-		case BPF_S_ALU_DIV_X:
+		case BPF_ALU | BPF_DIV | BPF_X:
 			/* A /= X */
 			update_on_xread(ctx);
 			ctx->flags |= SEEN_X | SEEN_A;
@@ -994,7 +989,7 @@ load_ind:
 			emit_load_imm(r_val, 0, ctx); /* delay slot */
 			emit_div(r_A, r_X, ctx);
 			break;
-		case BPF_S_ALU_MOD_X:
+		case BPF_ALU | BPF_MOD | BPF_X:
 			/* A %= X */
 			update_on_xread(ctx);
 			ctx->flags |= SEEN_X | SEEN_A;
@@ -1004,94 +999,94 @@ load_ind:
 			emit_load_imm(r_val, 0, ctx); /* delay slot */
 			emit_mod(r_A, r_X, ctx);
 			break;
-		case BPF_S_ALU_OR_K:
+		case BPF_ALU | BPF_OR | BPF_K:
 			/* A |= K */
 			ctx->flags |= SEEN_A;
 			emit_ori(r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ALU_OR_X:
+		case BPF_ALU | BPF_OR | BPF_X:
 			/* A |= X */
 			update_on_xread(ctx);
 			ctx->flags |= SEEN_A;
 			emit_ori(r_A, r_A, r_X, ctx);
 			break;
-		case BPF_S_ALU_XOR_K:
+		case BPF_ALU | BPF_XOR | BPF_K:
 			/* A ^= k */
 			ctx->flags |= SEEN_A;
 			emit_xori(r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ANC_ALU_XOR_X:
-		case BPF_S_ALU_XOR_X:
+		case BPF_ANC | SKF_AD_ALU_XOR_X:
+		case BPF_ALU | BPF_XOR | BPF_X:
 			/* A ^= X */
 			update_on_xread(ctx);
 			ctx->flags |= SEEN_A;
 			emit_xor(r_A, r_A, r_X, ctx);
 			break;
-		case BPF_S_ALU_AND_K:
+		case BPF_ALU | BPF_AND | BPF_K:
 			/* A &= K */
 			ctx->flags |= SEEN_A;
 			emit_andi(r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ALU_AND_X:
+		case BPF_ALU | BPF_AND | BPF_X:
 			/* A &= X */
 			update_on_xread(ctx);
 			ctx->flags |= SEEN_A | SEEN_X;
 			emit_and(r_A, r_A, r_X, ctx);
 			break;
-		case BPF_S_ALU_LSH_K:
+		case BPF_ALU | BPF_LSH | BPF_K:
 			/* A <<= K */
 			ctx->flags |= SEEN_A;
 			emit_sll(r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ALU_LSH_X:
+		case BPF_ALU | BPF_LSH | BPF_X:
 			/* A <<= X */
 			ctx->flags |= SEEN_A | SEEN_X;
 			update_on_xread(ctx);
 			emit_sllv(r_A, r_A, r_X, ctx);
 			break;
-		case BPF_S_ALU_RSH_K:
+		case BPF_ALU | BPF_RSH | BPF_K:
 			/* A >>= K */
 			ctx->flags |= SEEN_A;
 			emit_srl(r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ALU_RSH_X:
+		case BPF_ALU | BPF_RSH | BPF_X:
 			ctx->flags |= SEEN_A | SEEN_X;
 			update_on_xread(ctx);
 			emit_srlv(r_A, r_A, r_X, ctx);
 			break;
-		case BPF_S_ALU_NEG:
+		case BPF_ALU | BPF_NEG:
 			/* A = -A */
 			ctx->flags |= SEEN_A;
 			emit_neg(r_A, ctx);
 			break;
-		case BPF_S_JMP_JA:
+		case BPF_JMP | BPF_JA:
 			/* pc += K */
 			emit_b(b_imm(i + k + 1, ctx), ctx);
 			emit_nop(ctx);
 			break;
-		case BPF_S_JMP_JEQ_K:
+		case BPF_JMP | BPF_JEQ | BPF_K:
 			/* pc += ( A == K ) ? pc->jt : pc->jf */
 			condt = MIPS_COND_EQ | MIPS_COND_K;
 			goto jmp_cmp;
-		case BPF_S_JMP_JEQ_X:
+		case BPF_JMP | BPF_JEQ | BPF_X:
 			ctx->flags |= SEEN_X;
 			/* pc += ( A == X ) ? pc->jt : pc->jf */
 			condt = MIPS_COND_EQ | MIPS_COND_X;
 			goto jmp_cmp;
-		case BPF_S_JMP_JGE_K:
+		case BPF_JMP | BPF_JGE | BPF_K:
 			/* pc += ( A >= K ) ? pc->jt : pc->jf */
 			condt = MIPS_COND_GE | MIPS_COND_K;
 			goto jmp_cmp;
-		case BPF_S_JMP_JGE_X:
+		case BPF_JMP | BPF_JGE | BPF_X:
 			ctx->flags |= SEEN_X;
 			/* pc += ( A >= X ) ? pc->jt : pc->jf */
 			condt = MIPS_COND_GE | MIPS_COND_X;
 			goto jmp_cmp;
-		case BPF_S_JMP_JGT_K:
+		case BPF_JMP | BPF_JGT | BPF_K:
 			/* pc += ( A > K ) ? pc->jt : pc->jf */
 			condt = MIPS_COND_GT | MIPS_COND_K;
 			goto jmp_cmp;
-		case BPF_S_JMP_JGT_X:
+		case BPF_JMP | BPF_JGT | BPF_X:
 			ctx->flags |= SEEN_X;
 			/* pc += ( A > X ) ? pc->jt : pc->jf */
 			condt = MIPS_COND_GT | MIPS_COND_X;
@@ -1167,7 +1162,7 @@ jmp_cmp:
 				}
 			}
 			break;
-		case BPF_S_JMP_JSET_K:
+		case BPF_JMP | BPF_JSET | BPF_K:
 			ctx->flags |= SEEN_S0 | SEEN_S1 | SEEN_A;
 			/* pc += (A & K) ? pc -> jt : pc -> jf */
 			emit_load_imm(r_s1, k, ctx);
@@ -1181,7 +1176,7 @@ jmp_cmp:
 			emit_b(b_off, ctx);
 			emit_nop(ctx);
 			break;
-		case BPF_S_JMP_JSET_X:
+		case BPF_JMP | BPF_JSET | BPF_X:
 			ctx->flags |= SEEN_S0 | SEEN_X | SEEN_A;
 			/* pc += (A & X) ? pc -> jt : pc -> jf */
 			emit_and(r_s0, r_A, r_X, ctx);
@@ -1194,7 +1189,7 @@ jmp_cmp:
 			emit_b(b_off, ctx);
 			emit_nop(ctx);
 			break;
-		case BPF_S_RET_A:
+		case BPF_RET | BPF_A:
 			ctx->flags |= SEEN_A;
 			if (i != prog->len - 1)
 				/*
@@ -1204,7 +1199,7 @@ jmp_cmp:
 				emit_b(b_imm(prog->len, ctx), ctx);
 			emit_reg_move(r_ret, r_A, ctx); /* delay slot */
 			break;
-		case BPF_S_RET_K:
+		case BPF_RET | BPF_K:
 			/*
 			 * It can emit two instructions so it does not fit on
 			 * the delay slot.
@@ -1219,19 +1214,19 @@ jmp_cmp:
 				emit_nop(ctx);
 			}
 			break;
-		case BPF_S_MISC_TAX:
+		case BPF_MISC | BPF_TAX:
 			/* X = A */
 			ctx->flags |= SEEN_X | SEEN_A;
 			emit_jit_reg_move(r_X, r_A, ctx);
 			break;
-		case BPF_S_MISC_TXA:
+		case BPF_MISC | BPF_TXA:
 			/* A = X */
 			ctx->flags |= SEEN_A | SEEN_X;
 			update_on_xread(ctx);
 			emit_jit_reg_move(r_A, r_X, ctx);
 			break;
 		/* AUX */
-		case BPF_S_ANC_PROTOCOL:
+		case BPF_ANC | SKF_AD_PROTOCOL:
 			/* A = ntohs(skb->protocol */
 			ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
@@ -1256,7 +1251,7 @@ jmp_cmp:
 			}
 #endif
 			break;
-		case BPF_S_ANC_CPU:
+		case BPF_ANC | SKF_AD_CPU:
 			ctx->flags |= SEEN_A | SEEN_OFF;
 			/* A = current_thread_info()->cpu */
 			BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info,
@@ -1265,7 +1260,7 @@ jmp_cmp:
 			/* $28/gp points to the thread_info struct */
 			emit_load(r_A, 28, off, ctx);
 			break;
-		case BPF_S_ANC_IFINDEX:
+		case BPF_ANC | SKF_AD_IFINDEX:
 			/* A = skb->dev->ifindex */
 			ctx->flags |= SEEN_SKB | SEEN_A | SEEN_S0;
 			off = offsetof(struct sk_buff, dev);
@@ -1279,31 +1274,31 @@ jmp_cmp:
 			off = offsetof(struct net_device, ifindex);
 			emit_load(r_A, r_s0, off, ctx);
 			break;
-		case BPF_S_ANC_MARK:
+		case BPF_ANC | SKF_AD_MARK:
 			ctx->flags |= SEEN_SKB | SEEN_A;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
 			off = offsetof(struct sk_buff, mark);
 			emit_load(r_A, r_skb, off, ctx);
 			break;
-		case BPF_S_ANC_RXHASH:
+		case BPF_ANC | SKF_AD_RXHASH:
 			ctx->flags |= SEEN_SKB | SEEN_A;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
 			off = offsetof(struct sk_buff, hash);
 			emit_load(r_A, r_skb, off, ctx);
 			break;
-		case BPF_S_ANC_VLAN_TAG:
-		case BPF_S_ANC_VLAN_TAG_PRESENT:
+		case BPF_ANC | SKF_AD_VLAN_TAG:
+		case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
 			ctx->flags |= SEEN_SKB | SEEN_S0 | SEEN_A;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
 						  vlan_tci) != 2);
 			off = offsetof(struct sk_buff, vlan_tci);
 			emit_half_load(r_s0, r_skb, off, ctx);
-			if (inst->code == BPF_S_ANC_VLAN_TAG)
+			if (code == (BPF_ANC | SKF_AD_VLAN_TAG))
 				emit_and(r_A, r_s0, VLAN_VID_MASK, ctx);
 			else
 				emit_and(r_A, r_s0, VLAN_TAG_PRESENT, ctx);
 			break;
-		case BPF_S_ANC_PKTTYPE:
+		case BPF_ANC | SKF_AD_PKTTYPE:
 			off = pkt_type_offset();
 
 			if (off < 0)
@@ -1312,7 +1307,7 @@ jmp_cmp:
 			/* Keep only the last 3 bits */
 			emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx);
 			break;
-		case BPF_S_ANC_QUEUE:
+		case BPF_ANC | SKF_AD_QUEUE:
 			ctx->flags |= SEEN_SKB | SEEN_A;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
 						  queue_mapping) != 2);
-- 
cgit v1.2.3


From 9d9873697e8b5aaf39ef1f96e57ab4fffb8f45ae Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Mon, 23 Jun 2014 10:38:44 +0100
Subject: MIPS: uasm: Add s3s1s2 instruction builder

It will be used later on by the SLT instruction.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7119/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/uasm.h | 3 +++
 arch/mips/mm/uasm.c          | 7 +++++++
 2 files changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h
index f8d63b3b40b4..43259b3fca6d 100644
--- a/arch/mips/include/asm/uasm.h
+++ b/arch/mips/include/asm/uasm.h
@@ -67,6 +67,9 @@ void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, signed int c)
 #define Ip_u2s3u1(op)							\
 void ISAOPC(op)(u32 **buf, unsigned int a, signed int b, unsigned int c)
 
+#define Ip_s3s1s2(op)							\
+void ISAOPC(op)(u32 **buf, int a, int b, int c)
+
 #define Ip_u2u1s3(op)							\
 void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, signed int c)
 
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index 00515805fe41..1e3e10919423 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -139,6 +139,13 @@ Ip_u1u2u3(op)						\
 }							\
 UASM_EXPORT_SYMBOL(uasm_i##op);
 
+#define I_s3s1s2(op)					\
+Ip_s3s1s2(op)						\
+{							\
+	build_insn(buf, insn##op, b, c, a);		\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
 #define I_u2u1u3(op)					\
 Ip_u2u1u3(op)						\
 {							\
-- 
cgit v1.2.3


From 7682f9e81899f1f874bd565daa7fb0b20024ed80 Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Mon, 23 Jun 2014 10:38:45 +0100
Subject: MIPS: uasm: Add SLT uasm instruction

It will be used later on by bpf-jit

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: Markos Chandras <markos.chandras@imgtec.com>
Patchwork: https://patchwork.linux-mips.org/patch/7120/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/uasm.h      | 1 +
 arch/mips/include/uapi/asm/inst.h | 1 +
 arch/mips/mm/uasm-micromips.c     | 1 +
 arch/mips/mm/uasm-mips.c          | 1 +
 arch/mips/mm/uasm.c               | 3 ++-
 5 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h
index 43259b3fca6d..708c5d414905 100644
--- a/arch/mips/include/asm/uasm.h
+++ b/arch/mips/include/asm/uasm.h
@@ -150,6 +150,7 @@ Ip_u2s3u1(_scd);
 Ip_u2s3u1(_sd);
 Ip_u2u1u3(_sll);
 Ip_u3u2u1(_sllv);
+Ip_s3s1s2(_slt);
 Ip_u2u1s3(_sltiu);
 Ip_u3u1u2(_sltu);
 Ip_u2u1u3(_sra);
diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h
index 4b7160259292..4bfdb9d4c186 100644
--- a/arch/mips/include/uapi/asm/inst.h
+++ b/arch/mips/include/uapi/asm/inst.h
@@ -273,6 +273,7 @@ enum mm_32a_minor_op {
 	mm_and_op = 0x250,
 	mm_or32_op = 0x290,
 	mm_xor32_op = 0x310,
+	mm_slt_op = 0x350,
 	mm_sltu_op = 0x390,
 };
 
diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c
index 775c2800cba2..8399ddf03a02 100644
--- a/arch/mips/mm/uasm-micromips.c
+++ b/arch/mips/mm/uasm-micromips.c
@@ -102,6 +102,7 @@ static struct insn insn_table_MM[] = {
 	{ insn_sd, 0, 0 },
 	{ insn_sll, M(mm_pool32a_op, 0, 0, 0, 0, mm_sll32_op), RT | RS | RD },
 	{ insn_sllv, M(mm_pool32a_op, 0, 0, 0, 0, mm_sllv32_op), RT | RS | RD },
+	{ insn_slt, M(mm_pool32a_op, 0, 0, 0, 0, mm_slt_op), RT | RS | RD },
 	{ insn_sltiu, M(mm_sltiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM },
 	{ insn_sltu, M(mm_pool32a_op, 0, 0, 0, 0, mm_sltu_op), RT | RS | RD },
 	{ insn_sra, M(mm_pool32a_op, 0, 0, 0, 0, mm_sra_op), RT | RS | RD },
diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c
index 38792c2364f5..4535a9d19ea5 100644
--- a/arch/mips/mm/uasm-mips.c
+++ b/arch/mips/mm/uasm-mips.c
@@ -110,6 +110,7 @@ static struct insn insn_table[] = {
 	{ insn_sd,  M(sd_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 	{ insn_sll,  M(spec_op, 0, 0, 0, 0, sll_op),  RT | RD | RE },
 	{ insn_sllv,  M(spec_op, 0, 0, 0, 0, sllv_op),  RS | RT | RD },
+	{ insn_slt,  M(spec_op, 0, 0, 0, 0, slt_op),  RS | RT | RD },
 	{ insn_sltiu, M(sltiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
 	{ insn_sltu, M(spec_op, 0, 0, 0, 0, sltu_op), RS | RT | RD },
 	{ insn_sra,  M(spec_op, 0, 0, 0, 0, sra_op),  RT | RD | RE },
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index 1e3e10919423..a01b0d6cedd2 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -53,7 +53,7 @@ enum opcode {
 	insn_ld, insn_ldx, insn_lh, insn_ll, insn_lld, insn_lui, insn_lw,
 	insn_lwx, insn_mfc0, insn_mfhi, insn_mflo, insn_mtc0, insn_mul,
 	insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sc, insn_scd,
-	insn_sd, insn_sll, insn_sllv, insn_sltiu, insn_sltu, insn_sra,
+	insn_sd, insn_sll, insn_sllv, insn_slt, insn_sltiu, insn_sltu, insn_sra,
 	insn_srl, insn_srlv, insn_subu, insn_sw, insn_sync, insn_syscall,
 	insn_tlbp, insn_tlbr, insn_tlbwi, insn_tlbwr, insn_wait, insn_wsbh,
 	insn_xor, insn_xori, insn_yield,
@@ -296,6 +296,7 @@ I_u2s3u1(_scd)
 I_u2s3u1(_sd)
 I_u2u1u3(_sll)
 I_u3u2u1(_sllv)
+I_s3s1s2(_slt)
 I_u2u1s3(_sltiu)
 I_u3u1u2(_sltu)
 I_u2u1u3(_sra)
-- 
cgit v1.2.3


From 84c68cbc667287dfd5eb793b6715c6d76a318e3f Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Mon, 23 Jun 2014 10:38:46 +0100
Subject: MIPS: mm: uasm: Fix lh micro-assembler instruction

Commit d6b3314b49e12e8c349deb4ca28e7028db00728f "MIPS: uasm: Add lh uam
instruction" added the 'lh' micro-assembler instruction but it used the
'lw' opcode for it. Fix it by using the correct 'lh' opcode.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7121/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/uasm-mips.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c
index 4535a9d19ea5..6708a2dbf934 100644
--- a/arch/mips/mm/uasm-mips.c
+++ b/arch/mips/mm/uasm-mips.c
@@ -89,7 +89,7 @@ static struct insn insn_table[] = {
 	{ insn_lb, M(lb_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
 	{ insn_ld,  M(ld_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 	{ insn_ldx, M(spec3_op, 0, 0, 0, ldx_op, lx_op), RS | RT | RD },
-	{ insn_lh,  M(lw_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
+	{ insn_lh,  M(lh_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 	{ insn_lld,  M(lld_op, 0, 0, 0, 0, 0),	RS | RT | SIMM },
 	{ insn_ll,  M(ll_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 	{ insn_lui,  M(lui_op, 0, 0, 0, 0, 0),	RT | SIMM },
-- 
cgit v1.2.3


From 35a8e16abe36d385d602997e1500a668d2b9c5cf Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Mon, 23 Jun 2014 10:38:47 +0100
Subject: MIPS: bpf: Use the LO register to get division's quotient

Reading from the HI register to get the division result is wrong.
The quotient is placed in the LO register.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: netdev@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7122/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/net/bpf_jit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index f7c206404989..5cc92c4590cb 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -408,7 +408,7 @@ static inline void emit_div(unsigned int dst, unsigned int src,
 		u32 *p = &ctx->target[ctx->idx];
 		uasm_i_divu(&p, dst, src);
 		p = &ctx->target[ctx->idx + 1];
-		uasm_i_mfhi(&p, dst);
+		uasm_i_mflo(&p, dst);
 	}
 	ctx->idx += 2; /* 2 insts */
 }
-- 
cgit v1.2.3


From 55393ee535496f7db15f3b2e9d3cf418f772f71a Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Mon, 23 Jun 2014 10:38:48 +0100
Subject: MIPS: bpf: Return error code if the offset is a negative number

Previously, the negative offset was not checked leading to failures
due to trying to load data beyond the skb struct boundaries. Until we
have proper asm helpers in place, it's best if we return ENOSUPP if K
is negative when trying to JIT the filter or 0 during runtime if we
do an indirect load where the value of X is unknown during build time.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: netdev@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7123/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/net/bpf_jit.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 5cc92c4590cb..95728ea6cb74 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -331,6 +331,12 @@ static inline void emit_srl(unsigned int dst, unsigned int src,
 	emit_instr(ctx, srl, dst, src, sa);
 }
 
+static inline void emit_slt(unsigned int dst, unsigned int src1,
+			    unsigned int src2, struct jit_ctx *ctx)
+{
+	emit_instr(ctx, slt, dst, src1, src2);
+}
+
 static inline void emit_sltu(unsigned int dst, unsigned int src1,
 			     unsigned int src2, struct jit_ctx *ctx)
 {
@@ -816,8 +822,21 @@ static int build_body(struct jit_ctx *ctx)
 			/* A <- P[k:1] */
 			load_order = 0;
 load:
+			/* the interpreter will deal with the negative K */
+			if ((int)k < 0)
+				return -ENOTSUPP;
+
 			emit_load_imm(r_off, k, ctx);
 load_common:
+			/*
+			 * We may got here from the indirect loads so
+			 * return if offset is negative.
+			 */
+			emit_slt(r_s0, r_off, r_zero, ctx);
+			emit_bcond(MIPS_COND_NE, r_s0, r_zero,
+				   b_imm(prog->len, ctx), ctx);
+			emit_reg_move(r_ret, r_zero, ctx);
+
 			ctx->flags |= SEEN_CALL | SEEN_OFF | SEEN_S0 |
 				SEEN_SKB | SEEN_A;
 
@@ -880,6 +899,10 @@ load_ind:
 			emit_load(r_X, r_skb, off, ctx);
 			break;
 		case BPF_LDX | BPF_B | BPF_MSH:
+			/* the interpreter will deal with the negative K */
+			if ((int)k < 0)
+				return -ENOTSUPP;
+
 			/* X <- 4 * (P[k:1] & 0xf) */
 			ctx->flags |= SEEN_X | SEEN_CALL | SEEN_S0 | SEEN_SKB;
 			/* Load offset to a1 */
-- 
cgit v1.2.3


From 9ee1606e8a6316287029c86ef48ddcfe4dec595d Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Mon, 23 Jun 2014 10:38:49 +0100
Subject: MIPS: bpf: Use 'andi' instead of 'and' for the VLAN cases

The VLAN_VID_MASK and VLAN_TAG_PRESENT are immediates, so using
'and' which expects 3 registers will produce wrong results. Fix
this by using the 'andi' instruction.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: netdev@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7124/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/net/bpf_jit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 95728ea6cb74..fe5041bdc6fb 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -1317,9 +1317,9 @@ jmp_cmp:
 			off = offsetof(struct sk_buff, vlan_tci);
 			emit_half_load(r_s0, r_skb, off, ctx);
 			if (code == (BPF_ANC | SKF_AD_VLAN_TAG))
-				emit_and(r_A, r_s0, VLAN_VID_MASK, ctx);
+				emit_andi(r_A, r_s0, VLAN_VID_MASK, ctx);
 			else
-				emit_and(r_A, r_s0, VLAN_TAG_PRESENT, ctx);
+				emit_andi(r_A, r_s0, VLAN_TAG_PRESENT, ctx);
 			break;
 		case BPF_ANC | SKF_AD_PKTTYPE:
 			off = pkt_type_offset();
-- 
cgit v1.2.3


From 9eebfe478d1b83389c3ac1bd73a45b6665e356db Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Mon, 23 Jun 2014 10:38:50 +0100
Subject: MIPS: bpf: Add SEEN_SKB to flags when looking for the PKT_TYPE

The SKF_AD_PKTTYPE uses the skb pointer so make sure it's in the
flags so it will be initialized in time.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: netdev@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7125/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/net/bpf_jit.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index fe5041bdc6fb..8cae27af03da 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -1322,6 +1322,8 @@ jmp_cmp:
 				emit_andi(r_A, r_s0, VLAN_TAG_PRESENT, ctx);
 			break;
 		case BPF_ANC | SKF_AD_PKTTYPE:
+			ctx->flags |= SEEN_SKB;
+
 			off = pkt_type_offset();
 
 			if (off < 0)
-- 
cgit v1.2.3


From 1ab24a4e3de1ec37d8ed255841d2d94d77e8a4f4 Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Mon, 23 Jun 2014 10:38:51 +0100
Subject: MIPS: bpf: Fix branch conditional for BPF_J{GT/GE} cases

The sltiu and sltu instructions will set the scratch register
to 1 if A <= X|K so fix the emitted branch conditional to check
for scratch != zero rather than scratch >= zero which would complicate
the resuling branch logic given that MIPS does not have a BGT or BGET
instructions to compare general purpose registers directly.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: netdev@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7126/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/net/bpf_jit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 8cae27af03da..500f97fdc0e1 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -1127,7 +1127,7 @@ jmp_cmp:
 				}
 				/* A < (K|X) ? r_scrach = 1 */
 				b_off = b_imm(i + inst->jf + 1, ctx);
-				emit_bcond(MIPS_COND_GT, r_s0, r_zero, b_off,
+				emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off,
 					   ctx);
 				emit_nop(ctx);
 				/* A > (K|X) ? scratch = 0 */
-- 
cgit v1.2.3


From 6e86c59d4d0d04e7ebefd05b8af245c968892f81 Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Mon, 23 Jun 2014 10:38:52 +0100
Subject: MIPS: bpf: Use correct mask for VLAN_TAG case

Using VLAN_VID_MASK is not correct to get the vlan tag. Use
~VLAN_PRESENT_MASK instead and make sure it's u16 so the top 16-bits
will be removed. This will ensure that the emit_andi() code will not
treat this as a big 32-bit unsigned value.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: netdev@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7127/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/net/bpf_jit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 500f97fdc0e1..a4d1b76e7373 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -1317,7 +1317,7 @@ jmp_cmp:
 			off = offsetof(struct sk_buff, vlan_tci);
 			emit_half_load(r_s0, r_skb, off, ctx);
 			if (code == (BPF_ANC | SKF_AD_VLAN_TAG))
-				emit_andi(r_A, r_s0, VLAN_VID_MASK, ctx);
+				emit_andi(r_A, r_s0, (u16)~VLAN_TAG_PRESENT, ctx);
 			else
 				emit_andi(r_A, r_s0, VLAN_TAG_PRESENT, ctx);
 			break;
-- 
cgit v1.2.3


From 91a41d7f972b1d78b4bcbb61ada4a33c9d7ba8a3 Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Mon, 23 Jun 2014 10:38:53 +0100
Subject: MIPS: bpf: Fix return values for VLAN_TAG_PRESENT case

If VLAN_TAG_PRESENT is not zero, then return 1 as expected by
classic BPF. Otherwise return 0.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: netdev@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7128/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/net/bpf_jit.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index a4d1b76e7373..d852bb6d3fe3 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -1316,10 +1316,13 @@ jmp_cmp:
 						  vlan_tci) != 2);
 			off = offsetof(struct sk_buff, vlan_tci);
 			emit_half_load(r_s0, r_skb, off, ctx);
-			if (code == (BPF_ANC | SKF_AD_VLAN_TAG))
+			if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) {
 				emit_andi(r_A, r_s0, (u16)~VLAN_TAG_PRESENT, ctx);
-			else
+			} else {
 				emit_andi(r_A, r_s0, VLAN_TAG_PRESENT, ctx);
+				/* return 1 if present */
+				emit_sltu(r_A, r_zero, r_A, ctx);
+			}
 			break;
 		case BPF_ANC | SKF_AD_PKTTYPE:
 			ctx->flags |= SEEN_SKB;
-- 
cgit v1.2.3


From 78b95b662c4c633206c997fe2bd25a9c680e047a Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Mon, 23 Jun 2014 10:38:54 +0100
Subject: MIPS: bpf: Use pr_debug instead of pr_warn for unhandled opcodes

We should prevent spamming the logs during normal execution of bpf-jit.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Suggested-by: Alexei Starovoitov <ast@plumgrid.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: netdev@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7129/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/net/bpf_jit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index d852bb6d3fe3..1d228d27d759 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -1345,8 +1345,8 @@ jmp_cmp:
 			emit_half_load(r_A, r_skb, off, ctx);
 			break;
 		default:
-			pr_warn("%s: Unhandled opcode: 0x%02x\n", __FILE__,
-				inst->code);
+			pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__,
+				 inst->code);
 			return -1;
 		}
 	}
-- 
cgit v1.2.3


From 10c4d614d2ffcfc17add01f9648c3e530fb308d1 Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Mon, 23 Jun 2014 10:38:55 +0100
Subject: MIPS: bpf: Fix is_range() semantics

is_range() was meant to check whether the number is within
the s16 range or not. However the return values and consumers expected
the exact opposite. We fix that by inverting the logic in the function
to return 'true' for < s16 and 'false' for > s16.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Reported-by: Alexei Starovoitov <ast@plumgrid.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: netdev@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7131/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/net/bpf_jit.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 1d228d27d759..00c4c83972bb 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -166,9 +166,7 @@ do {							\
 /* Determine if immediate is within the 16-bit signed range */
 static inline bool is_range16(s32 imm)
 {
-	if (imm >= SBIT(15) || imm < -SBIT(15))
-		return true;
-	return false;
+	return !(imm >= SBIT(15) || imm < -SBIT(15));
 }
 
 static inline void emit_addu(unsigned int dst, unsigned int src1,
@@ -187,7 +185,7 @@ static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx)
 {
 	if (ctx->target != NULL) {
 		/* addiu can only handle s16 */
-		if (is_range16(imm)) {
+		if (!is_range16(imm)) {
 			u32 *p = &ctx->target[ctx->idx];
 			uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16);
 			p = &ctx->target[ctx->idx + 1];
@@ -199,7 +197,7 @@ static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx)
 	}
 	ctx->idx++;
 
-	if (is_range16(imm))
+	if (!is_range16(imm))
 		ctx->idx++;
 }
 
@@ -240,7 +238,7 @@ static inline void emit_daddiu(unsigned int dst, unsigned int src,
 static inline void emit_addiu(unsigned int dst, unsigned int src,
 			      u32 imm, struct jit_ctx *ctx)
 {
-	if (is_range16(imm)) {
+	if (!is_range16(imm)) {
 		emit_load_imm(r_tmp, imm, ctx);
 		emit_addu(dst, r_tmp, src, ctx);
 	} else {
@@ -347,7 +345,7 @@ static inline void emit_sltiu(unsigned dst, unsigned int src,
 			      unsigned int imm, struct jit_ctx *ctx)
 {
 	/* 16 bit immediate */
-	if (is_range16((s32)imm)) {
+	if (!is_range16((s32)imm)) {
 		emit_load_imm(r_tmp, imm, ctx);
 		emit_sltu(dst, src, r_tmp, ctx);
 	} else {
-- 
cgit v1.2.3


From e5bb48b0553d75918094c5a6f7b60a4359887218 Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Mon, 23 Jun 2014 10:38:56 +0100
Subject: MIPS: bpf: Drop update_on_xread and always initialize the X register

Previously, update_on_xread() only set the reset flag if SEEN_X hasn't
been set already. However, SEEN_X is used to indicate that X is used
as destination or source register so there are some cases where X
is only used as source register and we really need to make sure that it
has been initialized in time. As a result of which, drop this function and
always set X to zero if it's used in any of the opcodes.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: netdev@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7133/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/net/bpf_jit.c | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 00c4c83972bb..1bcd599d9971 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -119,8 +119,6 @@
 /* Arguments used by JIT */
 #define ARGS_USED_BY_JIT	2 /* only applicable to 64-bit */
 
-#define FLAG_NEED_X_RESET	(1 << 0)
-
 #define SBIT(x)			(1 << (x)) /* Signed version of BIT() */
 
 /**
@@ -549,14 +547,6 @@ static inline u16 align_sp(unsigned int num)
 	return num;
 }
 
-static inline void update_on_xread(struct jit_ctx *ctx)
-{
-	if (!(ctx->flags & SEEN_X))
-		ctx->flags |= FLAG_NEED_X_RESET;
-
-	ctx->flags |= SEEN_X;
-}
-
 static bool is_load_to_a(u16 inst)
 {
 	switch (inst) {
@@ -701,7 +691,7 @@ static void build_prologue(struct jit_ctx *ctx)
 	if (ctx->flags & SEEN_SKB)
 		emit_reg_move(r_skb, MIPS_R_A0, ctx);
 
-	if (ctx->flags & FLAG_NEED_X_RESET)
+	if (ctx->flags & SEEN_X)
 		emit_jit_reg_move(r_X, r_zero, ctx);
 
 	/* Do not leak kernel data to userspace */
@@ -876,7 +866,6 @@ load_common:
 			/* A <- P[X + k:1] */
 			load_order = 0;
 load_ind:
-			update_on_xread(ctx);
 			ctx->flags |= SEEN_OFF | SEEN_X;
 			emit_addiu(r_off, r_X, k, ctx);
 			goto load_common;
@@ -972,7 +961,6 @@ load_ind:
 			break;
 		case BPF_ALU | BPF_MUL | BPF_X:
 			/* A *= X */
-			update_on_xread(ctx);
 			ctx->flags |= SEEN_A | SEEN_X;
 			emit_mul(r_A, r_A, r_X, ctx);
 			break;
@@ -1002,7 +990,6 @@ load_ind:
 			break;
 		case BPF_ALU | BPF_DIV | BPF_X:
 			/* A /= X */
-			update_on_xread(ctx);
 			ctx->flags |= SEEN_X | SEEN_A;
 			/* Check if r_X is zero */
 			emit_bcond(MIPS_COND_EQ, r_X, r_zero,
@@ -1012,7 +999,6 @@ load_ind:
 			break;
 		case BPF_ALU | BPF_MOD | BPF_X:
 			/* A %= X */
-			update_on_xread(ctx);
 			ctx->flags |= SEEN_X | SEEN_A;
 			/* Check if r_X is zero */
 			emit_bcond(MIPS_COND_EQ, r_X, r_zero,
@@ -1027,7 +1013,6 @@ load_ind:
 			break;
 		case BPF_ALU | BPF_OR | BPF_X:
 			/* A |= X */
-			update_on_xread(ctx);
 			ctx->flags |= SEEN_A;
 			emit_ori(r_A, r_A, r_X, ctx);
 			break;
@@ -1039,7 +1024,6 @@ load_ind:
 		case BPF_ANC | SKF_AD_ALU_XOR_X:
 		case BPF_ALU | BPF_XOR | BPF_X:
 			/* A ^= X */
-			update_on_xread(ctx);
 			ctx->flags |= SEEN_A;
 			emit_xor(r_A, r_A, r_X, ctx);
 			break;
@@ -1050,7 +1034,6 @@ load_ind:
 			break;
 		case BPF_ALU | BPF_AND | BPF_X:
 			/* A &= X */
-			update_on_xread(ctx);
 			ctx->flags |= SEEN_A | SEEN_X;
 			emit_and(r_A, r_A, r_X, ctx);
 			break;
@@ -1062,7 +1045,6 @@ load_ind:
 		case BPF_ALU | BPF_LSH | BPF_X:
 			/* A <<= X */
 			ctx->flags |= SEEN_A | SEEN_X;
-			update_on_xread(ctx);
 			emit_sllv(r_A, r_A, r_X, ctx);
 			break;
 		case BPF_ALU | BPF_RSH | BPF_K:
@@ -1072,7 +1054,6 @@ load_ind:
 			break;
 		case BPF_ALU | BPF_RSH | BPF_X:
 			ctx->flags |= SEEN_A | SEEN_X;
-			update_on_xread(ctx);
 			emit_srlv(r_A, r_A, r_X, ctx);
 			break;
 		case BPF_ALU | BPF_NEG:
@@ -1243,7 +1224,6 @@ jmp_cmp:
 		case BPF_MISC | BPF_TXA:
 			/* A = X */
 			ctx->flags |= SEEN_A | SEEN_X;
-			update_on_xread(ctx);
 			emit_jit_reg_move(r_A, r_X, ctx);
 			break;
 		/* AUX */
-- 
cgit v1.2.3


From 95782bf434437b3292f5cb9ce21b53bdbc1beda1 Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Wed, 25 Jun 2014 09:37:21 +0100
Subject: MIPS: BPF: Prevent kernel fall over for >=32bit shifts

Remove BUG_ON() if the shift immediate is >=32 to avoid kernel crashes
due to malicious user input. If the shift immediate is >= 32,
we simply load the destination register with 0 since only
32-bit instructions are used by JIT so this will do the
correct thing even on MIPS64.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: netdev@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7179/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/net/bpf_jit.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 1bcd599d9971..9476e7f061a1 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -151,6 +151,8 @@ static inline int optimize_div(u32 *k)
 	return 0;
 }
 
+static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx);
+
 /* Simply emit the instruction if the JIT memory space has been allocated */
 #define emit_instr(ctx, func, ...)			\
 do {							\
@@ -309,8 +311,11 @@ static inline void emit_sll(unsigned int dst, unsigned int src,
 			    unsigned int sa, struct jit_ctx *ctx)
 {
 	/* sa is 5-bits long */
-	BUG_ON(sa >= BIT(5));
-	emit_instr(ctx, sll, dst, src, sa);
+	if (sa >= BIT(5))
+		/* Shifting >= 32 results in zero */
+		emit_jit_reg_move(dst, r_zero, ctx);
+	else
+		emit_instr(ctx, sll, dst, src, sa);
 }
 
 static inline void emit_srlv(unsigned int dst, unsigned int src,
@@ -323,8 +328,11 @@ static inline void emit_srl(unsigned int dst, unsigned int src,
 			    unsigned int sa, struct jit_ctx *ctx)
 {
 	/* sa is 5-bits long */
-	BUG_ON(sa >= BIT(5));
-	emit_instr(ctx, srl, dst, src, sa);
+	if (sa >= BIT(5))
+		/* Shifting >= 32 results in zero */
+		emit_jit_reg_move(dst, r_zero, ctx);
+	else
+		emit_instr(ctx, srl, dst, src, sa);
 }
 
 static inline void emit_slt(unsigned int dst, unsigned int src1,
-- 
cgit v1.2.3


From b4fe0ec86dae91abfa9f932cd0e2e9d50e336c8b Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Mon, 23 Jun 2014 10:38:58 +0100
Subject: MIPS: bpf: Fix PKT_TYPE case for big-endian cores

The skb->pkt_type field is defined as follows:

u8 pkt_type:3,
   fclone:2,
   ipvs_property:1,
   peeked:1,
   nf_trace:1

resulting to the following layout in big-endian systems

[pkt_type][fclone][ipvs_propery][peeked][nf_trace]
^                                                ^
|                                                |
LSB                                             MSB

As a result, the existing code did not work because it was trying to
match pkt_type == 7 whereas in reality it is 7<<5 on big-endian
systems.

This has been fixed in the interpreter in
0dcceabb0c1bf2d4c12a748df9933fad303072a7
"net: filter: fix SKF_AD_PKTTYPE extension on big-endian"

The fix is to look for 7<<5 on big-endian systems for the pkt_type
field, and shift by 5 so the packet type will be at the lower 3 bits
of the A register.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: netdev@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7132/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/net/bpf_jit.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 9476e7f061a1..4505e2e6ab53 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -751,13 +751,17 @@ static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset)
 	return (u64)err << 32 | ntohl(ret);
 }
 
-#define PKT_TYPE_MAX 7
+#ifdef __BIG_ENDIAN_BITFIELD
+#define PKT_TYPE_MAX	(7 << 5)
+#else
+#define PKT_TYPE_MAX	7
+#endif
 static int pkt_type_offset(void)
 {
 	struct sk_buff skb_probe = {
 		.pkt_type = ~0,
 	};
-	char *ct = (char *)&skb_probe;
+	u8 *ct = (u8 *)&skb_probe;
 	unsigned int off;
 
 	for (off = 0; off < sizeof(struct sk_buff); off++) {
@@ -1320,6 +1324,10 @@ jmp_cmp:
 			emit_load_byte(r_tmp, r_skb, off, ctx);
 			/* Keep only the last 3 bits */
 			emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx);
+#ifdef __BIG_ENDIAN_BITFIELD
+			/* Get the actual packet type to the lower 3 bits */
+			emit_srl(r_A, r_A, 5, ctx);
+#endif
 			break;
 		case BPF_ANC | SKF_AD_QUEUE:
 			ctx->flags |= SEEN_SKB | SEEN_A;
-- 
cgit v1.2.3


From b6a14a9845259eb21c9d8121330c4c3b22de182e Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Wed, 25 Jun 2014 09:39:38 +0100
Subject: MIPS: BPF: Use 32 or 64-bit load instruction to load an address to
 register

When loading a pointer to register we need to use the appropriate
32 or 64bit instruction to preserve the pointers' top 32bits.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: netdev@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7180/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/net/bpf_jit.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 4505e2e6ab53..6e3963425b64 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -453,6 +453,17 @@ static inline void emit_wsbh(unsigned int dst, unsigned int src,
 	emit_instr(ctx, wsbh, dst, src);
 }
 
+/* load pointer to register */
+static inline void emit_load_ptr(unsigned int dst, unsigned int src,
+				     int imm, struct jit_ctx *ctx)
+{
+	/* src contains the base addr of the 32/64-pointer */
+	if (config_enabled(CONFIG_64BIT))
+		emit_instr(ctx, ld, dst, imm, src);
+	else
+		emit_instr(ctx, lw, dst, imm, src);
+}
+
 /* load a function pointer to register */
 static inline void emit_load_func(unsigned int reg, ptr imm,
 				  struct jit_ctx *ctx)
@@ -1277,7 +1288,8 @@ jmp_cmp:
 			/* A = skb->dev->ifindex */
 			ctx->flags |= SEEN_SKB | SEEN_A | SEEN_S0;
 			off = offsetof(struct sk_buff, dev);
-			emit_load(r_s0, r_skb, off, ctx);
+			/* Load *dev pointer */
+			emit_load_ptr(r_s0, r_skb, off, ctx);
 			/* error (0) in the delay slot */
 			emit_bcond(MIPS_COND_EQ, r_s0, r_zero,
 				   b_imm(prog->len, ctx), ctx);
-- 
cgit v1.2.3


From d8214ef14a1db4172c93e5694906bda9b00fac93 Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Mon, 23 Jun 2014 10:39:00 +0100
Subject: MIPS: bpf: Fix stack space allocation for BPF memwords on MIPS64

When allocating stack space for BPF memwords we need to use the
appropriate 32 or 64-bit instruction to avoid losing the top 32 bits
of the stack pointer.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: netdev@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7135/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/net/bpf_jit.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 6e3963425b64..b87390a56a2f 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -623,7 +623,10 @@ static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset)
 	if (ctx->flags & SEEN_MEM) {
 		if (real_off % (RSIZE * 2))
 			real_off += RSIZE;
-		emit_addiu(r_M, r_sp, real_off, ctx);
+		if (config_enabled(CONFIG_64BIT))
+			emit_daddiu(r_M, r_sp, real_off, ctx);
+		else
+			emit_addiu(r_M, r_sp, real_off, ctx);
 	}
 }
 
-- 
cgit v1.2.3


From ab6c15bc6620ebe220970cc040b29bcb2757f373 Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Mon, 23 Jun 2014 09:48:51 +0100
Subject: MIPS: MSC: Prevent out-of-bounds writes to MIPS SC ioremap'd region

Previously, the lower limit for the MIPS SC initialization loop was
set incorrectly allowing one extra loop leading to writes
beyond the MSC ioremap'd space. More precisely, the value of the 'imp'
in the last loop increased beyond the msc_irqmap_t boundaries and
as a result of which, the 'n' variable was loaded with an incorrect
value. This value was used later on to calculate the offset in the
MSC01_IC_SUP which led to random crashes like the following one:

CPU 0 Unable to handle kernel paging request at virtual address e75c0200,
epc == 8058dba4, ra == 8058db90
[...]
Call Trace:
[<8058dba4>] init_msc_irqs+0x104/0x154
[<8058b5bc>] arch_init_irq+0xd8/0x154
[<805897b0>] start_kernel+0x220/0x36c

Kernel panic - not syncing: Attempted to kill the idle task!

This patch fixes the problem

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Reviewed-by: James Hogan <james.hogan@imgtec.com>
Cc: stable@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7118/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/irq-msc01.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/irq-msc01.c b/arch/mips/kernel/irq-msc01.c
index 4858642d543d..a734b2c2f9ea 100644
--- a/arch/mips/kernel/irq-msc01.c
+++ b/arch/mips/kernel/irq-msc01.c
@@ -126,7 +126,7 @@ void __init init_msc_irqs(unsigned long icubase, unsigned int irqbase, msc_irqma
 
 	board_bind_eic_interrupt = &msc_bind_eic_interrupt;
 
-	for (; nirq >= 0; nirq--, imp++) {
+	for (; nirq > 0; nirq--, imp++) {
 		int n = imp->im_irq;
 
 		switch (imp->im_type) {
-- 
cgit v1.2.3


From 16f0bbbc1fbe0d09cda5b5b2dbbd6716026dfa7b Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Thu, 26 Jun 2014 14:43:01 +0100
Subject: MIPS: Lasat: Fix build error if CRC32 is not enabled.

Kconfig doesn't select CRC32 so it's possible to build a Lasat kernel
without CONFIG_CRC32 resulting in a build error:

  LD      vmlinux
arch/mips/built-in.o: In function `lasat_init_board_info':
(.text+0x22c): undefined reference to `crc32_le'
arch/mips/built-in.o: In function `lasat_write_eeprom_info':
(.text+0x7fc): undefined reference to `crc32_le'
make: *** [vmlinux] Error 1

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 7a469acee33c..4e238e6e661c 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -269,6 +269,7 @@ config LANTIQ
 config LASAT
 	bool "LASAT Networks platforms"
 	select CEVT_R4K
+	select CRC32
 	select CSRC_R4K
 	select DMA_NONCOHERENT
 	select SYS_HAS_EARLY_PRINTK
-- 
cgit v1.2.3


From 5c65c360da06635f985684fa0ab41027660d0804 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 7 Jun 2014 10:47:36 +0100
Subject: ARM: OMAP: add deprecation message for legacy OMAP DMA API

The legacy OMAP DMA API is now deprecated; all remaining users should
now convert over ASAP to using the DMA engine API instead of the OMAP
private API.

Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/plat-omap/dma.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c
index b5608b1f9fbd..1c98659bbf89 100644
--- a/arch/arm/plat-omap/dma.c
+++ b/arch/arm/plat-omap/dma.c
@@ -698,6 +698,8 @@ int omap_request_dma(int dev_id, const char *dev_name,
 	unsigned long flags;
 	struct omap_dma_lch *chan;
 
+	WARN(strcmp(dev_name, "DMA engine"), "Using deprecated platform DMA API - please update to DMA engine");
+
 	spin_lock_irqsave(&dma_chan_lock, flags);
 	for (ch = 0; ch < dma_chan_count; ch++) {
 		if (free_ch == -1 && dma_chan[ch].dev_id == -1) {
-- 
cgit v1.2.3


From 33656d565da51bd01779600352ec28385ab806c8 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@fluxnic.net>
Date: Mon, 2 Jun 2014 17:32:25 +0100
Subject: ARM: 8067/1: zImage: ensure header in LE format for BE8 kernels

All known BE8-capable systems have LE bootloaders, so we need to ensure
that the magic number and image start/end values are in little endian
format.

[ben.dooks@codethink.co.uk: from nico's original email on this subject]
[taras.kondratiuk@linaro.org: removed lds.S->lds rule, added target to extra-y]

Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
Signed-off-by: Taras Kondratiuk <taras.kondratiuk@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/boot/compressed/.gitignore     |  1 +
 arch/arm/boot/compressed/Makefile       |  4 ++--
 arch/arm/boot/compressed/head.S         |  7 ++++---
 arch/arm/boot/compressed/vmlinux.lds.in | 14 ++++++++++++++
 4 files changed, 21 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore
index 0714e0334e33..6a26e7bcb714 100644
--- a/arch/arm/boot/compressed/.gitignore
+++ b/arch/arm/boot/compressed/.gitignore
@@ -10,6 +10,7 @@ piggy.xzkern
 piggy.lz4
 vmlinux
 vmlinux.lds
+vmlinux.lds.S
 
 # borrowed libfdt files
 fdt.c
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 68c918362b79..8a8090694d89 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -114,7 +114,7 @@ targets       := vmlinux vmlinux.lds \
 # Make sure files are removed during clean
 extra-y       += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \
 		 lib1funcs.S ashldi3.S bswapsdi2.S $(libfdt) $(libfdt_hdrs) \
-		 hyp-stub.S
+		 hyp-stub.S vmlinux.lds.S
 
 ifeq ($(CONFIG_FUNCTION_TRACER),y)
 ORIG_CFLAGS := $(KBUILD_CFLAGS)
@@ -199,7 +199,7 @@ CFLAGS_font.o := -Dstatic=
 $(obj)/font.c: $(FONTC)
 	$(call cmd,shipped)
 
-$(obj)/vmlinux.lds: $(obj)/vmlinux.lds.in arch/arm/boot/Makefile $(KCONFIG_CONFIG)
+$(obj)/vmlinux.lds.S: $(obj)/vmlinux.lds.in arch/arm/boot/Makefile $(KCONFIG_CONFIG)
 	@sed "$(SEDFLAGS)" < $< > $@
 
 $(obj)/hyp-stub.S: $(srctree)/arch/$(SRCARCH)/kernel/hyp-stub.S
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 3a8b32df6b31..c95feab6ce7f 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -125,9 +125,10 @@ start:
  THUMB(		adr	r12, BSYM(1f)	)
  THUMB(		bx	r12		)
 
-		.word	0x016f2818		@ Magic numbers to help the loader
-		.word	start			@ absolute load/run zImage address
-		.word	_edata			@ zImage end address
+		.word	_magic_sig	@ Magic numbers to help the loader
+		.word	_magic_start	@ absolute load/run zImage address
+		.word	_magic_end	@ zImage end address
+
  THUMB(		.thumb			)
 1:
  ARM_BE8(	setend	be )			@ go BE8 if compiled for BE8
diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.in
index 4919f2ac8b89..60162231c7ea 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.in
+++ b/arch/arm/boot/compressed/vmlinux.lds.in
@@ -7,6 +7,16 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+
+#ifdef CONFIG_CPU_ENDIAN_BE8
+#define ZIMAGE_MAGIC(x) ( (((x) >> 24) & 0x000000ff) | \
+			  (((x) >>  8) & 0x0000ff00) | \
+			  (((x) <<  8) & 0x00ff0000) | \
+			  (((x) << 24) & 0xff000000) )
+#else
+#define ZIMAGE_MAGIC(x) (x)
+#endif
+
 OUTPUT_ARCH(arm)
 ENTRY(_start)
 SECTIONS
@@ -57,6 +67,10 @@ SECTIONS
   .pad			: { BYTE(0); . = ALIGN(8); }
   _edata = .;
 
+  _magic_sig = ZIMAGE_MAGIC(0x016f2818);
+  _magic_start = ZIMAGE_MAGIC(_start);
+  _magic_end = ZIMAGE_MAGIC(_edata);
+
   . = BSS_START;
   __bss_start = .;
   .bss			: { *(.bss) }
-- 
cgit v1.2.3


From c79bf928d99a3b8cf1e8df75726f78e197b17eb6 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Tue, 17 Jun 2014 15:00:54 +0100
Subject: ARM: simplify generation of compressed vmlinux.lds file

As we are now using the C preprocessor, we do not need to use sed to
edit constants in this file, and then pass the resulting file through
the C preprocessor.  Instead, rely solely on the C preprocessor to
rewrite TEXT_START and BSS_ADDR.

Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/boot/compressed/.gitignore     |  1 -
 arch/arm/boot/compressed/Makefile       |  7 +--
 arch/arm/boot/compressed/vmlinux.lds.S  | 87 +++++++++++++++++++++++++++++++
 arch/arm/boot/compressed/vmlinux.lds.in | 90 ---------------------------------
 4 files changed, 89 insertions(+), 96 deletions(-)
 create mode 100644 arch/arm/boot/compressed/vmlinux.lds.S
 delete mode 100644 arch/arm/boot/compressed/vmlinux.lds.in

(limited to 'arch')

diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore
index 6a26e7bcb714..0714e0334e33 100644
--- a/arch/arm/boot/compressed/.gitignore
+++ b/arch/arm/boot/compressed/.gitignore
@@ -10,7 +10,6 @@ piggy.xzkern
 piggy.lz4
 vmlinux
 vmlinux.lds
-vmlinux.lds.S
 
 # borrowed libfdt files
 fdt.c
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 8a8090694d89..76a50ecae1c3 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -81,7 +81,7 @@ ZTEXTADDR	:= 0
 ZBSSADDR	:= ALIGN(8)
 endif
 
-SEDFLAGS	= s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/
+CPPFLAGS_vmlinux.lds := -DTEXT_START="$(ZTEXTADDR)" -DBSS_START="$(ZBSSADDR)"
 
 suffix_$(CONFIG_KERNEL_GZIP) = gzip
 suffix_$(CONFIG_KERNEL_LZO)  = lzo
@@ -114,7 +114,7 @@ targets       := vmlinux vmlinux.lds \
 # Make sure files are removed during clean
 extra-y       += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \
 		 lib1funcs.S ashldi3.S bswapsdi2.S $(libfdt) $(libfdt_hdrs) \
-		 hyp-stub.S vmlinux.lds.S
+		 hyp-stub.S
 
 ifeq ($(CONFIG_FUNCTION_TRACER),y)
 ORIG_CFLAGS := $(KBUILD_CFLAGS)
@@ -199,8 +199,5 @@ CFLAGS_font.o := -Dstatic=
 $(obj)/font.c: $(FONTC)
 	$(call cmd,shipped)
 
-$(obj)/vmlinux.lds.S: $(obj)/vmlinux.lds.in arch/arm/boot/Makefile $(KCONFIG_CONFIG)
-	@sed "$(SEDFLAGS)" < $< > $@
-
 $(obj)/hyp-stub.S: $(srctree)/arch/$(SRCARCH)/kernel/hyp-stub.S
 	$(call cmd,shipped)
diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S
new file mode 100644
index 000000000000..2b60b843ac5e
--- /dev/null
+++ b/arch/arm/boot/compressed/vmlinux.lds.S
@@ -0,0 +1,87 @@
+/*
+ *  Copyright (C) 2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifdef CONFIG_CPU_ENDIAN_BE8
+#define ZIMAGE_MAGIC(x) ( (((x) >> 24) & 0x000000ff) | \
+			  (((x) >>  8) & 0x0000ff00) | \
+			  (((x) <<  8) & 0x00ff0000) | \
+			  (((x) << 24) & 0xff000000) )
+#else
+#define ZIMAGE_MAGIC(x) (x)
+#endif
+
+OUTPUT_ARCH(arm)
+ENTRY(_start)
+SECTIONS
+{
+  /DISCARD/ : {
+    *(.ARM.exidx*)
+    *(.ARM.extab*)
+    /*
+     * Discard any r/w data - this produces a link error if we have any,
+     * which is required for PIC decompression.  Local data generates
+     * GOTOFF relocations, which prevents it being relocated independently
+     * of the text/got segments.
+     */
+    *(.data)
+  }
+
+  . = TEXT_START;
+  _text = .;
+
+  .text : {
+    _start = .;
+    *(.start)
+    *(.text)
+    *(.text.*)
+    *(.fixup)
+    *(.gnu.warning)
+    *(.glue_7t)
+    *(.glue_7)
+  }
+  .rodata : {
+    *(.rodata)
+    *(.rodata.*)
+  }
+  .piggydata : {
+    *(.piggydata)
+  }
+
+  . = ALIGN(4);
+  _etext = .;
+
+  .got.plt		: { *(.got.plt) }
+  _got_start = .;
+  .got			: { *(.got) }
+  _got_end = .;
+
+  /* ensure the zImage file size is always a multiple of 64 bits */
+  /* (without a dummy byte, ld just ignores the empty section) */
+  .pad			: { BYTE(0); . = ALIGN(8); }
+  _edata = .;
+
+  _magic_sig = ZIMAGE_MAGIC(0x016f2818);
+  _magic_start = ZIMAGE_MAGIC(_start);
+  _magic_end = ZIMAGE_MAGIC(_edata);
+
+  . = BSS_START;
+  __bss_start = .;
+  .bss			: { *(.bss) }
+  _end = .;
+
+  . = ALIGN(8);		/* the stack must be 64-bit aligned */
+  .stack		: { *(.stack) }
+
+  .stab 0		: { *(.stab) }
+  .stabstr 0		: { *(.stabstr) }
+  .stab.excl 0		: { *(.stab.excl) }
+  .stab.exclstr 0	: { *(.stab.exclstr) }
+  .stab.index 0		: { *(.stab.index) }
+  .stab.indexstr 0	: { *(.stab.indexstr) }
+  .comment 0		: { *(.comment) }
+}
diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.in
deleted file mode 100644
index 60162231c7ea..000000000000
--- a/arch/arm/boot/compressed/vmlinux.lds.in
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- *  linux/arch/arm/boot/compressed/vmlinux.lds.in
- *
- *  Copyright (C) 2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifdef CONFIG_CPU_ENDIAN_BE8
-#define ZIMAGE_MAGIC(x) ( (((x) >> 24) & 0x000000ff) | \
-			  (((x) >>  8) & 0x0000ff00) | \
-			  (((x) <<  8) & 0x00ff0000) | \
-			  (((x) << 24) & 0xff000000) )
-#else
-#define ZIMAGE_MAGIC(x) (x)
-#endif
-
-OUTPUT_ARCH(arm)
-ENTRY(_start)
-SECTIONS
-{
-  /DISCARD/ : {
-    *(.ARM.exidx*)
-    *(.ARM.extab*)
-    /*
-     * Discard any r/w data - this produces a link error if we have any,
-     * which is required for PIC decompression.  Local data generates
-     * GOTOFF relocations, which prevents it being relocated independently
-     * of the text/got segments.
-     */
-    *(.data)
-  }
-
-  . = TEXT_START;
-  _text = .;
-
-  .text : {
-    _start = .;
-    *(.start)
-    *(.text)
-    *(.text.*)
-    *(.fixup)
-    *(.gnu.warning)
-    *(.glue_7t)
-    *(.glue_7)
-  }
-  .rodata : {
-    *(.rodata)
-    *(.rodata.*)
-  }
-  .piggydata : {
-    *(.piggydata)
-  }
-
-  . = ALIGN(4);
-  _etext = .;
-
-  .got.plt		: { *(.got.plt) }
-  _got_start = .;
-  .got			: { *(.got) }
-  _got_end = .;
-
-  /* ensure the zImage file size is always a multiple of 64 bits */
-  /* (without a dummy byte, ld just ignores the empty section) */
-  .pad			: { BYTE(0); . = ALIGN(8); }
-  _edata = .;
-
-  _magic_sig = ZIMAGE_MAGIC(0x016f2818);
-  _magic_start = ZIMAGE_MAGIC(_start);
-  _magic_end = ZIMAGE_MAGIC(_edata);
-
-  . = BSS_START;
-  __bss_start = .;
-  .bss			: { *(.bss) }
-  _end = .;
-
-  . = ALIGN(8);		/* the stack must be 64-bit aligned */
-  .stack		: { *(.stack) }
-
-  .stab 0		: { *(.stab) }
-  .stabstr 0		: { *(.stabstr) }
-  .stab.excl 0		: { *(.stab.excl) }
-  .stab.exclstr 0	: { *(.stab.exclstr) }
-  .stab.index 0		: { *(.stab.index) }
-  .stab.indexstr 0	: { *(.stab.indexstr) }
-  .comment 0		: { *(.comment) }
-}
-
-- 
cgit v1.2.3


From 9a2b51b6ca93fc42e5676aa444c956f7506185db Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <a.ryabinin@samsung.com>
Date: Wed, 18 Jun 2014 16:12:40 +0100
Subject: ARM: 8078/1: get rid of hardcoded assumptions about kernel stack size

Changing kernel stack size on arm is not as simple as it should be:
1) THREAD_SIZE macro doesn't respect PAGE_SIZE and THREAD_SIZE_ORDER
2) stack size is hardcoded in get_thread_info macro

This patch fixes it by calculating THREAD_SIZE and thread_info address
taking into account PAGE_SIZE and THREAD_SIZE_ORDER.

Now changing stack size becomes simply changing THREAD_SIZE_ORDER.

Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/assembler.h   | 8 +++++---
 arch/arm/include/asm/thread_info.h | 3 ++-
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 57f0584e8d97..906703a5b564 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -24,6 +24,8 @@
 #include <asm/domain.h>
 #include <asm/opcodes-virt.h>
 #include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/thread_info.h>
 
 #define IOMEM(x)	(x)
 
@@ -179,10 +181,10 @@
  * Get current thread_info.
  */
 	.macro	get_thread_info, rd
- ARM(	mov	\rd, sp, lsr #13	)
+ ARM(	mov	\rd, sp, lsr #THREAD_SIZE_ORDER + PAGE_SHIFT	)
  THUMB(	mov	\rd, sp			)
- THUMB(	lsr	\rd, \rd, #13		)
-	mov	\rd, \rd, lsl #13
+ THUMB(	lsr	\rd, \rd, #THREAD_SIZE_ORDER + PAGE_SHIFT	)
+	mov	\rd, \rd, lsl #THREAD_SIZE_ORDER + PAGE_SHIFT
 	.endm
 
 /*
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index e4e4208a9130..fc44d3761f9e 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -14,9 +14,10 @@
 
 #include <linux/compiler.h>
 #include <asm/fpstate.h>
+#include <asm/page.h>
 
 #define THREAD_SIZE_ORDER	1
-#define THREAD_SIZE		8192
+#define THREAD_SIZE		(PAGE_SIZE << THREAD_SIZE_ORDER)
 #define THREAD_START_SP		(THREAD_SIZE - 8)
 
 #ifndef __ASSEMBLY__
-- 
cgit v1.2.3


From 731542ef44a3dea7726a03f906111700847cf777 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Jul 2014 18:07:36 +0100
Subject: ARM: 8101/1: mach-iop13xx: fix possible build failure

After applying patch:
	"ARM: 8078/1: get rid of hardcoded assumptions about kernel stack size"

following build failure happens on iop13xx platform:

   In file included from include/linux/srcu.h:33:0,
                    from include/linux/notifier.h:15,
                    from include/linux/reboot.h:5,
                    from arch/arm/mach-iop13xx/include/mach/iop13xx.h:6,
                    from arch/arm/mach-iop13xx/include/mach/hardware.h:14,
                    from arch/arm/mach-iop13xx/include/mach/memory.h:4,
                    from arch/arm/include/asm/memory.h:24,
                    from arch/arm/include/asm/page.h:163,
                    from arch/arm/include/asm/thread_info.h:17,
                    from include/linux/thread_info.h:54,
                    from include/asm-generic/preempt.h:4,
                    from arch/arm/include/generated/asm/preempt.h:1,
                    from include/linux/preempt.h:18,
                    from include/linux/spinlock.h:50,
                    from include/linux/seqlock.h:35,
                    from include/linux/time.h:5,
                    from include/uapi/linux/timex.h:56,
                    from include/linux/timex.h:56,
                    from include/linux/sched.h:19,
                    from arch/arm/kernel/asm-offsets.c:13:
   include/linux/rcupdate.h: In function '__rcu_read_lock':
>> include/linux/rcupdate.h:220:2: error: implicit declaration of function 'preempt_disable' [-Werror=implicit-function-declaration]
     preempt_disable();

The problem here is recursive header inclusion which could be avoided by
removing linux/reboot.h from mach/iop13xxx.h.
linux/reboot.h in include/mach/iop13xx.h is needed only for enum reboot_mode,
so header it could be replaced with a enum declaration.

Whatever patch "ARM: 8078/1: get rid of hardcoded assumptions about kernel stack size"
does, I think it's good to avoid unnecessary header inclusion here in any case.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-iop13xx/include/mach/iop13xx.h | 2 +-
 arch/arm/mach-iop13xx/setup.c                | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-iop13xx/include/mach/iop13xx.h b/arch/arm/mach-iop13xx/include/mach/iop13xx.h
index 17b40279e0a4..9311ee2126d6 100644
--- a/arch/arm/mach-iop13xx/include/mach/iop13xx.h
+++ b/arch/arm/mach-iop13xx/include/mach/iop13xx.h
@@ -3,7 +3,7 @@
 
 #ifndef __ASSEMBLY__
 
-#include <linux/reboot.h>
+enum reboot_mode;
 
 /* The ATU offsets can change based on the strapping */
 extern u32 iop13xx_atux_pmmr_offset;
diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c
index bca96f433495..53c316f7301e 100644
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -20,6 +20,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/serial_8250.h>
 #include <linux/io.h>
+#include <linux/reboot.h>
 #ifdef CONFIG_MTD_PHYSMAP
 #include <linux/mtd/physmap.h>
 #endif
-- 
cgit v1.2.3


From 3721924c81541d828d73d0e36dcbae8fd93f0885 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 24 Jun 2014 18:32:51 +0100
Subject: ARM: 8081/1: MCPM: provide infrastructure to allow for MCPM loopback

The kernel already has the responsibility to handle resources such as the

CCI when hotplugging CPUs, during the booting of secondary CPUs, and when
resuming from suspend/idle.  It would be more coherent and less confusing
if the CCI for the boot CPU (or cluster)  was also initialized by the
kernel rather than expecting the firmware/bootloader to do it and only in
that case. After all, the kernel has all the necessary code already and
the bootloader shouldn't have to care at all.

The CCI may be turned on only when the cache is off. Leveraging the CPU
suspend code to loop back through the low-level MCPM entry point is all
that is needed to properly turn on the CCI from the kernel by using the
same code as during secondary boot.

Let's provide a generic MCPM loopback function that can be invoked by
backend initialization code to set things (CCI or similar) on the boot
CPU just as it is done for the other CPUs.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Tested-by: Kevin Hilman <khilman@linaro.org>
Tested-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/common/mcpm_entry.c | 52 ++++++++++++++++++++++++++++++++++++++++++++
 arch/arm/include/asm/mcpm.h  | 16 ++++++++++++++
 2 files changed, 68 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index f91136ab447e..3c165fc2dce2 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -12,11 +12,13 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/irqflags.h>
+#include <linux/cpu_pm.h>
 
 #include <asm/mcpm.h>
 #include <asm/cacheflush.h>
 #include <asm/idmap.h>
 #include <asm/cputype.h>
+#include <asm/suspend.h>
 
 extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER];
 
@@ -146,6 +148,56 @@ int mcpm_cpu_powered_up(void)
 	return 0;
 }
 
+#ifdef CONFIG_ARM_CPU_SUSPEND
+
+static int __init nocache_trampoline(unsigned long _arg)
+{
+	void (*cache_disable)(void) = (void *)_arg;
+	unsigned int mpidr = read_cpuid_mpidr();
+	unsigned int cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	unsigned int cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	phys_reset_t phys_reset;
+
+	mcpm_set_entry_vector(cpu, cluster, cpu_resume);
+	setup_mm_for_reboot();
+
+	__mcpm_cpu_going_down(cpu, cluster);
+	BUG_ON(!__mcpm_outbound_enter_critical(cpu, cluster));
+	cache_disable();
+	__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
+	__mcpm_cpu_down(cpu, cluster);
+
+	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
+	phys_reset(virt_to_phys(mcpm_entry_point));
+	BUG();
+}
+
+int __init mcpm_loopback(void (*cache_disable)(void))
+{
+	int ret;
+
+	/*
+	 * We're going to soft-restart the current CPU through the
+	 * low-level MCPM code by leveraging the suspend/resume
+	 * infrastructure. Let's play it safe by using cpu_pm_enter()
+	 * in case the CPU init code path resets the VFP or similar.
+	 */
+	local_irq_disable();
+	local_fiq_disable();
+	ret = cpu_pm_enter();
+	if (!ret) {
+		ret = cpu_suspend((unsigned long)cache_disable, nocache_trampoline);
+		cpu_pm_exit();
+	}
+	local_fiq_enable();
+	local_irq_enable();
+	if (ret)
+		pr_err("%s returned %d\n", __func__, ret);
+	return ret;
+}
+
+#endif
+
 struct sync_struct mcpm_sync;
 
 /*
diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h
index 94060adba174..57ff7f2a3084 100644
--- a/arch/arm/include/asm/mcpm.h
+++ b/arch/arm/include/asm/mcpm.h
@@ -217,6 +217,22 @@ int __mcpm_cluster_state(unsigned int cluster);
 int __init mcpm_sync_init(
 	void (*power_up_setup)(unsigned int affinity_level));
 
+/**
+ * mcpm_loopback - make a run through the MCPM low-level code
+ *
+ * @cache_disable: pointer to function performing cache disabling
+ *
+ * This exercises the MCPM machinery by soft resetting the CPU and branching
+ * to the MCPM low-level entry code before returning to the caller.
+ * The @cache_disable function must do the necessary cache disabling to
+ * let the regular kernel init code turn it back on as if the CPU was
+ * hotplugged in. The MCPM state machine is set as if the cluster was
+ * initialized meaning the power_up_setup callback passed to mcpm_sync_init()
+ * will be invoked for all affinity levels. This may be useful to initialize
+ * some resources such as enabling the CCI that requires the cache to be off, or simply for testing purposes.
+ */
+int __init mcpm_loopback(void (*cache_disable)(void));
+
 void __init mcpm_smp_set_ops(void);
 
 #else
-- 
cgit v1.2.3


From 3592d7e002438980f9ce4a399f21ec94cbf071ea Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 24 Jun 2014 18:34:38 +0100
Subject: ARM: 8082/1: TC2: test the MCPM loopback during boot

This is not strictly needed on TC2 but still a good idea to exercise
that code.

Signed-off-by: nicolas Pitre <nico@linaro.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-vexpress/tc2_pm.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c
index b743a0ae02ce..54a9fff77c7d 100644
--- a/arch/arm/mach-vexpress/tc2_pm.c
+++ b/arch/arm/mach-vexpress/tc2_pm.c
@@ -323,6 +323,21 @@ static void __naked tc2_pm_power_up_setup(unsigned int affinity_level)
 "	b	cci_enable_port_for_self ");
 }
 
+static void __init tc2_cache_off(void)
+{
+	pr_info("TC2: disabling cache during MCPM loopback test\n");
+	if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
+		/* disable L2 prefetching on the Cortex-A15 */
+		asm volatile(
+		"mcr	p15, 1, %0, c15, c0, 3 \n\t"
+		"isb	\n\t"
+		"dsb	"
+		: : "r" (0x400) );
+	}
+	v7_exit_coherency_flush(all);
+	cci_disable_port_by_cpu(read_cpuid_mpidr());
+}
+
 static int __init tc2_pm_init(void)
 {
 	int ret, irq;
@@ -370,6 +385,8 @@ static int __init tc2_pm_init(void)
 	ret = mcpm_platform_register(&tc2_pm_power_ops);
 	if (!ret) {
 		mcpm_sync_init(tc2_pm_power_up_setup);
+		/* test if we can (re)enable the CCI on our own */
+		BUG_ON(mcpm_loopback(tc2_cache_off) != 0);
 		pr_info("TC2 power management initialized\n");
 	}
 	return ret;
-- 
cgit v1.2.3


From fbb0499091dc1132995214a47188214602fd75c9 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 24 Jun 2014 18:36:32 +0100
Subject: ARM: 8083/1: exynos: activate the CCI on boot CPU/cluster using the
 MCPM loopback

The Chromebook firmware doesn't enable the CCI for the boot cpu, and
arguably it shouldn't have to either. Let's have the kernel handle the

CCI on its own for the boot CPU the same way it does it for secondary CPUs
by using the MCPM loopback.

This allows to boot all 8 cores on exynos5420-peach-pit,
exynos5800-peach-pi and ARM Chromebook 2.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Tushar Behera <tushar.b@samsung.com>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Tested-by: Kevin Hilman <khilman@linaro.org>
Tested-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-exynos/mcpm-exynos.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c
index ace0ed617476..0d95bc8e49d8 100644
--- a/arch/arm/mach-exynos/mcpm-exynos.c
+++ b/arch/arm/mach-exynos/mcpm-exynos.c
@@ -289,6 +289,19 @@ static void __naked exynos_pm_power_up_setup(unsigned int affinity_level)
 	"b	cci_enable_port_for_self");
 }
 
+static void __init exynos_cache_off(void)
+{
+	if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
+		/* disable L2 prefetching on the Cortex-A15 */
+		asm volatile(
+		"mcr	p15, 1, %0, c15, c0, 3\n\t"
+		"isb\n\t"
+		"dsb"
+		: : "r" (0x400));
+	}
+	exynos_v7_exit_coherency_flush(all);
+}
+
 static const struct of_device_id exynos_dt_mcpm_match[] = {
 	{ .compatible = "samsung,exynos5420" },
 	{ .compatible = "samsung,exynos5800" },
@@ -332,6 +345,8 @@ static int __init exynos_mcpm_init(void)
 	ret = mcpm_platform_register(&exynos_power_ops);
 	if (!ret)
 		ret = mcpm_sync_init(exynos_pm_power_up_setup);
+	if (!ret)
+		ret = mcpm_loopback(exynos_cache_off); /* turn on the CCI */
 	if (ret) {
 		iounmap(ns_sram_base_addr);
 		return ret;
-- 
cgit v1.2.3


From ee2593ef5680a8646c6465ce998fea30b5af9b2b Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Mon, 14 Jul 2014 13:59:03 +0100
Subject: ARM: 8099/1: EXYNOS: Fix MCPM build with SUSPEND=n

Building of EXYNOS5420_MCPM with disabled SUSPEND fails:
arch/arm/mach-exynos/built-in.o: In function `exynos_mcpm_init':
arch/arm/mach-exynos/mcpm-exynos.c:361: undefined reference to `mcpm_loopback'

The exynos_mcpm_init() in mcp-exynos.c calls mcpm_loopback() which
depends on cpu_suspend function (ARM_CPU_SUSPEND).

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-exynos/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index 8f9b66c4ac78..5d4ff6571dcd 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -119,6 +119,7 @@ config EXYNOS5420_MCPM
 	bool "Exynos5420 Multi-Cluster PM support"
 	depends on MCPM && SOC_EXYNOS5420
 	select ARM_CCI
+	select ARM_CPU_SUSPEND
 	help
 	  This is needed to provide CPU and cluster power management
 	  on Exynos5420 implementing big.LITTLE.
-- 
cgit v1.2.3


From af040ffc9ba1e079ee4c0748aff64fa3d4716fa5 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Tue, 24 Jun 2014 19:43:15 +0100
Subject: ARM: make it easier to check the CPU part number correctly

Ensure that platform maintainers check the CPU part number in the right
manner: the CPU part number is meaningless without also checking the
CPU implement(e|o)r (choose your preferred spelling!)  Provide an
interface which returns both the implementer and part number together,
and update the definitions to include the implementer.

Mark the old function as being deprecated... indeed, using the old
function with the definitions will now always evaluate as false, so
people must update their un-merged code to the new function.  While
this could be avoided by adding new definitions, we'd also have to
create new names for them which would be awkward.

Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/cputype.h         | 37 +++++++++++++++--------
 arch/arm/include/asm/smp_scu.h         |  2 +-
 arch/arm/kernel/perf_event_cpu.c       | 55 +++++++++++++++++-----------------
 arch/arm/kvm/guest.c                   |  8 +----
 arch/arm/mach-exynos/mcpm-exynos.c     |  4 +--
 arch/arm/mach-exynos/platsmp.c         |  4 +--
 arch/arm/mach-exynos/pm.c              | 11 ++++---
 arch/arm/mach-vexpress/tc2_pm.c        |  4 +--
 arch/arm/mm/cache-l2x0.c               |  2 +-
 drivers/clocksource/arm_global_timer.c |  2 +-
 10 files changed, 66 insertions(+), 63 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index 8c2b7321a478..963a2515906d 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -62,17 +62,18 @@
 #define ARM_CPU_IMP_ARM			0x41
 #define ARM_CPU_IMP_INTEL		0x69
 
-#define ARM_CPU_PART_ARM1136		0xB360
-#define ARM_CPU_PART_ARM1156		0xB560
-#define ARM_CPU_PART_ARM1176		0xB760
-#define ARM_CPU_PART_ARM11MPCORE	0xB020
-#define ARM_CPU_PART_CORTEX_A8		0xC080
-#define ARM_CPU_PART_CORTEX_A9		0xC090
-#define ARM_CPU_PART_CORTEX_A5		0xC050
-#define ARM_CPU_PART_CORTEX_A15		0xC0F0
-#define ARM_CPU_PART_CORTEX_A7		0xC070
-#define ARM_CPU_PART_CORTEX_A12		0xC0D0
-#define ARM_CPU_PART_CORTEX_A17		0xC0E0
+/* ARM implemented processors */
+#define ARM_CPU_PART_ARM1136		0x4100b360
+#define ARM_CPU_PART_ARM1156		0x4100b560
+#define ARM_CPU_PART_ARM1176		0x4100b760
+#define ARM_CPU_PART_ARM11MPCORE	0x4100b020
+#define ARM_CPU_PART_CORTEX_A8		0x4100c080
+#define ARM_CPU_PART_CORTEX_A9		0x4100c090
+#define ARM_CPU_PART_CORTEX_A5		0x4100c050
+#define ARM_CPU_PART_CORTEX_A7		0x4100c070
+#define ARM_CPU_PART_CORTEX_A12		0x4100c0d0
+#define ARM_CPU_PART_CORTEX_A17		0x4100c0e0
+#define ARM_CPU_PART_CORTEX_A15		0x4100c0f0
 
 #define ARM_CPU_XSCALE_ARCH_MASK	0xe000
 #define ARM_CPU_XSCALE_ARCH_V1		0x2000
@@ -171,14 +172,24 @@ static inline unsigned int __attribute_const__ read_cpuid_implementor(void)
 	return (read_cpuid_id() & 0xFF000000) >> 24;
 }
 
-static inline unsigned int __attribute_const__ read_cpuid_part_number(void)
+/*
+ * The CPU part number is meaningless without referring to the CPU
+ * implementer: implementers are free to define their own part numbers
+ * which are permitted to clash with other implementer part numbers.
+ */
+static inline unsigned int __attribute_const__ read_cpuid_part(void)
+{
+	return read_cpuid_id() & 0xff00fff0;
+}
+
+static inline unsigned int __attribute_const__ __deprecated read_cpuid_part_number(void)
 {
 	return read_cpuid_id() & 0xFFF0;
 }
 
 static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void)
 {
-	return read_cpuid_part_number() & ARM_CPU_XSCALE_ARCH_MASK;
+	return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK;
 }
 
 static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
diff --git a/arch/arm/include/asm/smp_scu.h b/arch/arm/include/asm/smp_scu.h
index 0393fbab8dd5..bfe163c40024 100644
--- a/arch/arm/include/asm/smp_scu.h
+++ b/arch/arm/include/asm/smp_scu.h
@@ -11,7 +11,7 @@
 
 static inline bool scu_a9_has_base(void)
 {
-	return read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9;
+	return read_cpuid_part() == ARM_CPU_PART_CORTEX_A9;
 }
 
 static inline unsigned long scu_a9_get_base(void)
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index af9e35e8836f..c02c2e8c877d 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -250,40 +250,39 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = {
 static int probe_current_pmu(struct arm_pmu *pmu)
 {
 	int cpu = get_cpu();
-	unsigned long implementor = read_cpuid_implementor();
-	unsigned long part_number = read_cpuid_part_number();
 	int ret = -ENODEV;
 
 	pr_info("probing PMU on CPU %d\n", cpu);
 
+	switch (read_cpuid_part()) {
 	/* ARM Ltd CPUs. */
-	if (implementor == ARM_CPU_IMP_ARM) {
-		switch (part_number) {
-		case ARM_CPU_PART_ARM1136:
-		case ARM_CPU_PART_ARM1156:
-		case ARM_CPU_PART_ARM1176:
-			ret = armv6pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_ARM11MPCORE:
-			ret = armv6mpcore_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A8:
-			ret = armv7_a8_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A9:
-			ret = armv7_a9_pmu_init(pmu);
-			break;
-		}
-	/* Intel CPUs [xscale]. */
-	} else if (implementor == ARM_CPU_IMP_INTEL) {
-		switch (xscale_cpu_arch_version()) {
-		case ARM_CPU_XSCALE_ARCH_V1:
-			ret = xscale1pmu_init(pmu);
-			break;
-		case ARM_CPU_XSCALE_ARCH_V2:
-			ret = xscale2pmu_init(pmu);
-			break;
+	case ARM_CPU_PART_ARM1136:
+	case ARM_CPU_PART_ARM1156:
+	case ARM_CPU_PART_ARM1176:
+		ret = armv6pmu_init(pmu);
+		break;
+	case ARM_CPU_PART_ARM11MPCORE:
+		ret = armv6mpcore_pmu_init(pmu);
+		break;
+	case ARM_CPU_PART_CORTEX_A8:
+		ret = armv7_a8_pmu_init(pmu);
+		break;
+	case ARM_CPU_PART_CORTEX_A9:
+		ret = armv7_a9_pmu_init(pmu);
+		break;
+
+	default:
+		if (read_cpuid_implementor() == ARM_CPU_IMP_INTEL) {
+			switch (xscale_cpu_arch_version()) {
+			case ARM_CPU_XSCALE_ARCH_V1:
+				ret = xscale1pmu_init(pmu);
+				break;
+			case ARM_CPU_XSCALE_ARCH_V2:
+				ret = xscale2pmu_init(pmu);
+				break;
+			}
 		}
+		break;
 	}
 
 	put_cpu();
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index b23a59c1c522..70bf49b8b244 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -274,13 +274,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
 int __attribute_const__ kvm_target_cpu(void)
 {
-	unsigned long implementor = read_cpuid_implementor();
-	unsigned long part_number = read_cpuid_part_number();
-
-	if (implementor != ARM_CPU_IMP_ARM)
-		return -EINVAL;
-
-	switch (part_number) {
+	switch (read_cpuid_part()) {
 	case ARM_CPU_PART_CORTEX_A7:
 		return KVM_ARM_TARGET_CORTEX_A7;
 	case ARM_CPU_PART_CORTEX_A15:
diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c
index 0d95bc8e49d8..a96b78f93f2b 100644
--- a/arch/arm/mach-exynos/mcpm-exynos.c
+++ b/arch/arm/mach-exynos/mcpm-exynos.c
@@ -196,7 +196,7 @@ static void exynos_power_down(void)
 	if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
 		arch_spin_unlock(&exynos_mcpm_lock);
 
-		if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
+		if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
 			/*
 			 * On the Cortex-A15 we need to disable
 			 * L2 prefetching before flushing the cache.
@@ -291,7 +291,7 @@ static void __naked exynos_pm_power_up_setup(unsigned int affinity_level)
 
 static void __init exynos_cache_off(void)
 {
-	if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
+	if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
 		/* disable L2 prefetching on the Cortex-A15 */
 		asm volatile(
 		"mcr	p15, 1, %0, c15, c0, 3\n\t"
diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c
index 1c8d31e39520..8dc1d3a3a8bf 100644
--- a/arch/arm/mach-exynos/platsmp.c
+++ b/arch/arm/mach-exynos/platsmp.c
@@ -188,7 +188,7 @@ static void __init exynos_smp_init_cpus(void)
 	void __iomem *scu_base = scu_base_addr();
 	unsigned int i, ncores;
 
-	if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
+	if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
 		ncores = scu_base ? scu_get_core_count(scu_base) : 1;
 	else
 		/*
@@ -214,7 +214,7 @@ static void __init exynos_smp_prepare_cpus(unsigned int max_cpus)
 
 	exynos_sysram_init();
 
-	if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
+	if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
 		scu_enable(scu_base_addr());
 
 	/*
diff --git a/arch/arm/mach-exynos/pm.c b/arch/arm/mach-exynos/pm.c
index 202ca73e49c4..67d383de614f 100644
--- a/arch/arm/mach-exynos/pm.c
+++ b/arch/arm/mach-exynos/pm.c
@@ -300,7 +300,7 @@ static int exynos_pm_suspend(void)
 	tmp = (S5P_USE_STANDBY_WFI0 | S5P_USE_STANDBY_WFE0);
 	__raw_writel(tmp, S5P_CENTRAL_SEQ_OPTION);
 
-	if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
+	if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
 		exynos_cpu_save_register();
 
 	return 0;
@@ -334,7 +334,7 @@ static void exynos_pm_resume(void)
 	if (exynos_pm_central_resume())
 		goto early_wakeup;
 
-	if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
+	if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
 		exynos_cpu_restore_register();
 
 	/* For release retention */
@@ -353,7 +353,7 @@ static void exynos_pm_resume(void)
 
 	s3c_pm_do_restore_core(exynos_core_save, ARRAY_SIZE(exynos_core_save));
 
-	if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
+	if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
 		scu_enable(S5P_VA_SCU);
 
 early_wakeup:
@@ -440,15 +440,14 @@ static int exynos_cpu_pm_notifier(struct notifier_block *self,
 	case CPU_PM_ENTER:
 		if (cpu == 0) {
 			exynos_pm_central_suspend();
-			if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
+			if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
 				exynos_cpu_save_register();
 		}
 		break;
 
 	case CPU_PM_EXIT:
 		if (cpu == 0) {
-			if (read_cpuid_part_number() ==
-					ARM_CPU_PART_CORTEX_A9) {
+			if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) {
 				scu_enable(S5P_VA_SCU);
 				exynos_cpu_restore_register();
 			}
diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c
index 54a9fff77c7d..2fb78b4648cb 100644
--- a/arch/arm/mach-vexpress/tc2_pm.c
+++ b/arch/arm/mach-vexpress/tc2_pm.c
@@ -152,7 +152,7 @@ static void tc2_pm_down(u64 residency)
 	if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
 		arch_spin_unlock(&tc2_pm_lock);
 
-		if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
+		if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
 			/*
 			 * On the Cortex-A15 we need to disable
 			 * L2 prefetching before flushing the cache.
@@ -326,7 +326,7 @@ static void __naked tc2_pm_power_up_setup(unsigned int affinity_level)
 static void __init tc2_cache_off(void)
 {
 	pr_info("TC2: disabling cache during MCPM loopback test\n");
-	if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
+	if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
 		/* disable L2 prefetching on the Cortex-A15 */
 		asm volatile(
 		"mcr	p15, 1, %0, c15, c0, 3 \n\t"
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 076172b69422..556df22e89bd 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -665,7 +665,7 @@ static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, v
 static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock)
 {
 	unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_PART_MASK;
-	bool cortex_a9 = read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9;
+	bool cortex_a9 = read_cpuid_part() == ARM_CPU_PART_CORTEX_A9;
 
 	if (rev >= L310_CACHE_ID_RTL_R2P0) {
 		if (cortex_a9) {
diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c
index 60e5a170c4d2..e6833771a716 100644
--- a/drivers/clocksource/arm_global_timer.c
+++ b/drivers/clocksource/arm_global_timer.c
@@ -250,7 +250,7 @@ static void __init global_timer_of_register(struct device_node *np)
 	 * fire when the timer value is greater than or equal to. In previous
 	 * revisions the comparators fired when the timer value was equal to.
 	 */
-	if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9
+	if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9
 	    && (read_cpuid_id() & 0xf0000f) < 0x200000) {
 		pr_warn("global-timer: non support for this cpu version.\n");
 		return;
-- 
cgit v1.2.3


From 6ebbf2ce437b33022d30badd49dc94d33ecfa498 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 30 Jun 2014 16:29:12 +0100
Subject: ARM: convert all "mov.* pc, reg" to "bx reg" for ARMv6+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ARMv6 and greater introduced a new instruction ("bx") which can be used
to return from function calls.  Recent CPUs perform better when the
"bx lr" instruction is used rather than the "mov pc, lr" instruction,
and this sequence is strongly recommended to be used by the ARM
architecture manual (section A.4.1.1).

We provide a new macro "ret" with all its variants for the condition
code which will resolve to the appropriate instruction.

Rather than doing this piecemeal, and miss some instances, change all
the "mov pc" instances to use the new macro, with the exception of
the "movs" instruction and the kprobes code.  This allows us to detect
the "mov pc, lr" case and fix it up - and also gives us the possibility
of deploying this for other registers depending on the CPU selection.

Reported-by: Will Deacon <will.deacon@arm.com>
Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1
Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S
Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood
Tested-by: Shawn Guo <shawn.guo@freescale.com>
Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs
Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385
Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci
Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp
Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen
Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M
Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/crypto/aes-armv4.S              |  3 ++-
 arch/arm/include/asm/assembler.h         | 21 +++++++++++++++
 arch/arm/include/asm/entry-macro-multi.S |  2 +-
 arch/arm/kernel/debug.S                  | 10 ++++----
 arch/arm/kernel/entry-armv.S             | 42 +++++++++++++++---------------
 arch/arm/kernel/entry-common.S           | 13 +++++-----
 arch/arm/kernel/entry-header.S           | 14 ----------
 arch/arm/kernel/fiqasm.S                 |  4 +--
 arch/arm/kernel/head-common.S            |  7 ++---
 arch/arm/kernel/head-nommu.S             |  8 +++---
 arch/arm/kernel/head.S                   | 18 ++++++-------
 arch/arm/kernel/hyp-stub.S               |  6 ++---
 arch/arm/kernel/iwmmxt.S                 | 10 ++++----
 arch/arm/kernel/relocate_kernel.S        |  3 ++-
 arch/arm/kernel/sleep.S                  |  2 +-
 arch/arm/kvm/init.S                      |  3 ++-
 arch/arm/lib/ashldi3.S                   |  3 ++-
 arch/arm/lib/ashrdi3.S                   |  3 ++-
 arch/arm/lib/backtrace.S                 |  2 +-
 arch/arm/lib/bitops.h                    |  5 ++--
 arch/arm/lib/bswapsdi2.S                 |  5 ++--
 arch/arm/lib/call_with_stack.S           |  4 +--
 arch/arm/lib/csumpartial.S               |  2 +-
 arch/arm/lib/csumpartialcopygeneric.S    |  5 ++--
 arch/arm/lib/delay-loop.S                | 18 ++++++-------
 arch/arm/lib/div64.S                     | 13 +++++-----
 arch/arm/lib/findbit.S                   | 10 ++++----
 arch/arm/lib/getuser.S                   |  8 +++---
 arch/arm/lib/io-readsb.S                 |  2 +-
 arch/arm/lib/io-readsl.S                 |  6 ++---
 arch/arm/lib/io-readsw-armv3.S           |  4 +--
 arch/arm/lib/io-readsw-armv4.S           |  2 +-
 arch/arm/lib/io-writesb.S                |  2 +-
 arch/arm/lib/io-writesl.S                | 10 ++++----
 arch/arm/lib/io-writesw-armv3.S          |  4 +--
 arch/arm/lib/io-writesw-armv4.S          |  4 +--
 arch/arm/lib/lib1funcs.S                 | 26 +++++++++----------
 arch/arm/lib/lshrdi3.S                   |  3 ++-
 arch/arm/lib/memchr.S                    |  2 +-
 arch/arm/lib/memset.S                    |  2 +-
 arch/arm/lib/memzero.S                   |  2 +-
 arch/arm/lib/muldi3.S                    |  3 ++-
 arch/arm/lib/putuser.S                   | 10 ++++----
 arch/arm/lib/strchr.S                    |  2 +-
 arch/arm/lib/strrchr.S                   |  2 +-
 arch/arm/lib/ucmpdi2.S                   |  5 ++--
 arch/arm/mach-davinci/sleep.S            |  2 +-
 arch/arm/mach-ep93xx/crunch-bits.S       |  6 ++---
 arch/arm/mach-imx/suspend-imx6.S         |  5 ++--
 arch/arm/mach-mvebu/coherency_ll.S       | 10 ++++----
 arch/arm/mach-mvebu/headsmp-a9.S         |  3 ++-
 arch/arm/mach-omap2/sleep44xx.S          |  3 ++-
 arch/arm/mach-omap2/sram242x.S           |  6 ++---
 arch/arm/mach-omap2/sram243x.S           |  6 ++---
 arch/arm/mach-pxa/mioa701_bootresume.S   |  2 +-
 arch/arm/mach-pxa/standby.S              |  4 +--
 arch/arm/mach-s3c24xx/sleep-s3c2410.S    |  2 +-
 arch/arm/mach-s3c24xx/sleep-s3c2412.S    |  2 +-
 arch/arm/mach-shmobile/headsmp.S         |  3 ++-
 arch/arm/mach-tegra/sleep-tegra20.S      | 24 ++++++++---------
 arch/arm/mach-tegra/sleep-tegra30.S      | 14 +++++-----
 arch/arm/mach-tegra/sleep.S              |  8 +++---
 arch/arm/mm/cache-fa.S                   | 19 +++++++-------
 arch/arm/mm/cache-nop.S                  |  5 ++--
 arch/arm/mm/cache-v4.S                   | 13 +++++-----
 arch/arm/mm/cache-v4wb.S                 | 15 ++++++-----
 arch/arm/mm/cache-v4wt.S                 | 13 +++++-----
 arch/arm/mm/cache-v6.S                   | 20 +++++++--------
 arch/arm/mm/cache-v7.S                   | 30 +++++++++++-----------
 arch/arm/mm/l2c-l2x0-resume.S            |  7 ++---
 arch/arm/mm/proc-arm1020.S               | 34 ++++++++++++------------
 arch/arm/mm/proc-arm1020e.S              | 34 ++++++++++++------------
 arch/arm/mm/proc-arm1022.S               | 34 ++++++++++++------------
 arch/arm/mm/proc-arm1026.S               | 34 ++++++++++++------------
 arch/arm/mm/proc-arm720.S                | 16 ++++++------
 arch/arm/mm/proc-arm740.S                |  8 +++---
 arch/arm/mm/proc-arm7tdmi.S              |  8 +++---
 arch/arm/mm/proc-arm920.S                | 34 ++++++++++++------------
 arch/arm/mm/proc-arm922.S                | 34 ++++++++++++------------
 arch/arm/mm/proc-arm925.S                | 34 ++++++++++++------------
 arch/arm/mm/proc-arm926.S                | 34 ++++++++++++------------
 arch/arm/mm/proc-arm940.S                | 24 ++++++++---------
 arch/arm/mm/proc-arm946.S                | 30 +++++++++++-----------
 arch/arm/mm/proc-arm9tdmi.S              |  8 +++---
 arch/arm/mm/proc-fa526.S                 | 16 ++++++------
 arch/arm/mm/proc-feroceon.S              | 44 ++++++++++++++++----------------
 arch/arm/mm/proc-mohawk.S                | 34 ++++++++++++------------
 arch/arm/mm/proc-sa110.S                 | 16 ++++++------
 arch/arm/mm/proc-sa1100.S                | 16 ++++++------
 arch/arm/mm/proc-v6.S                    | 16 ++++++------
 arch/arm/mm/proc-v7-2level.S             |  4 +--
 arch/arm/mm/proc-v7-3level.S             |  5 ++--
 arch/arm/mm/proc-v7.S                    | 14 +++++-----
 arch/arm/mm/proc-v7m.S                   | 18 ++++++-------
 arch/arm/mm/proc-xsc3.S                  | 32 +++++++++++------------
 arch/arm/mm/proc-xscale.S                | 34 ++++++++++++------------
 arch/arm/mm/tlb-fa.S                     |  7 ++---
 arch/arm/mm/tlb-v4.S                     |  5 ++--
 arch/arm/mm/tlb-v4wb.S                   |  7 ++---
 arch/arm/mm/tlb-v4wbi.S                  |  7 ++---
 arch/arm/mm/tlb-v6.S                     |  5 ++--
 arch/arm/mm/tlb-v7.S                     |  4 +--
 arch/arm/nwfpe/entry.S                   |  8 +++---
 arch/arm/vfp/entry.S                     |  4 +--
 arch/arm/vfp/vfphw.S                     | 26 +++++++++----------
 arch/arm/xen/hypercall.S                 |  6 ++---
 106 files changed, 644 insertions(+), 607 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S
index 3a14ea8fe97e..ebb9761fb572 100644
--- a/arch/arm/crypto/aes-armv4.S
+++ b/arch/arm/crypto/aes-armv4.S
@@ -35,6 +35,7 @@
 @ that is being targetted.
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 .text
 
@@ -648,7 +649,7 @@ _armv4_AES_set_encrypt_key:
 
 .Ldone:	mov	r0,#0
 	ldmia   sp!,{r4-r12,lr}
-.Labrt:	mov	pc,lr
+.Labrt:	ret	lr
 ENDPROC(private_AES_set_encrypt_key)
 
 .align	5
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 906703a5b564..f67fd3afebdf 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -427,4 +427,25 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 #endif
 	.endm
 
+	.irp	c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
+	.macro	ret\c, reg
+#if __LINUX_ARM_ARCH__ < 6
+	mov\c	pc, \reg
+#else
+	.ifeqs	"\reg", "lr"
+	bx\c	\reg
+	.else
+	mov\c	pc, \reg
+	.endif
+#endif
+	.endm
+	.endr
+
+	.macro	ret.w, reg
+	ret	\reg
+#ifdef CONFIG_THUMB2_KERNEL
+	nop
+#endif
+	.endm
+
 #endif /* __ASM_ASSEMBLER_H__ */
diff --git a/arch/arm/include/asm/entry-macro-multi.S b/arch/arm/include/asm/entry-macro-multi.S
index 88d61815f0c0..469a2b30fa27 100644
--- a/arch/arm/include/asm/entry-macro-multi.S
+++ b/arch/arm/include/asm/entry-macro-multi.S
@@ -35,5 +35,5 @@
 \symbol_name:
 	mov	r8, lr
 	arch_irq_handler_default
-	mov     pc, r8
+	ret	r8
 	.endm
diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S
index 14f7c3b14632..78c91b5f97d4 100644
--- a/arch/arm/kernel/debug.S
+++ b/arch/arm/kernel/debug.S
@@ -90,7 +90,7 @@ ENTRY(printascii)
 		ldrneb	r1, [r0], #1
 		teqne	r1, #0
 		bne	1b
-		mov	pc, lr
+		ret	lr
 ENDPROC(printascii)
 
 ENTRY(printch)
@@ -105,7 +105,7 @@ ENTRY(debug_ll_addr)
 		addruart r2, r3, ip
 		str	r2, [r0]
 		str	r3, [r1]
-		mov	pc, lr
+		ret	lr
 ENDPROC(debug_ll_addr)
 #endif
 
@@ -116,7 +116,7 @@ ENTRY(printascii)
 		mov	r0, #0x04		@ SYS_WRITE0
 	ARM(	svc	#0x123456	)
 	THUMB(	svc	#0xab		)
-		mov	pc, lr
+		ret	lr
 ENDPROC(printascii)
 
 ENTRY(printch)
@@ -125,14 +125,14 @@ ENTRY(printch)
 		mov	r0, #0x03		@ SYS_WRITEC
 	ARM(	svc	#0x123456	)
 	THUMB(	svc	#0xab		)
-		mov	pc, lr
+		ret	lr
 ENDPROC(printch)
 
 ENTRY(debug_ll_addr)
 		mov	r2, #0
 		str	r2, [r0]
 		str	r2, [r1]
-		mov	pc, lr
+		ret	lr
 ENDPROC(debug_ll_addr)
 
 #endif
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 52a949a8077d..36276cdccfbc 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -224,7 +224,7 @@ svc_preempt:
 1:	bl	preempt_schedule_irq		@ irq en/disable is done inside
 	ldr	r0, [tsk, #TI_FLAGS]		@ get new tasks TI_FLAGS
 	tst	r0, #_TIF_NEED_RESCHED
-	moveq	pc, r8				@ go again
+	reteq	r8				@ go again
 	b	1b
 #endif
 
@@ -490,7 +490,7 @@ ENDPROC(__und_usr)
 	.pushsection .fixup, "ax"
 	.align	2
 4:	str     r4, [sp, #S_PC]			@ retry current instruction
-	mov	pc, r9
+	ret	r9
 	.popsection
 	.pushsection __ex_table,"a"
 	.long	1b, 4b
@@ -552,7 +552,7 @@ call_fpe:
 #endif
 	tst	r0, #0x08000000			@ only CDP/CPRT/LDC/STC have bit 27
 	tstne	r0, #0x04000000			@ bit 26 set on both ARM and Thumb-2
-	moveq	pc, lr
+	reteq	lr
 	and	r8, r0, #0x00000f00		@ mask out CP number
  THUMB(	lsr	r8, r8, #8		)
 	mov	r7, #1
@@ -571,33 +571,33 @@ call_fpe:
  THUMB(	add	pc, r8			)
 	nop
 
-	movw_pc	lr				@ CP#0
+	ret.w	lr				@ CP#0
 	W(b)	do_fpe				@ CP#1 (FPE)
 	W(b)	do_fpe				@ CP#2 (FPE)
-	movw_pc	lr				@ CP#3
+	ret.w	lr				@ CP#3
 #ifdef CONFIG_CRUNCH
 	b	crunch_task_enable		@ CP#4 (MaverickCrunch)
 	b	crunch_task_enable		@ CP#5 (MaverickCrunch)
 	b	crunch_task_enable		@ CP#6 (MaverickCrunch)
 #else
-	movw_pc	lr				@ CP#4
-	movw_pc	lr				@ CP#5
-	movw_pc	lr				@ CP#6
+	ret.w	lr				@ CP#4
+	ret.w	lr				@ CP#5
+	ret.w	lr				@ CP#6
 #endif
-	movw_pc	lr				@ CP#7
-	movw_pc	lr				@ CP#8
-	movw_pc	lr				@ CP#9
+	ret.w	lr				@ CP#7
+	ret.w	lr				@ CP#8
+	ret.w	lr				@ CP#9
 #ifdef CONFIG_VFP
 	W(b)	do_vfp				@ CP#10 (VFP)
 	W(b)	do_vfp				@ CP#11 (VFP)
 #else
-	movw_pc	lr				@ CP#10 (VFP)
-	movw_pc	lr				@ CP#11 (VFP)
+	ret.w	lr				@ CP#10 (VFP)
+	ret.w	lr				@ CP#11 (VFP)
 #endif
-	movw_pc	lr				@ CP#12
-	movw_pc	lr				@ CP#13
-	movw_pc	lr				@ CP#14 (Debug)
-	movw_pc	lr				@ CP#15 (Control)
+	ret.w	lr				@ CP#12
+	ret.w	lr				@ CP#13
+	ret.w	lr				@ CP#14 (Debug)
+	ret.w	lr				@ CP#15 (Control)
 
 #ifdef NEED_CPU_ARCHITECTURE
 	.align	2
@@ -649,7 +649,7 @@ ENTRY(fp_enter)
 	.popsection
 
 ENTRY(no_fp)
-	mov	pc, lr
+	ret	lr
 ENDPROC(no_fp)
 
 __und_usr_fault_32:
@@ -745,7 +745,7 @@ ENDPROC(__switch_to)
 #ifdef CONFIG_ARM_THUMB
 	bx	\reg
 #else
-	mov	pc, \reg
+	ret	\reg
 #endif
 	.endm
 
@@ -837,7 +837,7 @@ kuser_cmpxchg64_fixup:
 #if __LINUX_ARM_ARCH__ < 6
 	bcc	kuser_cmpxchg32_fixup
 #endif
-	mov	pc, lr
+	ret	lr
 	.previous
 
 #else
@@ -905,7 +905,7 @@ kuser_cmpxchg32_fixup:
 	subs	r8, r4, r7
 	rsbcss	r8, r8, #(2b - 1b)
 	strcs	r7, [sp, #S_PC]
-	mov	pc, lr
+	ret	lr
 	.previous
 
 #else
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 7139d4a7dea7..e52fe5a2d843 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <asm/assembler.h>
 #include <asm/unistd.h>
 #include <asm/ftrace.h>
 #include <asm/unwind.h>
@@ -88,7 +89,7 @@ ENTRY(ret_from_fork)
 	cmp	r5, #0
 	movne	r0, r4
 	adrne	lr, BSYM(1f)
-	movne	pc, r5
+	retne	r5
 1:	get_thread_info tsk
 	b	ret_slow_syscall
 ENDPROC(ret_from_fork)
@@ -290,7 +291,7 @@ ENDPROC(ftrace_graph_caller_old)
 
 .macro mcount_exit
 	ldmia	sp!, {r0-r3, ip, lr}
-	mov	pc, ip
+	ret	ip
 .endm
 
 ENTRY(__gnu_mcount_nc)
@@ -298,7 +299,7 @@ UNWIND(.fnstart)
 #ifdef CONFIG_DYNAMIC_FTRACE
 	mov	ip, lr
 	ldmia	sp!, {lr}
-	mov	pc, ip
+	ret	ip
 #else
 	__mcount
 #endif
@@ -333,12 +334,12 @@ return_to_handler:
 	bl	ftrace_return_to_handler
 	mov	lr, r0			@ r0 has real ret addr
 	ldmia	sp!, {r0-r3}
-	mov	pc, lr
+	ret	lr
 #endif
 
 ENTRY(ftrace_stub)
 .Lftrace_stub:
-	mov	pc, lr
+	ret	lr
 ENDPROC(ftrace_stub)
 
 #endif /* CONFIG_FUNCTION_TRACER */
@@ -561,7 +562,7 @@ sys_mmap2:
 		streq	r5, [sp, #4]
 		beq	sys_mmap_pgoff
 		mov	r0, #-EINVAL
-		mov	pc, lr
+		ret	lr
 #else
 		str	r5, [sp, #4]
 		b	sys_mmap_pgoff
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 5d702f8900b1..8db307d0954b 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -240,12 +240,6 @@
 	movs	pc, lr				@ return & move spsr_svc into cpsr
 	.endm
 
-	@
-	@ 32-bit wide "mov pc, reg"
-	@
-	.macro	movw_pc, reg
-	mov	pc, \reg
-	.endm
 #else	/* CONFIG_THUMB2_KERNEL */
 	.macro	svc_exit, rpsr, irq = 0
 	.if	\irq != 0
@@ -304,14 +298,6 @@
 	movs	pc, lr				@ return & move spsr_svc into cpsr
 	.endm
 #endif	/* ifdef CONFIG_CPU_V7M / else */
-
-	@
-	@ 32-bit wide "mov pc, reg"
-	@
-	.macro	movw_pc, reg
-	mov	pc, \reg
-	nop
-	.endm
 #endif	/* !CONFIG_THUMB2_KERNEL */
 
 /*
diff --git a/arch/arm/kernel/fiqasm.S b/arch/arm/kernel/fiqasm.S
index 207f9d652010..8dd26e1a9bd6 100644
--- a/arch/arm/kernel/fiqasm.S
+++ b/arch/arm/kernel/fiqasm.S
@@ -32,7 +32,7 @@ ENTRY(__set_fiq_regs)
 	ldr	lr, [r0]
 	msr	cpsr_c, r1	@ return to SVC mode
 	mov	r0, r0		@ avoid hazard prior to ARMv4
-	mov	pc, lr
+	ret	lr
 ENDPROC(__set_fiq_regs)
 
 ENTRY(__get_fiq_regs)
@@ -45,5 +45,5 @@ ENTRY(__get_fiq_regs)
 	str	lr, [r0]
 	msr	cpsr_c, r1	@ return to SVC mode
 	mov	r0, r0		@ avoid hazard prior to ARMv4
-	mov	pc, lr
+	ret	lr
 ENDPROC(__get_fiq_regs)
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 572a38335c96..8733012d231f 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -10,6 +10,7 @@
  * published by the Free Software Foundation.
  *
  */
+#include <asm/assembler.h>
 
 #define ATAG_CORE 0x54410001
 #define ATAG_CORE_SIZE ((2*4 + 3*4) >> 2)
@@ -61,10 +62,10 @@ __vet_atags:
 	cmp	r5, r6
 	bne	1f
 
-2:	mov	pc, lr				@ atag/dtb pointer is ok
+2:	ret	lr				@ atag/dtb pointer is ok
 
 1:	mov	r2, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__vet_atags)
 
 /*
@@ -162,7 +163,7 @@ __lookup_processor_type:
 	cmp	r5, r6
 	blo	1b
 	mov	r5, #0				@ unknown processor
-2:	mov	pc, lr
+2:	ret	lr
 ENDPROC(__lookup_processor_type)
 
 /*
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 716249cc2ee1..cc176b67c134 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -82,7 +82,7 @@ ENTRY(stext)
 	adr	lr, BSYM(1f)			@ return (PIC) address
  ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
  THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
- THUMB(	mov	pc, r12				)
+ THUMB(	ret	r12				)
  1:	b	__after_proc_init
 ENDPROC(stext)
 
@@ -119,7 +119,7 @@ ENTRY(secondary_startup)
 	mov	r13, r12			@ __secondary_switched address
  ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
  THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
- THUMB(	mov	pc, r12				)
+ THUMB(	ret	r12				)
 ENDPROC(secondary_startup)
 
 ENTRY(__secondary_switched)
@@ -164,7 +164,7 @@ __after_proc_init:
 #endif
 	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
 #endif /* CONFIG_CPU_CP15 */
-	mov	pc, r13
+	ret	r13
 ENDPROC(__after_proc_init)
 	.ltorg
 
@@ -254,7 +254,7 @@ ENTRY(__setup_mpu)
 	orr	r0, r0, #CR_M			@ Set SCTRL.M (MPU on)
 	mcr	p15, 0, r0, c1, c0, 0		@ Enable MPU
 	isb
-	mov pc,lr
+	ret	lr
 ENDPROC(__setup_mpu)
 #endif
 #include "head-common.S"
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 2c35f0ff2fdc..664eee8c4a26 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -140,7 +140,7 @@ ENTRY(stext)
 	mov	r8, r4				@ set TTBR1 to swapper_pg_dir
  ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
  THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
- THUMB(	mov	pc, r12				)
+ THUMB(	ret	r12				)
 1:	b	__enable_mmu
 ENDPROC(stext)
 	.ltorg
@@ -335,7 +335,7 @@ __create_page_tables:
 	sub	r4, r4, #0x1000		@ point to the PGD table
 	mov	r4, r4, lsr #ARCH_PGD_SHIFT
 #endif
-	mov	pc, lr
+	ret	lr
 ENDPROC(__create_page_tables)
 	.ltorg
 	.align
@@ -383,7 +383,7 @@ ENTRY(secondary_startup)
  ARM(	add	pc, r10, #PROCINFO_INITFUNC	) @ initialise processor
 						  @ (return control reg)
  THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
- THUMB(	mov	pc, r12				)
+ THUMB(	ret	r12				)
 ENDPROC(secondary_startup)
 
 	/*
@@ -468,7 +468,7 @@ ENTRY(__turn_mmu_on)
 	instr_sync
 	mov	r3, r3
 	mov	r3, r13
-	mov	pc, r3
+	ret	r3
 __turn_mmu_on_end:
 ENDPROC(__turn_mmu_on)
 	.popsection
@@ -487,7 +487,7 @@ __fixup_smp:
 	orr	r4, r4, #0x0000b000
 	orr	r4, r4, #0x00000020	@ val 0x4100b020
 	teq	r3, r4			@ ARM 11MPCore?
-	moveq	pc, lr			@ yes, assume SMP
+	reteq	lr			@ yes, assume SMP
 
 	mrc	p15, 0, r0, c0, c0, 5	@ read MPIDR
 	and	r0, r0, #0xc0000000	@ multiprocessing extensions and
@@ -500,7 +500,7 @@ __fixup_smp:
 	orr	r4, r4, #0x0000c000
 	orr	r4, r4, #0x00000090
 	teq	r3, r4			@ Check for ARM Cortex-A9
-	movne	pc, lr			@ Not ARM Cortex-A9,
+	retne	lr			@ Not ARM Cortex-A9,
 
 	@ If a future SoC *does* use 0x0 as the PERIPH_BASE, then the
 	@ below address check will need to be #ifdef'd or equivalent
@@ -512,7 +512,7 @@ __fixup_smp:
 ARM_BE8(rev	r0, r0)			@ byteswap if big endian
 	and	r0, r0, #0x3		@ number of CPUs
 	teq	r0, #0x0		@ is 1?
-	movne	pc, lr
+	retne	lr
 
 __fixup_smp_on_up:
 	adr	r0, 1f
@@ -539,7 +539,7 @@ smp_on_up:
 	.text
 __do_fixup_smp_on_up:
 	cmp	r4, r5
-	movhs	pc, lr
+	reths	lr
 	ldmia	r4!, {r0, r6}
  ARM(	str	r6, [r0, r3]	)
  THUMB(	add	r0, r0, r3	)
@@ -672,7 +672,7 @@ ARM_BE8(rev16	ip, ip)
 2:	cmp	r4, r5
 	ldrcc	r7, [r4], #4	@ use branch for delay slot
 	bcc	1b
-	mov	pc, lr
+	ret	lr
 #endif
 ENDPROC(__fixup_a_pv_table)
 
diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
index 797b1a6a4906..56ce6290c831 100644
--- a/arch/arm/kernel/hyp-stub.S
+++ b/arch/arm/kernel/hyp-stub.S
@@ -99,7 +99,7 @@ ENTRY(__hyp_stub_install_secondary)
 	 * immediately.
 	 */
 	compare_cpu_mode_with_primary	r4, r5, r6, r7
-	movne	pc, lr
+	retne	lr
 
 	/*
 	 * Once we have given up on one CPU, we do not try to install the
@@ -111,7 +111,7 @@ ENTRY(__hyp_stub_install_secondary)
 	 */
 
 	cmp	r4, #HYP_MODE
-	movne	pc, lr			@ give up if the CPU is not in HYP mode
+	retne	lr			@ give up if the CPU is not in HYP mode
 
 /*
  * Configure HSCTLR to set correct exception endianness/instruction set
@@ -201,7 +201,7 @@ ENDPROC(__hyp_get_vectors)
 	@ fall through
 ENTRY(__hyp_set_vectors)
 	__HVC(0)
-	mov	pc, lr
+	ret	lr
 ENDPROC(__hyp_set_vectors)
 
 #ifndef ZIMAGE
diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S
index a5599cfc43cb..0960be7953f0 100644
--- a/arch/arm/kernel/iwmmxt.S
+++ b/arch/arm/kernel/iwmmxt.S
@@ -179,7 +179,7 @@ concan_load:
 	get_thread_info r10
 #endif
 4:	dec_preempt_count r10, r3
-	mov	pc, lr
+	ret	lr
 
 /*
  * Back up Concan regs to save area and disable access to them
@@ -265,7 +265,7 @@ ENTRY(iwmmxt_task_copy)
 	mov	r3, lr				@ preserve return address
 	bl	concan_dump
 	msr	cpsr_c, ip			@ restore interrupt mode
-	mov	pc, r3
+	ret	r3
 
 /*
  * Restore Concan state from given memory address
@@ -301,7 +301,7 @@ ENTRY(iwmmxt_task_restore)
 	mov	r3, lr				@ preserve return address
 	bl	concan_load
 	msr	cpsr_c, ip			@ restore interrupt mode
-	mov	pc, r3
+	ret	r3
 
 /*
  * Concan handling on task switch
@@ -323,7 +323,7 @@ ENTRY(iwmmxt_task_switch)
 	add	r3, r0, #TI_IWMMXT_STATE	@ get next task Concan save area
 	ldr	r2, [r2]			@ get current Concan owner
 	teq	r2, r3				@ next task owns it?
-	movne	pc, lr				@ no: leave Concan disabled
+	retne	lr				@ no: leave Concan disabled
 
 1:	@ flip Concan access
 	XSC(eor	r1, r1, #0x3)
@@ -350,7 +350,7 @@ ENTRY(iwmmxt_task_release)
 	eors	r0, r0, r1			@ if equal...
 	streq	r0, [r3]			@ then clear ownership
 	msr	cpsr_c, r2			@ restore interrupts
-	mov	pc, lr
+	ret	lr
 
 	.data
 concan_owner:
diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S
index 95858966d84e..35e72585ec1d 100644
--- a/arch/arm/kernel/relocate_kernel.S
+++ b/arch/arm/kernel/relocate_kernel.S
@@ -3,6 +3,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 #include <asm/kexec.h>
 
 	.align	3	/* not needed for this code, but keeps fncpy() happy */
@@ -59,7 +60,7 @@ ENTRY(relocate_new_kernel)
 	mov r0,#0
 	ldr r1,kexec_mach_type
 	ldr r2,kexec_boot_atags
- ARM(	mov pc, lr	)
+ ARM(	ret lr	)
  THUMB(	bx lr		)
 
 	.align
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index 1b880db2a033..e1e60e5a7a27 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -107,7 +107,7 @@ ENTRY(cpu_resume_mmu)
 	instr_sync
 	mov	r0, r0
 	mov	r0, r0
-	mov	pc, r3			@ jump to virtual address
+	ret	r3			@ jump to virtual address
 ENDPROC(cpu_resume_mmu)
 	.popsection
 cpu_resume_after_mmu:
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 1b9844d369cc..b2d229f09c07 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -17,6 +17,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 #include <asm/unified.h>
 #include <asm/asm-offsets.h>
 #include <asm/kvm_asm.h>
@@ -134,7 +135,7 @@ phase2:
 	ldr	r0, =TRAMPOLINE_VA
 	adr	r1, target
 	bfi	r0, r1, #0, #PAGE_SHIFT
-	mov	pc, r0
+	ret	r0
 
 target:	@ We're now in the trampoline code, switch page tables
 	mcrr	p15, 4, r2, r3, c2
diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S
index 638deb13da1c..b05e95840651 100644
--- a/arch/arm/lib/ashldi3.S
+++ b/arch/arm/lib/ashldi3.S
@@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA.  */
 
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define al r1
@@ -47,7 +48,7 @@ ENTRY(__aeabi_llsl)
  THUMB(	lsrmi	r3, al, ip		)
  THUMB(	orrmi	ah, ah, r3		)
 	mov	al, al, lsl r2
-	mov	pc, lr
+	ret	lr
 
 ENDPROC(__ashldi3)
 ENDPROC(__aeabi_llsl)
diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S
index 015e8aa5a1d1..275d7d2341a4 100644
--- a/arch/arm/lib/ashrdi3.S
+++ b/arch/arm/lib/ashrdi3.S
@@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA.  */
 
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define al r1
@@ -47,7 +48,7 @@ ENTRY(__aeabi_lasr)
  THUMB(	lslmi	r3, ah, ip		)
  THUMB(	orrmi	al, al, r3		)
 	mov	ah, ah, asr r2
-	mov	pc, lr
+	ret	lr
 
 ENDPROC(__ashrdi3)
 ENDPROC(__aeabi_lasr)
diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S
index 4102be617fce..fab5a50503ae 100644
--- a/arch/arm/lib/backtrace.S
+++ b/arch/arm/lib/backtrace.S
@@ -25,7 +25,7 @@
 ENTRY(c_backtrace)
 
 #if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
-		mov	pc, lr
+		ret	lr
 ENDPROC(c_backtrace)
 #else
 		stmfd	sp!, {r4 - r8, lr}	@ Save an extra register so we have a location...
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
index 9f12ed1eea86..7d807cfd8ef5 100644
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -1,3 +1,4 @@
+#include <asm/assembler.h>
 #include <asm/unwind.h>
 
 #if __LINUX_ARM_ARCH__ >= 6
@@ -70,7 +71,7 @@ UNWIND(	.fnstart	)
 	\instr	r2, r2, r3
 	str	r2, [r1, r0, lsl #2]
 	restore_irqs ip
-	mov	pc, lr
+	ret	lr
 UNWIND(	.fnend		)
 ENDPROC(\name		)
 	.endm
@@ -98,7 +99,7 @@ UNWIND(	.fnstart	)
 	\store	r2, [r1]
 	moveq	r0, #0
 	restore_irqs ip
-	mov	pc, lr
+	ret	lr
 UNWIND(	.fnend		)
 ENDPROC(\name		)
 	.endm
diff --git a/arch/arm/lib/bswapsdi2.S b/arch/arm/lib/bswapsdi2.S
index 9fcdd154eff9..07cda737bb11 100644
--- a/arch/arm/lib/bswapsdi2.S
+++ b/arch/arm/lib/bswapsdi2.S
@@ -1,4 +1,5 @@
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #if __LINUX_ARM_ARCH__ >= 6
 ENTRY(__bswapsi2)
@@ -18,7 +19,7 @@ ENTRY(__bswapsi2)
 	mov r3, r3, lsr #8
 	bic r3, r3, #0xff00
 	eor r0, r3, r0, ror #8
-	mov pc, lr
+	ret lr
 ENDPROC(__bswapsi2)
 
 ENTRY(__bswapdi2)
@@ -31,6 +32,6 @@ ENTRY(__bswapdi2)
 	bic r1, r1, #0xff00
 	eor r1, r1, r0, ror #8
 	eor r0, r3, ip, ror #8
-	mov pc, lr
+	ret lr
 ENDPROC(__bswapdi2)
 #endif
diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S
index 916c80f13ae7..ed1a421813cb 100644
--- a/arch/arm/lib/call_with_stack.S
+++ b/arch/arm/lib/call_with_stack.S
@@ -36,9 +36,9 @@ ENTRY(call_with_stack)
 	mov	r0, r1
 
 	adr	lr, BSYM(1f)
-	mov	pc, r2
+	ret	r2
 
 1:	ldr	lr, [sp]
 	ldr	sp, [sp, #4]
-	mov	pc, lr
+	ret	lr
 ENDPROC(call_with_stack)
diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S
index 31d3cb34740d..984e0f29d548 100644
--- a/arch/arm/lib/csumpartial.S
+++ b/arch/arm/lib/csumpartial.S
@@ -97,7 +97,7 @@ td3	.req	lr
 #endif
 #endif
 		adcnes	sum, sum, td0		@ update checksum
-		mov	pc, lr
+		ret	lr
 
 ENTRY(csum_partial)
 		stmfd	sp!, {buf, lr}
diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
index d6e742d24007..10b45909610c 100644
--- a/arch/arm/lib/csumpartialcopygeneric.S
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -7,6 +7,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#include <asm/assembler.h>
 
 /*
  * unsigned int
@@ -40,7 +41,7 @@ sum	.req	r3
 		adcs	sum, sum, ip, put_byte_1	@ update checksum
 		strb	ip, [dst], #1
 		tst	dst, #2
-		moveq	pc, lr			@ dst is now 32bit aligned
+		reteq	lr			@ dst is now 32bit aligned
 
 .Ldst_16bit:	load2b	r8, ip
 		sub	len, len, #2
@@ -48,7 +49,7 @@ sum	.req	r3
 		strb	r8, [dst], #1
 		adcs	sum, sum, ip, put_byte_1
 		strb	ip, [dst], #1
-		mov	pc, lr			@ dst is now 32bit aligned
+		ret	lr			@ dst is now 32bit aligned
 
 		/*
 		 * Handle 0 to 7 bytes, with any alignment of source and
diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S
index bc1033b897b4..518bf6e93f78 100644
--- a/arch/arm/lib/delay-loop.S
+++ b/arch/arm/lib/delay-loop.S
@@ -35,7 +35,7 @@ ENTRY(__loop_const_udelay)			@ 0 <= r0 <= 0x7fffff06
 		mul	r0, r2, r0		@ max = 2^32-1
 		add	r0, r0, r1, lsr #32-6
 		movs	r0, r0, lsr #6
-		moveq	pc, lr
+		reteq	lr
 
 /*
  * loops = r0 * HZ * loops_per_jiffy / 1000000
@@ -46,23 +46,23 @@ ENTRY(__loop_const_udelay)			@ 0 <= r0 <= 0x7fffff06
 ENTRY(__loop_delay)
 		subs	r0, r0, #1
 #if 0
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
 #endif
 		bhi	__loop_delay
-		mov	pc, lr
+		ret	lr
 ENDPROC(__loop_udelay)
 ENDPROC(__loop_const_udelay)
 ENDPROC(__loop_delay)
diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S
index e55c4842c290..a9eafe4981eb 100644
--- a/arch/arm/lib/div64.S
+++ b/arch/arm/lib/div64.S
@@ -13,6 +13,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 #include <asm/unwind.h>
 
 #ifdef __ARMEB__
@@ -97,7 +98,7 @@ UNWIND(.fnstart)
 	mov	yl, #0
 	cmpeq	xl, r4
 	movlo	xh, xl
-	movlo	pc, lr
+	retlo	lr
 
 	@ The division loop for lower bit positions.
 	@ Here we shift remainer bits leftwards rather than moving the
@@ -111,14 +112,14 @@ UNWIND(.fnstart)
 	subcs	xh, xh, r4
 	movs	ip, ip, lsr #1
 	bne	4b
-	mov	pc, lr
+	ret	lr
 
 	@ The top part of remainder became zero.  If carry is set
 	@ (the 33th bit) this is a false positive so resume the loop.
 	@ Otherwise, if lower part is also null then we are done.
 6:	bcs	5b
 	cmp	xl, #0
-	moveq	pc, lr
+	reteq	lr
 
 	@ We still have remainer bits in the low part.  Bring them up.
 
@@ -144,7 +145,7 @@ UNWIND(.fnstart)
 	movs	ip, ip, lsr #1
 	mov	xh, #1
 	bne	4b
-	mov	pc, lr
+	ret	lr
 
 8:	@ Division by a power of 2: determine what that divisor order is
 	@ then simply shift values around
@@ -184,13 +185,13 @@ UNWIND(.fnstart)
  THUMB(	orr	yl, yl, xh		)
 	mov	xh, xl, lsl ip
 	mov	xh, xh, lsr ip
-	mov	pc, lr
+	ret	lr
 
 	@ eq -> division by 1: obvious enough...
 9:	moveq	yl, xl
 	moveq	yh, xh
 	moveq	xh, #0
-	moveq	pc, lr
+	reteq	lr
 UNWIND(.fnend)
 
 UNWIND(.fnstart)
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
index 64f6bc1a9132..7848780e8834 100644
--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -35,7 +35,7 @@ ENTRY(_find_first_zero_bit_le)
 2:		cmp	r2, r1			@ any more?
 		blo	1b
 3:		mov	r0, r1			@ no free bits
-		mov	pc, lr
+		ret	lr
 ENDPROC(_find_first_zero_bit_le)
 
 /*
@@ -76,7 +76,7 @@ ENTRY(_find_first_bit_le)
 2:		cmp	r2, r1			@ any more?
 		blo	1b
 3:		mov	r0, r1			@ no free bits
-		mov	pc, lr
+		ret	lr
 ENDPROC(_find_first_bit_le)
 
 /*
@@ -114,7 +114,7 @@ ENTRY(_find_first_zero_bit_be)
 2:		cmp	r2, r1			@ any more?
 		blo	1b
 3:		mov	r0, r1			@ no free bits
-		mov	pc, lr
+		ret	lr
 ENDPROC(_find_first_zero_bit_be)
 
 ENTRY(_find_next_zero_bit_be)
@@ -148,7 +148,7 @@ ENTRY(_find_first_bit_be)
 2:		cmp	r2, r1			@ any more?
 		blo	1b
 3:		mov	r0, r1			@ no free bits
-		mov	pc, lr
+		ret	lr
 ENDPROC(_find_first_bit_be)
 
 ENTRY(_find_next_bit_be)
@@ -192,5 +192,5 @@ ENDPROC(_find_next_bit_be)
 #endif
 		cmp	r1, r0			@ Clamp to maxbit
 		movlo	r0, r1
-		mov	pc, lr
+		ret	lr
 
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index 9b06bb41fca6..0f958e3d8180 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -36,7 +36,7 @@ ENTRY(__get_user_1)
 	check_uaccess r0, 1, r1, r2, __get_user_bad
 1: TUSER(ldrb)	r2, [r0]
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__get_user_1)
 
 ENTRY(__get_user_2)
@@ -56,20 +56,20 @@ rb	.req	r0
 	orr	r2, rb, r2, lsl #8
 #endif
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__get_user_2)
 
 ENTRY(__get_user_4)
 	check_uaccess r0, 4, r1, r2, __get_user_bad
 4: TUSER(ldr)	r2, [r0]
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__get_user_4)
 
 __get_user_bad:
 	mov	r2, #0
 	mov	r0, #-EFAULT
-	mov	pc, lr
+	ret	lr
 ENDPROC(__get_user_bad)
 
 .pushsection __ex_table, "a"
diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S
index 9f4238987fe9..c31b2f3153f1 100644
--- a/arch/arm/lib/io-readsb.S
+++ b/arch/arm/lib/io-readsb.S
@@ -25,7 +25,7 @@
 
 ENTRY(__raw_readsb)
 		teq	r2, #0		@ do we have to check for the zero len?
-		moveq	pc, lr
+		reteq	lr
 		ands	ip, r1, #3
 		bne	.Linsb_align
 
diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S
index 7a7430950c79..2ed86fa5465f 100644
--- a/arch/arm/lib/io-readsl.S
+++ b/arch/arm/lib/io-readsl.S
@@ -12,7 +12,7 @@
 
 ENTRY(__raw_readsl)
 		teq	r2, #0		@ do we have to check for the zero len?
-		moveq	pc, lr
+		reteq	lr
 		ands	ip, r1, #3
 		bne	3f
 
@@ -33,7 +33,7 @@ ENTRY(__raw_readsl)
 		stmcsia	r1!, {r3, ip}
 		ldrne	r3, [r0, #0]
 		strne	r3, [r1, #0]
-		mov	pc, lr
+		ret	lr
 
 3:		ldr	r3, [r0]
 		cmp	ip, #2
@@ -75,5 +75,5 @@ ENTRY(__raw_readsl)
 		strb	r3, [r1, #1]
 8:		mov	r3, ip, get_byte_0
 		strb	r3, [r1, #0]
-		mov	pc, lr
+		ret	lr
 ENDPROC(__raw_readsl)
diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S
index 88487c8c4f23..413da9914529 100644
--- a/arch/arm/lib/io-readsw-armv3.S
+++ b/arch/arm/lib/io-readsw-armv3.S
@@ -27,11 +27,11 @@
 		strb	r3, [r1], #1
 
 		subs	r2, r2, #1
-		moveq	pc, lr
+		reteq	lr
 
 ENTRY(__raw_readsw)
 		teq	r2, #0		@ do we have to check for the zero len?
-		moveq	pc, lr
+		reteq	lr
 		tst	r1, #3
 		bne	.Linsw_align
 
diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S
index 1f393d42593d..d9a45e9692ae 100644
--- a/arch/arm/lib/io-readsw-armv4.S
+++ b/arch/arm/lib/io-readsw-armv4.S
@@ -26,7 +26,7 @@
 
 ENTRY(__raw_readsw)
 		teq	r2, #0
-		moveq	pc, lr
+		reteq	lr
 		tst	r1, #3
 		bne	.Linsw_align
 
diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S
index 68b92f4acaeb..a46bbc9b168b 100644
--- a/arch/arm/lib/io-writesb.S
+++ b/arch/arm/lib/io-writesb.S
@@ -45,7 +45,7 @@
 
 ENTRY(__raw_writesb)
 		teq	r2, #0		@ do we have to check for the zero len?
-		moveq	pc, lr
+		reteq	lr
 		ands	ip, r1, #3
 		bne	.Loutsb_align
 
diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S
index d0d104a0dd11..4ea2435988c1 100644
--- a/arch/arm/lib/io-writesl.S
+++ b/arch/arm/lib/io-writesl.S
@@ -12,7 +12,7 @@
 
 ENTRY(__raw_writesl)
 		teq	r2, #0		@ do we have to check for the zero len?
-		moveq	pc, lr
+		reteq	lr
 		ands	ip, r1, #3
 		bne	3f
 
@@ -33,7 +33,7 @@ ENTRY(__raw_writesl)
 		ldrne	r3, [r1, #0]
 		strcs	ip, [r0, #0]
 		strne	r3, [r0, #0]
-		mov	pc, lr
+		ret	lr
 
 3:		bic	r1, r1, #3
 		ldr	r3, [r1], #4
@@ -47,7 +47,7 @@ ENTRY(__raw_writesl)
 		orr	ip, ip, r3, lspush #16
 		str	ip, [r0]
 		bne	4b
-		mov	pc, lr
+		ret	lr
 
 5:		mov	ip, r3, lspull #8
 		ldr	r3, [r1], #4
@@ -55,7 +55,7 @@ ENTRY(__raw_writesl)
 		orr	ip, ip, r3, lspush #24
 		str	ip, [r0]
 		bne	5b
-		mov	pc, lr
+		ret	lr
 
 6:		mov	ip, r3, lspull #24
 		ldr	r3, [r1], #4
@@ -63,5 +63,5 @@ ENTRY(__raw_writesl)
 		orr	ip, ip, r3, lspush #8
 		str	ip, [r0]
 		bne	6b
-		mov	pc, lr
+		ret	lr
 ENDPROC(__raw_writesl)
diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S
index 49b800419e32..121789eb6802 100644
--- a/arch/arm/lib/io-writesw-armv3.S
+++ b/arch/arm/lib/io-writesw-armv3.S
@@ -28,11 +28,11 @@
 		orr	r3, r3, r3, lsl #16
 		str	r3, [r0]
 		subs	r2, r2, #1
-		moveq	pc, lr
+		reteq	lr
 
 ENTRY(__raw_writesw)
 		teq	r2, #0		@ do we have to check for the zero len?
-		moveq	pc, lr
+		reteq	lr
 		tst	r1, #3
 		bne	.Loutsw_align
 
diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S
index ff4f71b579ee..269f90c51ad2 100644
--- a/arch/arm/lib/io-writesw-armv4.S
+++ b/arch/arm/lib/io-writesw-armv4.S
@@ -31,7 +31,7 @@
 
 ENTRY(__raw_writesw)
 		teq	r2, #0
-		moveq	pc, lr
+		reteq	lr
 		ands	r3, r1, #3
 		bne	.Loutsw_align
 
@@ -96,5 +96,5 @@ ENTRY(__raw_writesw)
 		tst	r2, #1
 3:		movne	ip, r3, lsr #8
 		strneh	ip, [r0]
-		mov	pc, lr
+		ret	lr
 ENDPROC(__raw_writesw)
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
index c562f649734c..947567ff67f9 100644
--- a/arch/arm/lib/lib1funcs.S
+++ b/arch/arm/lib/lib1funcs.S
@@ -210,7 +210,7 @@ ENTRY(__aeabi_uidiv)
 UNWIND(.fnstart)
 
 	subs	r2, r1, #1
-	moveq	pc, lr
+	reteq	lr
 	bcc	Ldiv0
 	cmp	r0, r1
 	bls	11f
@@ -220,16 +220,16 @@ UNWIND(.fnstart)
 	ARM_DIV_BODY r0, r1, r2, r3
 
 	mov	r0, r2
-	mov	pc, lr
+	ret	lr
 
 11:	moveq	r0, #1
 	movne	r0, #0
-	mov	pc, lr
+	ret	lr
 
 12:	ARM_DIV2_ORDER r1, r2
 
 	mov	r0, r0, lsr r2
-	mov	pc, lr
+	ret	lr
 
 UNWIND(.fnend)
 ENDPROC(__udivsi3)
@@ -244,11 +244,11 @@ UNWIND(.fnstart)
 	moveq   r0, #0
 	tsthi	r1, r2				@ see if divisor is power of 2
 	andeq	r0, r0, r2
-	movls	pc, lr
+	retls	lr
 
 	ARM_MOD_BODY r0, r1, r2, r3
 
-	mov	pc, lr
+	ret	lr
 
 UNWIND(.fnend)
 ENDPROC(__umodsi3)
@@ -274,23 +274,23 @@ UNWIND(.fnstart)
 
 	cmp	ip, #0
 	rsbmi	r0, r0, #0
-	mov	pc, lr
+	ret	lr
 
 10:	teq	ip, r0				@ same sign ?
 	rsbmi	r0, r0, #0
-	mov	pc, lr
+	ret	lr
 
 11:	movlo	r0, #0
 	moveq	r0, ip, asr #31
 	orreq	r0, r0, #1
-	mov	pc, lr
+	ret	lr
 
 12:	ARM_DIV2_ORDER r1, r2
 
 	cmp	ip, #0
 	mov	r0, r3, lsr r2
 	rsbmi	r0, r0, #0
-	mov	pc, lr
+	ret	lr
 
 UNWIND(.fnend)
 ENDPROC(__divsi3)
@@ -315,7 +315,7 @@ UNWIND(.fnstart)
 
 10:	cmp	ip, #0
 	rsbmi	r0, r0, #0
-	mov	pc, lr
+	ret	lr
 
 UNWIND(.fnend)
 ENDPROC(__modsi3)
@@ -331,7 +331,7 @@ UNWIND(.save {r0, r1, ip, lr}	)
 	ldmfd	sp!, {r1, r2, ip, lr}
 	mul	r3, r0, r2
 	sub	r1, r1, r3
-	mov	pc, lr
+	ret	lr
 
 UNWIND(.fnend)
 ENDPROC(__aeabi_uidivmod)
@@ -344,7 +344,7 @@ UNWIND(.save {r0, r1, ip, lr}	)
 	ldmfd	sp!, {r1, r2, ip, lr}
 	mul	r3, r0, r2
 	sub	r1, r1, r3
-	mov	pc, lr
+	ret	lr
 
 UNWIND(.fnend)
 ENDPROC(__aeabi_idivmod)
diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S
index f83d449141f7..922dcd88b02b 100644
--- a/arch/arm/lib/lshrdi3.S
+++ b/arch/arm/lib/lshrdi3.S
@@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA.  */
 
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define al r1
@@ -47,7 +48,7 @@ ENTRY(__aeabi_llsr)
  THUMB(	lslmi	r3, ah, ip		)
  THUMB(	orrmi	al, al, r3		)
 	mov	ah, ah, lsr r2
-	mov	pc, lr
+	ret	lr
 
 ENDPROC(__lshrdi3)
 ENDPROC(__aeabi_llsr)
diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S
index 1da86991d700..74a5bed6d999 100644
--- a/arch/arm/lib/memchr.S
+++ b/arch/arm/lib/memchr.S
@@ -22,5 +22,5 @@ ENTRY(memchr)
 	bne	1b
 	sub	r0, r0, #1
 2:	movne	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(memchr)
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 94b0650ea98f..671455c854fa 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -110,7 +110,7 @@ ENTRY(memset)
 	strneb	r1, [ip], #1
 	tst	r2, #1
 	strneb	r1, [ip], #1
-	mov	pc, lr
+	ret	lr
 
 6:	subs	r2, r2, #4		@ 1 do we have enough
 	blt	5b			@ 1 bytes to align with?
diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S
index 3fbdef5f802a..385ccb306fa2 100644
--- a/arch/arm/lib/memzero.S
+++ b/arch/arm/lib/memzero.S
@@ -121,5 +121,5 @@ ENTRY(__memzero)
 	strneb	r2, [r0], #1		@ 1
 	tst	r1, #1			@ 1 a byte left over
 	strneb	r2, [r0], #1		@ 1
-	mov	pc, lr			@ 1
+	ret	lr			@ 1
 ENDPROC(__memzero)
diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S
index 36c91b4957e2..204305956925 100644
--- a/arch/arm/lib/muldi3.S
+++ b/arch/arm/lib/muldi3.S
@@ -11,6 +11,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define xh r0
@@ -41,7 +42,7 @@ ENTRY(__aeabi_lmul)
 	adc	xh, xh, yh, lsr #16
 	adds	xl, xl, ip, lsl #16
 	adc	xh, xh, ip, lsr #16
-	mov	pc, lr
+	ret	lr
 
 ENDPROC(__muldi3)
 ENDPROC(__aeabi_lmul)
diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S
index 3d73dcb959b0..38d660d3705f 100644
--- a/arch/arm/lib/putuser.S
+++ b/arch/arm/lib/putuser.S
@@ -36,7 +36,7 @@ ENTRY(__put_user_1)
 	check_uaccess r0, 1, r1, ip, __put_user_bad
 1: TUSER(strb)	r2, [r0]
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__put_user_1)
 
 ENTRY(__put_user_2)
@@ -60,14 +60,14 @@ ENTRY(__put_user_2)
 #endif
 #endif	/* CONFIG_THUMB2_KERNEL */
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__put_user_2)
 
 ENTRY(__put_user_4)
 	check_uaccess r0, 4, r1, ip, __put_user_bad
 4: TUSER(str)	r2, [r0]
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__put_user_4)
 
 ENTRY(__put_user_8)
@@ -80,12 +80,12 @@ ENTRY(__put_user_8)
 6: TUSER(str)	r3, [r0]
 #endif
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__put_user_8)
 
 __put_user_bad:
 	mov	r0, #-EFAULT
-	mov	pc, lr
+	ret	lr
 ENDPROC(__put_user_bad)
 
 .pushsection __ex_table, "a"
diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S
index d8f2a1c1aea4..013d64c71e8d 100644
--- a/arch/arm/lib/strchr.S
+++ b/arch/arm/lib/strchr.S
@@ -23,5 +23,5 @@ ENTRY(strchr)
 		teq	r2, r1
 		movne	r0, #0
 		subeq	r0, r0, #1
-		mov	pc, lr
+		ret	lr
 ENDPROC(strchr)
diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S
index 302f20cd2423..3cec1c7482c4 100644
--- a/arch/arm/lib/strrchr.S
+++ b/arch/arm/lib/strrchr.S
@@ -22,5 +22,5 @@ ENTRY(strrchr)
 		teq	r2, #0
 		bne	1b
 		mov	r0, r3
-		mov	pc, lr
+		ret	lr
 ENDPROC(strrchr)
diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S
index f0df6a91db04..ad4a6309141a 100644
--- a/arch/arm/lib/ucmpdi2.S
+++ b/arch/arm/lib/ucmpdi2.S
@@ -11,6 +11,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define xh r0
@@ -31,7 +32,7 @@ ENTRY(__ucmpdi2)
 	movlo	r0, #0
 	moveq	r0, #1
 	movhi	r0, #2
-	mov	pc, lr
+	ret	lr
 
 ENDPROC(__ucmpdi2)
 
@@ -44,7 +45,7 @@ ENTRY(__aeabi_ulcmp)
 	movlo	r0, #-1
 	moveq	r0, #0
 	movhi	r0, #1
-	mov	pc, lr
+	ret	lr
 
 ENDPROC(__aeabi_ulcmp)
 
diff --git a/arch/arm/mach-davinci/sleep.S b/arch/arm/mach-davinci/sleep.S
index d4e9316ecacb..a5336a5e2739 100644
--- a/arch/arm/mach-davinci/sleep.S
+++ b/arch/arm/mach-davinci/sleep.S
@@ -213,7 +213,7 @@ ddr2clk_stop_done:
 	cmp	ip, r0
 	bne	ddr2clk_stop_done
 
-	mov	pc, lr
+	ret	lr
 ENDPROC(davinci_ddr_psc_config)
 
 CACHE_FLUSH:
diff --git a/arch/arm/mach-ep93xx/crunch-bits.S b/arch/arm/mach-ep93xx/crunch-bits.S
index e96923a3017b..ee0be2af5c61 100644
--- a/arch/arm/mach-ep93xx/crunch-bits.S
+++ b/arch/arm/mach-ep93xx/crunch-bits.S
@@ -198,7 +198,7 @@ crunch_load:
 	get_thread_info r10
 #endif
 2:	dec_preempt_count r10, r3
-	mov	pc, lr
+	ret	lr
 
 /*
  * Back up crunch regs to save area and disable access to them
@@ -277,7 +277,7 @@ ENTRY(crunch_task_copy)
 	mov	r3, lr				@ preserve return address
 	bl	crunch_save
 	msr	cpsr_c, ip			@ restore interrupt mode
-	mov	pc, r3
+	ret	r3
 
 /*
  * Restore crunch state from given memory address
@@ -310,4 +310,4 @@ ENTRY(crunch_task_restore)
 	mov	r3, lr				@ preserve return address
 	bl	crunch_load
 	msr	cpsr_c, ip			@ restore interrupt mode
-	mov	pc, r3
+	ret	r3
diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S
index fe123b079c05..74b50f1982db 100644
--- a/arch/arm/mach-imx/suspend-imx6.S
+++ b/arch/arm/mach-imx/suspend-imx6.S
@@ -10,6 +10,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 #include <asm/asm-offsets.h>
 #include <asm/hardware/cache-l2x0.h>
 #include "hardware.h"
@@ -301,7 +302,7 @@ rbc_loop:
 	resume_mmdc
 
 	/* return to suspend finish */
-	mov	pc, lr
+	ret	lr
 
 resume:
 	/* invalidate L1 I-cache first */
@@ -325,7 +326,7 @@ resume:
 	mov	r5, #0x1
 	resume_mmdc
 
-	mov	pc, lr
+	ret	lr
 ENDPROC(imx6_suspend)
 
 /*
diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S
index 510c29e079ca..f5d881b5d0f7 100644
--- a/arch/arm/mach-mvebu/coherency_ll.S
+++ b/arch/arm/mach-mvebu/coherency_ll.S
@@ -46,7 +46,7 @@ ENTRY(ll_get_coherency_base)
 	ldr	r1, =coherency_base
 	ldr	r1, [r1]
 2:
-	mov	pc, lr
+	ret	lr
 ENDPROC(ll_get_coherency_base)
 
 /*
@@ -63,7 +63,7 @@ ENTRY(ll_get_coherency_cpumask)
 	mov	r2, #(1 << 24)
 	lsl	r3, r2, r3
 ARM_BE8(rev	r3, r3)
-	mov	pc, lr
+	ret	lr
 ENDPROC(ll_get_coherency_cpumask)
 
 /*
@@ -94,7 +94,7 @@ ENTRY(ll_add_cpu_to_smp_group)
 	strex	r1, r2, [r0]
 	cmp	r1, #0
 	bne	1b
-	mov	pc, lr
+	ret	lr
 ENDPROC(ll_add_cpu_to_smp_group)
 
 ENTRY(ll_enable_coherency)
@@ -118,7 +118,7 @@ ENTRY(ll_enable_coherency)
 	bne	1b
 	dsb
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(ll_enable_coherency)
 
 ENTRY(ll_disable_coherency)
@@ -141,7 +141,7 @@ ENTRY(ll_disable_coherency)
 	cmp	r1, #0
 	bne	1b
 	dsb
-	mov	pc, lr
+	ret	lr
 ENDPROC(ll_disable_coherency)
 
 	.align 2
diff --git a/arch/arm/mach-mvebu/headsmp-a9.S b/arch/arm/mach-mvebu/headsmp-a9.S
index 5925366bc03c..7c91ddb6f1f7 100644
--- a/arch/arm/mach-mvebu/headsmp-a9.S
+++ b/arch/arm/mach-mvebu/headsmp-a9.S
@@ -14,6 +14,7 @@
 
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 
 	__CPUINIT
 #define CPU_RESUME_ADDR_REG 0xf10182d4
@@ -24,7 +25,7 @@
 armada_375_smp_cpu1_enable_code_start:
 	ldr     r0, [pc, #4]
 	ldr     r1, [r0]
-	mov     pc, r1
+	ret     r1
 	.word   CPU_RESUME_ADDR_REG
 armada_375_smp_cpu1_enable_code_end:
 
diff --git a/arch/arm/mach-omap2/sleep44xx.S b/arch/arm/mach-omap2/sleep44xx.S
index 9086ce03ae12..b84a0122d823 100644
--- a/arch/arm/mach-omap2/sleep44xx.S
+++ b/arch/arm/mach-omap2/sleep44xx.S
@@ -10,6 +10,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 #include <asm/smp_scu.h>
 #include <asm/memory.h>
 #include <asm/hardware/cache-l2x0.h>
@@ -334,7 +335,7 @@ ENDPROC(omap4_cpu_resume)
 
 #ifndef CONFIG_OMAP4_ERRATA_I688
 ENTRY(omap_bus_sync)
-	mov	pc, lr
+	ret	lr
 ENDPROC(omap_bus_sync)
 #endif
 
diff --git a/arch/arm/mach-omap2/sram242x.S b/arch/arm/mach-omap2/sram242x.S
index 680a7c56cc3e..2c88ff2d0236 100644
--- a/arch/arm/mach-omap2/sram242x.S
+++ b/arch/arm/mach-omap2/sram242x.S
@@ -101,7 +101,7 @@ i_dll_wait:
 i_dll_delay:
 	subs	r4, r4, #0x1
 	bne	i_dll_delay
-	mov	pc, lr
+	ret	lr
 
 	/*
 	 * shift up or down voltage, use R9 as input to tell level.
@@ -125,7 +125,7 @@ volt_delay:
 	ldr	r7, [r3]		@ get timer value
 	cmp	r5, r7			@ time up?
 	bhi	volt_delay		@ not yet->branch
-	mov	pc, lr			@ back to caller.
+	ret	lr			@ back to caller.
 
 omap242x_sdi_cm_clksel2_pll:
 	.word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
@@ -220,7 +220,7 @@ volt_delay_c:
 	ldr	r7, [r10]		@ get timer value
 	cmp	r8, r7			@ time up?
 	bhi	volt_delay_c		@ not yet->branch
-	mov	pc, lr			@ back to caller
+	ret	lr			@ back to caller
 
 omap242x_srs_cm_clksel2_pll:
 	.word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
diff --git a/arch/arm/mach-omap2/sram243x.S b/arch/arm/mach-omap2/sram243x.S
index a1e9edd673f4..d5deb9761fc7 100644
--- a/arch/arm/mach-omap2/sram243x.S
+++ b/arch/arm/mach-omap2/sram243x.S
@@ -101,7 +101,7 @@ i_dll_wait:
 i_dll_delay:
 	subs	r4, r4, #0x1
 	bne	i_dll_delay
-	mov	pc, lr
+	ret	lr
 
 	/*
 	 * shift up or down voltage, use R9 as input to tell level.
@@ -125,7 +125,7 @@ volt_delay:
 	ldr	r7, [r3]		@ get timer value
 	cmp	r5, r7			@ time up?
 	bhi	volt_delay		@ not yet->branch
-	mov	pc, lr			@ back to caller.
+	ret	lr			@ back to caller.
 
 omap243x_sdi_cm_clksel2_pll:
 	.word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
@@ -220,7 +220,7 @@ volt_delay_c:
 	ldr	r7, [r10]		@ get timer value
 	cmp	r8, r7			@ time up?
 	bhi	volt_delay_c		@ not yet->branch
-	mov	pc, lr			@ back to caller
+	ret	lr			@ back to caller
 
 omap243x_srs_cm_clksel2_pll:
 	.word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
diff --git a/arch/arm/mach-pxa/mioa701_bootresume.S b/arch/arm/mach-pxa/mioa701_bootresume.S
index 324d25a48c85..81591491ab94 100644
--- a/arch/arm/mach-pxa/mioa701_bootresume.S
+++ b/arch/arm/mach-pxa/mioa701_bootresume.S
@@ -29,7 +29,7 @@ ENTRY(mioa701_jumpaddr)
 	str	r1, [r0]		@ Early disable resume for next boot
 	ldr	r0, mioa701_jumpaddr	@ (Murphy's Law)
 	ldr	r0, [r0]
-	mov	pc, r0
+	ret	r0
 2:
 
 ENTRY(mioa701_bootstrap_lg)
diff --git a/arch/arm/mach-pxa/standby.S b/arch/arm/mach-pxa/standby.S
index 29f5f5c180b7..eab1645bb4ad 100644
--- a/arch/arm/mach-pxa/standby.S
+++ b/arch/arm/mach-pxa/standby.S
@@ -29,7 +29,7 @@ ENTRY(pxa_cpu_standby)
 	.align	5
 1:	mcr	p14, 0, r2, c7, c0, 0	@ put the system into Standby
 	str	r1, [r0]		@ make sure PSSR_PH/STS are clear
-	mov	pc, lr
+	ret	lr
 
 #endif
 
@@ -108,7 +108,7 @@ ENTRY(pm_enter_standby_start)
 	bic	r0, r0, #0x20000000
 	str	r0, [r1, #PXA3_DMCIER]
 
-	mov	pc, lr
+	ret	lr
 ENTRY(pm_enter_standby_end)
 
 #endif
diff --git a/arch/arm/mach-s3c24xx/sleep-s3c2410.S b/arch/arm/mach-s3c24xx/sleep-s3c2410.S
index c9b91223697c..875ba8911127 100644
--- a/arch/arm/mach-s3c24xx/sleep-s3c2410.S
+++ b/arch/arm/mach-s3c24xx/sleep-s3c2410.S
@@ -66,4 +66,4 @@ s3c2410_do_sleep:
 	streq	r8, [r5]			@ SDRAM power-down config
 	streq	r9, [r6]			@ CPU sleep
 1:	beq	1b
-	mov	pc, r14
+	ret	lr
diff --git a/arch/arm/mach-s3c24xx/sleep-s3c2412.S b/arch/arm/mach-s3c24xx/sleep-s3c2412.S
index 5adaceb7da13..6bf5b4d8743c 100644
--- a/arch/arm/mach-s3c24xx/sleep-s3c2412.S
+++ b/arch/arm/mach-s3c24xx/sleep-s3c2412.S
@@ -65,4 +65,4 @@ s3c2412_sleep_enter1:
 	strne	r9, [r3]
 	bne	s3c2412_sleep_enter1
 
-	mov	pc, r14
+	ret	lr
diff --git a/arch/arm/mach-shmobile/headsmp.S b/arch/arm/mach-shmobile/headsmp.S
index e5be5c88644b..293007579b8e 100644
--- a/arch/arm/mach-shmobile/headsmp.S
+++ b/arch/arm/mach-shmobile/headsmp.S
@@ -12,6 +12,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 #include <asm/memory.h>
 
 ENTRY(shmobile_invalidate_start)
@@ -75,7 +76,7 @@ shmobile_smp_boot_next:
 
 shmobile_smp_boot_found:
 	ldr	r0, [r7, r1, lsl #2]
-	mov	pc, r9
+	ret	r9
 ENDPROC(shmobile_smp_boot)
 
 ENTRY(shmobile_smp_sleep)
diff --git a/arch/arm/mach-tegra/sleep-tegra20.S b/arch/arm/mach-tegra/sleep-tegra20.S
index aaaf3abd2688..be4bc5f853f5 100644
--- a/arch/arm/mach-tegra/sleep-tegra20.S
+++ b/arch/arm/mach-tegra/sleep-tegra20.S
@@ -78,7 +78,7 @@ ENTRY(tegra20_hotplug_shutdown)
 	/* Put this CPU down */
 	cpu_id	r0
 	bl	tegra20_cpu_shutdown
-	mov	pc, lr			@ should never get here
+	ret	lr			@ should never get here
 ENDPROC(tegra20_hotplug_shutdown)
 
 /*
@@ -96,7 +96,7 @@ ENDPROC(tegra20_hotplug_shutdown)
  */
 ENTRY(tegra20_cpu_shutdown)
 	cmp	r0, #0
-	moveq	pc, lr			@ must not be called for CPU 0
+	reteq	lr			@ must not be called for CPU 0
 	mov32	r1, TEGRA_PMC_VIRT + PMC_SCRATCH41
 	mov	r12, #CPU_RESETTABLE
 	str	r12, [r1]
@@ -117,7 +117,7 @@ ENTRY(tegra20_cpu_shutdown)
 	cpu_id	r3
 	cmp	r3, r0
 	beq	.
-	mov	pc, lr
+	ret	lr
 ENDPROC(tegra20_cpu_shutdown)
 #endif
 
@@ -164,7 +164,7 @@ ENTRY(tegra_pen_lock)
 	cmpeq	r12, r0			@ !turn == cpu?
 	beq	1b			@ while !turn == cpu && flag[!cpu] == 1
 
-	mov	pc, lr			@ locked
+	ret	lr			@ locked
 ENDPROC(tegra_pen_lock)
 
 ENTRY(tegra_pen_unlock)
@@ -176,7 +176,7 @@ ENTRY(tegra_pen_unlock)
 	addne	r2, r3, #PMC_SCRATCH39
 	mov	r12, #0
 	str	r12, [r2]
-	mov     pc, lr
+	ret     lr
 ENDPROC(tegra_pen_unlock)
 
 /*
@@ -189,7 +189,7 @@ ENTRY(tegra20_cpu_clear_resettable)
 	mov32	r1, TEGRA_PMC_VIRT + PMC_SCRATCH41
 	mov	r12, #CPU_NOT_RESETTABLE
 	str	r12, [r1]
-	mov	pc, lr
+	ret	lr
 ENDPROC(tegra20_cpu_clear_resettable)
 
 /*
@@ -202,7 +202,7 @@ ENTRY(tegra20_cpu_set_resettable_soon)
 	mov32	r1, TEGRA_PMC_VIRT + PMC_SCRATCH41
 	mov	r12, #CPU_RESETTABLE_SOON
 	str	r12, [r1]
-	mov	pc, lr
+	ret	lr
 ENDPROC(tegra20_cpu_set_resettable_soon)
 
 /*
@@ -217,7 +217,7 @@ ENTRY(tegra20_cpu_is_resettable_soon)
 	cmp	r12, #CPU_RESETTABLE_SOON
 	moveq	r0, #1
 	movne	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(tegra20_cpu_is_resettable_soon)
 
 /*
@@ -239,7 +239,7 @@ ENTRY(tegra20_sleep_core_finish)
 	mov32	r1, TEGRA_IRAM_LPx_RESUME_AREA
 	add	r0, r0, r1
 
-	mov	pc, r3
+	ret	r3
 ENDPROC(tegra20_sleep_core_finish)
 
 /*
@@ -402,7 +402,7 @@ exit_selfrefresh_loop:
 
 	mov32	r0, TEGRA_PMC_BASE
 	ldr	r0, [r0, #PMC_SCRATCH41]
-	mov	pc, r0			@ jump to tegra_resume
+	ret	r0			@ jump to tegra_resume
 ENDPROC(tegra20_lp1_reset)
 
 /*
@@ -455,7 +455,7 @@ tegra20_switch_cpu_to_clk32k:
 	mov	r0, #0	/* brust policy = 32KHz */
 	str	r0, [r5, #CLK_RESET_SCLK_BURST]
 
-	mov	pc, lr
+	ret	lr
 
 /*
  * tegra20_enter_sleep
@@ -535,7 +535,7 @@ padsave_done:
 	adr	r2, tegra20_sclk_save
 	str	r0, [r2]
 	dsb
-	mov	pc, lr
+	ret	lr
 
 tegra20_sdram_pad_address:
 	.word	TEGRA_APB_MISC_BASE + APB_MISC_XM2CFGCPADCTRL
diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S
index b16d4a57fa59..09cad9b071de 100644
--- a/arch/arm/mach-tegra/sleep-tegra30.S
+++ b/arch/arm/mach-tegra/sleep-tegra30.S
@@ -142,7 +142,7 @@ ENTRY(tegra30_hotplug_shutdown)
 	/* Powergate this CPU */
 	mov	r0, #TEGRA30_POWER_HOTPLUG_SHUTDOWN
 	bl	tegra30_cpu_shutdown
-	mov	pc, lr			@ should never get here
+	ret	lr			@ should never get here
 ENDPROC(tegra30_hotplug_shutdown)
 
 /*
@@ -161,7 +161,7 @@ ENTRY(tegra30_cpu_shutdown)
 	bne	_no_cpu0_chk	@ It's not Tegra30
 
 	cmp	r3, #0
-	moveq	pc, lr		@ Must never be called for CPU 0
+	reteq	lr		@ Must never be called for CPU 0
 _no_cpu0_chk:
 
 	ldr	r12, =TEGRA_FLOW_CTRL_VIRT
@@ -266,7 +266,7 @@ ENTRY(tegra30_sleep_core_finish)
 	mov32	r1, TEGRA_IRAM_LPx_RESUME_AREA
 	add	r0, r0, r1
 
-	mov	pc, r3
+	ret	r3
 ENDPROC(tegra30_sleep_core_finish)
 
 /*
@@ -285,7 +285,7 @@ ENTRY(tegra30_sleep_cpu_secondary_finish)
 	mov	r0, #0                          @ power mode flags (!hotplug)
 	bl	tegra30_cpu_shutdown
 	mov	r0, #1                          @ never return here
-	mov	pc, r7
+	ret	r7
 ENDPROC(tegra30_sleep_cpu_secondary_finish)
 
 /*
@@ -529,7 +529,7 @@ __no_dual_emc_chanl:
 
 	mov32	r0, TEGRA_PMC_BASE
 	ldr	r0, [r0, #PMC_SCRATCH41]
-	mov	pc, r0			@ jump to tegra_resume
+	ret	r0			@ jump to tegra_resume
 ENDPROC(tegra30_lp1_reset)
 
 	.align	L1_CACHE_SHIFT
@@ -659,7 +659,7 @@ _no_pll_in_iddq:
 	mov	r0, #0	/* brust policy = 32KHz */
 	str	r0, [r5, #CLK_RESET_SCLK_BURST]
 
-	mov	pc, lr
+	ret	lr
 
 /*
  * tegra30_enter_sleep
@@ -819,7 +819,7 @@ pmc_io_dpd_skip:
 
 	dsb
 
-	mov	pc, lr
+	ret	lr
 
 	.ltorg
 /* dummy symbol for end of IRAM */
diff --git a/arch/arm/mach-tegra/sleep.S b/arch/arm/mach-tegra/sleep.S
index 8d06213fbc47..f024a5109e8e 100644
--- a/arch/arm/mach-tegra/sleep.S
+++ b/arch/arm/mach-tegra/sleep.S
@@ -87,7 +87,7 @@ ENTRY(tegra_init_l2_for_a15)
 	mcrne	p15, 0x1, r0, c9, c0, 2
 _exit_init_l2_a15:
 
-	mov	pc, lr
+	ret	lr
 ENDPROC(tegra_init_l2_for_a15)
 
 /*
@@ -111,7 +111,7 @@ ENTRY(tegra_sleep_cpu_finish)
 	add	r3, r3, r0
 	mov	r0, r1
 
-	mov	pc, r3
+	ret	r3
 ENDPROC(tegra_sleep_cpu_finish)
 
 /*
@@ -139,7 +139,7 @@ ENTRY(tegra_shut_off_mmu)
 	moveq	r3, #0
 	streq	r3, [r2, #L2X0_CTRL]
 #endif
-	mov	pc, r0
+	ret	r0
 ENDPROC(tegra_shut_off_mmu)
 	.popsection
 
@@ -156,6 +156,6 @@ ENTRY(tegra_switch_cpu_to_pllp)
 	str	r0, [r5, #CLK_RESET_CCLK_BURST]
 	mov	r0, #0
 	str	r0, [r5, #CLK_RESET_CCLK_DIVIDER]
-	mov	pc, lr
+	ret	lr
 ENDPROC(tegra_switch_cpu_to_pllp)
 #endif
diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S
index e505befe51b5..2f0c58836ae7 100644
--- a/arch/arm/mm/cache-fa.S
+++ b/arch/arm/mm/cache-fa.S
@@ -15,6 +15,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 #include <asm/memory.h>
 #include <asm/page.h>
 
@@ -45,7 +46,7 @@
 ENTRY(fa_flush_icache_all)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mov	pc, lr
+	ret	lr
 ENDPROC(fa_flush_icache_all)
 
 /*
@@ -71,7 +72,7 @@ __flush_whole_cache:
 	mcrne	p15, 0, ip, c7, c5, 6		@ invalidate BTB
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain write buffer
 	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_user_cache_range(start, end, flags)
@@ -99,7 +100,7 @@ ENTRY(fa_flush_user_cache_range)
 	mcrne	p15, 0, ip, c7, c5, 6		@ invalidate BTB
 	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
 	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
-	mov	pc, lr
+	ret	lr
 
 /*
  *	coherent_kern_range(start, end)
@@ -135,7 +136,7 @@ ENTRY(fa_coherent_user_range)
 	mcr	p15, 0, r0, c7, c5, 6		@ invalidate BTB
 	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
 	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_kern_dcache_area(void *addr, size_t size)
@@ -155,7 +156,7 @@ ENTRY(fa_flush_kern_dcache_area)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
 	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_inv_range(start, end)
@@ -181,7 +182,7 @@ fa_dma_inv_range:
 	blo	1b
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_clean_range(start, end)
@@ -199,7 +200,7 @@ fa_dma_clean_range:
 	blo	1b
 	mov	r0, #0	
 	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_flush_range(start,end)
@@ -214,7 +215,7 @@ ENTRY(fa_dma_flush_range)
 	blo	1b
 	mov	r0, #0	
 	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_map_area(start, size, dir)
@@ -237,7 +238,7 @@ ENDPROC(fa_dma_map_area)
  *	- dir	- DMA direction
  */
 ENTRY(fa_dma_unmap_area)
-	mov	pc, lr
+	ret	lr
 ENDPROC(fa_dma_unmap_area)
 
 	.globl	fa_flush_kern_cache_louis
diff --git a/arch/arm/mm/cache-nop.S b/arch/arm/mm/cache-nop.S
index 8e12ddca0031..f1cc9861031f 100644
--- a/arch/arm/mm/cache-nop.S
+++ b/arch/arm/mm/cache-nop.S
@@ -5,11 +5,12 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 
 #include "proc-macros.S"
 
 ENTRY(nop_flush_icache_all)
-	mov	pc, lr
+	ret	lr
 ENDPROC(nop_flush_icache_all)
 
 	.globl nop_flush_kern_cache_all
@@ -29,7 +30,7 @@ ENDPROC(nop_flush_icache_all)
 
 ENTRY(nop_coherent_user_range)
 	mov	r0, 0
-	mov	pc, lr
+	ret	lr
 ENDPROC(nop_coherent_user_range)
 
 	.globl nop_flush_kern_dcache_area
diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S
index a7ba68f59f0c..91e3adf155cb 100644
--- a/arch/arm/mm/cache-v4.S
+++ b/arch/arm/mm/cache-v4.S
@@ -9,6 +9,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 #include <asm/page.h>
 #include "proc-macros.S"
 
@@ -18,7 +19,7 @@
  *	Unconditionally clean and invalidate the entire icache.
  */
 ENTRY(v4_flush_icache_all)
-	mov	pc, lr
+	ret	lr
 ENDPROC(v4_flush_icache_all)
 
 /*
@@ -40,7 +41,7 @@ ENTRY(v4_flush_kern_cache_all)
 #ifdef CONFIG_CPU_CP15
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c7, 0		@ flush ID cache
-	mov	pc, lr
+	ret	lr
 #else
 	/* FALLTHROUGH */
 #endif
@@ -59,7 +60,7 @@ ENTRY(v4_flush_user_cache_range)
 #ifdef CONFIG_CPU_CP15
 	mov	ip, #0
 	mcr	p15, 0, ip, c7, c7, 0		@ flush ID cache
-	mov	pc, lr
+	ret	lr
 #else
 	/* FALLTHROUGH */
 #endif
@@ -89,7 +90,7 @@ ENTRY(v4_coherent_kern_range)
  */
 ENTRY(v4_coherent_user_range)
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_kern_dcache_area(void *addr, size_t size)
@@ -116,7 +117,7 @@ ENTRY(v4_dma_flush_range)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c7, 0		@ flush ID cache
 #endif
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_unmap_area(start, size, dir)
@@ -136,7 +137,7 @@ ENTRY(v4_dma_unmap_area)
  *	- dir	- DMA direction
  */
 ENTRY(v4_dma_map_area)
-	mov	pc, lr
+	ret	lr
 ENDPROC(v4_dma_unmap_area)
 ENDPROC(v4_dma_map_area)
 
diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S
index cd4945321407..2522f8c8fbb1 100644
--- a/arch/arm/mm/cache-v4wb.S
+++ b/arch/arm/mm/cache-v4wb.S
@@ -9,6 +9,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 #include <asm/memory.h>
 #include <asm/page.h>
 #include "proc-macros.S"
@@ -58,7 +59,7 @@ flush_base:
 ENTRY(v4wb_flush_icache_all)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mov	pc, lr
+	ret	lr
 ENDPROC(v4wb_flush_icache_all)
 
 /*
@@ -94,7 +95,7 @@ __flush_whole_cache:
 	blo	1b
 #endif
 	mcr	p15, 0, ip, c7, c10, 4		@ drain write buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_user_cache_range(start, end, flags)
@@ -122,7 +123,7 @@ ENTRY(v4wb_flush_user_cache_range)
 	blo	1b
 	tst	r2, #VM_EXEC
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain write buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_kern_dcache_area(void *addr, size_t size)
@@ -170,7 +171,7 @@ ENTRY(v4wb_coherent_user_range)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 
 /*
@@ -195,7 +196,7 @@ v4wb_dma_inv_range:
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_clean_range(start, end)
@@ -212,7 +213,7 @@ v4wb_dma_clean_range:
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_flush_range(start, end)
@@ -248,7 +249,7 @@ ENDPROC(v4wb_dma_map_area)
  *	- dir	- DMA direction
  */
 ENTRY(v4wb_dma_unmap_area)
-	mov	pc, lr
+	ret	lr
 ENDPROC(v4wb_dma_unmap_area)
 
 	.globl	v4wb_flush_kern_cache_louis
diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S
index 11e5e5838bc5..a0982ce49007 100644
--- a/arch/arm/mm/cache-v4wt.S
+++ b/arch/arm/mm/cache-v4wt.S
@@ -13,6 +13,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 #include <asm/page.h>
 #include "proc-macros.S"
 
@@ -48,7 +49,7 @@
 ENTRY(v4wt_flush_icache_all)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mov	pc, lr
+	ret	lr
 ENDPROC(v4wt_flush_icache_all)
 
 /*
@@ -71,7 +72,7 @@ __flush_whole_cache:
 	tst	r2, #VM_EXEC
 	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
 	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_user_cache_range(start, end, flags)
@@ -94,7 +95,7 @@ ENTRY(v4wt_flush_user_cache_range)
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mov	pc, lr
+	ret	lr
 
 /*
  *	coherent_kern_range(start, end)
@@ -126,7 +127,7 @@ ENTRY(v4wt_coherent_user_range)
 	cmp	r0, r1
 	blo	1b
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_kern_dcache_area(void *addr, size_t size)
@@ -160,7 +161,7 @@ v4wt_dma_inv_range:
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_flush_range(start, end)
@@ -192,7 +193,7 @@ ENTRY(v4wt_dma_unmap_area)
  *	- dir	- DMA direction
  */
 ENTRY(v4wt_dma_map_area)
-	mov	pc, lr
+	ret	lr
 ENDPROC(v4wt_dma_unmap_area)
 ENDPROC(v4wt_dma_map_area)
 
diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
index d8fd4d4bd3d4..24659952c278 100644
--- a/arch/arm/mm/cache-v6.S
+++ b/arch/arm/mm/cache-v6.S
@@ -51,7 +51,7 @@ ENTRY(v6_flush_icache_all)
 #else
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I-cache
 #endif
-	mov	pc, lr
+	ret	lr
 ENDPROC(v6_flush_icache_all)
 
 /*
@@ -73,7 +73,7 @@ ENTRY(v6_flush_kern_cache_all)
 #else
 	mcr	p15, 0, r0, c7, c15, 0		@ Cache clean+invalidate
 #endif
-	mov	pc, lr
+	ret	lr
 
 /*
  *	v6_flush_cache_all()
@@ -98,7 +98,7 @@ ENTRY(v6_flush_user_cache_all)
  *	- we have a VIPT cache.
  */
 ENTRY(v6_flush_user_cache_range)
-	mov	pc, lr
+	ret	lr
 
 /*
  *	v6_coherent_kern_range(start,end)
@@ -150,7 +150,7 @@ ENTRY(v6_coherent_user_range)
 #else
 	mcr	p15, 0, r0, c7, c5, 6		@ invalidate BTB
 #endif
-	mov	pc, lr
+	ret	lr
 
 /*
  * Fault handling for the cache operation above. If the virtual address in r0
@@ -158,7 +158,7 @@ ENTRY(v6_coherent_user_range)
  */
 9001:
 	mov	r0, #-EFAULT
-	mov	pc, lr
+	ret	lr
  UNWIND(.fnend		)
 ENDPROC(v6_coherent_user_range)
 ENDPROC(v6_coherent_kern_range)
@@ -188,7 +188,7 @@ ENTRY(v6_flush_kern_dcache_area)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c10, 4
 #endif
-	mov	pc, lr
+	ret	lr
 
 
 /*
@@ -239,7 +239,7 @@ v6_dma_inv_range:
 	blo	1b
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	v6_dma_clean_range(start,end)
@@ -262,7 +262,7 @@ v6_dma_clean_range:
 	blo	1b
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	v6_dma_flush_range(start,end)
@@ -290,7 +290,7 @@ ENTRY(v6_dma_flush_range)
 	blo	1b
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_map_area(start, size, dir)
@@ -323,7 +323,7 @@ ENTRY(v6_dma_unmap_area)
 	teq	r2, #DMA_TO_DEVICE
 	bne	v6_dma_inv_range
 #endif
-	mov	pc, lr
+	ret	lr
 ENDPROC(v6_dma_unmap_area)
 
 	.globl	v6_flush_kern_cache_louis
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 615c99e38ba1..b966656d2c2d 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -61,7 +61,7 @@ ENTRY(v7_invalidate_l1)
        bgt     1b
        dsb     st
        isb
-       mov     pc, lr
+       ret     lr
 ENDPROC(v7_invalidate_l1)
 
 /*
@@ -76,7 +76,7 @@ ENTRY(v7_flush_icache_all)
 	mov	r0, #0
 	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)		@ invalidate I-cache inner shareable
 	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)		@ I+BTB cache invalidate
-	mov	pc, lr
+	ret	lr
 ENDPROC(v7_flush_icache_all)
 
  /*
@@ -94,7 +94,7 @@ ENTRY(v7_flush_dcache_louis)
 	ALT_UP(ands	r3, r0, #(7 << 27))	@ extract LoUU from clidr
 #ifdef CONFIG_ARM_ERRATA_643719
 	ALT_SMP(mrceq	p15, 0, r2, c0, c0, 0)	@ read main ID register
-	ALT_UP(moveq	pc, lr)			@ LoUU is zero, so nothing to do
+	ALT_UP(reteq	lr)			@ LoUU is zero, so nothing to do
 	ldreq	r1, =0x410fc090                 @ ID of ARM Cortex A9 r0p?
 	biceq	r2, r2, #0x0000000f             @ clear minor revision number
 	teqeq	r2, r1                          @ test for errata affected core and if so...
@@ -102,7 +102,7 @@ ENTRY(v7_flush_dcache_louis)
 #endif
 	ALT_SMP(mov	r3, r3, lsr #20)	@ r3 = LoUIS * 2
 	ALT_UP(mov	r3, r3, lsr #26)	@ r3 = LoUU * 2
-	moveq	pc, lr				@ return if level == 0
+	reteq	lr				@ return if level == 0
 	mov	r10, #0				@ r10 (starting level) = 0
 	b	flush_levels			@ start flushing cache levels
 ENDPROC(v7_flush_dcache_louis)
@@ -168,7 +168,7 @@ finished:
 	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
 	dsb	st
 	isb
-	mov	pc, lr
+	ret	lr
 ENDPROC(v7_flush_dcache_all)
 
 /*
@@ -191,7 +191,7 @@ ENTRY(v7_flush_kern_cache_all)
 	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)	@ I+BTB cache invalidate
  ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
  THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
-	mov	pc, lr
+	ret	lr
 ENDPROC(v7_flush_kern_cache_all)
 
  /*
@@ -209,7 +209,7 @@ ENTRY(v7_flush_kern_cache_louis)
 	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)	@ I+BTB cache invalidate
  ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
  THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
-	mov	pc, lr
+	ret	lr
 ENDPROC(v7_flush_kern_cache_louis)
 
 /*
@@ -235,7 +235,7 @@ ENTRY(v7_flush_user_cache_all)
  *	- we have a VIPT cache.
  */
 ENTRY(v7_flush_user_cache_range)
-	mov	pc, lr
+	ret	lr
 ENDPROC(v7_flush_user_cache_all)
 ENDPROC(v7_flush_user_cache_range)
 
@@ -296,7 +296,7 @@ ENTRY(v7_coherent_user_range)
 	ALT_UP(mcr	p15, 0, r0, c7, c5, 6)	@ invalidate BTB
 	dsb	ishst
 	isb
-	mov	pc, lr
+	ret	lr
 
 /*
  * Fault handling for the cache operation above. If the virtual address in r0
@@ -307,7 +307,7 @@ ENTRY(v7_coherent_user_range)
 	dsb
 #endif
 	mov	r0, #-EFAULT
-	mov	pc, lr
+	ret	lr
  UNWIND(.fnend		)
 ENDPROC(v7_coherent_kern_range)
 ENDPROC(v7_coherent_user_range)
@@ -336,7 +336,7 @@ ENTRY(v7_flush_kern_dcache_area)
 	cmp	r0, r1
 	blo	1b
 	dsb	st
-	mov	pc, lr
+	ret	lr
 ENDPROC(v7_flush_kern_dcache_area)
 
 /*
@@ -369,7 +369,7 @@ v7_dma_inv_range:
 	cmp	r0, r1
 	blo	1b
 	dsb	st
-	mov	pc, lr
+	ret	lr
 ENDPROC(v7_dma_inv_range)
 
 /*
@@ -391,7 +391,7 @@ v7_dma_clean_range:
 	cmp	r0, r1
 	blo	1b
 	dsb	st
-	mov	pc, lr
+	ret	lr
 ENDPROC(v7_dma_clean_range)
 
 /*
@@ -413,7 +413,7 @@ ENTRY(v7_dma_flush_range)
 	cmp	r0, r1
 	blo	1b
 	dsb	st
-	mov	pc, lr
+	ret	lr
 ENDPROC(v7_dma_flush_range)
 
 /*
@@ -439,7 +439,7 @@ ENTRY(v7_dma_unmap_area)
 	add	r1, r1, r0
 	teq	r2, #DMA_TO_DEVICE
 	bne	v7_dma_inv_range
-	mov	pc, lr
+	ret	lr
 ENDPROC(v7_dma_unmap_area)
 
 	__INITDATA
diff --git a/arch/arm/mm/l2c-l2x0-resume.S b/arch/arm/mm/l2c-l2x0-resume.S
index 99b05f21a59a..fda415e4ca8f 100644
--- a/arch/arm/mm/l2c-l2x0-resume.S
+++ b/arch/arm/mm/l2c-l2x0-resume.S
@@ -6,6 +6,7 @@
  * This code can only be used to if you are running in the secure world.
  */
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 #include <asm/hardware/cache-l2x0.h>
 
 	.text
@@ -27,7 +28,7 @@ ENTRY(l2c310_early_resume)
 
 	@ Check that the address has been initialised
 	teq	r1, #0
-	moveq	pc, lr
+	reteq	lr
 
 	@ The prefetch and power control registers are revision dependent
 	@ and can be written whether or not the L2 cache is enabled
@@ -41,7 +42,7 @@ ENTRY(l2c310_early_resume)
 	@ Don't setup the L2 cache if it is already enabled
 	ldr	r0, [r1, #L2X0_CTRL]
 	tst	r0, #L2X0_CTRL_EN
-	movne	pc, lr
+	retne	lr
 
 	str	r3, [r1, #L310_TAG_LATENCY_CTRL]
 	str	r4, [r1, #L310_DATA_LATENCY_CTRL]
@@ -51,7 +52,7 @@ ENTRY(l2c310_early_resume)
 	str	r2, [r1, #L2X0_AUX_CTRL]
 	mov	r9, #L2X0_CTRL_EN
 	str	r9, [r1, #L2X0_CTRL]
-	mov	pc, lr
+	ret	lr
 ENDPROC(l2c310_early_resume)
 
 	.align
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index d1a2d05971e0..86ee5d47ce3c 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -73,7 +73,7 @@
  * cpu_arm1020_proc_init()
  */
 ENTRY(cpu_arm1020_proc_init)
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm1020_proc_fin()
@@ -83,7 +83,7 @@ ENTRY(cpu_arm1020_proc_fin)
 	bic	r0, r0, #0x1000 		@ ...i............
 	bic	r0, r0, #0x000e 		@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm1020_reset(loc)
@@ -107,7 +107,7 @@ ENTRY(cpu_arm1020_reset)
 	bic	ip, ip, #0x000f 		@ ............wcam
 	bic	ip, ip, #0x1100 		@ ...i...s........
 	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	mov	pc, r0
+	ret	r0
 ENDPROC(cpu_arm1020_reset)
 	.popsection
 
@@ -117,7 +117,7 @@ ENDPROC(cpu_arm1020_reset)
 	.align	5
 ENTRY(cpu_arm1020_do_idle)
 	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	mov	pc, lr
+	ret	lr
 
 /* ================================= CACHE ================================ */
 
@@ -133,7 +133,7 @@ ENTRY(arm1020_flush_icache_all)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
 #endif
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm1020_flush_icache_all)
 
 /*
@@ -169,7 +169,7 @@ __flush_whole_cache:
 	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
 #endif
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_user_cache_range(start, end, flags)
@@ -200,7 +200,7 @@ ENTRY(arm1020_flush_user_cache_range)
 	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
 #endif
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	coherent_kern_range(start, end)
@@ -242,7 +242,7 @@ ENTRY(arm1020_coherent_user_range)
 	blo	1b
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_kern_dcache_area(void *addr, size_t size)
@@ -264,7 +264,7 @@ ENTRY(arm1020_flush_kern_dcache_area)
 	blo	1b
 #endif
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_inv_range(start, end)
@@ -297,7 +297,7 @@ arm1020_dma_inv_range:
 	blo	1b
 #endif
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_clean_range(start, end)
@@ -320,7 +320,7 @@ arm1020_dma_clean_range:
 	blo	1b
 #endif
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_flush_range(start, end)
@@ -342,7 +342,7 @@ ENTRY(arm1020_dma_flush_range)
 	blo	1b
 #endif
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_map_area(start, size, dir)
@@ -365,7 +365,7 @@ ENDPROC(arm1020_dma_map_area)
  *	- dir	- DMA direction
  */
 ENTRY(arm1020_dma_unmap_area)
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm1020_dma_unmap_area)
 
 	.globl	arm1020_flush_kern_cache_louis
@@ -384,7 +384,7 @@ ENTRY(cpu_arm1020_dcache_clean_area)
 	subs	r1, r1, #CACHE_DLINESIZE
 	bhi	1b
 #endif
-	mov	pc, lr
+	ret	lr
 
 /* =============================== PageTable ============================== */
 
@@ -423,7 +423,7 @@ ENTRY(cpu_arm1020_switch_mm)
 	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
 	mcr	p15, 0, r1, c8, c7, 0		@ invalidate I & D TLBs
 #endif /* CONFIG_MMU */
-	mov	pc, lr
+	ret	lr
         
 /*
  * cpu_arm1020_set_pte(ptep, pte)
@@ -441,7 +441,7 @@ ENTRY(cpu_arm1020_set_pte_ext)
 #endif
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 #endif /* CONFIG_MMU */
-	mov	pc, lr
+	ret	lr
 
 	.type	__arm1020_setup, #function
 __arm1020_setup:
@@ -460,7 +460,7 @@ __arm1020_setup:
 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
 	orr	r0, r0, #0x4000 		@ .R.. .... .... ....
 #endif
-	mov	pc, lr
+	ret	lr
 	.size	__arm1020_setup, . - __arm1020_setup
 
 	/*
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index 9d89405c3d03..a6331d78601f 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -73,7 +73,7 @@
  * cpu_arm1020e_proc_init()
  */
 ENTRY(cpu_arm1020e_proc_init)
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm1020e_proc_fin()
@@ -83,7 +83,7 @@ ENTRY(cpu_arm1020e_proc_fin)
 	bic	r0, r0, #0x1000 		@ ...i............
 	bic	r0, r0, #0x000e 		@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm1020e_reset(loc)
@@ -107,7 +107,7 @@ ENTRY(cpu_arm1020e_reset)
 	bic	ip, ip, #0x000f 		@ ............wcam
 	bic	ip, ip, #0x1100 		@ ...i...s........
 	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	mov	pc, r0
+	ret	r0
 ENDPROC(cpu_arm1020e_reset)
 	.popsection
 
@@ -117,7 +117,7 @@ ENDPROC(cpu_arm1020e_reset)
 	.align	5
 ENTRY(cpu_arm1020e_do_idle)
 	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	mov	pc, lr
+	ret	lr
 
 /* ================================= CACHE ================================ */
 
@@ -133,7 +133,7 @@ ENTRY(arm1020e_flush_icache_all)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
 #endif
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm1020e_flush_icache_all)
 
 /*
@@ -168,7 +168,7 @@ __flush_whole_cache:
 	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
 #endif
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_user_cache_range(start, end, flags)
@@ -197,7 +197,7 @@ ENTRY(arm1020e_flush_user_cache_range)
 	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
 #endif
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	coherent_kern_range(start, end)
@@ -236,7 +236,7 @@ ENTRY(arm1020e_coherent_user_range)
 	blo	1b
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_kern_dcache_area(void *addr, size_t size)
@@ -257,7 +257,7 @@ ENTRY(arm1020e_flush_kern_dcache_area)
 	blo	1b
 #endif
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_inv_range(start, end)
@@ -286,7 +286,7 @@ arm1020e_dma_inv_range:
 	blo	1b
 #endif
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_clean_range(start, end)
@@ -308,7 +308,7 @@ arm1020e_dma_clean_range:
 	blo	1b
 #endif
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_flush_range(start, end)
@@ -328,7 +328,7 @@ ENTRY(arm1020e_dma_flush_range)
 	blo	1b
 #endif
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_map_area(start, size, dir)
@@ -351,7 +351,7 @@ ENDPROC(arm1020e_dma_map_area)
  *	- dir	- DMA direction
  */
 ENTRY(arm1020e_dma_unmap_area)
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm1020e_dma_unmap_area)
 
 	.globl	arm1020e_flush_kern_cache_louis
@@ -369,7 +369,7 @@ ENTRY(cpu_arm1020e_dcache_clean_area)
 	subs	r1, r1, #CACHE_DLINESIZE
 	bhi	1b
 #endif
-	mov	pc, lr
+	ret	lr
 
 /* =============================== PageTable ============================== */
 
@@ -407,7 +407,7 @@ ENTRY(cpu_arm1020e_switch_mm)
 	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
 	mcr	p15, 0, r1, c8, c7, 0		@ invalidate I & D TLBs
 #endif
-	mov	pc, lr
+	ret	lr
         
 /*
  * cpu_arm1020e_set_pte(ptep, pte)
@@ -423,7 +423,7 @@ ENTRY(cpu_arm1020e_set_pte_ext)
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
 #endif
 #endif /* CONFIG_MMU */
-	mov	pc, lr
+	ret	lr
 
 	.type	__arm1020e_setup, #function
 __arm1020e_setup:
@@ -441,7 +441,7 @@ __arm1020e_setup:
 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
 	orr	r0, r0, #0x4000 		@ .R.. .... .... ....
 #endif
-	mov	pc, lr
+	ret	lr
 	.size	__arm1020e_setup, . - __arm1020e_setup
 
 	/*
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
index 6f01a0ae3b30..a126b7a59928 100644
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S
@@ -62,7 +62,7 @@
  * cpu_arm1022_proc_init()
  */
 ENTRY(cpu_arm1022_proc_init)
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm1022_proc_fin()
@@ -72,7 +72,7 @@ ENTRY(cpu_arm1022_proc_fin)
 	bic	r0, r0, #0x1000 		@ ...i............
 	bic	r0, r0, #0x000e 		@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm1022_reset(loc)
@@ -96,7 +96,7 @@ ENTRY(cpu_arm1022_reset)
 	bic	ip, ip, #0x000f 		@ ............wcam
 	bic	ip, ip, #0x1100 		@ ...i...s........
 	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	mov	pc, r0
+	ret	r0
 ENDPROC(cpu_arm1022_reset)
 	.popsection
 
@@ -106,7 +106,7 @@ ENDPROC(cpu_arm1022_reset)
 	.align	5
 ENTRY(cpu_arm1022_do_idle)
 	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	mov	pc, lr
+	ret	lr
 
 /* ================================= CACHE ================================ */
 
@@ -122,7 +122,7 @@ ENTRY(arm1022_flush_icache_all)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
 #endif
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm1022_flush_icache_all)
 
 /*
@@ -156,7 +156,7 @@ __flush_whole_cache:
 	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
 #endif
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_user_cache_range(start, end, flags)
@@ -185,7 +185,7 @@ ENTRY(arm1022_flush_user_cache_range)
 	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
 #endif
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	coherent_kern_range(start, end)
@@ -225,7 +225,7 @@ ENTRY(arm1022_coherent_user_range)
 	blo	1b
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_kern_dcache_area(void *addr, size_t size)
@@ -246,7 +246,7 @@ ENTRY(arm1022_flush_kern_dcache_area)
 	blo	1b
 #endif
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_inv_range(start, end)
@@ -275,7 +275,7 @@ arm1022_dma_inv_range:
 	blo	1b
 #endif
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_clean_range(start, end)
@@ -297,7 +297,7 @@ arm1022_dma_clean_range:
 	blo	1b
 #endif
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_flush_range(start, end)
@@ -317,7 +317,7 @@ ENTRY(arm1022_dma_flush_range)
 	blo	1b
 #endif
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_map_area(start, size, dir)
@@ -340,7 +340,7 @@ ENDPROC(arm1022_dma_map_area)
  *	- dir	- DMA direction
  */
 ENTRY(arm1022_dma_unmap_area)
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm1022_dma_unmap_area)
 
 	.globl	arm1022_flush_kern_cache_louis
@@ -358,7 +358,7 @@ ENTRY(cpu_arm1022_dcache_clean_area)
 	subs	r1, r1, #CACHE_DLINESIZE
 	bhi	1b
 #endif
-	mov	pc, lr
+	ret	lr
 
 /* =============================== PageTable ============================== */
 
@@ -389,7 +389,7 @@ ENTRY(cpu_arm1022_switch_mm)
 	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
 	mcr	p15, 0, r1, c8, c7, 0		@ invalidate I & D TLBs
 #endif
-	mov	pc, lr
+	ret	lr
         
 /*
  * cpu_arm1022_set_pte_ext(ptep, pte, ext)
@@ -405,7 +405,7 @@ ENTRY(cpu_arm1022_set_pte_ext)
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
 #endif
 #endif /* CONFIG_MMU */
-	mov	pc, lr
+	ret	lr
 
 	.type	__arm1022_setup, #function
 __arm1022_setup:
@@ -423,7 +423,7 @@ __arm1022_setup:
 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
 	orr	r0, r0, #0x4000 		@ .R..............
 #endif
-	mov	pc, lr
+	ret	lr
 	.size	__arm1022_setup, . - __arm1022_setup
 
 	/*
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
index 4799a24b43e6..fc294067e977 100644
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S
@@ -62,7 +62,7 @@
  * cpu_arm1026_proc_init()
  */
 ENTRY(cpu_arm1026_proc_init)
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm1026_proc_fin()
@@ -72,7 +72,7 @@ ENTRY(cpu_arm1026_proc_fin)
 	bic	r0, r0, #0x1000 		@ ...i............
 	bic	r0, r0, #0x000e 		@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm1026_reset(loc)
@@ -96,7 +96,7 @@ ENTRY(cpu_arm1026_reset)
 	bic	ip, ip, #0x000f 		@ ............wcam
 	bic	ip, ip, #0x1100 		@ ...i...s........
 	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	mov	pc, r0
+	ret	r0
 ENDPROC(cpu_arm1026_reset)
 	.popsection
 
@@ -106,7 +106,7 @@ ENDPROC(cpu_arm1026_reset)
 	.align	5
 ENTRY(cpu_arm1026_do_idle)
 	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	mov	pc, lr
+	ret	lr
 
 /* ================================= CACHE ================================ */
 
@@ -122,7 +122,7 @@ ENTRY(arm1026_flush_icache_all)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
 #endif
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm1026_flush_icache_all)
 
 /*
@@ -151,7 +151,7 @@ __flush_whole_cache:
 	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
 #endif
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_user_cache_range(start, end, flags)
@@ -180,7 +180,7 @@ ENTRY(arm1026_flush_user_cache_range)
 	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
 #endif
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	coherent_kern_range(start, end)
@@ -219,7 +219,7 @@ ENTRY(arm1026_coherent_user_range)
 	blo	1b
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_kern_dcache_area(void *addr, size_t size)
@@ -240,7 +240,7 @@ ENTRY(arm1026_flush_kern_dcache_area)
 	blo	1b
 #endif
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_inv_range(start, end)
@@ -269,7 +269,7 @@ arm1026_dma_inv_range:
 	blo	1b
 #endif
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_clean_range(start, end)
@@ -291,7 +291,7 @@ arm1026_dma_clean_range:
 	blo	1b
 #endif
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_flush_range(start, end)
@@ -311,7 +311,7 @@ ENTRY(arm1026_dma_flush_range)
 	blo	1b
 #endif
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_map_area(start, size, dir)
@@ -334,7 +334,7 @@ ENDPROC(arm1026_dma_map_area)
  *	- dir	- DMA direction
  */
 ENTRY(arm1026_dma_unmap_area)
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm1026_dma_unmap_area)
 
 	.globl	arm1026_flush_kern_cache_louis
@@ -352,7 +352,7 @@ ENTRY(cpu_arm1026_dcache_clean_area)
 	subs	r1, r1, #CACHE_DLINESIZE
 	bhi	1b
 #endif
-	mov	pc, lr
+	ret	lr
 
 /* =============================== PageTable ============================== */
 
@@ -378,7 +378,7 @@ ENTRY(cpu_arm1026_switch_mm)
 	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
 	mcr	p15, 0, r1, c8, c7, 0		@ invalidate I & D TLBs
 #endif
-	mov	pc, lr
+	ret	lr
         
 /*
  * cpu_arm1026_set_pte_ext(ptep, pte, ext)
@@ -394,7 +394,7 @@ ENTRY(cpu_arm1026_set_pte_ext)
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
 #endif
 #endif /* CONFIG_MMU */
-	mov	pc, lr
+	ret	lr
 
 	.type	__arm1026_setup, #function
 __arm1026_setup:
@@ -417,7 +417,7 @@ __arm1026_setup:
 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
 	orr	r0, r0, #0x4000 		@ .R.. .... .... ....
 #endif
-	mov	pc, lr
+	ret	lr
 	.size	__arm1026_setup, . - __arm1026_setup
 
 	/*
diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S
index d42c37f9f5bc..2baa66b3ac9b 100644
--- a/arch/arm/mm/proc-arm720.S
+++ b/arch/arm/mm/proc-arm720.S
@@ -51,14 +51,14 @@
  */
 ENTRY(cpu_arm720_dcache_clean_area)
 ENTRY(cpu_arm720_proc_init)
-		mov	pc, lr
+		ret	lr
 
 ENTRY(cpu_arm720_proc_fin)
 		mrc	p15, 0, r0, c1, c0, 0
 		bic	r0, r0, #0x1000			@ ...i............
 		bic	r0, r0, #0x000e			@ ............wca.
 		mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-		mov	pc, lr
+		ret	lr
 
 /*
  * Function: arm720_proc_do_idle(void)
@@ -66,7 +66,7 @@ ENTRY(cpu_arm720_proc_fin)
  * Purpose : put the processor in proper idle mode
  */
 ENTRY(cpu_arm720_do_idle)
-		mov	pc, lr
+		ret	lr
 
 /*
  * Function: arm720_switch_mm(unsigned long pgd_phys)
@@ -81,7 +81,7 @@ ENTRY(cpu_arm720_switch_mm)
 		mcr	p15, 0, r0, c2, c0, 0		@ update page table ptr
 		mcr	p15, 0, r1, c8, c7, 0		@ flush TLB (v4)
 #endif
-		mov	pc, lr
+		ret	lr
 
 /*
  * Function: arm720_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext)
@@ -94,7 +94,7 @@ ENTRY(cpu_arm720_set_pte_ext)
 #ifdef CONFIG_MMU
 	armv3_set_pte_ext wc_disable=0
 #endif
-	mov	pc, lr
+	ret	lr
 
 /*
  * Function: arm720_reset
@@ -112,7 +112,7 @@ ENTRY(cpu_arm720_reset)
 		bic	ip, ip, #0x000f			@ ............wcam
 		bic	ip, ip, #0x2100			@ ..v....s........
 		mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-		mov	pc, r0
+		ret	r0
 ENDPROC(cpu_arm720_reset)
 		.popsection
 
@@ -128,7 +128,7 @@ __arm710_setup:
 	bic	r0, r0, r5
 	ldr	r5, arm710_cr1_set
 	orr	r0, r0, r5
-	mov	pc, lr				@ __ret (head.S)
+	ret	lr				@ __ret (head.S)
 	.size	__arm710_setup, . - __arm710_setup
 
 	/*
@@ -156,7 +156,7 @@ __arm720_setup:
 	mrc	p15, 0, r0, c1, c0		@ get control register
 	bic	r0, r0, r5
 	orr	r0, r0, r6
-	mov	pc, lr				@ __ret (head.S)
+	ret	lr				@ __ret (head.S)
 	.size	__arm720_setup, . - __arm720_setup
 
 	/*
diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S
index 9b0ae90cbf17..ac1ea6b3bce4 100644
--- a/arch/arm/mm/proc-arm740.S
+++ b/arch/arm/mm/proc-arm740.S
@@ -32,7 +32,7 @@ ENTRY(cpu_arm740_proc_init)
 ENTRY(cpu_arm740_do_idle)
 ENTRY(cpu_arm740_dcache_clean_area)
 ENTRY(cpu_arm740_switch_mm)
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm740_proc_fin()
@@ -42,7 +42,7 @@ ENTRY(cpu_arm740_proc_fin)
 	bic	r0, r0, #0x3f000000		@ bank/f/lock/s
 	bic	r0, r0, #0x0000000c		@ w-buffer/cache
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm740_reset(loc)
@@ -56,7 +56,7 @@ ENTRY(cpu_arm740_reset)
 	mrc	p15, 0, ip, c1, c0, 0		@ get ctrl register
 	bic	ip, ip, #0x0000000c		@ ............wc..
 	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	mov	pc, r0
+	ret	r0
 ENDPROC(cpu_arm740_reset)
 	.popsection
 
@@ -115,7 +115,7 @@ __arm740_setup:
 						@ need some benchmark
 	orr	r0, r0, #0x0000000d		@ MPU/Cache/WB
 
-	mov	pc, lr
+	ret	lr
 
 	.size	__arm740_setup, . - __arm740_setup
 
diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S
index f6cc3f63ce39..bf6ba4bc30ff 100644
--- a/arch/arm/mm/proc-arm7tdmi.S
+++ b/arch/arm/mm/proc-arm7tdmi.S
@@ -32,13 +32,13 @@ ENTRY(cpu_arm7tdmi_proc_init)
 ENTRY(cpu_arm7tdmi_do_idle)
 ENTRY(cpu_arm7tdmi_dcache_clean_area)
 ENTRY(cpu_arm7tdmi_switch_mm)
-		mov	pc, lr
+		ret	lr
 
 /*
  * cpu_arm7tdmi_proc_fin()
  */
 ENTRY(cpu_arm7tdmi_proc_fin)
-		mov	pc, lr
+		ret	lr
 
 /*
  * Function: cpu_arm7tdmi_reset(loc)
@@ -47,13 +47,13 @@ ENTRY(cpu_arm7tdmi_proc_fin)
  */
 		.pushsection	.idmap.text, "ax"
 ENTRY(cpu_arm7tdmi_reset)
-		mov	pc, r0
+		ret	r0
 ENDPROC(cpu_arm7tdmi_reset)
 		.popsection
 
 		.type	__arm7tdmi_setup, #function
 __arm7tdmi_setup:
-		mov	pc, lr
+		ret	lr
 		.size	__arm7tdmi_setup, . - __arm7tdmi_setup
 
 		__INITDATA
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 549557df6d57..22bf8dde4f84 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -63,7 +63,7 @@
  * cpu_arm920_proc_init()
  */
 ENTRY(cpu_arm920_proc_init)
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm920_proc_fin()
@@ -73,7 +73,7 @@ ENTRY(cpu_arm920_proc_fin)
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm920_reset(loc)
@@ -97,7 +97,7 @@ ENTRY(cpu_arm920_reset)
 	bic	ip, ip, #0x000f			@ ............wcam
 	bic	ip, ip, #0x1100			@ ...i...s........
 	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	mov	pc, r0
+	ret	r0
 ENDPROC(cpu_arm920_reset)
 	.popsection
 
@@ -107,7 +107,7 @@ ENDPROC(cpu_arm920_reset)
 	.align	5
 ENTRY(cpu_arm920_do_idle)
 	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	mov	pc, lr
+	ret	lr
 
 
 #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
@@ -120,7 +120,7 @@ ENTRY(cpu_arm920_do_idle)
 ENTRY(arm920_flush_icache_all)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm920_flush_icache_all)
 
 /*
@@ -151,7 +151,7 @@ __flush_whole_cache:
 	tst	r2, #VM_EXEC
 	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_user_cache_range(start, end, flags)
@@ -177,7 +177,7 @@ ENTRY(arm920_flush_user_cache_range)
 	blo	1b
 	tst	r2, #VM_EXEC
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	coherent_kern_range(start, end)
@@ -211,7 +211,7 @@ ENTRY(arm920_coherent_user_range)
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_kern_dcache_area(void *addr, size_t size)
@@ -231,7 +231,7 @@ ENTRY(arm920_flush_kern_dcache_area)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_inv_range(start, end)
@@ -257,7 +257,7 @@ arm920_dma_inv_range:
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_clean_range(start, end)
@@ -276,7 +276,7 @@ arm920_dma_clean_range:
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_flush_range(start, end)
@@ -293,7 +293,7 @@ ENTRY(arm920_dma_flush_range)
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_map_area(start, size, dir)
@@ -316,7 +316,7 @@ ENDPROC(arm920_dma_map_area)
  *	- dir	- DMA direction
  */
 ENTRY(arm920_dma_unmap_area)
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm920_dma_unmap_area)
 
 	.globl	arm920_flush_kern_cache_louis
@@ -332,7 +332,7 @@ ENTRY(cpu_arm920_dcache_clean_area)
 	add	r0, r0, #CACHE_DLINESIZE
 	subs	r1, r1, #CACHE_DLINESIZE
 	bhi	1b
-	mov	pc, lr
+	ret	lr
 
 /* =============================== PageTable ============================== */
 
@@ -367,7 +367,7 @@ ENTRY(cpu_arm920_switch_mm)
 	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
 	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
 #endif
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm920_set_pte(ptep, pte, ext)
@@ -382,7 +382,7 @@ ENTRY(cpu_arm920_set_pte_ext)
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 #endif
-	mov	pc, lr
+	ret	lr
 
 /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
 .globl	cpu_arm920_suspend_size
@@ -423,7 +423,7 @@ __arm920_setup:
 	mrc	p15, 0, r0, c1, c0		@ get control register v4
 	bic	r0, r0, r5
 	orr	r0, r0, r6
-	mov	pc, lr
+	ret	lr
 	.size	__arm920_setup, . - __arm920_setup
 
 	/*
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
index 2a758b06c6f6..0c6d5ac5a6d4 100644
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S
@@ -65,7 +65,7 @@
  * cpu_arm922_proc_init()
  */
 ENTRY(cpu_arm922_proc_init)
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm922_proc_fin()
@@ -75,7 +75,7 @@ ENTRY(cpu_arm922_proc_fin)
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm922_reset(loc)
@@ -99,7 +99,7 @@ ENTRY(cpu_arm922_reset)
 	bic	ip, ip, #0x000f			@ ............wcam
 	bic	ip, ip, #0x1100			@ ...i...s........
 	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	mov	pc, r0
+	ret	r0
 ENDPROC(cpu_arm922_reset)
 	.popsection
 
@@ -109,7 +109,7 @@ ENDPROC(cpu_arm922_reset)
 	.align	5
 ENTRY(cpu_arm922_do_idle)
 	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	mov	pc, lr
+	ret	lr
 
 
 #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
@@ -122,7 +122,7 @@ ENTRY(cpu_arm922_do_idle)
 ENTRY(arm922_flush_icache_all)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm922_flush_icache_all)
 
 /*
@@ -153,7 +153,7 @@ __flush_whole_cache:
 	tst	r2, #VM_EXEC
 	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_user_cache_range(start, end, flags)
@@ -179,7 +179,7 @@ ENTRY(arm922_flush_user_cache_range)
 	blo	1b
 	tst	r2, #VM_EXEC
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	coherent_kern_range(start, end)
@@ -213,7 +213,7 @@ ENTRY(arm922_coherent_user_range)
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_kern_dcache_area(void *addr, size_t size)
@@ -233,7 +233,7 @@ ENTRY(arm922_flush_kern_dcache_area)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_inv_range(start, end)
@@ -259,7 +259,7 @@ arm922_dma_inv_range:
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_clean_range(start, end)
@@ -278,7 +278,7 @@ arm922_dma_clean_range:
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_flush_range(start, end)
@@ -295,7 +295,7 @@ ENTRY(arm922_dma_flush_range)
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_map_area(start, size, dir)
@@ -318,7 +318,7 @@ ENDPROC(arm922_dma_map_area)
  *	- dir	- DMA direction
  */
 ENTRY(arm922_dma_unmap_area)
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm922_dma_unmap_area)
 
 	.globl	arm922_flush_kern_cache_louis
@@ -336,7 +336,7 @@ ENTRY(cpu_arm922_dcache_clean_area)
 	subs	r1, r1, #CACHE_DLINESIZE
 	bhi	1b
 #endif
-	mov	pc, lr
+	ret	lr
 
 /* =============================== PageTable ============================== */
 
@@ -371,7 +371,7 @@ ENTRY(cpu_arm922_switch_mm)
 	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
 	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
 #endif
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm922_set_pte_ext(ptep, pte, ext)
@@ -386,7 +386,7 @@ ENTRY(cpu_arm922_set_pte_ext)
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 #endif /* CONFIG_MMU */
-	mov	pc, lr
+	ret	lr
 
 	.type	__arm922_setup, #function
 __arm922_setup:
@@ -401,7 +401,7 @@ __arm922_setup:
 	mrc	p15, 0, r0, c1, c0		@ get control register v4
 	bic	r0, r0, r5
 	orr	r0, r0, r6
-	mov	pc, lr
+	ret	lr
 	.size	__arm922_setup, . - __arm922_setup
 
 	/*
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index ba0d58e1a2a2..c32d073282ea 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -86,7 +86,7 @@
  * cpu_arm925_proc_init()
  */
 ENTRY(cpu_arm925_proc_init)
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm925_proc_fin()
@@ -96,7 +96,7 @@ ENTRY(cpu_arm925_proc_fin)
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm925_reset(loc)
@@ -129,7 +129,7 @@ ENDPROC(cpu_arm925_reset)
 	bic	ip, ip, #0x000f			@ ............wcam
 	bic	ip, ip, #0x1100			@ ...i...s........
 	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	mov	pc, r0
+	ret	r0
 
 /*
  * cpu_arm925_do_idle()
@@ -145,7 +145,7 @@ ENTRY(cpu_arm925_do_idle)
 	mcr	p15, 0, r2, c1, c0, 0		@ Disable I cache
 	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
 	mcr	p15, 0, r1, c1, c0, 0		@ Restore ICache enable
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_icache_all()
@@ -155,7 +155,7 @@ ENTRY(cpu_arm925_do_idle)
 ENTRY(arm925_flush_icache_all)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm925_flush_icache_all)
 
 /*
@@ -188,7 +188,7 @@ __flush_whole_cache:
 	tst	r2, #VM_EXEC
 	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_user_cache_range(start, end, flags)
@@ -225,7 +225,7 @@ ENTRY(arm925_flush_user_cache_range)
 	blo	1b
 	tst	r2, #VM_EXEC
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	coherent_kern_range(start, end)
@@ -259,7 +259,7 @@ ENTRY(arm925_coherent_user_range)
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_kern_dcache_area(void *addr, size_t size)
@@ -279,7 +279,7 @@ ENTRY(arm925_flush_kern_dcache_area)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_inv_range(start, end)
@@ -307,7 +307,7 @@ arm925_dma_inv_range:
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_clean_range(start, end)
@@ -328,7 +328,7 @@ arm925_dma_clean_range:
 	blo	1b
 #endif
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_flush_range(start, end)
@@ -350,7 +350,7 @@ ENTRY(arm925_dma_flush_range)
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_map_area(start, size, dir)
@@ -373,7 +373,7 @@ ENDPROC(arm925_dma_map_area)
  *	- dir	- DMA direction
  */
 ENTRY(arm925_dma_unmap_area)
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm925_dma_unmap_area)
 
 	.globl	arm925_flush_kern_cache_louis
@@ -390,7 +390,7 @@ ENTRY(cpu_arm925_dcache_clean_area)
 	bhi	1b
 #endif
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /* =============================== PageTable ============================== */
 
@@ -419,7 +419,7 @@ ENTRY(cpu_arm925_switch_mm)
 	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
 	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
 #endif
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm925_set_pte_ext(ptep, pte, ext)
@@ -436,7 +436,7 @@ ENTRY(cpu_arm925_set_pte_ext)
 #endif
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 #endif /* CONFIG_MMU */
-	mov	pc, lr
+	ret	lr
 
 	.type	__arm925_setup, #function
 __arm925_setup:
@@ -469,7 +469,7 @@ __arm925_setup:
 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
 	orr	r0, r0, #0x4000			@ .1.. .... .... ....
 #endif
-	mov	pc, lr
+	ret	lr
 	.size	__arm925_setup, . - __arm925_setup
 
 	/*
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index 0f098f407c9f..252b2503038d 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -55,7 +55,7 @@
  * cpu_arm926_proc_init()
  */
 ENTRY(cpu_arm926_proc_init)
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm926_proc_fin()
@@ -65,7 +65,7 @@ ENTRY(cpu_arm926_proc_fin)
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm926_reset(loc)
@@ -89,7 +89,7 @@ ENTRY(cpu_arm926_reset)
 	bic	ip, ip, #0x000f			@ ............wcam
 	bic	ip, ip, #0x1100			@ ...i...s........
 	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	mov	pc, r0
+	ret	r0
 ENDPROC(cpu_arm926_reset)
 	.popsection
 
@@ -111,7 +111,7 @@ ENTRY(cpu_arm926_do_idle)
 	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
 	mcr	p15, 0, r1, c1, c0, 0		@ Restore ICache enable
 	msr	cpsr_c, r3			@ Restore FIQ state
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_icache_all()
@@ -121,7 +121,7 @@ ENTRY(cpu_arm926_do_idle)
 ENTRY(arm926_flush_icache_all)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm926_flush_icache_all)
 
 /*
@@ -151,7 +151,7 @@ __flush_whole_cache:
 	tst	r2, #VM_EXEC
 	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_user_cache_range(start, end, flags)
@@ -188,7 +188,7 @@ ENTRY(arm926_flush_user_cache_range)
 	blo	1b
 	tst	r2, #VM_EXEC
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	coherent_kern_range(start, end)
@@ -222,7 +222,7 @@ ENTRY(arm926_coherent_user_range)
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_kern_dcache_area(void *addr, size_t size)
@@ -242,7 +242,7 @@ ENTRY(arm926_flush_kern_dcache_area)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_inv_range(start, end)
@@ -270,7 +270,7 @@ arm926_dma_inv_range:
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_clean_range(start, end)
@@ -291,7 +291,7 @@ arm926_dma_clean_range:
 	blo	1b
 #endif
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_flush_range(start, end)
@@ -313,7 +313,7 @@ ENTRY(arm926_dma_flush_range)
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_map_area(start, size, dir)
@@ -336,7 +336,7 @@ ENDPROC(arm926_dma_map_area)
  *	- dir	- DMA direction
  */
 ENTRY(arm926_dma_unmap_area)
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm926_dma_unmap_area)
 
 	.globl	arm926_flush_kern_cache_louis
@@ -353,7 +353,7 @@ ENTRY(cpu_arm926_dcache_clean_area)
 	bhi	1b
 #endif
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /* =============================== PageTable ============================== */
 
@@ -380,7 +380,7 @@ ENTRY(cpu_arm926_switch_mm)
 	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
 	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
 #endif
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm926_set_pte_ext(ptep, pte, ext)
@@ -397,7 +397,7 @@ ENTRY(cpu_arm926_set_pte_ext)
 #endif
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 #endif
-	mov	pc, lr
+	ret	lr
 
 /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
 .globl	cpu_arm926_suspend_size
@@ -448,7 +448,7 @@ __arm926_setup:
 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
 	orr	r0, r0, #0x4000			@ .1.. .... .... ....
 #endif
-	mov	pc, lr
+	ret	lr
 	.size	__arm926_setup, . - __arm926_setup
 
 	/*
diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
index 1c39a704ff6e..e5212d489377 100644
--- a/arch/arm/mm/proc-arm940.S
+++ b/arch/arm/mm/proc-arm940.S
@@ -31,7 +31,7 @@
  */
 ENTRY(cpu_arm940_proc_init)
 ENTRY(cpu_arm940_switch_mm)
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm940_proc_fin()
@@ -41,7 +41,7 @@ ENTRY(cpu_arm940_proc_fin)
 	bic	r0, r0, #0x00001000		@ i-cache
 	bic	r0, r0, #0x00000004		@ d-cache
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm940_reset(loc)
@@ -58,7 +58,7 @@ ENTRY(cpu_arm940_reset)
 	bic	ip, ip, #0x00000005		@ .............c.p
 	bic	ip, ip, #0x00001000		@ i-cache
 	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	mov	pc, r0
+	ret	r0
 ENDPROC(cpu_arm940_reset)
 	.popsection
 
@@ -68,7 +68,7 @@ ENDPROC(cpu_arm940_reset)
 	.align	5
 ENTRY(cpu_arm940_do_idle)
 	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_icache_all()
@@ -78,7 +78,7 @@ ENTRY(cpu_arm940_do_idle)
 ENTRY(arm940_flush_icache_all)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm940_flush_icache_all)
 
 /*
@@ -122,7 +122,7 @@ ENTRY(arm940_flush_user_cache_range)
 	tst	r2, #VM_EXEC
 	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	coherent_kern_range(start, end)
@@ -170,7 +170,7 @@ ENTRY(arm940_flush_kern_dcache_area)
 	bcs	1b				@ segments 7 to 0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_inv_range(start, end)
@@ -191,7 +191,7 @@ arm940_dma_inv_range:
 	subs	r1, r1, #1 << 4
 	bcs	1b				@ segments 7 to 0
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_clean_range(start, end)
@@ -215,7 +215,7 @@ ENTRY(cpu_arm940_dcache_clean_area)
 	bcs	1b				@ segments 7 to 0
 #endif
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_flush_range(start, end)
@@ -241,7 +241,7 @@ ENTRY(arm940_dma_flush_range)
 	subs	r1, r1, #1 << 4
 	bcs	1b				@ segments 7 to 0
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_map_area(start, size, dir)
@@ -264,7 +264,7 @@ ENDPROC(arm940_dma_map_area)
  *	- dir	- DMA direction
  */
 ENTRY(arm940_dma_unmap_area)
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm940_dma_unmap_area)
 
 	.globl	arm940_flush_kern_cache_louis
@@ -337,7 +337,7 @@ __arm940_setup:
 	orr	r0, r0, #0x00001000		@ I-cache
 	orr	r0, r0, #0x00000005		@ MPU/D-cache
 
-	mov	pc, lr
+	ret	lr
 
 	.size	__arm940_setup, . - __arm940_setup
 
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
index 0289cd905e73..b3dd9b2d0b8e 100644
--- a/arch/arm/mm/proc-arm946.S
+++ b/arch/arm/mm/proc-arm946.S
@@ -38,7 +38,7 @@
  */
 ENTRY(cpu_arm946_proc_init)
 ENTRY(cpu_arm946_switch_mm)
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm946_proc_fin()
@@ -48,7 +48,7 @@ ENTRY(cpu_arm946_proc_fin)
 	bic	r0, r0, #0x00001000		@ i-cache
 	bic	r0, r0, #0x00000004		@ d-cache
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_arm946_reset(loc)
@@ -65,7 +65,7 @@ ENTRY(cpu_arm946_reset)
 	bic	ip, ip, #0x00000005		@ .............c.p
 	bic	ip, ip, #0x00001000		@ i-cache
 	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	mov	pc, r0
+	ret	r0
 ENDPROC(cpu_arm946_reset)
 	.popsection
 
@@ -75,7 +75,7 @@ ENDPROC(cpu_arm946_reset)
 	.align	5
 ENTRY(cpu_arm946_do_idle)
 	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_icache_all()
@@ -85,7 +85,7 @@ ENTRY(cpu_arm946_do_idle)
 ENTRY(arm946_flush_icache_all)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm946_flush_icache_all)
 
 /*
@@ -117,7 +117,7 @@ __flush_whole_cache:
 	tst	r2, #VM_EXEC
 	mcrne	p15, 0, ip, c7, c5, 0		@ flush I cache
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_user_cache_range(start, end, flags)
@@ -156,7 +156,7 @@ ENTRY(arm946_flush_user_cache_range)
 	blo	1b
 	tst	r2, #VM_EXEC
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	coherent_kern_range(start, end)
@@ -191,7 +191,7 @@ ENTRY(arm946_coherent_user_range)
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_kern_dcache_area(void *addr, size_t size)
@@ -212,7 +212,7 @@ ENTRY(arm946_flush_kern_dcache_area)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_inv_range(start, end)
@@ -239,7 +239,7 @@ arm946_dma_inv_range:
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_clean_range(start, end)
@@ -260,7 +260,7 @@ arm946_dma_clean_range:
 	blo	1b
 #endif
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_flush_range(start, end)
@@ -284,7 +284,7 @@ ENTRY(arm946_dma_flush_range)
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_map_area(start, size, dir)
@@ -307,7 +307,7 @@ ENDPROC(arm946_dma_map_area)
  *	- dir	- DMA direction
  */
 ENTRY(arm946_dma_unmap_area)
-	mov	pc, lr
+	ret	lr
 ENDPROC(arm946_dma_unmap_area)
 
 	.globl	arm946_flush_kern_cache_louis
@@ -324,7 +324,7 @@ ENTRY(cpu_arm946_dcache_clean_area)
 	bhi	1b
 #endif
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 	.type	__arm946_setup, #function
 __arm946_setup:
@@ -392,7 +392,7 @@ __arm946_setup:
 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
 	orr	r0, r0, #0x00004000		@ .1.. .... .... ....
 #endif
-	mov	pc, lr
+	ret	lr
 
 	.size	__arm946_setup, . - __arm946_setup
 
diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S
index f51197ba754a..8227322bbb8f 100644
--- a/arch/arm/mm/proc-arm9tdmi.S
+++ b/arch/arm/mm/proc-arm9tdmi.S
@@ -32,13 +32,13 @@ ENTRY(cpu_arm9tdmi_proc_init)
 ENTRY(cpu_arm9tdmi_do_idle)
 ENTRY(cpu_arm9tdmi_dcache_clean_area)
 ENTRY(cpu_arm9tdmi_switch_mm)
-		mov	pc, lr
+		ret	lr
 
 /*
  * cpu_arm9tdmi_proc_fin()
  */
 ENTRY(cpu_arm9tdmi_proc_fin)
-		mov	pc, lr
+		ret	lr
 
 /*
  * Function: cpu_arm9tdmi_reset(loc)
@@ -47,13 +47,13 @@ ENTRY(cpu_arm9tdmi_proc_fin)
  */
 		.pushsection	.idmap.text, "ax"
 ENTRY(cpu_arm9tdmi_reset)
-		mov	pc, r0
+		ret	r0
 ENDPROC(cpu_arm9tdmi_reset)
 		.popsection
 
 		.type	__arm9tdmi_setup, #function
 __arm9tdmi_setup:
-		mov	pc, lr
+		ret	lr
 		.size	__arm9tdmi_setup, . - __arm9tdmi_setup
 
 		__INITDATA
diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
index 2dfc0f1d3bfd..c494886892ba 100644
--- a/arch/arm/mm/proc-fa526.S
+++ b/arch/arm/mm/proc-fa526.S
@@ -32,7 +32,7 @@
  * cpu_fa526_proc_init()
  */
 ENTRY(cpu_fa526_proc_init)
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_fa526_proc_fin()
@@ -44,7 +44,7 @@ ENTRY(cpu_fa526_proc_fin)
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
 	nop
 	nop
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_fa526_reset(loc)
@@ -72,7 +72,7 @@ ENTRY(cpu_fa526_reset)
 	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
 	nop
 	nop
-	mov	pc, r0
+	ret	r0
 ENDPROC(cpu_fa526_reset)
 	.popsection
 
@@ -81,7 +81,7 @@ ENDPROC(cpu_fa526_reset)
  */
 	.align	4
 ENTRY(cpu_fa526_do_idle)
-	mov	pc, lr
+	ret	lr
 
 
 ENTRY(cpu_fa526_dcache_clean_area)
@@ -90,7 +90,7 @@ ENTRY(cpu_fa526_dcache_clean_area)
 	subs	r1, r1, #CACHE_DLINESIZE
 	bhi	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /* =============================== PageTable ============================== */
 
@@ -117,7 +117,7 @@ ENTRY(cpu_fa526_switch_mm)
 	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
 	mcr	p15, 0, ip, c8, c7, 0		@ invalidate UTLB
 #endif
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_fa526_set_pte_ext(ptep, pte, ext)
@@ -133,7 +133,7 @@ ENTRY(cpu_fa526_set_pte_ext)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 #endif
-	mov	pc, lr
+	ret	lr
 
 	.type	__fa526_setup, #function
 __fa526_setup:
@@ -162,7 +162,7 @@ __fa526_setup:
 	bic	r0, r0, r5
 	ldr	r5, fa526_cr1_set
 	orr	r0, r0, r5
-	mov	pc, lr
+	ret	lr
 	.size	__fa526_setup, . - __fa526_setup
 
 	/*
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index db79b62c92fb..03a1b75f2e16 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -69,7 +69,7 @@ ENTRY(cpu_feroceon_proc_init)
 	movne	r2, r2, lsr #2			@ turned into # of sets
 	sub	r2, r2, #(1 << 5)
 	stmia	r1, {r2, r3}
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_feroceon_proc_fin()
@@ -86,7 +86,7 @@ ENTRY(cpu_feroceon_proc_fin)
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_feroceon_reset(loc)
@@ -110,7 +110,7 @@ ENTRY(cpu_feroceon_reset)
 	bic	ip, ip, #0x000f			@ ............wcam
 	bic	ip, ip, #0x1100			@ ...i...s........
 	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	mov	pc, r0
+	ret	r0
 ENDPROC(cpu_feroceon_reset)
 	.popsection
 
@@ -124,7 +124,7 @@ ENTRY(cpu_feroceon_do_idle)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c10, 4		@ Drain write buffer
 	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_icache_all()
@@ -134,7 +134,7 @@ ENTRY(cpu_feroceon_do_idle)
 ENTRY(feroceon_flush_icache_all)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mov	pc, lr
+	ret	lr
 ENDPROC(feroceon_flush_icache_all)
 
 /*
@@ -169,7 +169,7 @@ __flush_whole_cache:
 	mov	ip, #0
 	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_user_cache_range(start, end, flags)
@@ -198,7 +198,7 @@ ENTRY(feroceon_flush_user_cache_range)
 	tst	r2, #VM_EXEC
 	mov	ip, #0
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	coherent_kern_range(start, end)
@@ -233,7 +233,7 @@ ENTRY(feroceon_coherent_user_range)
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_kern_dcache_area(void *addr, size_t size)
@@ -254,7 +254,7 @@ ENTRY(feroceon_flush_kern_dcache_area)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 	.align	5
 ENTRY(feroceon_range_flush_kern_dcache_area)
@@ -268,7 +268,7 @@ ENTRY(feroceon_range_flush_kern_dcache_area)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_inv_range(start, end)
@@ -295,7 +295,7 @@ feroceon_dma_inv_range:
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 	.align	5
 feroceon_range_dma_inv_range:
@@ -311,7 +311,7 @@ feroceon_range_dma_inv_range:
 	mcr	p15, 5, r0, c15, c14, 0		@ D inv range start
 	mcr	p15, 5, r1, c15, c14, 1		@ D inv range top
 	msr	cpsr_c, r2			@ restore interrupts
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_clean_range(start, end)
@@ -331,7 +331,7 @@ feroceon_dma_clean_range:
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 	.align	5
 feroceon_range_dma_clean_range:
@@ -344,7 +344,7 @@ feroceon_range_dma_clean_range:
 	mcr	p15, 5, r1, c15, c13, 1		@ D clean range top
 	msr	cpsr_c, r2			@ restore interrupts
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_flush_range(start, end)
@@ -362,7 +362,7 @@ ENTRY(feroceon_dma_flush_range)
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 	.align	5
 ENTRY(feroceon_range_dma_flush_range)
@@ -375,7 +375,7 @@ ENTRY(feroceon_range_dma_flush_range)
 	mcr	p15, 5, r1, c15, c15, 1		@ D clean/inv range top
 	msr	cpsr_c, r2			@ restore interrupts
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_map_area(start, size, dir)
@@ -412,7 +412,7 @@ ENDPROC(feroceon_range_dma_map_area)
  *	- dir	- DMA direction
  */
 ENTRY(feroceon_dma_unmap_area)
-	mov	pc, lr
+	ret	lr
 ENDPROC(feroceon_dma_unmap_area)
 
 	.globl	feroceon_flush_kern_cache_louis
@@ -461,7 +461,7 @@ ENTRY(cpu_feroceon_dcache_clean_area)
 	bhi	1b
 #endif
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /* =============================== PageTable ============================== */
 
@@ -490,9 +490,9 @@ ENTRY(cpu_feroceon_switch_mm)
 
 	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
 	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-	mov	pc, r2
+	ret	r2
 #else
-	mov	pc, lr
+	ret	lr
 #endif
 
 /*
@@ -512,7 +512,7 @@ ENTRY(cpu_feroceon_set_pte_ext)
 #endif
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 #endif
-	mov	pc, lr
+	ret	lr
 
 /* Suspend/resume support: taken from arch/arm/mm/proc-arm926.S */
 .globl	cpu_feroceon_suspend_size
@@ -554,7 +554,7 @@ __feroceon_setup:
 	mrc	p15, 0, r0, c1, c0		@ get control register v4
 	bic	r0, r0, r5
 	orr	r0, r0, r6
-	mov	pc, lr
+	ret	lr
 	.size	__feroceon_setup, . - __feroceon_setup
 
 	/*
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
index 40acba595731..53d393455f13 100644
--- a/arch/arm/mm/proc-mohawk.S
+++ b/arch/arm/mm/proc-mohawk.S
@@ -45,7 +45,7 @@
  * cpu_mohawk_proc_init()
  */
 ENTRY(cpu_mohawk_proc_init)
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_mohawk_proc_fin()
@@ -55,7 +55,7 @@ ENTRY(cpu_mohawk_proc_fin)
 	bic	r0, r0, #0x1800			@ ...iz...........
 	bic	r0, r0, #0x0006			@ .............ca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_mohawk_reset(loc)
@@ -79,7 +79,7 @@ ENTRY(cpu_mohawk_reset)
 	bic	ip, ip, #0x0007			@ .............cam
 	bic	ip, ip, #0x1100			@ ...i...s........
 	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	mov	pc, r0
+	ret	r0
 ENDPROC(cpu_mohawk_reset)
 	.popsection
 
@@ -93,7 +93,7 @@ ENTRY(cpu_mohawk_do_idle)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
 	mcr	p15, 0, r0, c7, c0, 4		@ wait for interrupt
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_icache_all()
@@ -103,7 +103,7 @@ ENTRY(cpu_mohawk_do_idle)
 ENTRY(mohawk_flush_icache_all)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mov	pc, lr
+	ret	lr
 ENDPROC(mohawk_flush_icache_all)
 
 /*
@@ -128,7 +128,7 @@ __flush_whole_cache:
 	tst	r2, #VM_EXEC
 	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
 	mcrne	p15, 0, ip, c7, c10, 0		@ drain write buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_user_cache_range(start, end, flags)
@@ -158,7 +158,7 @@ ENTRY(mohawk_flush_user_cache_range)
 	blo	1b
 	tst	r2, #VM_EXEC
 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	coherent_kern_range(start, end)
@@ -194,7 +194,7 @@ ENTRY(mohawk_coherent_user_range)
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_kern_dcache_area(void *addr, size_t size)
@@ -214,7 +214,7 @@ ENTRY(mohawk_flush_kern_dcache_area)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_inv_range(start, end)
@@ -240,7 +240,7 @@ mohawk_dma_inv_range:
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_clean_range(start, end)
@@ -259,7 +259,7 @@ mohawk_dma_clean_range:
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_flush_range(start, end)
@@ -277,7 +277,7 @@ ENTRY(mohawk_dma_flush_range)
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_map_area(start, size, dir)
@@ -300,7 +300,7 @@ ENDPROC(mohawk_dma_map_area)
  *	- dir	- DMA direction
  */
 ENTRY(mohawk_dma_unmap_area)
-	mov	pc, lr
+	ret	lr
 ENDPROC(mohawk_dma_unmap_area)
 
 	.globl	mohawk_flush_kern_cache_louis
@@ -315,7 +315,7 @@ ENTRY(cpu_mohawk_dcache_clean_area)
 	subs	r1, r1, #CACHE_DLINESIZE
 	bhi	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_mohawk_switch_mm(pgd)
@@ -333,7 +333,7 @@ ENTRY(cpu_mohawk_switch_mm)
 	orr	r0, r0, #0x18			@ cache the page table in L2
 	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
 	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_mohawk_set_pte_ext(ptep, pte, ext)
@@ -346,7 +346,7 @@ ENTRY(cpu_mohawk_set_pte_ext)
 	mov	r0, r0
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	pc, lr
+	ret	lr
 
 .globl	cpu_mohawk_suspend_size
 .equ	cpu_mohawk_suspend_size, 4 * 6
@@ -400,7 +400,7 @@ __mohawk_setup:
 	mrc	p15, 0, r0, c1, c0		@ get control register
 	bic	r0, r0, r5
 	orr	r0, r0, r6
-	mov	pc, lr
+	ret	lr
 
 	.size	__mohawk_setup, . - __mohawk_setup
 
diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S
index c45319c8f1d9..8008a0461cf5 100644
--- a/arch/arm/mm/proc-sa110.S
+++ b/arch/arm/mm/proc-sa110.S
@@ -38,7 +38,7 @@
 ENTRY(cpu_sa110_proc_init)
 	mov	r0, #0
 	mcr	p15, 0, r0, c15, c1, 2		@ Enable clock switching
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_sa110_proc_fin()
@@ -50,7 +50,7 @@ ENTRY(cpu_sa110_proc_fin)
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_sa110_reset(loc)
@@ -74,7 +74,7 @@ ENTRY(cpu_sa110_reset)
 	bic	ip, ip, #0x000f			@ ............wcam
 	bic	ip, ip, #0x1100			@ ...i...s........
 	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	mov	pc, r0
+	ret	r0
 ENDPROC(cpu_sa110_reset)
 	.popsection
 
@@ -103,7 +103,7 @@ ENTRY(cpu_sa110_do_idle)
 	mov	r0, r0				@ safety
 	mov	r0, r0				@ safety
 	mcr	p15, 0, r0, c15, c1, 2		@ enable clock switching
-	mov	pc, lr
+	ret	lr
 
 /* ================================= CACHE ================================ */
 
@@ -121,7 +121,7 @@ ENTRY(cpu_sa110_dcache_clean_area)
 	add	r0, r0, #DCACHELINESIZE
 	subs	r1, r1, #DCACHELINESIZE
 	bhi	1b
-	mov	pc, lr
+	ret	lr
 
 /* =============================== PageTable ============================== */
 
@@ -141,7 +141,7 @@ ENTRY(cpu_sa110_switch_mm)
 	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
 	ldr	pc, [sp], #4
 #else
-	mov	pc, lr
+	ret	lr
 #endif
 
 /*
@@ -157,7 +157,7 @@ ENTRY(cpu_sa110_set_pte_ext)
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 #endif
-	mov	pc, lr
+	ret	lr
 
 	.type	__sa110_setup, #function
 __sa110_setup:
@@ -173,7 +173,7 @@ __sa110_setup:
 	mrc	p15, 0, r0, c1, c0		@ get control register v4
 	bic	r0, r0, r5
 	orr	r0, r0, r6
-	mov	pc, lr
+	ret	lr
 	.size	__sa110_setup, . - __sa110_setup
 
 	/*
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index 09d241ae2dbe..89f97ac648a9 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -43,7 +43,7 @@ ENTRY(cpu_sa1100_proc_init)
 	mov	r0, #0
 	mcr	p15, 0, r0, c15, c1, 2		@ Enable clock switching
 	mcr	p15, 0, r0, c9, c0, 5		@ Allow read-buffer operations from userland
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_sa1100_proc_fin()
@@ -58,7 +58,7 @@ ENTRY(cpu_sa1100_proc_fin)
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_sa1100_reset(loc)
@@ -82,7 +82,7 @@ ENTRY(cpu_sa1100_reset)
 	bic	ip, ip, #0x000f			@ ............wcam
 	bic	ip, ip, #0x1100			@ ...i...s........
 	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	mov	pc, r0
+	ret	r0
 ENDPROC(cpu_sa1100_reset)
 	.popsection
 
@@ -113,7 +113,7 @@ ENTRY(cpu_sa1100_do_idle)
 	mcr	p15, 0, r0, c15, c8, 2		@ wait for interrupt
 	mov	r0, r0				@ safety
 	mcr	p15, 0, r0, c15, c1, 2		@ enable clock switching
-	mov	pc, lr
+	ret	lr
 
 /* ================================= CACHE ================================ */
 
@@ -131,7 +131,7 @@ ENTRY(cpu_sa1100_dcache_clean_area)
 	add	r0, r0, #DCACHELINESIZE
 	subs	r1, r1, #DCACHELINESIZE
 	bhi	1b
-	mov	pc, lr
+	ret	lr
 
 /* =============================== PageTable ============================== */
 
@@ -152,7 +152,7 @@ ENTRY(cpu_sa1100_switch_mm)
 	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
 	ldr	pc, [sp], #4
 #else
-	mov	pc, lr
+	ret	lr
 #endif
 
 /*
@@ -168,7 +168,7 @@ ENTRY(cpu_sa1100_set_pte_ext)
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 #endif
-	mov	pc, lr
+	ret	lr
 
 .globl	cpu_sa1100_suspend_size
 .equ	cpu_sa1100_suspend_size, 4 * 3
@@ -211,7 +211,7 @@ __sa1100_setup:
 	mrc	p15, 0, r0, c1, c0		@ get control register v4
 	bic	r0, r0, r5
 	orr	r0, r0, r6
-	mov	pc, lr
+	ret	lr
 	.size	__sa1100_setup, . - __sa1100_setup
 
 	/*
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 32b3558321c4..d0390f4b3f18 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -36,14 +36,14 @@
 #define PMD_FLAGS_SMP	PMD_SECT_WBWA|PMD_SECT_S
 
 ENTRY(cpu_v6_proc_init)
-	mov	pc, lr
+	ret	lr
 
 ENTRY(cpu_v6_proc_fin)
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x0006			@ .............ca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mov	pc, lr
+	ret	lr
 
 /*
  *	cpu_v6_reset(loc)
@@ -62,7 +62,7 @@ ENTRY(cpu_v6_reset)
 	mcr	p15, 0, r1, c1, c0, 0		@ disable MMU
 	mov	r1, #0
 	mcr	p15, 0, r1, c7, c5, 4		@ ISB
-	mov	pc, r0
+	ret	r0
 ENDPROC(cpu_v6_reset)
 	.popsection
 
@@ -77,14 +77,14 @@ ENTRY(cpu_v6_do_idle)
 	mov	r1, #0
 	mcr	p15, 0, r1, c7, c10, 4		@ DWB - WFI may enter a low-power mode
 	mcr	p15, 0, r1, c7, c0, 4		@ wait for interrupt
-	mov	pc, lr
+	ret	lr
 
 ENTRY(cpu_v6_dcache_clean_area)
 1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
 	add	r0, r0, #D_CACHE_LINE_SIZE
 	subs	r1, r1, #D_CACHE_LINE_SIZE
 	bhi	1b
-	mov	pc, lr
+	ret	lr
 
 /*
  *	cpu_v6_switch_mm(pgd_phys, tsk)
@@ -113,7 +113,7 @@ ENTRY(cpu_v6_switch_mm)
 #endif
 	mcr	p15, 0, r1, c13, c0, 1		@ set context ID
 #endif
-	mov	pc, lr
+	ret	lr
 
 /*
  *	cpu_v6_set_pte_ext(ptep, pte, ext)
@@ -131,7 +131,7 @@ ENTRY(cpu_v6_set_pte_ext)
 #ifdef CONFIG_MMU
 	armv6_set_pte_ext cpu_v6
 #endif
-	mov	pc, lr
+	ret	lr
 
 /* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */
 .globl	cpu_v6_suspend_size
@@ -241,7 +241,7 @@ __v6_setup:
 	mcreq	p15, 0, r5, c1, c0, 1		@ write aux control reg
 	orreq	r0, r0, #(1 << 21)		@ low interrupt latency configuration
 #endif
-	mov	pc, lr				@ return to head.S:__ret
+	ret	lr				@ return to head.S:__ret
 
 	/*
 	 *         V X F   I D LR
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index 1f52915f2b28..ed448d8a596b 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -59,7 +59,7 @@ ENTRY(cpu_v7_switch_mm)
 	mcr	p15, 0, r0, c2, c0, 0		@ set TTB 0
 	isb
 #endif
-	mov	pc, lr
+	bx	lr
 ENDPROC(cpu_v7_switch_mm)
 
 /*
@@ -106,7 +106,7 @@ ENTRY(cpu_v7_set_pte_ext)
 	ALT_SMP(W(nop))
 	ALT_UP (mcr	p15, 0, r0, c7, c10, 1)		@ flush_pte
 #endif
-	mov	pc, lr
+	bx	lr
 ENDPROC(cpu_v7_set_pte_ext)
 
 	/*
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index 22e3ad63500c..564f4b934ceb 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -19,6 +19,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
+#include <asm/assembler.h>
 
 #define TTB_IRGN_NC	(0 << 8)
 #define TTB_IRGN_WBWA	(1 << 8)
@@ -61,7 +62,7 @@ ENTRY(cpu_v7_switch_mm)
 	mcrr	p15, 0, rpgdl, rpgdh, c2		@ set TTB 0
 	isb
 #endif
-	mov	pc, lr
+	ret	lr
 ENDPROC(cpu_v7_switch_mm)
 
 #ifdef __ARMEB__
@@ -92,7 +93,7 @@ ENTRY(cpu_v7_set_pte_ext)
 	ALT_SMP(W(nop))
 	ALT_UP (mcr	p15, 0, r0, c7, c10, 1)		@ flush_pte
 #endif
-	mov	pc, lr
+	ret	lr
 ENDPROC(cpu_v7_set_pte_ext)
 
 	/*
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 3db2c2f04a30..71abb60c4222 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -26,7 +26,7 @@
 #endif
 
 ENTRY(cpu_v7_proc_init)
-	mov	pc, lr
+	ret	lr
 ENDPROC(cpu_v7_proc_init)
 
 ENTRY(cpu_v7_proc_fin)
@@ -34,7 +34,7 @@ ENTRY(cpu_v7_proc_fin)
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x0006			@ .............ca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mov	pc, lr
+	ret	lr
 ENDPROC(cpu_v7_proc_fin)
 
 /*
@@ -71,20 +71,20 @@ ENDPROC(cpu_v7_reset)
 ENTRY(cpu_v7_do_idle)
 	dsb					@ WFI may enter a low-power mode
 	wfi
-	mov	pc, lr
+	ret	lr
 ENDPROC(cpu_v7_do_idle)
 
 ENTRY(cpu_v7_dcache_clean_area)
 	ALT_SMP(W(nop))			@ MP extensions imply L1 PTW
 	ALT_UP_B(1f)
-	mov	pc, lr
+	ret	lr
 1:	dcache_line_size r2, r3
 2:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
 	add	r0, r0, r2
 	subs	r1, r1, r2
 	bhi	2b
 	dsb	ishst
-	mov	pc, lr
+	ret	lr
 ENDPROC(cpu_v7_dcache_clean_area)
 
 	string	cpu_v7_name, "ARMv7 Processor"
@@ -163,7 +163,7 @@ ENTRY(cpu_pj4b_do_idle)
 	dsb					@ WFI may enter a low-power mode
 	wfi
 	dsb					@barrier
-	mov	pc, lr
+	ret	lr
 ENDPROC(cpu_pj4b_do_idle)
 #else
 	globl_equ	cpu_pj4b_do_idle,  	cpu_v7_do_idle
@@ -407,7 +407,7 @@ __v7_setup:
 	bic	r0, r0, r5			@ clear bits them
 	orr	r0, r0, r6			@ set them
  THUMB(	orr	r0, r0, #1 << 30	)	@ Thumb exceptions
-	mov	pc, lr				@ return to head.S:__ret
+	ret	lr				@ return to head.S:__ret
 ENDPROC(__v7_setup)
 
 	.align	2
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index 1ca37c72f12f..d1e68b553d3b 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -16,11 +16,11 @@
 #include "proc-macros.S"
 
 ENTRY(cpu_v7m_proc_init)
-	mov	pc, lr
+	ret	lr
 ENDPROC(cpu_v7m_proc_init)
 
 ENTRY(cpu_v7m_proc_fin)
-	mov	pc, lr
+	ret	lr
 ENDPROC(cpu_v7m_proc_fin)
 
 /*
@@ -34,7 +34,7 @@ ENDPROC(cpu_v7m_proc_fin)
  */
 	.align	5
 ENTRY(cpu_v7m_reset)
-	mov	pc, r0
+	ret	r0
 ENDPROC(cpu_v7m_reset)
 
 /*
@@ -46,18 +46,18 @@ ENDPROC(cpu_v7m_reset)
  */
 ENTRY(cpu_v7m_do_idle)
 	wfi
-	mov	pc, lr
+	ret	lr
 ENDPROC(cpu_v7m_do_idle)
 
 ENTRY(cpu_v7m_dcache_clean_area)
-	mov	pc, lr
+	ret	lr
 ENDPROC(cpu_v7m_dcache_clean_area)
 
 /*
  * There is no MMU, so here is nothing to do.
  */
 ENTRY(cpu_v7m_switch_mm)
-	mov	pc, lr
+	ret	lr
 ENDPROC(cpu_v7m_switch_mm)
 
 .globl	cpu_v7m_suspend_size
@@ -65,11 +65,11 @@ ENDPROC(cpu_v7m_switch_mm)
 
 #ifdef CONFIG_ARM_CPU_SUSPEND
 ENTRY(cpu_v7m_do_suspend)
-	mov	pc, lr
+	ret	lr
 ENDPROC(cpu_v7m_do_suspend)
 
 ENTRY(cpu_v7m_do_resume)
-	mov	pc, lr
+	ret	lr
 ENDPROC(cpu_v7m_do_resume)
 #endif
 
@@ -120,7 +120,7 @@ __v7m_setup:
 	ldr	r12, [r0, V7M_SCB_CCR]	@ system control register
 	orr	r12, #V7M_SCB_CCR_STKALIGN
 	str	r12, [r0, V7M_SCB_CCR]
-	mov	pc, lr
+	ret	lr
 ENDPROC(__v7m_setup)
 
 	.align 2
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index dc1645890042..f8acdfece036 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -83,7 +83,7 @@
  * Nothing too exciting at the moment
  */
 ENTRY(cpu_xsc3_proc_init)
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_xsc3_proc_fin()
@@ -93,7 +93,7 @@ ENTRY(cpu_xsc3_proc_fin)
 	bic	r0, r0, #0x1800			@ ...IZ...........
 	bic	r0, r0, #0x0006			@ .............CA.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_xsc3_reset(loc)
@@ -119,7 +119,7 @@ ENTRY(cpu_xsc3_reset)
 	@ CAUTION: MMU turned off from this point.  We count on the pipeline
 	@ already containing those two last instructions to survive.
 	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I and D TLBs
-	mov	pc, r0
+	ret	r0
 ENDPROC(cpu_xsc3_reset)
 	.popsection
 
@@ -138,7 +138,7 @@ ENDPROC(cpu_xsc3_reset)
 ENTRY(cpu_xsc3_do_idle)
 	mov	r0, #1
 	mcr	p14, 0, r0, c7, c0, 0		@ go to idle
-	mov	pc, lr
+	ret	lr
 
 /* ================================= CACHE ================================ */
 
@@ -150,7 +150,7 @@ ENTRY(cpu_xsc3_do_idle)
 ENTRY(xsc3_flush_icache_all)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mov	pc, lr
+	ret	lr
 ENDPROC(xsc3_flush_icache_all)
 
 /*
@@ -176,7 +176,7 @@ __flush_whole_cache:
 	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate L1 I cache and BTB
 	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
 	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_user_cache_range(start, end, vm_flags)
@@ -205,7 +205,7 @@ ENTRY(xsc3_flush_user_cache_range)
 	mcrne	p15, 0, ip, c7, c5, 6		@ invalidate BTB
 	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
 	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
-	mov	pc, lr
+	ret	lr
 
 /*
  *	coherent_kern_range(start, end)
@@ -232,7 +232,7 @@ ENTRY(xsc3_coherent_user_range)
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate L1 I cache and BTB
 	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
 	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_kern_dcache_area(void *addr, size_t size)
@@ -253,7 +253,7 @@ ENTRY(xsc3_flush_kern_dcache_area)
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate L1 I cache and BTB
 	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
 	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_inv_range(start, end)
@@ -277,7 +277,7 @@ xsc3_dma_inv_range:
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_clean_range(start, end)
@@ -294,7 +294,7 @@ xsc3_dma_clean_range:
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_flush_range(start, end)
@@ -311,7 +311,7 @@ ENTRY(xsc3_dma_flush_range)
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_map_area(start, size, dir)
@@ -334,7 +334,7 @@ ENDPROC(xsc3_dma_map_area)
  *	- dir	- DMA direction
  */
 ENTRY(xsc3_dma_unmap_area)
-	mov	pc, lr
+	ret	lr
 ENDPROC(xsc3_dma_unmap_area)
 
 	.globl	xsc3_flush_kern_cache_louis
@@ -348,7 +348,7 @@ ENTRY(cpu_xsc3_dcache_clean_area)
 	add	r0, r0, #CACHELINESIZE
 	subs	r1, r1, #CACHELINESIZE
 	bhi	1b
-	mov	pc, lr
+	ret	lr
 
 /* =============================== PageTable ============================== */
 
@@ -406,7 +406,7 @@ ENTRY(cpu_xsc3_set_pte_ext)
 	orr	r2, r2, ip
 
 	xscale_set_pte_ext_epilogue
-	mov	pc, lr
+	ret	lr
 
 	.ltorg
 	.align
@@ -478,7 +478,7 @@ __xsc3_setup:
 	bic	r0, r0, r5			@ ..V. ..R. .... ..A.
 	orr	r0, r0, r6			@ ..VI Z..S .... .C.M (mmu)
 						@ ...I Z..S .... .... (uc)
-	mov	pc, lr
+	ret	lr
 
 	.size	__xsc3_setup, . - __xsc3_setup
 
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index d19b1cfcad91..23259f104c66 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -118,7 +118,7 @@ ENTRY(cpu_xscale_proc_init)
 	mrc	p15, 0, r1, c1, c0, 1
 	bic	r1, r1, #1
 	mcr	p15, 0, r1, c1, c0, 1
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_xscale_proc_fin()
@@ -128,7 +128,7 @@ ENTRY(cpu_xscale_proc_fin)
 	bic	r0, r0, #0x1800			@ ...IZ...........
 	bic	r0, r0, #0x0006			@ .............CA.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mov	pc, lr
+	ret	lr
 
 /*
  * cpu_xscale_reset(loc)
@@ -160,7 +160,7 @@ ENTRY(cpu_xscale_reset)
 	@ CAUTION: MMU turned off from this point. We count on the pipeline
 	@ already containing those two last instructions to survive.
 	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-	mov	pc, r0
+	ret	r0
 ENDPROC(cpu_xscale_reset)
 	.popsection
 
@@ -179,7 +179,7 @@ ENDPROC(cpu_xscale_reset)
 ENTRY(cpu_xscale_do_idle)
 	mov	r0, #1
 	mcr	p14, 0, r0, c7, c0, 0		@ Go to IDLE
-	mov	pc, lr
+	ret	lr
 
 /* ================================= CACHE ================================ */
 
@@ -191,7 +191,7 @@ ENTRY(cpu_xscale_do_idle)
 ENTRY(xscale_flush_icache_all)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mov	pc, lr
+	ret	lr
 ENDPROC(xscale_flush_icache_all)
 
 /*
@@ -216,7 +216,7 @@ __flush_whole_cache:
 	tst	r2, #VM_EXEC
 	mcrne	p15, 0, ip, c7, c5, 0		@ Invalidate I cache & BTB
 	mcrne	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_user_cache_range(start, end, vm_flags)
@@ -245,7 +245,7 @@ ENTRY(xscale_flush_user_cache_range)
 	tst	r2, #VM_EXEC
 	mcrne	p15, 0, ip, c7, c5, 6		@ Invalidate BTB
 	mcrne	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	coherent_kern_range(start, end)
@@ -269,7 +269,7 @@ ENTRY(xscale_coherent_kern_range)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ Invalidate I cache & BTB
 	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	coherent_user_range(start, end)
@@ -291,7 +291,7 @@ ENTRY(xscale_coherent_user_range)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 6		@ Invalidate BTB
 	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	flush_kern_dcache_area(void *addr, size_t size)
@@ -312,7 +312,7 @@ ENTRY(xscale_flush_kern_dcache_area)
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c5, 0		@ Invalidate I cache & BTB
 	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_inv_range(start, end)
@@ -336,7 +336,7 @@ xscale_dma_inv_range:
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_clean_range(start, end)
@@ -353,7 +353,7 @@ xscale_dma_clean_range:
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_flush_range(start, end)
@@ -371,7 +371,7 @@ ENTRY(xscale_dma_flush_range)
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	mov	pc, lr
+	ret	lr
 
 /*
  *	dma_map_area(start, size, dir)
@@ -407,7 +407,7 @@ ENDPROC(xscale_80200_A0_A1_dma_map_area)
  *	- dir	- DMA direction
  */
 ENTRY(xscale_dma_unmap_area)
-	mov	pc, lr
+	ret	lr
 ENDPROC(xscale_dma_unmap_area)
 
 	.globl	xscale_flush_kern_cache_louis
@@ -458,7 +458,7 @@ ENTRY(cpu_xscale_dcache_clean_area)
 	add	r0, r0, #CACHELINESIZE
 	subs	r1, r1, #CACHELINESIZE
 	bhi	1b
-	mov	pc, lr
+	ret	lr
 
 /* =============================== PageTable ============================== */
 
@@ -521,7 +521,7 @@ ENTRY(cpu_xscale_set_pte_ext)
 	orr	r2, r2, ip
 
 	xscale_set_pte_ext_epilogue
-	mov	pc, lr
+	ret	lr
 
 	.ltorg
 	.align
@@ -572,7 +572,7 @@ __xscale_setup:
 	mrc	p15, 0, r0, c1, c0, 0		@ get control register
 	bic	r0, r0, r5
 	orr	r0, r0, r6
-	mov	pc, lr
+	ret	lr
 	.size	__xscale_setup, . - __xscale_setup
 
 	/*
diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S
index d3ddcf9a76ca..d2d9ecbe0aac 100644
--- a/arch/arm/mm/tlb-fa.S
+++ b/arch/arm/mm/tlb-fa.S
@@ -18,6 +18,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 #include <asm/asm-offsets.h>
 #include <asm/tlbflush.h>
 #include "proc-macros.S"
@@ -37,7 +38,7 @@ ENTRY(fa_flush_user_tlb_range)
 	vma_vm_mm ip, r2
 	act_mm	r3				@ get current->active_mm
 	eors	r3, ip, r3			@ == mm ?
-	movne	pc, lr				@ no, we dont do anything
+	retne	lr				@ no, we dont do anything
 	mov	r3, #0
 	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
 	bic	r0, r0, #0x0ff
@@ -47,7 +48,7 @@ ENTRY(fa_flush_user_tlb_range)
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, r3, c7, c10, 4		@ data write barrier
-	mov	pc, lr
+	ret	lr
 
 
 ENTRY(fa_flush_kern_tlb_range)
@@ -61,7 +62,7 @@ ENTRY(fa_flush_kern_tlb_range)
 	blo	1b
 	mcr	p15, 0, r3, c7, c10, 4		@ data write barrier
 	mcr	p15, 0, r3, c7, c5, 4		@ prefetch flush (isb)
-	mov	pc, lr
+	ret	lr
 
 	__INITDATA
 
diff --git a/arch/arm/mm/tlb-v4.S b/arch/arm/mm/tlb-v4.S
index 17a025ade573..a2b5dca42048 100644
--- a/arch/arm/mm/tlb-v4.S
+++ b/arch/arm/mm/tlb-v4.S
@@ -14,6 +14,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 #include <asm/asm-offsets.h>
 #include <asm/tlbflush.h>
 #include "proc-macros.S"
@@ -33,7 +34,7 @@ ENTRY(v4_flush_user_tlb_range)
 	vma_vm_mm ip, r2
 	act_mm	r3				@ get current->active_mm
 	eors	r3, ip, r3				@ == mm ?
-	movne	pc, lr				@ no, we dont do anything
+	retne	lr				@ no, we dont do anything
 .v4_flush_kern_tlb_range:
 	bic	r0, r0, #0x0ff
 	bic	r0, r0, #0xf00
@@ -41,7 +42,7 @@ ENTRY(v4_flush_user_tlb_range)
 	add	r0, r0, #PAGE_SZ
 	cmp	r0, r1
 	blo	1b
-	mov	pc, lr
+	ret	lr
 
 /*
  *	v4_flush_kern_tlb_range(start, end)
diff --git a/arch/arm/mm/tlb-v4wb.S b/arch/arm/mm/tlb-v4wb.S
index c04598fa4d4a..5a093b458dbc 100644
--- a/arch/arm/mm/tlb-v4wb.S
+++ b/arch/arm/mm/tlb-v4wb.S
@@ -14,6 +14,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 #include <asm/asm-offsets.h>
 #include <asm/tlbflush.h>
 #include "proc-macros.S"
@@ -33,7 +34,7 @@ ENTRY(v4wb_flush_user_tlb_range)
 	vma_vm_mm ip, r2
 	act_mm	r3				@ get current->active_mm
 	eors	r3, ip, r3				@ == mm ?
-	movne	pc, lr				@ no, we dont do anything
+	retne	lr				@ no, we dont do anything
 	vma_vm_flags r2, r2
 	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
 	tst	r2, #VM_EXEC
@@ -44,7 +45,7 @@ ENTRY(v4wb_flush_user_tlb_range)
 	add	r0, r0, #PAGE_SZ
 	cmp	r0, r1
 	blo	1b
-	mov	pc, lr
+	ret	lr
 
 /*
  *	v4_flush_kern_tlb_range(start, end)
@@ -65,7 +66,7 @@ ENTRY(v4wb_flush_kern_tlb_range)
 	add	r0, r0, #PAGE_SZ
 	cmp	r0, r1
 	blo	1b
-	mov	pc, lr
+	ret	lr
 
 	__INITDATA
 
diff --git a/arch/arm/mm/tlb-v4wbi.S b/arch/arm/mm/tlb-v4wbi.S
index 1f6062b6c1c1..058861548f68 100644
--- a/arch/arm/mm/tlb-v4wbi.S
+++ b/arch/arm/mm/tlb-v4wbi.S
@@ -14,6 +14,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 #include <asm/asm-offsets.h>
 #include <asm/tlbflush.h>
 #include "proc-macros.S"
@@ -32,7 +33,7 @@ ENTRY(v4wbi_flush_user_tlb_range)
 	vma_vm_mm ip, r2
 	act_mm	r3				@ get current->active_mm
 	eors	r3, ip, r3			@ == mm ?
-	movne	pc, lr				@ no, we dont do anything
+	retne	lr				@ no, we dont do anything
 	mov	r3, #0
 	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
 	vma_vm_flags r2, r2
@@ -44,7 +45,7 @@ ENTRY(v4wbi_flush_user_tlb_range)
 	add	r0, r0, #PAGE_SZ
 	cmp	r0, r1
 	blo	1b
-	mov	pc, lr
+	ret	lr
 
 ENTRY(v4wbi_flush_kern_tlb_range)
 	mov	r3, #0
@@ -56,7 +57,7 @@ ENTRY(v4wbi_flush_kern_tlb_range)
 	add	r0, r0, #PAGE_SZ
 	cmp	r0, r1
 	blo	1b
-	mov	pc, lr
+	ret	lr
 
 	__INITDATA
 
diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S
index eca07f550a0b..6f689be638bd 100644
--- a/arch/arm/mm/tlb-v6.S
+++ b/arch/arm/mm/tlb-v6.S
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/linkage.h>
 #include <asm/asm-offsets.h>
+#include <asm/assembler.h>
 #include <asm/page.h>
 #include <asm/tlbflush.h>
 #include "proc-macros.S"
@@ -55,7 +56,7 @@ ENTRY(v6wbi_flush_user_tlb_range)
 	cmp	r0, r1
 	blo	1b
 	mcr	p15, 0, ip, c7, c10, 4		@ data synchronization barrier
-	mov	pc, lr
+	ret	lr
 
 /*
  *	v6wbi_flush_kern_tlb_range(start,end)
@@ -84,7 +85,7 @@ ENTRY(v6wbi_flush_kern_tlb_range)
 	blo	1b
 	mcr	p15, 0, r2, c7, c10, 4		@ data synchronization barrier
 	mcr	p15, 0, r2, c7, c5, 4		@ prefetch flush (isb)
-	mov	pc, lr
+	ret	lr
 
 	__INIT
 
diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S
index 355308767bae..e5101a3bc57c 100644
--- a/arch/arm/mm/tlb-v7.S
+++ b/arch/arm/mm/tlb-v7.S
@@ -57,7 +57,7 @@ ENTRY(v7wbi_flush_user_tlb_range)
 	cmp	r0, r1
 	blo	1b
 	dsb	ish
-	mov	pc, lr
+	ret	lr
 ENDPROC(v7wbi_flush_user_tlb_range)
 
 /*
@@ -86,7 +86,7 @@ ENTRY(v7wbi_flush_kern_tlb_range)
 	blo	1b
 	dsb	ish
 	isb
-	mov	pc, lr
+	ret	lr
 ENDPROC(v7wbi_flush_kern_tlb_range)
 
 	__INIT
diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S
index d18dde95b8aa..5d65be1f1e8a 100644
--- a/arch/arm/nwfpe/entry.S
+++ b/arch/arm/nwfpe/entry.S
@@ -19,7 +19,7 @@
     along with this program; if not, write to the Free Software
     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
-
+#include <asm/assembler.h>
 #include <asm/opcodes.h>
 
 /* This is the kernel's entry point into the floating point emulator.
@@ -92,7 +92,7 @@ emulate:
 	mov	r0, r6			@ prepare for EmulateAll()
 	bl	EmulateAll		@ emulate the instruction
 	cmp	r0, #0			@ was emulation successful
-	moveq	pc, r4			@ no, return failure
+	reteq	r4			@ no, return failure
 
 next:
 .Lx1:	ldrt	r6, [r5], #4		@ get the next instruction and
@@ -102,7 +102,7 @@ next:
 	teq	r2, #0x0C000000
 	teqne	r2, #0x0D000000
 	teqne	r2, #0x0E000000
-	movne	pc, r9			@ return ok if not a fp insn
+	retne	r9			@ return ok if not a fp insn
 
 	str	r5, [sp, #S_PC]		@ update PC copy in regs
 
@@ -115,7 +115,7 @@ next:
 	@ plain LDR instruction.  Weird, but it seems harmless.
 	.pushsection .fixup,"ax"
 	.align	2
-.Lfix:	mov	pc, r9			@ let the user eat segfaults
+.Lfix:	ret	r9			@ let the user eat segfaults
 	.popsection
 
 	.pushsection __ex_table,"a"
diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
index fe6ca574d093..2e78760f3495 100644
--- a/arch/arm/vfp/entry.S
+++ b/arch/arm/vfp/entry.S
@@ -34,7 +34,7 @@ ENDPROC(do_vfp)
 
 ENTRY(vfp_null_entry)
 	dec_preempt_count_ti r10, r4
-	mov	pc, lr
+	ret	lr
 ENDPROC(vfp_null_entry)
 
 	.align	2
@@ -49,7 +49,7 @@ ENTRY(vfp_testing_entry)
 	dec_preempt_count_ti r10, r4
 	ldr	r0, VFP_arch_address
 	str	r0, [r0]		@ set to non-zero value
-	mov	pc, r9			@ we have handled the fault
+	ret	r9			@ we have handled the fault
 ENDPROC(vfp_testing_entry)
 
 	.align	2
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index be807625ed8c..cda654cbf2c2 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -183,7 +183,7 @@ vfp_hw_state_valid:
 					@ always subtract 4 from the following
 					@ instruction address.
 	dec_preempt_count_ti r10, r4
-	mov	pc, r9			@ we think we have handled things
+	ret	r9			@ we think we have handled things
 
 
 look_for_VFP_exceptions:
@@ -202,7 +202,7 @@ look_for_VFP_exceptions:
 
 	DBGSTR	"not VFP"
 	dec_preempt_count_ti r10, r4
-	mov	pc, lr
+	ret	lr
 
 process_exception:
 	DBGSTR	"bounce"
@@ -234,7 +234,7 @@ ENTRY(vfp_save_state)
 	VFPFMRX	r12, FPINST2		@ FPINST2 if needed (and present)
 1:
 	stmia	r0, {r1, r2, r3, r12}	@ save FPEXC, FPSCR, FPINST, FPINST2
-	mov	pc, lr
+	ret	lr
 ENDPROC(vfp_save_state)
 
 	.align
@@ -245,7 +245,7 @@ vfp_current_hw_state_address:
 #ifdef CONFIG_THUMB2_KERNEL
 	adr	\tmp, 1f
 	add	\tmp, \tmp, \base, lsl \shift
-	mov	pc, \tmp
+	ret	\tmp
 #else
 	add	pc, pc, \base, lsl \shift
 	mov	r0, r0
@@ -257,10 +257,10 @@ ENTRY(vfp_get_float)
 	tbl_branch r0, r3, #3
 	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 1:	mrc	p10, 0, r0, c\dr, c0, 0	@ fmrs	r0, s0
-	mov	pc, lr
+	ret	lr
 	.org	1b + 8
 1:	mrc	p10, 0, r0, c\dr, c0, 4	@ fmrs	r0, s1
-	mov	pc, lr
+	ret	lr
 	.org	1b + 8
 	.endr
 ENDPROC(vfp_get_float)
@@ -269,10 +269,10 @@ ENTRY(vfp_put_float)
 	tbl_branch r1, r3, #3
 	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 1:	mcr	p10, 0, r0, c\dr, c0, 0	@ fmsr	r0, s0
-	mov	pc, lr
+	ret	lr
 	.org	1b + 8
 1:	mcr	p10, 0, r0, c\dr, c0, 4	@ fmsr	r0, s1
-	mov	pc, lr
+	ret	lr
 	.org	1b + 8
 	.endr
 ENDPROC(vfp_put_float)
@@ -281,14 +281,14 @@ ENTRY(vfp_get_double)
 	tbl_branch r0, r3, #3
 	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 1:	fmrrd	r0, r1, d\dr
-	mov	pc, lr
+	ret	lr
 	.org	1b + 8
 	.endr
 #ifdef CONFIG_VFPv3
 	@ d16 - d31 registers
 	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 1:	mrrc	p11, 3, r0, r1, c\dr	@ fmrrd	r0, r1, d\dr
-	mov	pc, lr
+	ret	lr
 	.org	1b + 8
 	.endr
 #endif
@@ -296,21 +296,21 @@ ENTRY(vfp_get_double)
 	@ virtual register 16 (or 32 if VFPv3) for compare with zero
 	mov	r0, #0
 	mov	r1, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(vfp_get_double)
 
 ENTRY(vfp_put_double)
 	tbl_branch r2, r3, #3
 	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 1:	fmdrr	d\dr, r0, r1
-	mov	pc, lr
+	ret	lr
 	.org	1b + 8
 	.endr
 #ifdef CONFIG_VFPv3
 	@ d16 - d31 registers
 	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 1:	mcrr	p11, 3, r0, r1, c\dr	@ fmdrr	r0, r1, d\dr
-	mov	pc, lr
+	ret	lr
 	.org	1b + 8
 	.endr
 #endif
diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S
index 44e3a5f10c4c..f00e08075938 100644
--- a/arch/arm/xen/hypercall.S
+++ b/arch/arm/xen/hypercall.S
@@ -58,7 +58,7 @@
 ENTRY(HYPERVISOR_##hypercall)			\
 	mov r12, #__HYPERVISOR_##hypercall;	\
 	__HVC(XEN_IMM);						\
-	mov pc, lr;							\
+	ret lr;					\
 ENDPROC(HYPERVISOR_##hypercall)
 
 #define HYPERCALL0 HYPERCALL_SIMPLE
@@ -74,7 +74,7 @@ ENTRY(HYPERVISOR_##hypercall)			\
 	mov r12, #__HYPERVISOR_##hypercall;	\
 	__HVC(XEN_IMM);						\
 	ldm sp!, {r4}						\
-	mov pc, lr							\
+	ret lr					\
 ENDPROC(HYPERVISOR_##hypercall)
 
                 .text
@@ -101,5 +101,5 @@ ENTRY(privcmd_call)
 	ldr r4, [sp, #4]
 	__HVC(XEN_IMM)
 	ldm sp!, {r4}
-	mov pc, lr
+	ret lr
 ENDPROC(privcmd_call);
-- 
cgit v1.2.3


From 1e7e3211854507d6c2695126d6d30a7762cf4006 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 4 Jul 2014 14:32:43 +0100
Subject: ARM: alignment: save last kernel aligned fault location

Save and report (via the procfs file) the last kernel unaligned fault
location.  This allows us to trivially inspect where the last fault
happened for cases which we don't expect to occur.

Since we expect the kernel to generate misalignment faults (due to
the networking layer), even when warnings are enabled, we don't log
them for the kernel.

Tested-by: Tony Lindgren <tony@atomide.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/alignment.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index b8cb1a2688a0..0c1ab49e5f7b 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -76,6 +76,7 @@
 
 static unsigned long ai_user;
 static unsigned long ai_sys;
+static void *ai_sys_last_pc;
 static unsigned long ai_skipped;
 static unsigned long ai_half;
 static unsigned long ai_word;
@@ -130,7 +131,7 @@ static const char *usermode_action[] = {
 static int alignment_proc_show(struct seq_file *m, void *v)
 {
 	seq_printf(m, "User:\t\t%lu\n", ai_user);
-	seq_printf(m, "System:\t\t%lu\n", ai_sys);
+	seq_printf(m, "System:\t\t%lu (%pF)\n", ai_sys, ai_sys_last_pc);
 	seq_printf(m, "Skipped:\t%lu\n", ai_skipped);
 	seq_printf(m, "Half:\t\t%lu\n", ai_half);
 	seq_printf(m, "Word:\t\t%lu\n", ai_word);
@@ -794,6 +795,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 		goto user;
 
 	ai_sys += 1;
+	ai_sys_last_pc = (void *)instruction_pointer(regs);
 
  fixup:
 
-- 
cgit v1.2.3


From 9696fcae9251610f5935a3823be40d1365649720 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Thu, 19 Jun 2014 22:44:32 +0100
Subject: ARM: 8079/1: zImage: identify kernel endianness

With patch #8067/1 ("zImage: ensure header in LE format for BE8 kernels")
applied, it is no longer possible to determine the endianness of a compiled
kernel image.  This normally shouldn't matter to the boot environment,
except for those cases where the selection of a ramdisk or root filesystem
with a matching endianness has to be automated.

Let's add a flag to the zImage header indicating the actual endianness.
Four bytes from offset 0x30 can be interpreted as follows:

	04 03 02 01	big endian kernel

	01 02 03 04	little endian kernel

Anything else should be interpreted as "unknown", in which case it is
most likely that patch #8067/1 was not applied either and the zImage
magic number at offset 0x24 could be used instead to determine
endianness. No zImage before this patch ever produced 0x01020304 nor
0x04030201 at offset 0x30 so there is no confusion possible.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/boot/compressed/head.S | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index c95feab6ce7f..413fd94b5301 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -128,6 +128,7 @@ start:
 		.word	_magic_sig	@ Magic numbers to help the loader
 		.word	_magic_start	@ absolute load/run zImage address
 		.word	_magic_end	@ zImage end address
+		.word	0x04030201	@ endianness flag
 
  THUMB(		.thumb			)
 1:
-- 
cgit v1.2.3


From 9865f1d46a68a5f58cb623054c8308484007d8a4 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <Nikolay.Borisov@arm.com>
Date: Tue, 3 Jun 2014 19:48:10 +0100
Subject: ARM: 8070/1: Introduce arm_get_current_stack_frame()

Currently there are numerous places where "struct pt_regs" are used to
populate "struct stackframe", however all of those location do not
consider the situation where the kernel might be compiled in THUMB2
mode, in which case the framepointer member of pt_regs become ARM_r7
instead of ARM_fp (r11). Document this idiosyncracy in the
definition of "struct stackframe"

The easiest solution is to introduce a new function (in the spirit of
https://groups.google.com/forum/#!topic/linux.kernel/dA2YuUcSpZ4)
which would hide the complexity of initializing the stackframe struct
from pt_regs.

Also implement a macro frame_pointer(regs) that would return the correct
register so that we can use it in cases where we just require the frame
pointer and not a whole struct stackframe

Signed-off-by: Nikolay Borisov <Nikolay.Borisov@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Robert Richter <rric@kernel.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/ptrace.h     |  6 ++++++
 arch/arm/include/asm/stacktrace.h | 15 +++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index c877654fe3bf..601264d983fa 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -84,6 +84,12 @@ static inline long regs_return_value(struct pt_regs *regs)
 
 #define instruction_pointer(regs)	(regs)->ARM_pc
 
+#ifdef CONFIG_THUMB2_KERNEL
+#define frame_pointer(regs) (regs)->ARM_r7
+#else
+#define frame_pointer(regs) (regs)->ARM_fp
+#endif
+
 static inline void instruction_pointer_set(struct pt_regs *regs,
 					   unsigned long val)
 {
diff --git a/arch/arm/include/asm/stacktrace.h b/arch/arm/include/asm/stacktrace.h
index 4d0a16441b29..7722201ead19 100644
--- a/arch/arm/include/asm/stacktrace.h
+++ b/arch/arm/include/asm/stacktrace.h
@@ -1,13 +1,28 @@
 #ifndef __ASM_STACKTRACE_H
 #define __ASM_STACKTRACE_H
 
+#include <asm/ptrace.h>
+
 struct stackframe {
+	/*
+	 * FP member should hold R7 when CONFIG_THUMB2_KERNEL is enabled
+	 * and R11 otherwise.
+	 */
 	unsigned long fp;
 	unsigned long sp;
 	unsigned long lr;
 	unsigned long pc;
 };
 
+static __always_inline
+void arm_get_current_stackframe(struct pt_regs *regs, struct stackframe *frame)
+{
+		frame->fp = frame_pointer(regs);
+		frame->sp = regs->ARM_sp;
+		frame->lr = regs->ARM_lr;
+		frame->pc = regs->ARM_pc;
+}
+
 extern int unwind_frame(struct stackframe *frame);
 extern void walk_stackframe(struct stackframe *frame,
 			    int (*fn)(struct stackframe *, void *), void *data);
-- 
cgit v1.2.3


From 6888e32a9e0b284c4dcdefcc3158949110699bc2 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <Nikolay.Borisov@arm.com>
Date: Tue, 3 Jun 2014 19:48:43 +0100
Subject: ARM: 8071/1: perf: Make perf use arm_get_current_stackframe

Make the perf backend use the API so that it correctly references the FP
when in THUMB2 mode

Signed-off-by: Nikolay Borisov <Nikolay.Borisov@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/perf_event.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 4238bcba9d60..ae3e216a52c2 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -621,10 +621,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 		return;
 	}
 
-	fr.fp = regs->ARM_fp;
-	fr.sp = regs->ARM_sp;
-	fr.lr = regs->ARM_lr;
-	fr.pc = regs->ARM_pc;
+	arm_get_current_stackframe(regs, &fr);
 	walk_stackframe(&fr, callchain_trace, entry);
 }
 
-- 
cgit v1.2.3


From a3250c92aa83e199624133e692a92ecc1d3e7466 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <Nikolay.Borisov@arm.com>
Date: Tue, 3 Jun 2014 19:48:58 +0100
Subject: ARM: 8072/1: time: Make use of arm_get_current_stackframe

Make use of the arm_get_current_stackframe api so that
the frame pointer is correctly referenced in THUMB2 mode

Signed-off-by: Nikolay Borisov <Nikolay.Borisov@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/time.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 829a96d4a179..0cc7e58c47cc 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -50,10 +50,7 @@ unsigned long profile_pc(struct pt_regs *regs)
 	if (!in_lock_functions(regs->ARM_pc))
 		return regs->ARM_pc;
 
-	frame.fp = regs->ARM_fp;
-	frame.sp = regs->ARM_sp;
-	frame.lr = regs->ARM_lr;
-	frame.pc = regs->ARM_pc;
+	arm_get_current_stackframe(regs, &frame);
 	do {
 		int ret = unwind_frame(&frame);
 		if (ret < 0)
-- 
cgit v1.2.3


From c8bee0ad78b2107ffb56fa82f0ab2b762311d172 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <Nikolay.Borisov@arm.com>
Date: Tue, 3 Jun 2014 19:49:14 +0100
Subject: ARM: 8073/1: unwind: Use arm_get_current_stackframe

Make the unwind code use the correct API so that the frame pointer
is extracted from the correct register.

Signed-off-by: Nikolay Borisov <Nikolay.Borisov@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/unwind.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index e67682f02cb2..a61a1dfbb0db 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -479,12 +479,10 @@ void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 		tsk = current;
 
 	if (regs) {
-		frame.fp = regs->ARM_fp;
-		frame.sp = regs->ARM_sp;
-		frame.lr = regs->ARM_lr;
+		arm_get_current_stackframe(regs, &frame);
 		/* PC might be corrupted, use LR in that case. */
-		frame.pc = kernel_text_address(regs->ARM_pc)
-			 ? regs->ARM_pc : regs->ARM_lr;
+		if (!kernel_text_address(regs->ARM_pc))
+			frame.pc = regs->ARM_lr;
 	} else if (tsk == current) {
 		frame.fp = (unsigned long)__builtin_frame_address(0);
 		frame.sp = current_sp;
-- 
cgit v1.2.3


From 49432d4acf96a5dcc02d14978dcc75f522492757 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <Nikolay.Borisov@arm.com>
Date: Tue, 3 Jun 2014 19:50:09 +0100
Subject: ARM: 8074/1: traps: Make use of the frame_pointer macro

Use the newly-introduced frame_pointer macro to extract
the correct FP based on whether we are in THUMB2 mode or not.

Signed-off-by: Nikolay Borisov <Nikolay.Borisov@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/traps.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index abd2fc067736..c8e4bb714944 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -31,11 +31,13 @@
 #include <asm/exception.h>
 #include <asm/unistd.h>
 #include <asm/traps.h>
+#include <asm/ptrace.h>
 #include <asm/unwind.h>
 #include <asm/tls.h>
 #include <asm/system_misc.h>
 #include <asm/opcodes.h>
 
+
 static const char *handler[]= {
 	"prefetch abort",
 	"data abort",
@@ -184,7 +186,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 		tsk = current;
 
 	if (regs) {
-		fp = regs->ARM_fp;
+		fp = frame_pointer(regs);
 		mode = processor_mode(regs);
 	} else if (tsk != current) {
 		fp = thread_saved_fp(tsk);
@@ -719,7 +721,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		dump_instr("", regs);
 		if (user_mode(regs)) {
 			__show_regs(regs);
-			c_backtrace(regs->ARM_fp, processor_mode(regs));
+			c_backtrace(frame_pointer(regs), processor_mode(regs));
 		}
 	}
 #endif
-- 
cgit v1.2.3


From e8a5dbc59e5ef7b120635fe8a57643f3bbbd0919 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <Nikolay.Borisov@arm.com>
Date: Tue, 3 Jun 2014 19:51:32 +0100
Subject: ARM: 8075/1: oprofile: Use of arm_get_current_stackframe

Use the newly introduced API so that FP is correctly referenced from
either R7/R11 based on whether we are running in THUMB2 mode or not.

Signed-off-by: Nikolay Borisov <Nikolay.Borisov@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Robert Richter <rric@kernel.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/oprofile/common.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c
index 99c63d4b6af8..e6a3c4c92163 100644
--- a/arch/arm/oprofile/common.c
+++ b/arch/arm/oprofile/common.c
@@ -107,10 +107,7 @@ static void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
 
 	if (!user_mode(regs)) {
 		struct stackframe frame;
-		frame.fp = regs->ARM_fp;
-		frame.sp = regs->ARM_sp;
-		frame.lr = regs->ARM_lr;
-		frame.pc = regs->ARM_pc;
+		arm_get_current_stackframe(regs, &frame);
 		walk_stackframe(&frame, report_trace, &depth);
 		return;
 	}
-- 
cgit v1.2.3


From 76e7d5c4fd211061e56d2013a0ec9f875824c546 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 30 Jun 2014 15:40:40 +0100
Subject: ARM: 8088/1: vmlinux.lds.S: drop redundant .comment

Commit 78d7530ac3 ("ARM: Clean up linker script using new linker script
macros.") modified the arm kernel linker script to use the STABS_DEBUG
macro, but left a .comment section definition. As STABS_DEBUG defines
the .comment section in an identical way, the second section definition
is redundant and can be removed.

This patch removes the redundant .comment section definition.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/vmlinux.lds.S | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 7bcee5c9b604..6f57cb94367f 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -318,7 +318,6 @@ SECTIONS
 	_end = .;
 
 	STABS_DEBUG
-	.comment 0 : { *(.comment) }
 }
 
 /*
-- 
cgit v1.2.3


From 7ca791c59dda75715088d91788bf6989b5a1945a Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Thu, 3 Jul 2014 09:56:59 +0100
Subject: ARM: 8089/1: cpu_pj4b_suspend_size should base on cpu_v7_suspend_size

Since pj4b suspend/resume routines are implemented based on generic
ARMv7 ones, instead of hard-coding cpu_pj4b_suspend_size, we should have
it be cpu_v7_suspend_size plus pj4b specific bytes.  Otherwise, if
cpu_v7_suspend_size gets updated alone, the pj4b suspend/resume will
likely be broken.

While at it, fix the comments in cpu_pj4b_do_resume, as we're restoring
CP15 registers rather than saving in there.

Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
Acked-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/proc-v7.S | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 71abb60c4222..c8726abe9bab 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -184,16 +184,16 @@ ENDPROC(cpu_pj4b_do_suspend)
 
 ENTRY(cpu_pj4b_do_resume)
 	ldmia	r0!, {r6 - r10}
-	mcr	p15, 1, r6, c15, c1, 0  @ save CP15 - extra features
-	mcr	p15, 1, r7, c15, c2, 0	@ save CP15 - Aux Func Modes Ctrl 0
-	mcr	p15, 1, r8, c15, c1, 2	@ save CP15 - Aux Debug Modes Ctrl 2
-	mcr	p15, 1, r9, c15, c1, 1  @ save CP15 - Aux Debug Modes Ctrl 1
-	mcr	p15, 0, r10, c9, c14, 0  @ save CP15 - PMC
+	mcr	p15, 1, r6, c15, c1, 0  @ restore CP15 - extra features
+	mcr	p15, 1, r7, c15, c2, 0	@ restore CP15 - Aux Func Modes Ctrl 0
+	mcr	p15, 1, r8, c15, c1, 2	@ restore CP15 - Aux Debug Modes Ctrl 2
+	mcr	p15, 1, r9, c15, c1, 1  @ restore CP15 - Aux Debug Modes Ctrl 1
+	mcr	p15, 0, r10, c9, c14, 0  @ restore CP15 - PMC
 	b cpu_v7_do_resume
 ENDPROC(cpu_pj4b_do_resume)
 #endif
 .globl	cpu_pj4b_suspend_size
-.equ	cpu_pj4b_suspend_size, 4 * 14
+.equ	cpu_pj4b_suspend_size, cpu_v7_suspend_size + 4 * 5
 
 #endif
 
-- 
cgit v1.2.3


From 80d3cb9132b2e4f9a1858ae4a84546a3ea2e7f46 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Tue, 8 Jul 2014 02:59:42 +0100
Subject: ARM: 8090/1: add revision info for PL310 errata 588369 and 727915

Add revision info for PL310_ERRATA_588369 and PL310_ERRATA_727915 to
help people understand if they need to enable the errata for their
hardware.

Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/Kconfig | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index c348eaee7ee2..df46620206af 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -907,8 +907,8 @@ config PL310_ERRATA_588369
 	   They are architecturally defined to behave as the execution of a
 	   clean operation followed immediately by an invalidate operation,
 	   both performing to the same memory location. This functionality
-	   is not correctly implemented in PL310 as clean lines are not
-	   invalidated as a result of these operations.
+	   is not correctly implemented in PL310 prior to r2p0 (fixed in r2p0)
+	   as clean lines are not invalidated as a result of these operations.
 
 config PL310_ERRATA_727915
 	bool "PL310 errata: Background Clean & Invalidate by Way operation can cause data corruption"
@@ -918,7 +918,8 @@ config PL310_ERRATA_727915
 	  PL310 can handle normal accesses while it is in progress. Under very
 	  rare circumstances, due to this erratum, write data can be lost when
 	  PL310 treats a cacheable write transaction during a Clean &
-	  Invalidate by Way operation.
+	  Invalidate by Way operation.  Revisions prior to r3p1 are affected by
+	  this errata (fixed in r3p1).
 
 config PL310_ERRATA_753970
 	bool "PL310 errata: cache sync operation may be faulty"
-- 
cgit v1.2.3


From 10508b204c190471fe849a3e68f221277488c155 Mon Sep 17 00:00:00 2001
From: Daniel Thompson <daniel.thompson@linaro.org>
Date: Tue, 8 Jul 2014 13:50:50 +0100
Subject: ARM: 8096/1: Describe required sort order for textofs-y (TEXT_OFFSET)

The section of the makefile that determines the TEXT_OFFSET is sorted
by address so that, in multi-arch kernel builds, the architecture with the
most stringent requirements for the kernel base address gets to define
TEXT_OFFSET. The comment should reflect that.

Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Makefile | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 6721fab13734..718913dfe815 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -127,6 +127,9 @@ CHECKFLAGS	+= -D__arm__
 
 #Default value
 head-y		:= arch/arm/kernel/head$(MMUEXT).o
+
+# Text offset. This list is sorted numerically by address in order to
+# provide a means to avoid/resolve conflicts in multi-arch kernels.
 textofs-y	:= 0x00008000
 textofs-$(CONFIG_ARCH_CLPS711X) := 0x00028000
 # We don't want the htc bootloader to corrupt kernel during resume
-- 
cgit v1.2.3


From bc994c77ce82576209dcf08f71de9ae51b0b100f Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Wed, 9 Jul 2014 13:33:13 +0100
Subject: ARM: 8097/1: unistd.h: relocate comments back to place

Commit cb8db5d45 (UAPI: (Scripted) Disintegrate arch/arm/include/asm) moved
these syscall comments out of their context into the UAPI headers. Fix this.

Fixes: cb8db5d4578a ("UAPI: (Scripted) Disintegrate arch/arm/include/asm")

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/unistd.h      | 10 ++++++++++
 arch/arm/include/uapi/asm/unistd.h | 11 -----------
 2 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 43876245fc57..21ca0cebcab0 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -15,7 +15,17 @@
 
 #include <uapi/asm/unistd.h>
 
+/*
+ * This may need to be greater than __NR_last_syscall+1 in order to
+ * account for the padding in the syscall table
+ */
 #define __NR_syscalls  (384)
+
+/*
+ * *NOTE*: This is a ghost syscall private to the kernel.  Only the
+ * __kuser_cmpxchg code in entry-armv.S should be aware of its
+ * existence.  Don't ever use this from user code.
+ */
 #define __ARM_NR_cmpxchg		(__ARM_NR_BASE+0x00fff0)
 
 #define __ARCH_WANT_STAT64
diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index ba94446c72d9..acd5b66ea3aa 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -410,11 +410,6 @@
 #define __NR_sched_getattr		(__NR_SYSCALL_BASE+381)
 #define __NR_renameat2			(__NR_SYSCALL_BASE+382)
 
-/*
- * This may need to be greater than __NR_last_syscall+1 in order to
- * account for the padding in the syscall table
- */
-
 /*
  * The following SWIs are ARM private.
  */
@@ -425,12 +420,6 @@
 #define __ARM_NR_usr32			(__ARM_NR_BASE+4)
 #define __ARM_NR_set_tls		(__ARM_NR_BASE+5)
 
-/*
- * *NOTE*: This is a ghost syscall private to the kernel.  Only the
- * __kuser_cmpxchg code in entry-armv.S should be aware of its
- * existence.  Don't ever use this from user code.
- */
-
 /*
  * The following syscalls are obsolete and no longer available for EABI.
  */
-- 
cgit v1.2.3


From e38361d032f12f42ddc6d8e2028f6668da696d14 Mon Sep 17 00:00:00 2001
From: Daniel Thompson <daniel.thompson@linaro.org>
Date: Thu, 10 Jul 2014 20:58:08 +0100
Subject: ARM: 8091/2: add get_user() support for 8 byte types

Recent contributions, including to DRM and binder, introduce 64-bit
values in their interfaces. A common motivation for this is to allow
the same ABI for 32- and 64-bit userspaces (and therefore also a shared
ABI for 32/64 hybrid userspaces). Anyhow, the developers would like to
avoid gotchas like having to use copy_from_user().

This feature is already implemented on x86-32 and the majority of other
32-bit architectures. The current list of get_user_8 hold out
architectures are: arm, avr32, blackfin, m32r, metag, microblaze,
mn10300, sh.

Credit:

    My name sits rather uneasily at the top of this patch. The v1 and
    v2 versions of the patch were written by Rob Clark and to produce v4
    I mostly copied code from Russell King and H. Peter Anvin. However I
    have mangled the patch sufficiently that *blame* is rightfully mine
    even if credit should more widely shared.

Changelog:

v5: updated to use the ret macro (requested by Russell King)
v4: remove an inlined add on big endian systems (spotted by Russell King),
    used __ARMEB__ rather than BIG_ENDIAN (to match rest of file),
    cleared r3 on EFAULT during __get_user_8.
v3: fix a couple of checkpatch issues
v2: pass correct size to check_uaccess, and better handling of narrowing
    double word read with __get_user_xb() (Russell King's suggestion)
v1: original

Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/uaccess.h | 20 +++++++++++++++++++-
 arch/arm/lib/getuser.S         | 37 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 55 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 75d95799b6e6..7057cf8b87d0 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -107,6 +107,8 @@ static inline void set_fs(mm_segment_t fs)
 extern int __get_user_1(void *);
 extern int __get_user_2(void *);
 extern int __get_user_4(void *);
+extern int __get_user_lo8(void *);
+extern int __get_user_8(void *);
 
 #define __GUP_CLOBBER_1	"lr", "cc"
 #ifdef CONFIG_CPU_USE_DOMAINS
@@ -115,6 +117,8 @@ extern int __get_user_4(void *);
 #define __GUP_CLOBBER_2 "lr", "cc"
 #endif
 #define __GUP_CLOBBER_4	"lr", "cc"
+#define __GUP_CLOBBER_lo8 "lr", "cc"
+#define __GUP_CLOBBER_8	"lr", "cc"
 
 #define __get_user_x(__r2,__p,__e,__l,__s)				\
 	   __asm__ __volatile__ (					\
@@ -125,11 +129,19 @@ extern int __get_user_4(void *);
 		: "0" (__p), "r" (__l)					\
 		: __GUP_CLOBBER_##__s)
 
+/* narrowing a double-word get into a single 32bit word register: */
+#ifdef __ARMEB__
+#define __get_user_xb(__r2, __p, __e, __l, __s)				\
+	__get_user_x(__r2, __p, __e, __l, lo8)
+#else
+#define __get_user_xb __get_user_x
+#endif
+
 #define __get_user_check(x,p)							\
 	({								\
 		unsigned long __limit = current_thread_info()->addr_limit - 1; \
 		register const typeof(*(p)) __user *__p asm("r0") = (p);\
-		register unsigned long __r2 asm("r2");			\
+		register typeof(x) __r2 asm("r2");			\
 		register unsigned long __l asm("r1") = __limit;		\
 		register int __e asm("r0");				\
 		switch (sizeof(*(__p))) {				\
@@ -142,6 +154,12 @@ extern int __get_user_4(void *);
 		case 4:							\
 			__get_user_x(__r2, __p, __e, __l, 4);		\
 			break;						\
+		case 8:							\
+			if (sizeof((x)) < 8)				\
+				__get_user_xb(__r2, __p, __e, __l, 4);	\
+			else						\
+				__get_user_x(__r2, __p, __e, __l, 8);	\
+			break;						\
 		default: __e = __get_user_bad(); break;			\
 		}							\
 		x = (typeof(*(p))) __r2;				\
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index 0f958e3d8180..938600098b88 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -18,7 +18,7 @@
  * Inputs:	r0 contains the address
  *		r1 contains the address limit, which must be preserved
  * Outputs:	r0 is the error code
- *		r2 contains the zero-extended value
+ *		r2, r3 contains the zero-extended value
  *		lr corrupted
  *
  * No other registers must be altered.  (see <asm/uaccess.h>
@@ -66,15 +66,50 @@ ENTRY(__get_user_4)
 	ret	lr
 ENDPROC(__get_user_4)
 
+ENTRY(__get_user_8)
+	check_uaccess r0, 8, r1, r2, __get_user_bad
+#ifdef CONFIG_THUMB2_KERNEL
+5: TUSER(ldr)	r2, [r0]
+6: TUSER(ldr)	r3, [r0, #4]
+#else
+5: TUSER(ldr)	r2, [r0], #4
+6: TUSER(ldr)	r3, [r0]
+#endif
+	mov	r0, #0
+	ret	lr
+ENDPROC(__get_user_8)
+
+#ifdef __ARMEB__
+ENTRY(__get_user_lo8)
+	check_uaccess r0, 8, r1, r2, __get_user_bad
+#ifdef CONFIG_CPU_USE_DOMAINS
+	add	r0, r0, #4
+7:	ldrt	r2, [r0]
+#else
+7:	ldr	r2, [r0, #4]
+#endif
+	mov	r0, #0
+	ret	lr
+ENDPROC(__get_user_lo8)
+#endif
+
+__get_user_bad8:
+	mov	r3, #0
 __get_user_bad:
 	mov	r2, #0
 	mov	r0, #-EFAULT
 	ret	lr
 ENDPROC(__get_user_bad)
+ENDPROC(__get_user_bad8)
 
 .pushsection __ex_table, "a"
 	.long	1b, __get_user_bad
 	.long	2b, __get_user_bad
 	.long	3b, __get_user_bad
 	.long	4b, __get_user_bad
+	.long	5b, __get_user_bad8
+	.long	6b, __get_user_bad8
+#ifdef __ARMEB__
+	.long   7b, __get_user_bad
+#endif
 .popsection
-- 
cgit v1.2.3


From bf67fd3142dc605ded9b5e063e35b47c2bb21cb1 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 9 Jul 2014 19:31:28 +0100
Subject: ARM: 8098/1: mcs lock: implement wfe-based polling for MCS locking

This patch introduces a wfe-based polling loop for spinning on contended
MCS locks and waking up corresponding waiters when the lock is released.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/mcs_spinlock.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 arch/arm/include/asm/mcs_spinlock.h

(limited to 'arch')

diff --git a/arch/arm/include/asm/mcs_spinlock.h b/arch/arm/include/asm/mcs_spinlock.h
new file mode 100644
index 000000000000..f652ad65840a
--- /dev/null
+++ b/arch/arm/include/asm/mcs_spinlock.h
@@ -0,0 +1,23 @@
+#ifndef __ASM_MCS_LOCK_H
+#define __ASM_MCS_LOCK_H
+
+#ifdef CONFIG_SMP
+#include <asm/spinlock.h>
+
+/* MCS spin-locking. */
+#define arch_mcs_spin_lock_contended(lock)				\
+do {									\
+	/* Ensure prior stores are observed before we enter wfe. */	\
+	smp_mb();							\
+	while (!(smp_load_acquire(lock)))				\
+		wfe();							\
+} while (0)								\
+
+#define arch_mcs_spin_unlock_contended(lock)				\
+do {									\
+	smp_store_release(lock, 1);					\
+	dsb_sev();							\
+} while (0)
+
+#endif	/* CONFIG_SMP */
+#endif	/* __ASM_MCS_LOCK_H */
-- 
cgit v1.2.3


From ddd0c53018222df6bd9b2f61c881887b56b75d88 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Wed, 16 Jul 2014 07:40:53 +0100
Subject: ARM: 8103/1: save/restore Cortex-A9 CP15 registers on suspend/resume

The CP15 diagnostic register holds ARM errata bits on Cortex-A9, so it
needs to be saved/restored on suspend/resume.  Otherwise, the
effectiveness of errata workaround gets lost together with diagnostic
register bit across suspend/resume cycle.  And the CP15 power control
register of Cortex-A9 shares the same problem.

The patch adds a couple of Cortex-A9 specific suspend/resume functions
to save/restore these two Cortex-A9 CP15 registers across the
suspend/resume cycle.

Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/glue-proc.h | 18 +++++++++---------
 arch/arm/mm/proc-v7.S            | 37 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 45 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/glue-proc.h b/arch/arm/include/asm/glue-proc.h
index 74a8b84f3cb1..74be7c22035a 100644
--- a/arch/arm/include/asm/glue-proc.h
+++ b/arch/arm/include/asm/glue-proc.h
@@ -221,15 +221,6 @@
 # endif
 #endif
 
-#ifdef CONFIG_CPU_V7
-# ifdef CPU_NAME
-#  undef  MULTI_CPU
-#  define MULTI_CPU
-# else
-#  define CPU_NAME cpu_v7
-# endif
-#endif
-
 #ifdef CONFIG_CPU_V7M
 # ifdef CPU_NAME
 #  undef  MULTI_CPU
@@ -248,6 +239,15 @@
 # endif
 #endif
 
+#ifdef CONFIG_CPU_V7
+/*
+ * Cortex-A9 needs a different suspend/resume function, so we need
+ * multiple CPU support for ARMv7 anyway.
+ */
+#  undef  MULTI_CPU
+#  define MULTI_CPU
+#endif
+
 #ifndef MULTI_CPU
 #define cpu_proc_init			__glue(CPU_NAME,_proc_init)
 #define cpu_proc_fin			__glue(CPU_NAME,_proc_fin)
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index c8726abe9bab..43951adffd87 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -152,6 +152,40 @@ ENTRY(cpu_v7_do_resume)
 ENDPROC(cpu_v7_do_resume)
 #endif
 
+/*
+ * Cortex-A9 processor functions
+ */
+	globl_equ	cpu_ca9mp_proc_init,	cpu_v7_proc_init
+	globl_equ	cpu_ca9mp_proc_fin,	cpu_v7_proc_fin
+	globl_equ	cpu_ca9mp_reset,	cpu_v7_reset
+	globl_equ	cpu_ca9mp_do_idle,	cpu_v7_do_idle
+	globl_equ	cpu_ca9mp_dcache_clean_area, cpu_v7_dcache_clean_area
+	globl_equ	cpu_ca9mp_switch_mm,	cpu_v7_switch_mm
+	globl_equ	cpu_ca9mp_set_pte_ext,	cpu_v7_set_pte_ext
+.globl	cpu_ca9mp_suspend_size
+.equ	cpu_ca9mp_suspend_size, cpu_v7_suspend_size + 4 * 2
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ENTRY(cpu_ca9mp_do_suspend)
+	stmfd	sp!, {r4 - r5}
+	mrc	p15, 0, r4, c15, c0, 1		@ Diagnostic register
+	mrc	p15, 0, r5, c15, c0, 0		@ Power register
+	stmia	r0!, {r4 - r5}
+	ldmfd	sp!, {r4 - r5}
+	b	cpu_v7_do_suspend
+ENDPROC(cpu_ca9mp_do_suspend)
+
+ENTRY(cpu_ca9mp_do_resume)
+	ldmia	r0!, {r4 - r5}
+	mrc	p15, 0, r10, c15, c0, 1		@ Read Diagnostic register
+	teq	r4, r10				@ Already restored?
+	mcrne	p15, 0, r4, c15, c0, 1		@ No, so restore it
+	mrc	p15, 0, r10, c15, c0, 0		@ Read Power register
+	teq	r5, r10				@ Already restored?
+	mcrne	p15, 0, r5, c15, c0, 0		@ No, so restore it
+	b	cpu_v7_do_resume
+ENDPROC(cpu_ca9mp_do_resume)
+#endif
+
 #ifdef CONFIG_CPU_PJ4B
 	globl_equ	cpu_pj4b_switch_mm,     cpu_v7_switch_mm
 	globl_equ	cpu_pj4b_set_pte_ext,	cpu_v7_set_pte_ext
@@ -418,6 +452,7 @@ __v7_setup_stack:
 
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
+	define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
 #ifdef CONFIG_CPU_PJ4B
 	define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
 #endif
@@ -470,7 +505,7 @@ __v7_ca5mp_proc_info:
 __v7_ca9mp_proc_info:
 	.long	0x410fc090
 	.long	0xff0ffff0
-	__v7_proc __v7_ca9mp_setup
+	__v7_proc __v7_ca9mp_setup, proc_fns = ca9mp_processor_functions
 	.size	__v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info
 
 #endif	/* CONFIG_ARM_LPAE */
-- 
cgit v1.2.3


From f2950706871c4b6e8c0f0d7c3f62d35930b8de63 Mon Sep 17 00:00:00 2001
From: Steven Capper <steve.capper@linaro.org>
Date: Fri, 18 Jul 2014 16:15:27 +0100
Subject: ARM: 8108/1: mm: Introduce {pte,pmd}_isset and {pte,pmd}_isclear

Long descriptors on ARM are 64 bits, and some pte functions such as
pte_dirty return a bitwise-and of a flag with the pte value. If the
flag to be tested resides in the upper 32 bits of the pte, then we run
into the danger of the result being dropped if downcast.

For example:
	gather_stats(page, md, pte_dirty(*pte), 1);
where pte_dirty(*pte) is downcast to an int.

This patch introduces a new macro pte_isset which performs the bitwise
and, then performs a double logical invert (where needed) to ensure
predictable downcasting. The logical inverse pte_isclear is also
introduced.

Equivalent pmd functions for Transparent HugePages have also been
added.

Signed-off-by: Steve Capper <steve.capper@linaro.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/pgtable-3level.h | 12 ++++++++----
 arch/arm/include/asm/pgtable.h        | 18 +++++++++++-------
 2 files changed, 19 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 85c60adc8b60..34f371c4bbb2 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -207,17 +207,21 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 #define pte_huge(pte)		(pte_val(pte) && !(pte_val(pte) & PTE_TABLE_BIT))
 #define pte_mkhuge(pte)		(__pte(pte_val(pte) & ~PTE_TABLE_BIT))
 
-#define pmd_young(pmd)		(pmd_val(pmd) & PMD_SECT_AF)
+#define pmd_isset(pmd, val)	((u32)(val) == (val) ? pmd_val(pmd) & (val) \
+						: !!(pmd_val(pmd) & (val)))
+#define pmd_isclear(pmd, val)	(!(pmd_val(pmd) & (val)))
+
+#define pmd_young(pmd)		(pmd_isset((pmd), PMD_SECT_AF))
 
 #define __HAVE_ARCH_PMD_WRITE
-#define pmd_write(pmd)		(!(pmd_val(pmd) & PMD_SECT_RDONLY))
+#define pmd_write(pmd)		(pmd_isclear((pmd), PMD_SECT_RDONLY))
 
 #define pmd_hugewillfault(pmd)	(!pmd_young(pmd) || !pmd_write(pmd))
 #define pmd_thp_or_huge(pmd)	(pmd_huge(pmd) || pmd_trans_huge(pmd))
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define pmd_trans_huge(pmd)	(pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
-#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
+#define pmd_trans_huge(pmd)	(pmd_val(pmd) && !pmd_table(pmd))
+#define pmd_trans_splitting(pmd) (pmd_isset((pmd), PMD_SECT_SPLITTING))
 #endif
 
 #define PMD_BIT_FUNC(fn,op) \
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 5478e5d6ad89..01baef07cd0c 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -214,18 +214,22 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 
 #define pte_clear(mm,addr,ptep)	set_pte_ext(ptep, __pte(0), 0)
 
+#define pte_isset(pte, val)	((u32)(val) == (val) ? pte_val(pte) & (val) \
+						: !!(pte_val(pte) & (val)))
+#define pte_isclear(pte, val)	(!(pte_val(pte) & (val)))
+
 #define pte_none(pte)		(!pte_val(pte))
-#define pte_present(pte)	(pte_val(pte) & L_PTE_PRESENT)
-#define pte_valid(pte)		(pte_val(pte) & L_PTE_VALID)
+#define pte_present(pte)	(pte_isset((pte), L_PTE_PRESENT))
+#define pte_valid(pte)		(pte_isset((pte), L_PTE_VALID))
 #define pte_accessible(mm, pte)	(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
-#define pte_write(pte)		(!(pte_val(pte) & L_PTE_RDONLY))
-#define pte_dirty(pte)		(pte_val(pte) & L_PTE_DIRTY)
-#define pte_young(pte)		(pte_val(pte) & L_PTE_YOUNG)
-#define pte_exec(pte)		(!(pte_val(pte) & L_PTE_XN))
+#define pte_write(pte)		(pte_isclear((pte), L_PTE_RDONLY))
+#define pte_dirty(pte)		(pte_isset((pte), L_PTE_DIRTY))
+#define pte_young(pte)		(pte_isset((pte), L_PTE_YOUNG))
+#define pte_exec(pte)		(pte_isclear((pte), L_PTE_XN))
 #define pte_special(pte)	(0)
 
 #define pte_valid_user(pte)	\
-	(pte_valid(pte) && (pte_val(pte) & L_PTE_USER) && pte_young(pte))
+	(pte_valid(pte) && pte_isset((pte), L_PTE_USER) && pte_young(pte))
 
 #if __LINUX_ARM_ARCH__ < 6
 static inline void __sync_icache_dcache(pte_t pteval)
-- 
cgit v1.2.3


From ded9477984690d026e46dd75e8157392cea3f13f Mon Sep 17 00:00:00 2001
From: Steven Capper <steve.capper@linaro.org>
Date: Fri, 18 Jul 2014 16:16:15 +0100
Subject: ARM: 8109/1: mm: Modify pte_write and pmd_write logic for LPAE

For LPAE, we have the following means for encoding writable or dirty
ptes:
                              L_PTE_DIRTY       L_PTE_RDONLY
    !pte_dirty && !pte_write        0               1
    !pte_dirty && pte_write         0               1
    pte_dirty && !pte_write         1               1
    pte_dirty && pte_write          1               0

So we can't distinguish between writeable clean ptes and read only
ptes. This can cause problems with ptes being incorrectly flagged as
read only when they are writeable but not dirty.

This patch renumbers L_PTE_RDONLY from AP[2] to a software bit #58,
and adds additional logic to set AP[2] whenever the pte is read only
or not dirty. That way we can distinguish between clean writeable ptes
and read only ptes.

HugeTLB pages will use this new logic automatically.

We need to add some logic to Transparent HugePages to ensure that they
correctly interpret the revised pgprot permissions (L_PTE_RDONLY has
moved and no longer matches PMD_SECT_AP2). In the process of revising
THP, the names of the PMD software bits have been prefixed with L_ to
make them easier to distinguish from their hardware bit counterparts.

Signed-off-by: Steve Capper <steve.capper@linaro.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/pgtable-3level-hwdef.h |  3 ++-
 arch/arm/include/asm/pgtable-3level.h       | 41 +++++++++++++++++------------
 arch/arm/mm/dump.c                          |  4 +--
 arch/arm/mm/proc-v7-3level.S                |  9 +++++--
 4 files changed, 35 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index 626989fec4d3..9fd61c72a33a 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -43,7 +43,7 @@
 #define PMD_SECT_BUFFERABLE	(_AT(pmdval_t, 1) << 2)
 #define PMD_SECT_CACHEABLE	(_AT(pmdval_t, 1) << 3)
 #define PMD_SECT_USER		(_AT(pmdval_t, 1) << 6)		/* AP[1] */
-#define PMD_SECT_RDONLY		(_AT(pmdval_t, 1) << 7)		/* AP[2] */
+#define PMD_SECT_AP2		(_AT(pmdval_t, 1) << 7)		/* read only */
 #define PMD_SECT_S		(_AT(pmdval_t, 3) << 8)
 #define PMD_SECT_AF		(_AT(pmdval_t, 1) << 10)
 #define PMD_SECT_nG		(_AT(pmdval_t, 1) << 11)
@@ -72,6 +72,7 @@
 #define PTE_TABLE_BIT		(_AT(pteval_t, 1) << 1)
 #define PTE_BUFFERABLE		(_AT(pteval_t, 1) << 2)		/* AttrIndx[0] */
 #define PTE_CACHEABLE		(_AT(pteval_t, 1) << 3)		/* AttrIndx[1] */
+#define PTE_AP2			(_AT(pteval_t, 1) << 7)		/* AP[2] */
 #define PTE_EXT_SHARED		(_AT(pteval_t, 3) << 8)		/* SH[1:0], inner shareable */
 #define PTE_EXT_AF		(_AT(pteval_t, 1) << 10)	/* Access Flag */
 #define PTE_EXT_NG		(_AT(pteval_t, 1) << 11)	/* nG */
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 34f371c4bbb2..06e0bc0f8b00 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -79,18 +79,19 @@
 #define L_PTE_PRESENT		(_AT(pteval_t, 3) << 0)		/* Present */
 #define L_PTE_FILE		(_AT(pteval_t, 1) << 2)		/* only when !PRESENT */
 #define L_PTE_USER		(_AT(pteval_t, 1) << 6)		/* AP[1] */
-#define L_PTE_RDONLY		(_AT(pteval_t, 1) << 7)		/* AP[2] */
 #define L_PTE_SHARED		(_AT(pteval_t, 3) << 8)		/* SH[1:0], inner shareable */
 #define L_PTE_YOUNG		(_AT(pteval_t, 1) << 10)	/* AF */
 #define L_PTE_XN		(_AT(pteval_t, 1) << 54)	/* XN */
-#define L_PTE_DIRTY		(_AT(pteval_t, 1) << 55)	/* unused */
-#define L_PTE_SPECIAL		(_AT(pteval_t, 1) << 56)	/* unused */
+#define L_PTE_DIRTY		(_AT(pteval_t, 1) << 55)
+#define L_PTE_SPECIAL		(_AT(pteval_t, 1) << 56)
 #define L_PTE_NONE		(_AT(pteval_t, 1) << 57)	/* PROT_NONE */
+#define L_PTE_RDONLY		(_AT(pteval_t, 1) << 58)	/* READ ONLY */
 
-#define PMD_SECT_VALID		(_AT(pmdval_t, 1) << 0)
-#define PMD_SECT_DIRTY		(_AT(pmdval_t, 1) << 55)
-#define PMD_SECT_SPLITTING	(_AT(pmdval_t, 1) << 56)
-#define PMD_SECT_NONE		(_AT(pmdval_t, 1) << 57)
+#define L_PMD_SECT_VALID	(_AT(pmdval_t, 1) << 0)
+#define L_PMD_SECT_DIRTY	(_AT(pmdval_t, 1) << 55)
+#define L_PMD_SECT_SPLITTING	(_AT(pmdval_t, 1) << 56)
+#define L_PMD_SECT_NONE		(_AT(pmdval_t, 1) << 57)
+#define L_PMD_SECT_RDONLY	(_AT(pteval_t, 1) << 58)
 
 /*
  * To be used in assembly code with the upper page attributes.
@@ -214,24 +215,25 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 #define pmd_young(pmd)		(pmd_isset((pmd), PMD_SECT_AF))
 
 #define __HAVE_ARCH_PMD_WRITE
-#define pmd_write(pmd)		(pmd_isclear((pmd), PMD_SECT_RDONLY))
+#define pmd_write(pmd)		(pmd_isclear((pmd), L_PMD_SECT_RDONLY))
+#define pmd_dirty(pmd)		(pmd_isset((pmd), L_PMD_SECT_DIRTY))
 
 #define pmd_hugewillfault(pmd)	(!pmd_young(pmd) || !pmd_write(pmd))
 #define pmd_thp_or_huge(pmd)	(pmd_huge(pmd) || pmd_trans_huge(pmd))
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define pmd_trans_huge(pmd)	(pmd_val(pmd) && !pmd_table(pmd))
-#define pmd_trans_splitting(pmd) (pmd_isset((pmd), PMD_SECT_SPLITTING))
+#define pmd_trans_splitting(pmd) (pmd_isset((pmd), L_PMD_SECT_SPLITTING))
 #endif
 
 #define PMD_BIT_FUNC(fn,op) \
 static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
 
-PMD_BIT_FUNC(wrprotect,	|= PMD_SECT_RDONLY);
+PMD_BIT_FUNC(wrprotect,	|= L_PMD_SECT_RDONLY);
 PMD_BIT_FUNC(mkold,	&= ~PMD_SECT_AF);
-PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING);
-PMD_BIT_FUNC(mkwrite,   &= ~PMD_SECT_RDONLY);
-PMD_BIT_FUNC(mkdirty,   |= PMD_SECT_DIRTY);
+PMD_BIT_FUNC(mksplitting, |= L_PMD_SECT_SPLITTING);
+PMD_BIT_FUNC(mkwrite,   &= ~L_PMD_SECT_RDONLY);
+PMD_BIT_FUNC(mkdirty,   |= L_PMD_SECT_DIRTY);
 PMD_BIT_FUNC(mkyoung,   |= PMD_SECT_AF);
 
 #define pmd_mkhuge(pmd)		(__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
@@ -245,8 +247,8 @@ PMD_BIT_FUNC(mkyoung,   |= PMD_SECT_AF);
 
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 {
-	const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | PMD_SECT_RDONLY |
-				PMD_SECT_VALID | PMD_SECT_NONE;
+	const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | L_PMD_SECT_RDONLY |
+				L_PMD_SECT_VALID | L_PMD_SECT_NONE;
 	pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask);
 	return pmd;
 }
@@ -257,8 +259,13 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 	BUG_ON(addr >= TASK_SIZE);
 
 	/* create a faulting entry if PROT_NONE protected */
-	if (pmd_val(pmd) & PMD_SECT_NONE)
-		pmd_val(pmd) &= ~PMD_SECT_VALID;
+	if (pmd_val(pmd) & L_PMD_SECT_NONE)
+		pmd_val(pmd) &= ~L_PMD_SECT_VALID;
+
+	if (pmd_write(pmd) && pmd_dirty(pmd))
+		pmd_val(pmd) &= ~PMD_SECT_AP2;
+	else
+		pmd_val(pmd) |= PMD_SECT_AP2;
 
 	*pmdp = __pmd(pmd_val(pmd) | PMD_SECT_nG);
 	flush_pmd_entry(pmdp);
diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c
index c508f41a43bc..59424937e52b 100644
--- a/arch/arm/mm/dump.c
+++ b/arch/arm/mm/dump.c
@@ -126,8 +126,8 @@ static const struct prot_bits section_bits[] = {
 		.val	= PMD_SECT_USER,
 		.set	= "USR",
 	}, {
-		.mask	= PMD_SECT_RDONLY,
-		.val	= PMD_SECT_RDONLY,
+		.mask	= L_PMD_SECT_RDONLY,
+		.val	= L_PMD_SECT_RDONLY,
 		.set	= "ro",
 		.clear	= "RW",
 #elif __LINUX_ARM_ARCH__ >= 6
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index 564f4b934ceb..e4c8acfc1323 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -87,8 +87,13 @@ ENTRY(cpu_v7_set_pte_ext)
 	tst	rh, #1 << (57 - 32)		@ L_PTE_NONE
 	bicne	rl, #L_PTE_VALID
 	bne	1f
-	tst	rh, #1 << (55 - 32)		@ L_PTE_DIRTY
-	orreq	rl, #L_PTE_RDONLY
+
+	eor	ip, rh, #1 << (55 - 32)	@ toggle L_PTE_DIRTY in temp reg to
+					@ test for !L_PTE_DIRTY || L_PTE_RDONLY
+	tst	ip, #1 << (55 - 32) | 1 << (58 - 32)
+	orrne	rl, #PTE_AP2
+	biceq	rl, #PTE_AP2
+
 1:	strd	r2, r3, [r0]
 	ALT_SMP(W(nop))
 	ALT_UP (mcr	p15, 0, r0, c7, c10, 1)		@ flush_pte
-- 
cgit v1.2.3


From c51e78ed58e4e4e772bdd7897470ab2e7142f9c2 Mon Sep 17 00:00:00 2001
From: Marc Carino <marc.ceeeee@gmail.com>
Date: Wed, 23 Jul 2014 00:31:43 +0100
Subject: ARM: 8110/1: do CPU-specific init for Broadcom Brahma15 cores

Perform any CPU-specific initialization required on the
Broadcom Brahma-15 core.

Signed-off-by: Marc Carino <marc.ceeeee@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/proc-v7.S | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 43951adffd87..b5d67db20897 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -250,6 +250,7 @@ __v7_cr7mp_setup:
 __v7_ca7mp_setup:
 __v7_ca12mp_setup:
 __v7_ca15mp_setup:
+__v7_b15mp_setup:
 __v7_ca17mp_setup:
 	mov	r10, #0
 1:
@@ -562,6 +563,16 @@ __v7_ca15mp_proc_info:
 	__v7_proc __v7_ca15mp_setup
 	.size	__v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info
 
+	/*
+	 * Broadcom Corporation Brahma-B15 processor.
+	 */
+	.type	__v7_b15mp_proc_info, #object
+__v7_b15mp_proc_info:
+	.long	0x420f00f0
+	.long	0xff0ffff0
+	__v7_proc __v7_b15mp_setup, hwcaps = HWCAP_IDIV
+	.size	__v7_b15mp_proc_info, . - __v7_b15mp_proc_info
+
 	/*
 	 * ARM Ltd. Cortex A17 processor.
 	 */
-- 
cgit v1.2.3


From 04fcab32d3fa1d3f6afe97e0ab431c5572e07a2c Mon Sep 17 00:00:00 2001
From: Gregory Fong <gregory.0xf0@gmail.com>
Date: Wed, 23 Jul 2014 00:34:23 +0100
Subject: ARM: 8111/1: Enable erratum 798181 for Broadcom Brahma-B15

Broadcom Brahma-B15 (r0p0..r0p2) is also affected by Cortex-A15
erratum 798181, so enable the workaround for Brahma-B15.

Signed-off-by: Gregory Fong <gregory.0xf0@gmail.com>
Acked-by: Marc Carino <marc.ceeeee@gmail.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Cc: Rob Herring <rob.herring@calxeda.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/smp_tlb.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
index 95d063620b76..2e72be4f623e 100644
--- a/arch/arm/kernel/smp_tlb.c
+++ b/arch/arm/kernel/smp_tlb.c
@@ -92,15 +92,19 @@ void erratum_a15_798181_init(void)
 	unsigned int midr = read_cpuid_id();
 	unsigned int revidr = read_cpuid(CPUID_REVIDR);
 
-	/* Cortex-A15 r0p0..r3p2 w/o ECO fix affected */
-	if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2 ||
-	    (revidr & 0x210) == 0x210) {
-		return;
-	}
-	if (revidr & 0x10)
-		erratum_a15_798181_handler = erratum_a15_798181_partial;
-	else
+	/* Brahma-B15 r0p0..r0p2 affected
+	 * Cortex-A15 r0p0..r3p2 w/o ECO fix affected */
+	if ((midr & 0xff0ffff0) == 0x420f00f0 && midr <= 0x420f00f2)
 		erratum_a15_798181_handler = erratum_a15_798181_broadcast;
+	else if ((midr & 0xff0ffff0) == 0x410fc0f0 && midr <= 0x413fc0f2 &&
+		 (revidr & 0x210) != 0x210) {
+		if (revidr & 0x10)
+			erratum_a15_798181_handler =
+				erratum_a15_798181_partial;
+		else
+			erratum_a15_798181_handler =
+				erratum_a15_798181_broadcast;
+	}
 }
 #endif
 
-- 
cgit v1.2.3


From c6f54a9b39626090c934646f7d732e31b70ffce7 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Wed, 23 Jul 2014 20:37:43 +0100
Subject: ARM: 8113/1: remove remaining definitions of PLAT_PHYS_OFFSET from
 <mach/memory.h>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The platforms selecting NEED_MACH_MEMORY_H defined the start address of
their physical memory in the respective <mach/memory.h>. With
ARM_PATCH_PHYS_VIRT=y (which is quite common today) this is useless
though because the definition isn't used but determined dynamically.

So remove the definitions from all <mach/memory.h> and provide the
Kconfig symbol PHYS_OFFSET with the respective defaults in case
ARM_PATCH_PHYS_VIRT isn't enabled.

This allows to drop the dependency of PHYS_OFFSET on !NEED_MACH_MEMORY_H
which prevents compiling an integrator nommu-kernel.
(CONFIG_PAGE_OFFSET which has "default PHYS_OFFSET if !MMU" expanded to
"0x" because CONFIG_PHYS_OFFSET doesn't exist as INTEGRATOR selects
NEED_MACH_MEMORY_H.)

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig                               | 17 +++++++++++++++--
 arch/arm/include/asm/memory.h                  |  6 ++----
 arch/arm/mach-ebsa110/include/mach/memory.h    |  5 -----
 arch/arm/mach-ep93xx/include/mach/memory.h     | 22 ----------------------
 arch/arm/mach-footbridge/include/mach/memory.h |  5 -----
 arch/arm/mach-integrator/include/mach/memory.h |  5 -----
 arch/arm/mach-iop13xx/include/mach/memory.h    |  5 -----
 arch/arm/mach-ks8695/include/mach/memory.h     |  5 -----
 arch/arm/mach-omap1/include/mach/memory.h      |  5 -----
 arch/arm/mach-realview/include/mach/memory.h   |  9 ---------
 arch/arm/mach-rpc/include/mach/memory.h        |  5 -----
 arch/arm/mach-s5pv210/include/mach/memory.h    |  2 --
 arch/arm/mach-sa1100/include/mach/memory.h     |  5 -----
 13 files changed, 17 insertions(+), 79 deletions(-)
 delete mode 100644 arch/arm/mach-ep93xx/include/mach/memory.h

(limited to 'arch')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 245058b3b0ef..44ed4a803d81 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -262,8 +262,22 @@ config NEED_MACH_MEMORY_H
 
 config PHYS_OFFSET
 	hex "Physical address of main memory" if MMU
-	depends on !ARM_PATCH_PHYS_VIRT && !NEED_MACH_MEMORY_H
+	depends on !ARM_PATCH_PHYS_VIRT
 	default DRAM_BASE if !MMU
+	default 0x00000000 if ARCH_EBSA110 || \
+			EP93XX_SDCE3_SYNC_PHYS_OFFSET || \
+			ARCH_FOOTBRIDGE || \
+			ARCH_INTEGRATOR || \
+			ARCH_IOP13XX || \
+			ARCH_KS8695 || \
+			(ARCH_REALVIEW && !REALVIEW_HIGH_PHYS_OFFSET)
+	default 0x10000000 if ARCH_OMAP1 || ARCH_RPC
+	default 0x20000000 if ARCH_S5PV210
+	default 0x70000000 if REALVIEW_HIGH_PHYS_OFFSET
+	default 0xc0000000 if EP93XX_SDCE0_PHYS_OFFSET || ARCH_SA1100
+	default 0xd0000000 if EP93XX_SDCE1_PHYS_OFFSET
+	default 0xe0000000 if EP93XX_SDCE2_PHYS_OFFSET
+	default 0xf0000000 if EP93XX_SDCE3_ASYNC_PHYS_OFFSET
 	help
 	  Please provide the physical address corresponding to the
 	  location of main memory in your system.
@@ -435,7 +449,6 @@ config ARCH_EP93XX
 	select ARM_VIC
 	select CLKDEV_LOOKUP
 	select CPU_ARM920T
-	select NEED_MACH_MEMORY_H
 	help
 	  This enables support for the Cirrus EP93xx series of CPUs.
 
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 2b751464d6ff..04ccf1c0a1af 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -150,13 +150,11 @@
 
 /*
  * PLAT_PHYS_OFFSET is the offset (from zero) of the start of physical
- * memory.  This is used for XIP and NoMMU kernels, or by kernels which
- * have their own mach/memory.h.  Assembly code must always use
+ * memory.  This is used for XIP and NoMMU kernels, and on platforms that don't
+ * have CONFIG_ARM_PATCH_PHYS_VIRT. Assembly code must always use
  * PLAT_PHYS_OFFSET and not PHYS_OFFSET.
  */
-#ifndef PLAT_PHYS_OFFSET
 #define PLAT_PHYS_OFFSET	UL(CONFIG_PHYS_OFFSET)
-#endif
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm/mach-ebsa110/include/mach/memory.h b/arch/arm/mach-ebsa110/include/mach/memory.h
index 8e49066ad850..866f8a1c6ff7 100644
--- a/arch/arm/mach-ebsa110/include/mach/memory.h
+++ b/arch/arm/mach-ebsa110/include/mach/memory.h
@@ -16,11 +16,6 @@
 #ifndef __ASM_ARCH_MEMORY_H
 #define __ASM_ARCH_MEMORY_H
 
-/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET	UL(0x00000000)
-
 /*
  * Cache flushing area - SRAM
  */
diff --git a/arch/arm/mach-ep93xx/include/mach/memory.h b/arch/arm/mach-ep93xx/include/mach/memory.h
deleted file mode 100644
index c9400cf0051c..000000000000
--- a/arch/arm/mach-ep93xx/include/mach/memory.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * arch/arm/mach-ep93xx/include/mach/memory.h
- */
-
-#ifndef __ASM_ARCH_MEMORY_H
-#define __ASM_ARCH_MEMORY_H
-
-#if defined(CONFIG_EP93XX_SDCE3_SYNC_PHYS_OFFSET)
-#define PLAT_PHYS_OFFSET		UL(0x00000000)
-#elif defined(CONFIG_EP93XX_SDCE0_PHYS_OFFSET)
-#define PLAT_PHYS_OFFSET		UL(0xc0000000)
-#elif defined(CONFIG_EP93XX_SDCE1_PHYS_OFFSET)
-#define PLAT_PHYS_OFFSET		UL(0xd0000000)
-#elif defined(CONFIG_EP93XX_SDCE2_PHYS_OFFSET)
-#define PLAT_PHYS_OFFSET		UL(0xe0000000)
-#elif defined(CONFIG_EP93XX_SDCE3_ASYNC_PHYS_OFFSET)
-#define PLAT_PHYS_OFFSET		UL(0xf0000000)
-#else
-#error "Kconfig bug: No EP93xx PHYS_OFFSET set"
-#endif
-
-#endif
diff --git a/arch/arm/mach-footbridge/include/mach/memory.h b/arch/arm/mach-footbridge/include/mach/memory.h
index 5c6df377f969..6f2ecccdf323 100644
--- a/arch/arm/mach-footbridge/include/mach/memory.h
+++ b/arch/arm/mach-footbridge/include/mach/memory.h
@@ -59,11 +59,6 @@ extern unsigned long __bus_to_pfn(unsigned long);
  */
 #define FLUSH_BASE		0xf9000000
 
-/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET		UL(0x00000000)
-
 #define FLUSH_BASE_PHYS		0x50000000
 
 #endif
diff --git a/arch/arm/mach-integrator/include/mach/memory.h b/arch/arm/mach-integrator/include/mach/memory.h
index 334d5e271889..7268cb50ded0 100644
--- a/arch/arm/mach-integrator/include/mach/memory.h
+++ b/arch/arm/mach-integrator/include/mach/memory.h
@@ -20,11 +20,6 @@
 #ifndef __ASM_ARCH_MEMORY_H
 #define __ASM_ARCH_MEMORY_H
 
-/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET	UL(0x00000000)
-
 #define BUS_OFFSET	UL(0x80000000)
 #define __virt_to_bus(x)	((x) - PAGE_OFFSET + BUS_OFFSET)
 #define __bus_to_virt(x)	((x) - BUS_OFFSET + PAGE_OFFSET)
diff --git a/arch/arm/mach-iop13xx/include/mach/memory.h b/arch/arm/mach-iop13xx/include/mach/memory.h
index 7c032d0ab24a..59307e787588 100644
--- a/arch/arm/mach-iop13xx/include/mach/memory.h
+++ b/arch/arm/mach-iop13xx/include/mach/memory.h
@@ -3,11 +3,6 @@
 
 #include <mach/hardware.h>
 
-/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET	UL(0x00000000)
-
 #ifndef __ASSEMBLY__
 
 #if defined(CONFIG_ARCH_IOP13XX)
diff --git a/arch/arm/mach-ks8695/include/mach/memory.h b/arch/arm/mach-ks8695/include/mach/memory.h
index 95e731a7ed6a..ab0d27fa8969 100644
--- a/arch/arm/mach-ks8695/include/mach/memory.h
+++ b/arch/arm/mach-ks8695/include/mach/memory.h
@@ -15,11 +15,6 @@
 
 #include <mach/hardware.h>
 
-/*
- * Physical SRAM offset.
- */
-#define PLAT_PHYS_OFFSET		KS8695_SDRAM_PA
-
 #ifndef __ASSEMBLY__
 
 #ifdef CONFIG_PCI
diff --git a/arch/arm/mach-omap1/include/mach/memory.h b/arch/arm/mach-omap1/include/mach/memory.h
index 3c2530523111..058a4f7d44c5 100644
--- a/arch/arm/mach-omap1/include/mach/memory.h
+++ b/arch/arm/mach-omap1/include/mach/memory.h
@@ -5,11 +5,6 @@
 #ifndef __ASM_ARCH_MEMORY_H
 #define __ASM_ARCH_MEMORY_H
 
-/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET		UL(0x10000000)
-
 /*
  * Bus address is physical address, except for OMAP-1510 Local Bus.
  * OMAP-1510 bus address is translated into a Local Bus address if the
diff --git a/arch/arm/mach-realview/include/mach/memory.h b/arch/arm/mach-realview/include/mach/memory.h
index db09170e3832..23e7a313f75d 100644
--- a/arch/arm/mach-realview/include/mach/memory.h
+++ b/arch/arm/mach-realview/include/mach/memory.h
@@ -20,15 +20,6 @@
 #ifndef __ASM_ARCH_MEMORY_H
 #define __ASM_ARCH_MEMORY_H
 
-/*
- * Physical DRAM offset.
- */
-#ifdef CONFIG_REALVIEW_HIGH_PHYS_OFFSET
-#define PLAT_PHYS_OFFSET		UL(0x70000000)
-#else
-#define PLAT_PHYS_OFFSET		UL(0x00000000)
-#endif
-
 #ifdef CONFIG_SPARSEMEM
 
 /*
diff --git a/arch/arm/mach-rpc/include/mach/memory.h b/arch/arm/mach-rpc/include/mach/memory.h
index 18a221093bf5..b7e49571417d 100644
--- a/arch/arm/mach-rpc/include/mach/memory.h
+++ b/arch/arm/mach-rpc/include/mach/memory.h
@@ -18,11 +18,6 @@
 #ifndef __ASM_ARCH_MEMORY_H
 #define __ASM_ARCH_MEMORY_H
 
-/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET	UL(0x10000000)
-
 /*
  * Cache flushing area - ROM
  */
diff --git a/arch/arm/mach-s5pv210/include/mach/memory.h b/arch/arm/mach-s5pv210/include/mach/memory.h
index 2d3cfa221d5f..d584fac9156b 100644
--- a/arch/arm/mach-s5pv210/include/mach/memory.h
+++ b/arch/arm/mach-s5pv210/include/mach/memory.h
@@ -13,8 +13,6 @@
 #ifndef __ASM_ARCH_MEMORY_H
 #define __ASM_ARCH_MEMORY_H
 
-#define PLAT_PHYS_OFFSET		UL(0x20000000)
-
 /*
  * Sparsemem support
  * Physical memory can be located from 0x20000000 to 0x7fffffff,
diff --git a/arch/arm/mach-sa1100/include/mach/memory.h b/arch/arm/mach-sa1100/include/mach/memory.h
index 12d376795abc..2054051eb797 100644
--- a/arch/arm/mach-sa1100/include/mach/memory.h
+++ b/arch/arm/mach-sa1100/include/mach/memory.h
@@ -9,11 +9,6 @@
 
 #include <asm/sizes.h>
 
-/*
- * Physical DRAM offset is 0xc0000000 on the SA1100
- */
-#define PLAT_PHYS_OFFSET	UL(0xc0000000)
-
 /*
  * Because of the wide memory address space between physical RAM banks on the
  * SA1100, it's much convenient to use Linux's SparseMEM support to implement
-- 
cgit v1.2.3


From 1f8673d31a999ed7e20d9f66fcdad39e39f6b276 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Date: Tue, 29 Jul 2014 17:14:09 +0100
Subject: ARM: 8118/1: crypto: sha1/make use of common SHA-1 structures

Common SHA-1 structures are defined in <crypto/sha.h> for code sharing.

This patch changes SHA-1/ARM glue code to use these structures.

Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/crypto/sha1_glue.c | 50 ++++++++++++++++++++-------------------------
 1 file changed, 22 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c
index 76cd976230bc..c494e579ffc3 100644
--- a/arch/arm/crypto/sha1_glue.c
+++ b/arch/arm/crypto/sha1_glue.c
@@ -24,31 +24,25 @@
 #include <crypto/sha.h>
 #include <asm/byteorder.h>
 
-struct SHA1_CTX {
-	uint32_t h0,h1,h2,h3,h4;
-	u64 count;
-	u8 data[SHA1_BLOCK_SIZE];
-};
 
-asmlinkage void sha1_block_data_order(struct SHA1_CTX *digest,
+asmlinkage void sha1_block_data_order(u32 *digest,
 		const unsigned char *data, unsigned int rounds);
 
 
 static int sha1_init(struct shash_desc *desc)
 {
-	struct SHA1_CTX *sctx = shash_desc_ctx(desc);
-	memset(sctx, 0, sizeof(*sctx));
-	sctx->h0 = SHA1_H0;
-	sctx->h1 = SHA1_H1;
-	sctx->h2 = SHA1_H2;
-	sctx->h3 = SHA1_H3;
-	sctx->h4 = SHA1_H4;
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	*sctx = (struct sha1_state){
+		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
+	};
+
 	return 0;
 }
 
 
-static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data,
-			       unsigned int len, unsigned int partial)
+static int __sha1_update(struct sha1_state *sctx, const u8 *data,
+			 unsigned int len, unsigned int partial)
 {
 	unsigned int done = 0;
 
@@ -56,17 +50,17 @@ static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data,
 
 	if (partial) {
 		done = SHA1_BLOCK_SIZE - partial;
-		memcpy(sctx->data + partial, data, done);
-		sha1_block_data_order(sctx, sctx->data, 1);
+		memcpy(sctx->buffer + partial, data, done);
+		sha1_block_data_order(sctx->state, sctx->buffer, 1);
 	}
 
 	if (len - done >= SHA1_BLOCK_SIZE) {
 		const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
-		sha1_block_data_order(sctx, data + done, rounds);
+		sha1_block_data_order(sctx->state, data + done, rounds);
 		done += rounds * SHA1_BLOCK_SIZE;
 	}
 
-	memcpy(sctx->data, data + done, len - done);
+	memcpy(sctx->buffer, data + done, len - done);
 	return 0;
 }
 
@@ -74,14 +68,14 @@ static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data,
 static int sha1_update(struct shash_desc *desc, const u8 *data,
 			     unsigned int len)
 {
-	struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+	struct sha1_state *sctx = shash_desc_ctx(desc);
 	unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
 	int res;
 
 	/* Handle the fast case right here */
 	if (partial + len < SHA1_BLOCK_SIZE) {
 		sctx->count += len;
-		memcpy(sctx->data + partial, data, len);
+		memcpy(sctx->buffer + partial, data, len);
 		return 0;
 	}
 	res = __sha1_update(sctx, data, len, partial);
@@ -92,7 +86,7 @@ static int sha1_update(struct shash_desc *desc, const u8 *data,
 /* Add padding and return the message digest. */
 static int sha1_final(struct shash_desc *desc, u8 *out)
 {
-	struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+	struct sha1_state *sctx = shash_desc_ctx(desc);
 	unsigned int i, index, padlen;
 	__be32 *dst = (__be32 *)out;
 	__be64 bits;
@@ -106,7 +100,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out)
 	/* We need to fill a whole block for __sha1_update() */
 	if (padlen <= 56) {
 		sctx->count += padlen;
-		memcpy(sctx->data + index, padding, padlen);
+		memcpy(sctx->buffer + index, padding, padlen);
 	} else {
 		__sha1_update(sctx, padding, padlen, index);
 	}
@@ -114,7 +108,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out)
 
 	/* Store state in digest */
 	for (i = 0; i < 5; i++)
-		dst[i] = cpu_to_be32(((u32 *)sctx)[i]);
+		dst[i] = cpu_to_be32(sctx->state[i]);
 
 	/* Wipe context */
 	memset(sctx, 0, sizeof(*sctx));
@@ -124,7 +118,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out)
 
 static int sha1_export(struct shash_desc *desc, void *out)
 {
-	struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+	struct sha1_state *sctx = shash_desc_ctx(desc);
 	memcpy(out, sctx, sizeof(*sctx));
 	return 0;
 }
@@ -132,7 +126,7 @@ static int sha1_export(struct shash_desc *desc, void *out)
 
 static int sha1_import(struct shash_desc *desc, const void *in)
 {
-	struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+	struct sha1_state *sctx = shash_desc_ctx(desc);
 	memcpy(sctx, in, sizeof(*sctx));
 	return 0;
 }
@@ -145,8 +139,8 @@ static struct shash_alg alg = {
 	.final		=	sha1_final,
 	.export		=	sha1_export,
 	.import		=	sha1_import,
-	.descsize	=	sizeof(struct SHA1_CTX),
-	.statesize	=	sizeof(struct SHA1_CTX),
+	.descsize	=	sizeof(struct sha1_state),
+	.statesize	=	sizeof(struct sha1_state),
 	.base		=	{
 		.cra_name	=	"sha1",
 		.cra_driver_name=	"sha1-asm",
-- 
cgit v1.2.3


From 604682551aa511e00e57706ad5d9fcf955ee0323 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Date: Tue, 29 Jul 2014 17:14:14 +0100
Subject: ARM: 8119/1: crypto: sha1: add ARM NEON implementation

This patch adds ARM NEON assembly implementation of SHA-1 algorithm.

tcrypt benchmark results on Cortex-A8, sha1-arm-asm vs sha1-neon-asm:

block-size      bytes/update    old-vs-new
16              16              1.04x
64              16              1.02x
64              64              1.05x
256             16              1.03x
256             64              1.04x
256             256             1.30x
1024            16              1.03x
1024            256             1.36x
1024            1024            1.52x
2048            16              1.03x
2048            256             1.39x
2048            1024            1.55x
2048            2048            1.59x
4096            16              1.03x
4096            256             1.40x
4096            1024            1.57x
4096            4096            1.62x
8192            16              1.03x
8192            256             1.40x
8192            1024            1.58x
8192            4096            1.63x
8192            8192            1.63x

Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/crypto/Makefile           |   2 +
 arch/arm/crypto/sha1-armv7-neon.S  | 634 +++++++++++++++++++++++++++++++++++++
 arch/arm/crypto/sha1_glue.c        |   8 +-
 arch/arm/crypto/sha1_neon_glue.c   | 197 ++++++++++++
 arch/arm/include/asm/crypto/sha1.h |  10 +
 crypto/Kconfig                     |  11 +
 6 files changed, 859 insertions(+), 3 deletions(-)
 create mode 100644 arch/arm/crypto/sha1-armv7-neon.S
 create mode 100644 arch/arm/crypto/sha1_neon_glue.c
 create mode 100644 arch/arm/include/asm/crypto/sha1.h

(limited to 'arch')

diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 81cda39860c5..374956d2f896 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -5,10 +5,12 @@
 obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
 obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
 obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
+obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
 
 aes-arm-y	:= aes-armv4.o aes_glue.o
 aes-arm-bs-y	:= aesbs-core.o aesbs-glue.o
 sha1-arm-y	:= sha1-armv4-large.o sha1_glue.o
+sha1-arm-neon-y	:= sha1-armv7-neon.o sha1_neon_glue.o
 
 quiet_cmd_perl = PERL    $@
       cmd_perl = $(PERL) $(<) > $(@)
diff --git a/arch/arm/crypto/sha1-armv7-neon.S b/arch/arm/crypto/sha1-armv7-neon.S
new file mode 100644
index 000000000000..50013c0e2864
--- /dev/null
+++ b/arch/arm/crypto/sha1-armv7-neon.S
@@ -0,0 +1,634 @@
+/* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function
+ *
+ * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/linkage.h>
+
+
+.syntax unified
+.code   32
+.fpu neon
+
+.text
+
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+
+
+/* Constants */
+
+#define K1  0x5A827999
+#define K2  0x6ED9EBA1
+#define K3  0x8F1BBCDC
+#define K4  0xCA62C1D6
+.align 4
+.LK_VEC:
+.LK1:	.long K1, K1, K1, K1
+.LK2:	.long K2, K2, K2, K2
+.LK3:	.long K3, K3, K3, K3
+.LK4:	.long K4, K4, K4, K4
+
+
+/* Register macros */
+
+#define RSTATE r0
+#define RDATA r1
+#define RNBLKS r2
+#define ROLDSTACK r3
+#define RWK lr
+
+#define _a r4
+#define _b r5
+#define _c r6
+#define _d r7
+#define _e r8
+
+#define RT0 r9
+#define RT1 r10
+#define RT2 r11
+#define RT3 r12
+
+#define W0 q0
+#define W1 q1
+#define W2 q2
+#define W3 q3
+#define W4 q4
+#define W5 q5
+#define W6 q6
+#define W7 q7
+
+#define tmp0 q8
+#define tmp1 q9
+#define tmp2 q10
+#define tmp3 q11
+
+#define qK1 q12
+#define qK2 q13
+#define qK3 q14
+#define qK4 q15
+
+
+/* Round function macros. */
+
+#define WK_offs(i) (((i) & 15) * 4)
+
+#define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+	      W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	ldr RT3, [sp, WK_offs(i)]; \
+		pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+	bic RT0, d, b; \
+	add e, e, a, ror #(32 - 5); \
+	and RT1, c, b; \
+		pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+	add RT0, RT0, RT3; \
+	add e, e, RT1; \
+	ror b, #(32 - 30); \
+		pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+	add e, e, RT0;
+
+#define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+	      W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	ldr RT3, [sp, WK_offs(i)]; \
+		pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+	eor RT0, d, b; \
+	add e, e, a, ror #(32 - 5); \
+	eor RT0, RT0, c; \
+		pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+	add e, e, RT3; \
+	ror b, #(32 - 30); \
+		pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+	add e, e, RT0; \
+
+#define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+	      W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	ldr RT3, [sp, WK_offs(i)]; \
+		pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+	eor RT0, b, c; \
+	and RT1, b, c; \
+	add e, e, a, ror #(32 - 5); \
+		pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+	and RT0, RT0, d; \
+	add RT1, RT1, RT3; \
+	add e, e, RT0; \
+	ror b, #(32 - 30); \
+		pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+	add e, e, RT1;
+
+#define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+	      W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	_R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+	      W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
+
+#define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,\
+           W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	_R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+	       W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
+
+#define R(a,b,c,d,e,f,i) \
+	_R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,\
+	       W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
+
+#define dummy(...)
+
+
+/* Input expansion macros. */
+
+/********* Precalc macros for rounds 0-15 *************************************/
+
+#define W_PRECALC_00_15() \
+	add       RWK, sp, #(WK_offs(0));			\
+	\
+	vld1.32   {tmp0, tmp1}, [RDATA]!;			\
+	vrev32.8  W0, tmp0;		/* big => little */	\
+	vld1.32   {tmp2, tmp3}, [RDATA]!;			\
+	vadd.u32  tmp0, W0, curK;				\
+	vrev32.8  W7, tmp1;		/* big => little */	\
+	vrev32.8  W6, tmp2;		/* big => little */	\
+	vadd.u32  tmp1, W7, curK;				\
+	vrev32.8  W5, tmp3;		/* big => little */	\
+	vadd.u32  tmp2, W6, curK;				\
+	vst1.32   {tmp0, tmp1}, [RWK]!;				\
+	vadd.u32  tmp3, W5, curK;				\
+	vst1.32   {tmp2, tmp3}, [RWK];				\
+
+#define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vld1.32   {tmp0, tmp1}, [RDATA]!;			\
+
+#define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	add       RWK, sp, #(WK_offs(0));			\
+
+#define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vrev32.8  W0, tmp0;		/* big => little */	\
+
+#define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vld1.32   {tmp2, tmp3}, [RDATA]!;			\
+
+#define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vadd.u32  tmp0, W0, curK;				\
+
+#define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vrev32.8  W7, tmp1;		/* big => little */	\
+
+#define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vrev32.8  W6, tmp2;		/* big => little */	\
+
+#define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vadd.u32  tmp1, W7, curK;				\
+
+#define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vrev32.8  W5, tmp3;		/* big => little */	\
+
+#define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vadd.u32  tmp2, W6, curK;				\
+
+#define WPRECALC_00_15_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vst1.32   {tmp0, tmp1}, [RWK]!;				\
+
+#define WPRECALC_00_15_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vadd.u32  tmp3, W5, curK;				\
+
+#define WPRECALC_00_15_12(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vst1.32   {tmp2, tmp3}, [RWK];				\
+
+
+/********* Precalc macros for rounds 16-31 ************************************/
+
+#define WPRECALC_16_31_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	veor      tmp0, tmp0;			\
+	vext.8    W, W_m16, W_m12, #8;		\
+
+#define WPRECALC_16_31_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	add       RWK, sp, #(WK_offs(i));	\
+	vext.8    tmp0, W_m04, tmp0, #4;	\
+
+#define WPRECALC_16_31_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	veor      tmp0, tmp0, W_m16;		\
+	veor.32   W, W, W_m08;			\
+
+#define WPRECALC_16_31_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	veor      tmp1, tmp1;			\
+	veor      W, W, tmp0;			\
+
+#define WPRECALC_16_31_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vshl.u32  tmp0, W, #1;			\
+
+#define WPRECALC_16_31_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vext.8    tmp1, tmp1, W, #(16-12);	\
+	vshr.u32  W, W, #31;			\
+
+#define WPRECALC_16_31_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vorr      tmp0, tmp0, W;		\
+	vshr.u32  W, tmp1, #30;			\
+
+#define WPRECALC_16_31_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vshl.u32  tmp1, tmp1, #2;		\
+
+#define WPRECALC_16_31_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	veor      tmp0, tmp0, W;		\
+
+#define WPRECALC_16_31_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	veor      W, tmp0, tmp1;		\
+
+#define WPRECALC_16_31_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vadd.u32  tmp0, W, curK;		\
+
+#define WPRECALC_16_31_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vst1.32   {tmp0}, [RWK];
+
+
+/********* Precalc macros for rounds 32-79 ************************************/
+
+#define WPRECALC_32_79_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	veor W, W_m28; \
+
+#define WPRECALC_32_79_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vext.8 tmp0, W_m08, W_m04, #8; \
+
+#define WPRECALC_32_79_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	veor W, W_m16; \
+
+#define WPRECALC_32_79_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	veor W, tmp0; \
+
+#define WPRECALC_32_79_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	add RWK, sp, #(WK_offs(i&~3)); \
+
+#define WPRECALC_32_79_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vshl.u32 tmp1, W, #2; \
+
+#define WPRECALC_32_79_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vshr.u32 tmp0, W, #30; \
+
+#define WPRECALC_32_79_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vorr W, tmp0, tmp1; \
+
+#define WPRECALC_32_79_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vadd.u32 tmp0, W, curK; \
+
+#define WPRECALC_32_79_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vst1.32 {tmp0}, [RWK];
+
+
+/*
+ * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
+ *
+ * unsigned int
+ * sha1_transform_neon (void *ctx, const unsigned char *data,
+ *                      unsigned int nblks)
+ */
+.align 3
+ENTRY(sha1_transform_neon)
+  /* input:
+   *	r0: ctx, CTX
+   *	r1: data (64*nblks bytes)
+   *	r2: nblks
+   */
+
+  cmp RNBLKS, #0;
+  beq .Ldo_nothing;
+
+  push {r4-r12, lr};
+  /*vpush {q4-q7};*/
+
+  adr RT3, .LK_VEC;
+
+  mov ROLDSTACK, sp;
+
+  /* Align stack. */
+  sub RT0, sp, #(16*4);
+  and RT0, #(~(16-1));
+  mov sp, RT0;
+
+  vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */
+
+  /* Get the values of the chaining variables. */
+  ldm RSTATE, {_a-_e};
+
+  vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */
+
+#undef curK
+#define curK qK1
+  /* Precalc 0-15. */
+  W_PRECALC_00_15();
+
+.Loop:
+  /* Transform 0-15 + Precalc 16-31. */
+  _R( _a, _b, _c, _d, _e, F1,  0,
+      WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16,
+      W4, W5, W6, W7, W0, _, _, _ );
+  _R( _e, _a, _b, _c, _d, F1,  1,
+      WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16,
+      W4, W5, W6, W7, W0, _, _, _ );
+  _R( _d, _e, _a, _b, _c, F1,  2,
+      WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16,
+      W4, W5, W6, W7, W0, _, _, _ );
+  _R( _c, _d, _e, _a, _b, F1,  3,
+      WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16,
+      W4, W5, W6, W7, W0, _, _, _ );
+
+#undef curK
+#define curK qK2
+  _R( _b, _c, _d, _e, _a, F1,  4,
+      WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20,
+      W3, W4, W5, W6, W7, _, _, _ );
+  _R( _a, _b, _c, _d, _e, F1,  5,
+      WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20,
+      W3, W4, W5, W6, W7, _, _, _ );
+  _R( _e, _a, _b, _c, _d, F1,  6,
+      WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20,
+      W3, W4, W5, W6, W7, _, _, _ );
+  _R( _d, _e, _a, _b, _c, F1,  7,
+      WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20,
+      W3, W4, W5, W6, W7, _, _, _ );
+
+  _R( _c, _d, _e, _a, _b, F1,  8,
+      WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24,
+      W2, W3, W4, W5, W6, _, _, _ );
+  _R( _b, _c, _d, _e, _a, F1,  9,
+      WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24,
+      W2, W3, W4, W5, W6, _, _, _ );
+  _R( _a, _b, _c, _d, _e, F1, 10,
+      WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24,
+      W2, W3, W4, W5, W6, _, _, _ );
+  _R( _e, _a, _b, _c, _d, F1, 11,
+      WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24,
+      W2, W3, W4, W5, W6, _, _, _ );
+
+  _R( _d, _e, _a, _b, _c, F1, 12,
+      WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28,
+      W1, W2, W3, W4, W5, _, _, _ );
+  _R( _c, _d, _e, _a, _b, F1, 13,
+      WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28,
+      W1, W2, W3, W4, W5, _, _, _ );
+  _R( _b, _c, _d, _e, _a, F1, 14,
+      WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28,
+      W1, W2, W3, W4, W5, _, _, _ );
+  _R( _a, _b, _c, _d, _e, F1, 15,
+      WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28,
+      W1, W2, W3, W4, W5, _, _, _ );
+
+  /* Transform 16-63 + Precalc 32-79. */
+  _R( _e, _a, _b, _c, _d, F1, 16,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32,
+      W0, W1, W2, W3, W4, W5, W6, W7);
+  _R( _d, _e, _a, _b, _c, F1, 17,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32,
+      W0, W1, W2, W3, W4, W5, W6, W7);
+  _R( _c, _d, _e, _a, _b, F1, 18,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 32,
+      W0, W1, W2, W3, W4, W5, W6, W7);
+  _R( _b, _c, _d, _e, _a, F1, 19,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 32,
+      W0, W1, W2, W3, W4, W5, W6, W7);
+
+  _R( _a, _b, _c, _d, _e, F2, 20,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36,
+      W7, W0, W1, W2, W3, W4, W5, W6);
+  _R( _e, _a, _b, _c, _d, F2, 21,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36,
+      W7, W0, W1, W2, W3, W4, W5, W6);
+  _R( _d, _e, _a, _b, _c, F2, 22,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 36,
+      W7, W0, W1, W2, W3, W4, W5, W6);
+  _R( _c, _d, _e, _a, _b, F2, 23,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 36,
+      W7, W0, W1, W2, W3, W4, W5, W6);
+
+#undef curK
+#define curK qK3
+  _R( _b, _c, _d, _e, _a, F2, 24,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40,
+      W6, W7, W0, W1, W2, W3, W4, W5);
+  _R( _a, _b, _c, _d, _e, F2, 25,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40,
+      W6, W7, W0, W1, W2, W3, W4, W5);
+  _R( _e, _a, _b, _c, _d, F2, 26,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 40,
+      W6, W7, W0, W1, W2, W3, W4, W5);
+  _R( _d, _e, _a, _b, _c, F2, 27,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 40,
+      W6, W7, W0, W1, W2, W3, W4, W5);
+
+  _R( _c, _d, _e, _a, _b, F2, 28,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44,
+      W5, W6, W7, W0, W1, W2, W3, W4);
+  _R( _b, _c, _d, _e, _a, F2, 29,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44,
+      W5, W6, W7, W0, W1, W2, W3, W4);
+  _R( _a, _b, _c, _d, _e, F2, 30,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 44,
+      W5, W6, W7, W0, W1, W2, W3, W4);
+  _R( _e, _a, _b, _c, _d, F2, 31,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 44,
+      W5, W6, W7, W0, W1, W2, W3, W4);
+
+  _R( _d, _e, _a, _b, _c, F2, 32,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48,
+      W4, W5, W6, W7, W0, W1, W2, W3);
+  _R( _c, _d, _e, _a, _b, F2, 33,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48,
+      W4, W5, W6, W7, W0, W1, W2, W3);
+  _R( _b, _c, _d, _e, _a, F2, 34,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 48,
+      W4, W5, W6, W7, W0, W1, W2, W3);
+  _R( _a, _b, _c, _d, _e, F2, 35,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 48,
+      W4, W5, W6, W7, W0, W1, W2, W3);
+
+  _R( _e, _a, _b, _c, _d, F2, 36,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52,
+      W3, W4, W5, W6, W7, W0, W1, W2);
+  _R( _d, _e, _a, _b, _c, F2, 37,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52,
+      W3, W4, W5, W6, W7, W0, W1, W2);
+  _R( _c, _d, _e, _a, _b, F2, 38,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 52,
+      W3, W4, W5, W6, W7, W0, W1, W2);
+  _R( _b, _c, _d, _e, _a, F2, 39,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 52,
+      W3, W4, W5, W6, W7, W0, W1, W2);
+
+  _R( _a, _b, _c, _d, _e, F3, 40,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56,
+      W2, W3, W4, W5, W6, W7, W0, W1);
+  _R( _e, _a, _b, _c, _d, F3, 41,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56,
+      W2, W3, W4, W5, W6, W7, W0, W1);
+  _R( _d, _e, _a, _b, _c, F3, 42,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 56,
+      W2, W3, W4, W5, W6, W7, W0, W1);
+  _R( _c, _d, _e, _a, _b, F3, 43,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 56,
+      W2, W3, W4, W5, W6, W7, W0, W1);
+
+#undef curK
+#define curK qK4
+  _R( _b, _c, _d, _e, _a, F3, 44,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60,
+      W1, W2, W3, W4, W5, W6, W7, W0);
+  _R( _a, _b, _c, _d, _e, F3, 45,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60,
+      W1, W2, W3, W4, W5, W6, W7, W0);
+  _R( _e, _a, _b, _c, _d, F3, 46,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 60,
+      W1, W2, W3, W4, W5, W6, W7, W0);
+  _R( _d, _e, _a, _b, _c, F3, 47,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 60,
+      W1, W2, W3, W4, W5, W6, W7, W0);
+
+  _R( _c, _d, _e, _a, _b, F3, 48,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64,
+      W0, W1, W2, W3, W4, W5, W6, W7);
+  _R( _b, _c, _d, _e, _a, F3, 49,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64,
+      W0, W1, W2, W3, W4, W5, W6, W7);
+  _R( _a, _b, _c, _d, _e, F3, 50,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 64,
+      W0, W1, W2, W3, W4, W5, W6, W7);
+  _R( _e, _a, _b, _c, _d, F3, 51,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 64,
+      W0, W1, W2, W3, W4, W5, W6, W7);
+
+  _R( _d, _e, _a, _b, _c, F3, 52,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68,
+      W7, W0, W1, W2, W3, W4, W5, W6);
+  _R( _c, _d, _e, _a, _b, F3, 53,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68,
+      W7, W0, W1, W2, W3, W4, W5, W6);
+  _R( _b, _c, _d, _e, _a, F3, 54,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 68,
+      W7, W0, W1, W2, W3, W4, W5, W6);
+  _R( _a, _b, _c, _d, _e, F3, 55,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 68,
+      W7, W0, W1, W2, W3, W4, W5, W6);
+
+  _R( _e, _a, _b, _c, _d, F3, 56,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72,
+      W6, W7, W0, W1, W2, W3, W4, W5);
+  _R( _d, _e, _a, _b, _c, F3, 57,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72,
+      W6, W7, W0, W1, W2, W3, W4, W5);
+  _R( _c, _d, _e, _a, _b, F3, 58,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 72,
+      W6, W7, W0, W1, W2, W3, W4, W5);
+  _R( _b, _c, _d, _e, _a, F3, 59,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 72,
+      W6, W7, W0, W1, W2, W3, W4, W5);
+
+  subs RNBLKS, #1;
+
+  _R( _a, _b, _c, _d, _e, F4, 60,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76,
+      W5, W6, W7, W0, W1, W2, W3, W4);
+  _R( _e, _a, _b, _c, _d, F4, 61,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76,
+      W5, W6, W7, W0, W1, W2, W3, W4);
+  _R( _d, _e, _a, _b, _c, F4, 62,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 76,
+      W5, W6, W7, W0, W1, W2, W3, W4);
+  _R( _c, _d, _e, _a, _b, F4, 63,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 76,
+      W5, W6, W7, W0, W1, W2, W3, W4);
+
+  beq .Lend;
+
+  /* Transform 64-79 + Precalc 0-15 of next block. */
+#undef curK
+#define curK qK1
+  _R( _b, _c, _d, _e, _a, F4, 64,
+      WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _a, _b, _c, _d, _e, F4, 65,
+      WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _e, _a, _b, _c, _d, F4, 66,
+      WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _d, _e, _a, _b, _c, F4, 67,
+      WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+
+  _R( _c, _d, _e, _a, _b, F4, 68,
+      dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _b, _c, _d, _e, _a, F4, 69,
+      dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _a, _b, _c, _d, _e, F4, 70,
+      WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _e, _a, _b, _c, _d, F4, 71,
+      WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+
+  _R( _d, _e, _a, _b, _c, F4, 72,
+      dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _c, _d, _e, _a, _b, F4, 73,
+      dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _b, _c, _d, _e, _a, F4, 74,
+      WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _a, _b, _c, _d, _e, F4, 75,
+      WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+
+  _R( _e, _a, _b, _c, _d, F4, 76,
+      WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _d, _e, _a, _b, _c, F4, 77,
+      WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _c, _d, _e, _a, _b, F4, 78,
+      WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _b, _c, _d, _e, _a, F4, 79,
+      WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ );
+
+  /* Update the chaining variables. */
+  ldm RSTATE, {RT0-RT3};
+  add _a, RT0;
+  ldr RT0, [RSTATE, #state_h4];
+  add _b, RT1;
+  add _c, RT2;
+  add _d, RT3;
+  add _e, RT0;
+  stm RSTATE, {_a-_e};
+
+  b .Loop;
+
+.Lend:
+  /* Transform 64-79 */
+  R( _b, _c, _d, _e, _a, F4, 64 );
+  R( _a, _b, _c, _d, _e, F4, 65 );
+  R( _e, _a, _b, _c, _d, F4, 66 );
+  R( _d, _e, _a, _b, _c, F4, 67 );
+  R( _c, _d, _e, _a, _b, F4, 68 );
+  R( _b, _c, _d, _e, _a, F4, 69 );
+  R( _a, _b, _c, _d, _e, F4, 70 );
+  R( _e, _a, _b, _c, _d, F4, 71 );
+  R( _d, _e, _a, _b, _c, F4, 72 );
+  R( _c, _d, _e, _a, _b, F4, 73 );
+  R( _b, _c, _d, _e, _a, F4, 74 );
+  R( _a, _b, _c, _d, _e, F4, 75 );
+  R( _e, _a, _b, _c, _d, F4, 76 );
+  R( _d, _e, _a, _b, _c, F4, 77 );
+  R( _c, _d, _e, _a, _b, F4, 78 );
+  R( _b, _c, _d, _e, _a, F4, 79 );
+
+  mov sp, ROLDSTACK;
+
+  /* Update the chaining variables. */
+  ldm RSTATE, {RT0-RT3};
+  add _a, RT0;
+  ldr RT0, [RSTATE, #state_h4];
+  add _b, RT1;
+  add _c, RT2;
+  add _d, RT3;
+  /*vpop {q4-q7};*/
+  add _e, RT0;
+  stm RSTATE, {_a-_e};
+
+  pop {r4-r12, pc};
+
+.Ldo_nothing:
+  bx lr
+ENDPROC(sha1_transform_neon)
diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c
index c494e579ffc3..84f2a756588b 100644
--- a/arch/arm/crypto/sha1_glue.c
+++ b/arch/arm/crypto/sha1_glue.c
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 #include <crypto/sha.h>
 #include <asm/byteorder.h>
+#include <asm/crypto/sha1.h>
 
 
 asmlinkage void sha1_block_data_order(u32 *digest,
@@ -65,8 +66,8 @@ static int __sha1_update(struct sha1_state *sctx, const u8 *data,
 }
 
 
-static int sha1_update(struct shash_desc *desc, const u8 *data,
-			     unsigned int len)
+int sha1_update_arm(struct shash_desc *desc, const u8 *data,
+		    unsigned int len)
 {
 	struct sha1_state *sctx = shash_desc_ctx(desc);
 	unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
@@ -81,6 +82,7 @@ static int sha1_update(struct shash_desc *desc, const u8 *data,
 	res = __sha1_update(sctx, data, len, partial);
 	return res;
 }
+EXPORT_SYMBOL_GPL(sha1_update_arm);
 
 
 /* Add padding and return the message digest. */
@@ -135,7 +137,7 @@ static int sha1_import(struct shash_desc *desc, const void *in)
 static struct shash_alg alg = {
 	.digestsize	=	SHA1_DIGEST_SIZE,
 	.init		=	sha1_init,
-	.update		=	sha1_update,
+	.update		=	sha1_update_arm,
 	.final		=	sha1_final,
 	.export		=	sha1_export,
 	.import		=	sha1_import,
diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c
new file mode 100644
index 000000000000..6f1b411b1d55
--- /dev/null
+++ b/arch/arm/crypto/sha1_neon_glue.c
@@ -0,0 +1,197 @@
+/*
+ * Glue code for the SHA1 Secure Hash Algorithm assembler implementation using
+ * ARM NEON instructions.
+ *
+ * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is based on sha1_generic.c and sha1_ssse3_glue.c:
+ *  Copyright (c) Alan Smithee.
+ *  Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ *  Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
+ *  Copyright (c) Mathias Krause <minipli@googlemail.com>
+ *  Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/crypto/sha1.h>
+
+
+asmlinkage void sha1_transform_neon(void *state_h, const char *data,
+				    unsigned int rounds);
+
+
+static int sha1_neon_init(struct shash_desc *desc)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	*sctx = (struct sha1_state){
+		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
+	};
+
+	return 0;
+}
+
+static int __sha1_neon_update(struct shash_desc *desc, const u8 *data,
+			       unsigned int len, unsigned int partial)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	unsigned int done = 0;
+
+	sctx->count += len;
+
+	if (partial) {
+		done = SHA1_BLOCK_SIZE - partial;
+		memcpy(sctx->buffer + partial, data, done);
+		sha1_transform_neon(sctx->state, sctx->buffer, 1);
+	}
+
+	if (len - done >= SHA1_BLOCK_SIZE) {
+		const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
+
+		sha1_transform_neon(sctx->state, data + done, rounds);
+		done += rounds * SHA1_BLOCK_SIZE;
+	}
+
+	memcpy(sctx->buffer, data + done, len - done);
+
+	return 0;
+}
+
+static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
+			     unsigned int len)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
+	int res;
+
+	/* Handle the fast case right here */
+	if (partial + len < SHA1_BLOCK_SIZE) {
+		sctx->count += len;
+		memcpy(sctx->buffer + partial, data, len);
+
+		return 0;
+	}
+
+	if (!may_use_simd()) {
+		res = sha1_update_arm(desc, data, len);
+	} else {
+		kernel_neon_begin();
+		res = __sha1_neon_update(desc, data, len, partial);
+		kernel_neon_end();
+	}
+
+	return res;
+}
+
+
+/* Add padding and return the message digest. */
+static int sha1_neon_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	unsigned int i, index, padlen;
+	__be32 *dst = (__be32 *)out;
+	__be64 bits;
+	static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
+
+	bits = cpu_to_be64(sctx->count << 3);
+
+	/* Pad out to 56 mod 64 and append length */
+	index = sctx->count % SHA1_BLOCK_SIZE;
+	padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
+	if (!may_use_simd()) {
+		sha1_update_arm(desc, padding, padlen);
+		sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits));
+	} else {
+		kernel_neon_begin();
+		/* We need to fill a whole block for __sha1_neon_update() */
+		if (padlen <= 56) {
+			sctx->count += padlen;
+			memcpy(sctx->buffer + index, padding, padlen);
+		} else {
+			__sha1_neon_update(desc, padding, padlen, index);
+		}
+		__sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56);
+		kernel_neon_end();
+	}
+
+	/* Store state in digest */
+	for (i = 0; i < 5; i++)
+		dst[i] = cpu_to_be32(sctx->state[i]);
+
+	/* Wipe context */
+	memset(sctx, 0, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha1_neon_export(struct shash_desc *desc, void *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha1_neon_import(struct shash_desc *desc, const void *in)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+
+	return 0;
+}
+
+static struct shash_alg alg = {
+	.digestsize	=	SHA1_DIGEST_SIZE,
+	.init		=	sha1_neon_init,
+	.update		=	sha1_neon_update,
+	.final		=	sha1_neon_final,
+	.export		=	sha1_neon_export,
+	.import		=	sha1_neon_import,
+	.descsize	=	sizeof(struct sha1_state),
+	.statesize	=	sizeof(struct sha1_state),
+	.base		=	{
+		.cra_name		= "sha1",
+		.cra_driver_name	= "sha1-neon",
+		.cra_priority		= 250,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA1_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+};
+
+static int __init sha1_neon_mod_init(void)
+{
+	if (!cpu_has_neon())
+		return -ENODEV;
+
+	return crypto_register_shash(&alg);
+}
+
+static void __exit sha1_neon_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_init(sha1_neon_mod_init);
+module_exit(sha1_neon_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, NEON accelerated");
+MODULE_ALIAS("sha1");
diff --git a/arch/arm/include/asm/crypto/sha1.h b/arch/arm/include/asm/crypto/sha1.h
new file mode 100644
index 000000000000..75e6a417416b
--- /dev/null
+++ b/arch/arm/include/asm/crypto/sha1.h
@@ -0,0 +1,10 @@
+#ifndef ASM_ARM_CRYPTO_SHA1_H
+#define ASM_ARM_CRYPTO_SHA1_H
+
+#include <linux/crypto.h>
+#include <crypto/sha.h>
+
+extern int sha1_update_arm(struct shash_desc *desc, const u8 *data,
+			   unsigned int len);
+
+#endif
diff --git a/crypto/Kconfig b/crypto/Kconfig
index ce4012a58781..a379dada495c 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -540,6 +540,17 @@ config CRYPTO_SHA1_ARM
 	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
 	  using optimized ARM assembler.
 
+config CRYPTO_SHA1_ARM_NEON
+	tristate "SHA1 digest algorithm (ARM NEON)"
+	depends on ARM && KERNEL_MODE_NEON && !CPU_BIG_ENDIAN
+	select CRYPTO_SHA1_ARM
+	select CRYPTO_SHA1
+	select CRYPTO_HASH
+	help
+	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
+	  using optimized ARM NEON assembly, when NEON instructions are
+	  available.
+
 config CRYPTO_SHA1_PPC
 	tristate "SHA1 digest algorithm (powerpc)"
 	depends on PPC
-- 
cgit v1.2.3


From c8611d712ad01289a0b6a83cc93bba3a1ef4e990 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Date: Tue, 29 Jul 2014 17:15:24 +0100
Subject: ARM: 8120/1: crypto: sha512: add ARM NEON implementation

This patch adds ARM NEON assembly implementation of SHA-512 and SHA-384
algorithms.

tcrypt benchmark results on Cortex-A8, sha512-generic vs sha512-neon-asm:

block-size      bytes/update    old-vs-new
16              16              2.99x
64              16              2.67x
64              64              3.00x
256             16              2.64x
256             64              3.06x
256             256             3.33x
1024            16              2.53x
1024            256             3.39x
1024            1024            3.52x
2048            16              2.50x
2048            256             3.41x
2048            1024            3.54x
2048            2048            3.57x
4096            16              2.49x
4096            256             3.42x
4096            1024            3.56x
4096            4096            3.59x
8192            16              2.48x
8192            256             3.42x
8192            1024            3.56x
8192            4096            3.60x
8192            8192            3.60x

Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/crypto/Makefile            |   2 +
 arch/arm/crypto/sha512-armv7-neon.S | 455 ++++++++++++++++++++++++++++++++++++
 arch/arm/crypto/sha512_neon_glue.c  | 305 ++++++++++++++++++++++++
 crypto/Kconfig                      |  15 ++
 4 files changed, 777 insertions(+)
 create mode 100644 arch/arm/crypto/sha512-armv7-neon.S
 create mode 100644 arch/arm/crypto/sha512_neon_glue.c

(limited to 'arch')

diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 374956d2f896..b48fa341648d 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -6,11 +6,13 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
 obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
 obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
 obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
+obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
 
 aes-arm-y	:= aes-armv4.o aes_glue.o
 aes-arm-bs-y	:= aesbs-core.o aesbs-glue.o
 sha1-arm-y	:= sha1-armv4-large.o sha1_glue.o
 sha1-arm-neon-y	:= sha1-armv7-neon.o sha1_neon_glue.o
+sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
 
 quiet_cmd_perl = PERL    $@
       cmd_perl = $(PERL) $(<) > $(@)
diff --git a/arch/arm/crypto/sha512-armv7-neon.S b/arch/arm/crypto/sha512-armv7-neon.S
new file mode 100644
index 000000000000..fe99472e507c
--- /dev/null
+++ b/arch/arm/crypto/sha512-armv7-neon.S
@@ -0,0 +1,455 @@
+/* sha512-armv7-neon.S  -  ARM/NEON assembly implementation of SHA-512 transform
+ *
+ * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/linkage.h>
+
+
+.syntax unified
+.code   32
+.fpu neon
+
+.text
+
+/* structure of SHA512_CONTEXT */
+#define hd_a 0
+#define hd_b ((hd_a) + 8)
+#define hd_c ((hd_b) + 8)
+#define hd_d ((hd_c) + 8)
+#define hd_e ((hd_d) + 8)
+#define hd_f ((hd_e) + 8)
+#define hd_g ((hd_f) + 8)
+
+/* register macros */
+#define RK %r2
+
+#define RA d0
+#define RB d1
+#define RC d2
+#define RD d3
+#define RE d4
+#define RF d5
+#define RG d6
+#define RH d7
+
+#define RT0 d8
+#define RT1 d9
+#define RT2 d10
+#define RT3 d11
+#define RT4 d12
+#define RT5 d13
+#define RT6 d14
+#define RT7 d15
+
+#define RT01q q4
+#define RT23q q5
+#define RT45q q6
+#define RT67q q7
+
+#define RW0 d16
+#define RW1 d17
+#define RW2 d18
+#define RW3 d19
+#define RW4 d20
+#define RW5 d21
+#define RW6 d22
+#define RW7 d23
+#define RW8 d24
+#define RW9 d25
+#define RW10 d26
+#define RW11 d27
+#define RW12 d28
+#define RW13 d29
+#define RW14 d30
+#define RW15 d31
+
+#define RW01q q8
+#define RW23q q9
+#define RW45q q10
+#define RW67q q11
+#define RW89q q12
+#define RW1011q q13
+#define RW1213q q14
+#define RW1415q q15
+
+/***********************************************************************
+ * ARM assembly implementation of sha512 transform
+ ***********************************************************************/
+#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, \
+                     rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \
+	/* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
+	vshr.u64 RT2, re, #14; \
+	vshl.u64 RT3, re, #64 - 14; \
+	interleave_op(arg1); \
+	vshr.u64 RT4, re, #18; \
+	vshl.u64 RT5, re, #64 - 18; \
+	vld1.64 {RT0}, [RK]!; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vshr.u64 RT4, re, #41; \
+	vshl.u64 RT5, re, #64 - 41; \
+	vadd.u64 RT0, RT0, rw0; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vmov.64 RT7, re; \
+	veor.64 RT1, RT2, RT3; \
+	vbsl.64 RT7, rf, rg; \
+	\
+	vadd.u64 RT1, RT1, rh; \
+	vshr.u64 RT2, ra, #28; \
+	vshl.u64 RT3, ra, #64 - 28; \
+	vadd.u64 RT1, RT1, RT0; \
+	vshr.u64 RT4, ra, #34; \
+	vshl.u64 RT5, ra, #64 - 34; \
+	vadd.u64 RT1, RT1, RT7; \
+	\
+	/* h = Sum0 (a) + Maj (a, b, c); */ \
+	veor.64 RT23q, RT23q, RT45q; \
+	vshr.u64 RT4, ra, #39; \
+	vshl.u64 RT5, ra, #64 - 39; \
+	veor.64 RT0, ra, rb; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vbsl.64 RT0, rc, rb; \
+	vadd.u64 rd, rd, RT1; /* d+=t1; */ \
+	veor.64 rh, RT2, RT3; \
+	\
+	/* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
+	vshr.u64 RT2, rd, #14; \
+	vshl.u64 RT3, rd, #64 - 14; \
+	vadd.u64 rh, rh, RT0; \
+	vshr.u64 RT4, rd, #18; \
+	vshl.u64 RT5, rd, #64 - 18; \
+	vadd.u64 rh, rh, RT1; /* h+=t1; */ \
+	vld1.64 {RT0}, [RK]!; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vshr.u64 RT4, rd, #41; \
+	vshl.u64 RT5, rd, #64 - 41; \
+	vadd.u64 RT0, RT0, rw1; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vmov.64 RT7, rd; \
+	veor.64 RT1, RT2, RT3; \
+	vbsl.64 RT7, re, rf; \
+	\
+	vadd.u64 RT1, RT1, rg; \
+	vshr.u64 RT2, rh, #28; \
+	vshl.u64 RT3, rh, #64 - 28; \
+	vadd.u64 RT1, RT1, RT0; \
+	vshr.u64 RT4, rh, #34; \
+	vshl.u64 RT5, rh, #64 - 34; \
+	vadd.u64 RT1, RT1, RT7; \
+	\
+	/* g = Sum0 (h) + Maj (h, a, b); */ \
+	veor.64 RT23q, RT23q, RT45q; \
+	vshr.u64 RT4, rh, #39; \
+	vshl.u64 RT5, rh, #64 - 39; \
+	veor.64 RT0, rh, ra; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vbsl.64 RT0, rb, ra; \
+	vadd.u64 rc, rc, RT1; /* c+=t1; */ \
+	veor.64 rg, RT2, RT3; \
+	\
+	/* w[0] += S1 (w[14]) + w[9] + S0 (w[1]); */ \
+	/* w[1] += S1 (w[15]) + w[10] + S0 (w[2]); */ \
+	\
+	/**** S0(w[1:2]) */ \
+	\
+	/* w[0:1] += w[9:10] */ \
+	/* RT23q = rw1:rw2 */ \
+	vext.u64 RT23q, rw01q, rw23q, #1; \
+	vadd.u64 rw0, rw9; \
+	vadd.u64 rg, rg, RT0; \
+	vadd.u64 rw1, rw10;\
+	vadd.u64 rg, rg, RT1; /* g+=t1; */ \
+	\
+	vshr.u64 RT45q, RT23q, #1; \
+	vshl.u64 RT67q, RT23q, #64 - 1; \
+	vshr.u64 RT01q, RT23q, #8; \
+	veor.u64 RT45q, RT45q, RT67q; \
+	vshl.u64 RT67q, RT23q, #64 - 8; \
+	veor.u64 RT45q, RT45q, RT01q; \
+	vshr.u64 RT01q, RT23q, #7; \
+	veor.u64 RT45q, RT45q, RT67q; \
+	\
+	/**** S1(w[14:15]) */ \
+	vshr.u64 RT23q, rw1415q, #6; \
+	veor.u64 RT01q, RT01q, RT45q; \
+	vshr.u64 RT45q, rw1415q, #19; \
+	vshl.u64 RT67q, rw1415q, #64 - 19; \
+	veor.u64 RT23q, RT23q, RT45q; \
+	vshr.u64 RT45q, rw1415q, #61; \
+	veor.u64 RT23q, RT23q, RT67q; \
+	vshl.u64 RT67q, rw1415q, #64 - 61; \
+	veor.u64 RT23q, RT23q, RT45q; \
+	vadd.u64 rw01q, RT01q; /* w[0:1] += S(w[1:2]) */ \
+	veor.u64 RT01q, RT23q, RT67q;
+#define vadd_RT01q(rw01q) \
+	/* w[0:1] += S(w[14:15]) */ \
+	vadd.u64 rw01q, RT01q;
+
+#define dummy(_) /*_*/
+
+#define rounds2_64_79(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, \
+	              interleave_op1, arg1, interleave_op2, arg2) \
+	/* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
+	vshr.u64 RT2, re, #14; \
+	vshl.u64 RT3, re, #64 - 14; \
+	interleave_op1(arg1); \
+	vshr.u64 RT4, re, #18; \
+	vshl.u64 RT5, re, #64 - 18; \
+	interleave_op2(arg2); \
+	vld1.64 {RT0}, [RK]!; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vshr.u64 RT4, re, #41; \
+	vshl.u64 RT5, re, #64 - 41; \
+	vadd.u64 RT0, RT0, rw0; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vmov.64 RT7, re; \
+	veor.64 RT1, RT2, RT3; \
+	vbsl.64 RT7, rf, rg; \
+	\
+	vadd.u64 RT1, RT1, rh; \
+	vshr.u64 RT2, ra, #28; \
+	vshl.u64 RT3, ra, #64 - 28; \
+	vadd.u64 RT1, RT1, RT0; \
+	vshr.u64 RT4, ra, #34; \
+	vshl.u64 RT5, ra, #64 - 34; \
+	vadd.u64 RT1, RT1, RT7; \
+	\
+	/* h = Sum0 (a) + Maj (a, b, c); */ \
+	veor.64 RT23q, RT23q, RT45q; \
+	vshr.u64 RT4, ra, #39; \
+	vshl.u64 RT5, ra, #64 - 39; \
+	veor.64 RT0, ra, rb; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vbsl.64 RT0, rc, rb; \
+	vadd.u64 rd, rd, RT1; /* d+=t1; */ \
+	veor.64 rh, RT2, RT3; \
+	\
+	/* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
+	vshr.u64 RT2, rd, #14; \
+	vshl.u64 RT3, rd, #64 - 14; \
+	vadd.u64 rh, rh, RT0; \
+	vshr.u64 RT4, rd, #18; \
+	vshl.u64 RT5, rd, #64 - 18; \
+	vadd.u64 rh, rh, RT1; /* h+=t1; */ \
+	vld1.64 {RT0}, [RK]!; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vshr.u64 RT4, rd, #41; \
+	vshl.u64 RT5, rd, #64 - 41; \
+	vadd.u64 RT0, RT0, rw1; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vmov.64 RT7, rd; \
+	veor.64 RT1, RT2, RT3; \
+	vbsl.64 RT7, re, rf; \
+	\
+	vadd.u64 RT1, RT1, rg; \
+	vshr.u64 RT2, rh, #28; \
+	vshl.u64 RT3, rh, #64 - 28; \
+	vadd.u64 RT1, RT1, RT0; \
+	vshr.u64 RT4, rh, #34; \
+	vshl.u64 RT5, rh, #64 - 34; \
+	vadd.u64 RT1, RT1, RT7; \
+	\
+	/* g = Sum0 (h) + Maj (h, a, b); */ \
+	veor.64 RT23q, RT23q, RT45q; \
+	vshr.u64 RT4, rh, #39; \
+	vshl.u64 RT5, rh, #64 - 39; \
+	veor.64 RT0, rh, ra; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vbsl.64 RT0, rb, ra; \
+	vadd.u64 rc, rc, RT1; /* c+=t1; */ \
+	veor.64 rg, RT2, RT3;
+#define vadd_rg_RT0(rg) \
+	vadd.u64 rg, rg, RT0;
+#define vadd_rg_RT1(rg) \
+	vadd.u64 rg, rg, RT1; /* g+=t1; */
+
+.align 3
+ENTRY(sha512_transform_neon)
+	/* Input:
+	 *	%r0: SHA512_CONTEXT
+	 *	%r1: data
+	 *	%r2: u64 k[] constants
+	 *	%r3: nblks
+	 */
+	push {%lr};
+
+	mov %lr, #0;
+
+	/* Load context to d0-d7 */
+	vld1.64 {RA-RD}, [%r0]!;
+	vld1.64 {RE-RH}, [%r0];
+	sub %r0, #(4*8);
+
+	/* Load input to w[16], d16-d31 */
+	/* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */
+	vld1.64 {RW0-RW3}, [%r1]!;
+	vld1.64 {RW4-RW7}, [%r1]!;
+	vld1.64 {RW8-RW11}, [%r1]!;
+	vld1.64 {RW12-RW15}, [%r1]!;
+#ifdef __ARMEL__
+	/* byteswap */
+	vrev64.8 RW01q, RW01q;
+	vrev64.8 RW23q, RW23q;
+	vrev64.8 RW45q, RW45q;
+	vrev64.8 RW67q, RW67q;
+	vrev64.8 RW89q, RW89q;
+	vrev64.8 RW1011q, RW1011q;
+	vrev64.8 RW1213q, RW1213q;
+	vrev64.8 RW1415q, RW1415q;
+#endif
+
+	/* EABI says that d8-d15 must be preserved by callee. */
+	/*vpush {RT0-RT7};*/
+
+.Loop:
+	rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2,
+		     RW23q, RW1415q, RW9, RW10, dummy, _);
+	b .Lenter_rounds;
+
+.Loop_rounds:
+	rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2,
+		     RW23q, RW1415q, RW9, RW10, vadd_RT01q, RW1415q);
+.Lenter_rounds:
+	rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, RW23q, RW4,
+		     RW45q, RW01q, RW11, RW12, vadd_RT01q, RW01q);
+	rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, RW45q, RW6,
+		     RW67q, RW23q, RW13, RW14, vadd_RT01q, RW23q);
+	rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8,
+		     RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q);
+	rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10,
+		     RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q);
+	rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12,
+		     RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q);
+	add %lr, #16;
+	rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14,
+		     RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q);
+	cmp %lr, #64;
+	rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0,
+		     RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q);
+	bne .Loop_rounds;
+
+	subs %r3, #1;
+
+	rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1,
+		      vadd_RT01q, RW1415q, dummy, _);
+	rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3,
+		      vadd_rg_RT0, RG, vadd_rg_RT1, RG);
+	beq .Lhandle_tail;
+	vld1.64 {RW0-RW3}, [%r1]!;
+	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5,
+		      vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7,
+		      vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+#ifdef __ARMEL__
+	vrev64.8 RW01q, RW01q;
+	vrev64.8 RW23q, RW23q;
+#endif
+	vld1.64 {RW4-RW7}, [%r1]!;
+	rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9,
+		      vadd_rg_RT0, RA, vadd_rg_RT1, RA);
+	rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11,
+		      vadd_rg_RT0, RG, vadd_rg_RT1, RG);
+#ifdef __ARMEL__
+	vrev64.8 RW45q, RW45q;
+	vrev64.8 RW67q, RW67q;
+#endif
+	vld1.64 {RW8-RW11}, [%r1]!;
+	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13,
+		      vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15,
+		      vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+#ifdef __ARMEL__
+	vrev64.8 RW89q, RW89q;
+	vrev64.8 RW1011q, RW1011q;
+#endif
+	vld1.64 {RW12-RW15}, [%r1]!;
+	vadd_rg_RT0(RA);
+	vadd_rg_RT1(RA);
+
+	/* Load context */
+	vld1.64 {RT0-RT3}, [%r0]!;
+	vld1.64 {RT4-RT7}, [%r0];
+	sub %r0, #(4*8);
+
+#ifdef __ARMEL__
+	vrev64.8 RW1213q, RW1213q;
+	vrev64.8 RW1415q, RW1415q;
+#endif
+
+	vadd.u64 RA, RT0;
+	vadd.u64 RB, RT1;
+	vadd.u64 RC, RT2;
+	vadd.u64 RD, RT3;
+	vadd.u64 RE, RT4;
+	vadd.u64 RF, RT5;
+	vadd.u64 RG, RT6;
+	vadd.u64 RH, RT7;
+
+	/* Store the first half of context */
+	vst1.64 {RA-RD}, [%r0]!;
+	sub RK, $(8*80);
+	vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
+	mov %lr, #0;
+	sub %r0, #(4*8);
+
+	b .Loop;
+
+.Lhandle_tail:
+	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5,
+		      vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7,
+		      vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+	rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9,
+		      vadd_rg_RT0, RA, vadd_rg_RT1, RA);
+	rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11,
+		      vadd_rg_RT0, RG, vadd_rg_RT1, RG);
+	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13,
+		      vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15,
+		      vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+
+	/* Load context to d16-d23 */
+	vld1.64 {RW0-RW3}, [%r0]!;
+	vadd_rg_RT0(RA);
+	vld1.64 {RW4-RW7}, [%r0];
+	vadd_rg_RT1(RA);
+	sub %r0, #(4*8);
+
+	vadd.u64 RA, RW0;
+	vadd.u64 RB, RW1;
+	vadd.u64 RC, RW2;
+	vadd.u64 RD, RW3;
+	vadd.u64 RE, RW4;
+	vadd.u64 RF, RW5;
+	vadd.u64 RG, RW6;
+	vadd.u64 RH, RW7;
+
+	/* Store the first half of context */
+	vst1.64 {RA-RD}, [%r0]!;
+
+	/* Clear used registers */
+	/* d16-d31 */
+	veor.u64 RW01q, RW01q;
+	veor.u64 RW23q, RW23q;
+	veor.u64 RW45q, RW45q;
+	veor.u64 RW67q, RW67q;
+	vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
+	veor.u64 RW89q, RW89q;
+	veor.u64 RW1011q, RW1011q;
+	veor.u64 RW1213q, RW1213q;
+	veor.u64 RW1415q, RW1415q;
+	/* d8-d15 */
+	/*vpop {RT0-RT7};*/
+	/* d0-d7 (q0-q3) */
+	veor.u64 %q0, %q0;
+	veor.u64 %q1, %q1;
+	veor.u64 %q2, %q2;
+	veor.u64 %q3, %q3;
+
+	pop {%pc};
+ENDPROC(sha512_transform_neon)
diff --git a/arch/arm/crypto/sha512_neon_glue.c b/arch/arm/crypto/sha512_neon_glue.c
new file mode 100644
index 000000000000..0d2758ff5e12
--- /dev/null
+++ b/arch/arm/crypto/sha512_neon_glue.c
@@ -0,0 +1,305 @@
+/*
+ * Glue code for the SHA512 Secure Hash Algorithm assembly implementation
+ * using NEON instructions.
+ *
+ * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is based on sha512_ssse3_glue.c:
+ *   Copyright (C) 2013 Intel Corporation
+ *   Author: Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/simd.h>
+#include <asm/neon.h>
+
+
+static const u64 sha512_k[] = {
+	0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
+	0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
+	0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
+	0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
+	0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
+	0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
+	0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
+	0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
+	0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
+	0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
+	0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
+	0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
+	0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
+	0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
+	0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
+	0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
+	0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
+	0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
+	0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
+	0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
+	0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
+	0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
+	0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
+	0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
+	0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
+	0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
+	0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
+	0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
+	0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
+	0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
+	0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
+	0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
+	0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
+	0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
+	0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
+	0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
+	0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
+	0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
+	0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
+	0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
+};
+
+
+asmlinkage void sha512_transform_neon(u64 *digest, const void *data,
+				      const u64 k[], unsigned int num_blks);
+
+
+static int sha512_neon_init(struct shash_desc *desc)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA512_H0;
+	sctx->state[1] = SHA512_H1;
+	sctx->state[2] = SHA512_H2;
+	sctx->state[3] = SHA512_H3;
+	sctx->state[4] = SHA512_H4;
+	sctx->state[5] = SHA512_H5;
+	sctx->state[6] = SHA512_H6;
+	sctx->state[7] = SHA512_H7;
+	sctx->count[0] = sctx->count[1] = 0;
+
+	return 0;
+}
+
+static int __sha512_neon_update(struct shash_desc *desc, const u8 *data,
+				unsigned int len, unsigned int partial)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+	unsigned int done = 0;
+
+	sctx->count[0] += len;
+	if (sctx->count[0] < len)
+		sctx->count[1]++;
+
+	if (partial) {
+		done = SHA512_BLOCK_SIZE - partial;
+		memcpy(sctx->buf + partial, data, done);
+		sha512_transform_neon(sctx->state, sctx->buf, sha512_k, 1);
+	}
+
+	if (len - done >= SHA512_BLOCK_SIZE) {
+		const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
+
+		sha512_transform_neon(sctx->state, data + done, sha512_k,
+				      rounds);
+
+		done += rounds * SHA512_BLOCK_SIZE;
+	}
+
+	memcpy(sctx->buf, data + done, len - done);
+
+	return 0;
+}
+
+static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
+			     unsigned int len)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+	unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
+	int res;
+
+	/* Handle the fast case right here */
+	if (partial + len < SHA512_BLOCK_SIZE) {
+		sctx->count[0] += len;
+		if (sctx->count[0] < len)
+			sctx->count[1]++;
+		memcpy(sctx->buf + partial, data, len);
+
+		return 0;
+	}
+
+	if (!may_use_simd()) {
+		res = crypto_sha512_update(desc, data, len);
+	} else {
+		kernel_neon_begin();
+		res = __sha512_neon_update(desc, data, len, partial);
+		kernel_neon_end();
+	}
+
+	return res;
+}
+
+
+/* Add padding and return the message digest. */
+static int sha512_neon_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+	unsigned int i, index, padlen;
+	__be64 *dst = (__be64 *)out;
+	__be64 bits[2];
+	static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
+
+	/* save number of bits */
+	bits[1] = cpu_to_be64(sctx->count[0] << 3);
+	bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
+
+	/* Pad out to 112 mod 128 and append length */
+	index = sctx->count[0] & 0x7f;
+	padlen = (index < 112) ? (112 - index) : ((128+112) - index);
+
+	if (!may_use_simd()) {
+		crypto_sha512_update(desc, padding, padlen);
+		crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits));
+	} else {
+		kernel_neon_begin();
+		/* We need to fill a whole block for __sha512_neon_update() */
+		if (padlen <= 112) {
+			sctx->count[0] += padlen;
+			if (sctx->count[0] < padlen)
+				sctx->count[1]++;
+			memcpy(sctx->buf + index, padding, padlen);
+		} else {
+			__sha512_neon_update(desc, padding, padlen, index);
+		}
+		__sha512_neon_update(desc, (const u8 *)&bits,
+					sizeof(bits), 112);
+		kernel_neon_end();
+	}
+
+	/* Store state in digest */
+	for (i = 0; i < 8; i++)
+		dst[i] = cpu_to_be64(sctx->state[i]);
+
+	/* Wipe context */
+	memset(sctx, 0, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha512_neon_export(struct shash_desc *desc, void *out)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha512_neon_import(struct shash_desc *desc, const void *in)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha384_neon_init(struct shash_desc *desc)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA384_H0;
+	sctx->state[1] = SHA384_H1;
+	sctx->state[2] = SHA384_H2;
+	sctx->state[3] = SHA384_H3;
+	sctx->state[4] = SHA384_H4;
+	sctx->state[5] = SHA384_H5;
+	sctx->state[6] = SHA384_H6;
+	sctx->state[7] = SHA384_H7;
+
+	sctx->count[0] = sctx->count[1] = 0;
+
+	return 0;
+}
+
+static int sha384_neon_final(struct shash_desc *desc, u8 *hash)
+{
+	u8 D[SHA512_DIGEST_SIZE];
+
+	sha512_neon_final(desc, D);
+
+	memcpy(hash, D, SHA384_DIGEST_SIZE);
+	memset(D, 0, SHA512_DIGEST_SIZE);
+
+	return 0;
+}
+
+static struct shash_alg algs[] = { {
+	.digestsize	=	SHA512_DIGEST_SIZE,
+	.init		=	sha512_neon_init,
+	.update		=	sha512_neon_update,
+	.final		=	sha512_neon_final,
+	.export		=	sha512_neon_export,
+	.import		=	sha512_neon_import,
+	.descsize	=	sizeof(struct sha512_state),
+	.statesize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha512",
+		.cra_driver_name =	"sha512-neon",
+		.cra_priority	=	250,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA512_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+},  {
+	.digestsize	=	SHA384_DIGEST_SIZE,
+	.init		=	sha384_neon_init,
+	.update		=	sha512_neon_update,
+	.final		=	sha384_neon_final,
+	.export		=	sha512_neon_export,
+	.import		=	sha512_neon_import,
+	.descsize	=	sizeof(struct sha512_state),
+	.statesize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha384",
+		.cra_driver_name =	"sha384-neon",
+		.cra_priority	=	250,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA384_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
+
+static int __init sha512_neon_mod_init(void)
+{
+	if (!cpu_has_neon())
+		return -ENODEV;
+
+	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit sha512_neon_mod_fini(void)
+{
+	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_init(sha512_neon_mod_init);
+module_exit(sha512_neon_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, NEON accelerated");
+
+MODULE_ALIAS("sha512");
+MODULE_ALIAS("sha384");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index a379dada495c..749b1e05c490 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -600,6 +600,21 @@ config CRYPTO_SHA512_SPARC64
 	  SHA-512 secure hash standard (DFIPS 180-2) implemented
 	  using sparc64 crypto instructions, when available.
 
+config CRYPTO_SHA512_ARM_NEON
+	tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
+	depends on ARM && KERNEL_MODE_NEON && !CPU_BIG_ENDIAN
+	select CRYPTO_SHA512
+	select CRYPTO_HASH
+	help
+	  SHA-512 secure hash standard (DFIPS 180-2) implemented
+	  using ARM NEON instructions, when available.
+
+	  This version of SHA implements a 512 bit hash with 256 bits of
+	  security against collision attacks.
+
+	  This code also includes SHA-384, a 384 bit hash with 192 bits
+	  of security against collision attacks.
+
 config CRYPTO_TGR192
 	tristate "Tiger digest algorithms"
 	select CRYPTO_HASH
-- 
cgit v1.2.3


From f8f3d4ed0d64c59296d79e9d219d8f388562de35 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Thu, 31 Jul 2014 02:07:00 +0100
Subject: ARM: 8121/1: smp_scu: use macro for SCU enable bit

Use macro instead of magic number for SCU enable bit.

Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
Acked-by: Soren Brinkmann <soren.brinkmann@xilinx.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/smp_scu.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c
index 1aafa0d785eb..c947508f84e6 100644
--- a/arch/arm/kernel/smp_scu.c
+++ b/arch/arm/kernel/smp_scu.c
@@ -17,6 +17,7 @@
 #include <asm/cputype.h>
 
 #define SCU_CTRL		0x00
+#define SCU_ENABLE		(1 << 0)
 #define SCU_CONFIG		0x04
 #define SCU_CPU_STATUS		0x08
 #define SCU_INVALIDATE		0x0c
@@ -50,10 +51,10 @@ void scu_enable(void __iomem *scu_base)
 
 	scu_ctrl = readl_relaxed(scu_base + SCU_CTRL);
 	/* already enabled? */
-	if (scu_ctrl & 1)
+	if (scu_ctrl & SCU_ENABLE)
 		return;
 
-	scu_ctrl |= 1;
+	scu_ctrl |= SCU_ENABLE;
 	writel_relaxed(scu_ctrl, scu_base + SCU_CTRL);
 
 	/*
-- 
cgit v1.2.3


From c716483c3db10b31bf9bf43c5f45f2c3117ca13a Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Thu, 31 Jul 2014 02:07:37 +0100
Subject: ARM: 8122/1: smp_scu: enable SCU standby support

With SCU standby enabled, SCU CLK will be turned off when all processors
are in WFI mode.  And the clock will be turned on when any processor
leaves WFI mode.

This behavior should be preferable in terms of power efficiency of
system idle.  So let's set the SCU standby bit to enable the support in
function scu_enable().

Cortex-A9 earlier than r2p0 has no standby bit in SCU, so we need to
skip setting the bit for those.

Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
Acked-by: Soren Brinkmann <soren.brinkmann@xilinx.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/smp_scu.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c
index c947508f84e6..72f9241ad5db 100644
--- a/arch/arm/kernel/smp_scu.c
+++ b/arch/arm/kernel/smp_scu.c
@@ -18,6 +18,7 @@
 
 #define SCU_CTRL		0x00
 #define SCU_ENABLE		(1 << 0)
+#define SCU_STANDBY_ENABLE	(1 << 5)
 #define SCU_CONFIG		0x04
 #define SCU_CPU_STATUS		0x08
 #define SCU_INVALIDATE		0x0c
@@ -55,6 +56,12 @@ void scu_enable(void __iomem *scu_base)
 		return;
 
 	scu_ctrl |= SCU_ENABLE;
+
+	/* Cortex-A9 earlier than r2p0 has no standby bit in SCU */
+	if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc090 &&
+	    (read_cpuid_id() & 0x00f0000f) >= 0x00200000)
+		scu_ctrl |= SCU_STANDBY_ENABLE;
+
 	writel_relaxed(scu_ctrl, scu_base + SCU_CTRL);
 
 	/*
-- 
cgit v1.2.3


From 3bb70de692f70861f5c5729cd2b870d0104a7cc9 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Tue, 29 Jul 2014 09:27:13 +0100
Subject: ARM: add comments to the early page table remap code

Add further comments to the early page table remap code to explain what
the code is doing, why it is doing it, but more importantly to explain
that the code is not architecturally compliant and is squarely in
"UNPREDICTABLE" behaviour territory.

Add a warning and tainting of the kernel too.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/mmu.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 46 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index ab14b79b03f0..8348ed6b2efe 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1406,8 +1406,8 @@ void __init early_paging_init(const struct machine_desc *mdesc,
 		return;
 
 	/* remap kernel code and data */
-	map_start = init_mm.start_code;
-	map_end   = init_mm.brk;
+	map_start = init_mm.start_code & PMD_MASK;
+	map_end   = ALIGN(init_mm.brk, PMD_SIZE);
 
 	/* get a handle on things... */
 	pgd0 = pgd_offset_k(0);
@@ -1434,23 +1434,64 @@ void __init early_paging_init(const struct machine_desc *mdesc,
 	dsb(ishst);
 	isb();
 
-	/* remap level 1 table */
+	/*
+	 * FIXME: This code is not architecturally compliant: we modify
+	 * the mappings in-place, indeed while they are in use by this
+	 * very same code.  This may lead to unpredictable behaviour of
+	 * the CPU.
+	 *
+	 * Even modifying the mappings in a separate page table does
+	 * not resolve this.
+	 *
+	 * The architecture strongly recommends that when a mapping is
+	 * changed, that it is changed by first going via an invalid
+	 * mapping and back to the new mapping.  This is to ensure that
+	 * no TLB conflicts (caused by the TLB having more than one TLB
+	 * entry match a translation) can occur.  However, doing that
+	 * here will result in unmapping the code we are running.
+	 */
+	pr_warn("WARNING: unsafe modification of in-place page tables - tainting kernel\n");
+	add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+
+	/*
+	 * Remap level 1 table.  This changes the physical addresses
+	 * used to refer to the level 2 page tables to the high
+	 * physical address alias, leaving everything else the same.
+	 */
 	for (i = 0; i < PTRS_PER_PGD; pud0++, i++) {
 		set_pud(pud0,
 			__pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER));
 		pmd0 += PTRS_PER_PMD;
 	}
 
-	/* remap pmds for kernel mapping */
-	phys = __pa(map_start) & PMD_MASK;
+	/*
+	 * Remap the level 2 table, pointing the mappings at the high
+	 * physical address alias of these pages.
+	 */
+	phys = __pa(map_start);
 	do {
 		*pmdk++ = __pmd(phys | pmdprot);
 		phys += PMD_SIZE;
 	} while (phys < map_end);
 
+	/*
+	 * Ensure that the above updates are flushed out of the cache.
+	 * This is not strictly correct; on a system where the caches
+	 * are coherent with each other, but the MMU page table walks
+	 * may not be coherent, flush_cache_all() may be a no-op, and
+	 * this will fail.
+	 */
 	flush_cache_all();
+
+	/*
+	 * Re-write the TTBR values to point them at the high physical
+	 * alias of the page tables.  We expect __va() will work on
+	 * cpu_get_pgd(), which returns the value of TTBR0.
+	 */
 	cpu_switch_mm(pgd0, &init_mm);
 	cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET);
+
+	/* Finally flush any stale TLB values. */
 	local_flush_bp_all();
 	local_flush_tlb_all();
 }
-- 
cgit v1.2.3