summaryrefslogtreecommitdiff
path: root/arch/arm/crypto/sha1-armv7-neon.S
diff options
context:
space:
mode:
authorArd Biesheuvel <ard.biesheuvel@linaro.org>2014-08-06 00:15:19 +0400
committerRussell King <rmk+kernel@arm.linux.org.uk>2014-08-27 18:44:11 +0400
commit0777e3e1723f69276136140209c11deeecb7c6dc (patch)
tree59e04134940a024d8ad96c0274f856824a153f5b /arch/arm/crypto/sha1-armv7-neon.S
parent52addcf9d6669fa439387610bc65c92fa0980cef (diff)
downloadlinux-0777e3e1723f69276136140209c11deeecb7c6dc.tar.xz
ARM: 8125/1: crypto: enable NEON SHA-1 for big endian
This tweaks the SHA-1 NEON code slightly so it works correctly under big endian, and removes the Kconfig condition preventing it from being selected if CONFIG_CPU_BIG_ENDIAN is set. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/crypto/sha1-armv7-neon.S')
-rw-r--r--arch/arm/crypto/sha1-armv7-neon.S39
1 files changed, 22 insertions, 17 deletions
diff --git a/arch/arm/crypto/sha1-armv7-neon.S b/arch/arm/crypto/sha1-armv7-neon.S
index 50013c0e2864..dcd01f3f0bb0 100644
--- a/arch/arm/crypto/sha1-armv7-neon.S
+++ b/arch/arm/crypto/sha1-armv7-neon.S
@@ -9,7 +9,7 @@
*/
#include <linux/linkage.h>
-
+#include <asm/assembler.h>
.syntax unified
.code 32
@@ -61,13 +61,13 @@
#define RT3 r12
#define W0 q0
-#define W1 q1
+#define W1 q7
#define W2 q2
#define W3 q3
#define W4 q4
-#define W5 q5
-#define W6 q6
-#define W7 q7
+#define W5 q6
+#define W6 q5
+#define W7 q1
#define tmp0 q8
#define tmp1 q9
@@ -79,6 +79,11 @@
#define qK3 q14
#define qK4 q15
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define ARM_LE(code...)
+#else
+#define ARM_LE(code...) code
+#endif
/* Round function macros. */
@@ -150,45 +155,45 @@
#define W_PRECALC_00_15() \
add RWK, sp, #(WK_offs(0)); \
\
- vld1.32 {tmp0, tmp1}, [RDATA]!; \
- vrev32.8 W0, tmp0; /* big => little */ \
- vld1.32 {tmp2, tmp3}, [RDATA]!; \
+ vld1.32 {W0, W7}, [RDATA]!; \
+ ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \
+ vld1.32 {W6, W5}, [RDATA]!; \
vadd.u32 tmp0, W0, curK; \
- vrev32.8 W7, tmp1; /* big => little */ \
- vrev32.8 W6, tmp2; /* big => little */ \
+ ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \
+ ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \
vadd.u32 tmp1, W7, curK; \
- vrev32.8 W5, tmp3; /* big => little */ \
+ ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \
vadd.u32 tmp2, W6, curK; \
vst1.32 {tmp0, tmp1}, [RWK]!; \
vadd.u32 tmp3, W5, curK; \
vst1.32 {tmp2, tmp3}, [RWK]; \
#define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
- vld1.32 {tmp0, tmp1}, [RDATA]!; \
+ vld1.32 {W0, W7}, [RDATA]!; \
#define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
add RWK, sp, #(WK_offs(0)); \
#define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
- vrev32.8 W0, tmp0; /* big => little */ \
+ ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \
#define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
- vld1.32 {tmp2, tmp3}, [RDATA]!; \
+ vld1.32 {W6, W5}, [RDATA]!; \
#define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vadd.u32 tmp0, W0, curK; \
#define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
- vrev32.8 W7, tmp1; /* big => little */ \
+ ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \
#define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
- vrev32.8 W6, tmp2; /* big => little */ \
+ ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \
#define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vadd.u32 tmp1, W7, curK; \
#define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
- vrev32.8 W5, tmp3; /* big => little */ \
+ ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \
#define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vadd.u32 tmp2, W6, curK; \