summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm64/crypto/aes-neon.S25
1 files changed, 15 insertions, 10 deletions
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
index b93170e1cc93..85f07ead7c5c 100644
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -9,6 +9,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#define AES_ENTRY(func) ENTRY(neon_ ## func)
#define AES_ENDPROC(func) ENDPROC(neon_ ## func)
@@ -83,13 +84,13 @@
.endm
.macro do_block, enc, in, rounds, rk, rkp, i
- ld1 {v15.16b}, [\rk]
+ ld1 {v15.4s}, [\rk]
add \rkp, \rk, #16
mov \i, \rounds
1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
sub_bytes \in
- ld1 {v15.16b}, [\rkp], #16
+ ld1 {v15.4s}, [\rkp], #16
subs \i, \i, #1
beq 2222f
.if \enc == 1
@@ -229,7 +230,7 @@
.endm
.macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i
- ld1 {v15.16b}, [\rk]
+ ld1 {v15.4s}, [\rk]
add \rkp, \rk, #16
mov \i, \rounds
1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
@@ -237,7 +238,7 @@
sub_bytes_2x \in0, \in1
tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
- ld1 {v15.16b}, [\rkp], #16
+ ld1 {v15.4s}, [\rkp], #16
subs \i, \i, #1
beq 2222f
.if \enc == 1
@@ -254,7 +255,7 @@
.endm
.macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
- ld1 {v15.16b}, [\rk]
+ ld1 {v15.4s}, [\rk]
add \rkp, \rk, #16
mov \i, \rounds
1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
@@ -266,7 +267,7 @@
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
- ld1 {v15.16b}, [\rkp], #16
+ ld1 {v15.4s}, [\rkp], #16
subs \i, \i, #1
beq 2222f
.if \enc == 1
@@ -306,12 +307,16 @@
.text
.align 4
.LForward_ShiftRows:
- .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3
- .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb
+CPU_LE( .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 )
+CPU_LE( .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb )
+CPU_BE( .byte 0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8 )
+CPU_BE( .byte 0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0 )
.LReverse_ShiftRows:
- .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb
- .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3
+CPU_LE( .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb )
+CPU_LE( .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 )
+CPU_BE( .byte 0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8 )
+CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 )
.LForward_Sbox:
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5