summaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-03-05 00:26:15 +0300
committerDavid S. Miller <davem@davemloft.net>2019-03-05 00:26:15 +0300
commit18a4d8bf250a33c015955f0dec27259780ef6448 (patch)
tree3e380812ac047fef70c0da52ba74705ee62b7130 /arch/arm64
parent096461de96a94c856190ba892ebf62dfba5a38f1 (diff)
parent822e44b45eb991c63487c5e2ce7d636411870a8d (diff)
downloadlinux-18a4d8bf250a33c015955f0dec27259780ef6448.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/crypto/chacha-neon-core.S20
1 files changed, 18 insertions, 2 deletions
diff --git a/arch/arm64/crypto/chacha-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S
index 021bb9e9784b..706c4e10e9e2 100644
--- a/arch/arm64/crypto/chacha-neon-core.S
+++ b/arch/arm64/crypto/chacha-neon-core.S
@@ -158,8 +158,8 @@ ENTRY(hchacha_block_neon)
mov w3, w2
bl chacha_permute
- st1 {v0.16b}, [x1], #16
- st1 {v3.16b}, [x1]
+ st1 {v0.4s}, [x1], #16
+ st1 {v3.4s}, [x1]
ldp x29, x30, [sp], #16
ret
@@ -532,6 +532,10 @@ ENTRY(chacha_4block_xor_neon)
add v3.4s, v3.4s, v19.4s
add a2, a2, w8
add a3, a3, w9
+CPU_BE( rev a0, a0 )
+CPU_BE( rev a1, a1 )
+CPU_BE( rev a2, a2 )
+CPU_BE( rev a3, a3 )
ld4r {v24.4s-v27.4s}, [x0], #16
ld4r {v28.4s-v31.4s}, [x0]
@@ -552,6 +556,10 @@ ENTRY(chacha_4block_xor_neon)
add v7.4s, v7.4s, v23.4s
add a6, a6, w8
add a7, a7, w9
+CPU_BE( rev a4, a4 )
+CPU_BE( rev a5, a5 )
+CPU_BE( rev a6, a6 )
+CPU_BE( rev a7, a7 )
// x8[0-3] += s2[0]
// x9[0-3] += s2[1]
@@ -569,6 +577,10 @@ ENTRY(chacha_4block_xor_neon)
add v11.4s, v11.4s, v27.4s
add a10, a10, w8
add a11, a11, w9
+CPU_BE( rev a8, a8 )
+CPU_BE( rev a9, a9 )
+CPU_BE( rev a10, a10 )
+CPU_BE( rev a11, a11 )
// x12[0-3] += s3[0]
// x13[0-3] += s3[1]
@@ -586,6 +598,10 @@ ENTRY(chacha_4block_xor_neon)
add v15.4s, v15.4s, v31.4s
add a14, a14, w8
add a15, a15, w9
+CPU_BE( rev a12, a12 )
+CPU_BE( rev a13, a13 )
+CPU_BE( rev a14, a14 )
+CPU_BE( rev a15, a15 )
// interleave 32-bit words in state n, n+1
ldp w6, w7, [x2], #64