summaryrefslogtreecommitdiff
path: root/arch/x86/crypto
diff options
context:
space:
mode:
authorEric Biggers <ebiggers@google.com>2024-04-11 19:23:58 +0300
committerHerbert Xu <herbert@gondor.apana.org.au>2024-04-19 13:54:18 +0300
commit59e62b20acc3161cafe3dce52cd3d6211379c4c5 (patch)
treed875a64b766dd449e772506182e166b61bca35b9 /arch/x86/crypto
parent1b5ddb067df930c8232020cd059b2060275427cf (diff)
downloadlinux-59e62b20acc3161cafe3dce52cd3d6211379c4c5.tar.xz
crypto: x86/sha256-ni - optimize code size
- Load the SHA-256 round constants relative to a pointer that points into the middle of the constants rather than to the beginning. Since x86 instructions use signed offsets, this decreases the instruction length required to access some of the later round constants. - Use punpcklqdq or punpckhqdq instead of longer instructions such as pshufd, pblendw, and palignr. This doesn't harm performance. The end result is that sha256_ni_transform shrinks from 839 bytes to 791 bytes, with no loss in performance. Suggested-by: Stefan Kanthak <stefan.kanthak@nexgo.de> Signed-off-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r--arch/x86/crypto/sha256_ni_asm.S30
1 files changed, 15 insertions, 15 deletions
diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S
index b7e7001dafdf..ffc9f1c75c15 100644
--- a/arch/x86/crypto/sha256_ni_asm.S
+++ b/arch/x86/crypto/sha256_ni_asm.S
@@ -84,7 +84,7 @@
.else
movdqa \m0, MSG
.endif
- paddd \i*4(SHA256CONSTANTS), MSG
+ paddd (\i-32)*4(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
.if \i >= 12 && \i < 60
movdqa \m0, TMP
@@ -92,7 +92,7 @@
paddd TMP, \m1
sha256msg2 \m0, \m1
.endif
- pshufd $0x0E, MSG, MSG
+ punpckhqdq MSG, MSG
sha256rnds2 STATE1, STATE0
.if \i >= 4 && \i < 52
sha256msg1 \m0, \m3
@@ -128,17 +128,17 @@ SYM_TYPED_FUNC_START(sha256_ni_transform)
* Need to reorder these appropriately
* DCBA, HGFE -> ABEF, CDGH
*/
- movdqu 0*16(DIGEST_PTR), STATE0
- movdqu 1*16(DIGEST_PTR), STATE1
+ movdqu 0*16(DIGEST_PTR), STATE0 /* DCBA */
+ movdqu 1*16(DIGEST_PTR), STATE1 /* HGFE */
- pshufd $0xB1, STATE0, STATE0 /* CDAB */
- pshufd $0x1B, STATE1, STATE1 /* EFGH */
movdqa STATE0, TMP
- palignr $8, STATE1, STATE0 /* ABEF */
- pblendw $0xF0, TMP, STATE1 /* CDGH */
+ punpcklqdq STATE1, STATE0 /* FEBA */
+ punpckhqdq TMP, STATE1 /* DCHG */
+ pshufd $0x1B, STATE0, STATE0 /* ABEF */
+ pshufd $0xB1, STATE1, STATE1 /* CDGH */
movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
- lea K256(%rip), SHA256CONSTANTS
+ lea K256+32*4(%rip), SHA256CONSTANTS
.Lloop0:
/* Save hash values for addition after rounds */
@@ -162,14 +162,14 @@ SYM_TYPED_FUNC_START(sha256_ni_transform)
jne .Lloop0
/* Write hash values back in the correct order */
- pshufd $0x1B, STATE0, STATE0 /* FEBA */
- pshufd $0xB1, STATE1, STATE1 /* DCHG */
movdqa STATE0, TMP
- pblendw $0xF0, STATE1, STATE0 /* DCBA */
- palignr $8, TMP, STATE1 /* HGFE */
+ punpcklqdq STATE1, STATE0 /* GHEF */
+ punpckhqdq TMP, STATE1 /* ABCD */
+ pshufd $0xB1, STATE0, STATE0 /* HGFE */
+ pshufd $0x1B, STATE1, STATE1 /* DCBA */
- movdqu STATE0, 0*16(DIGEST_PTR)
- movdqu STATE1, 1*16(DIGEST_PTR)
+ movdqu STATE1, 0*16(DIGEST_PTR)
+ movdqu STATE0, 1*16(DIGEST_PTR)
.Ldone_hash: