summaryrefslogtreecommitdiff
path: root/arch/x86/crypto
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r--arch/x86/crypto/Makefile4
-rw-r--r--arch/x86/crypto/aegis128-aesni-asm.S48
-rw-r--r--arch/x86/crypto/aes_ctrby8_avx-x86_64.S2
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S56
-rw-r--r--arch/x86/crypto/aesni-intel_avx-x86_64.S40
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c4
-rw-r--r--arch/x86/crypto/blake2s-core.S4
-rw-r--r--arch/x86/crypto/blake2s-glue.c68
-rw-r--r--arch/x86/crypto/blake2s-shash.c77
-rw-r--r--arch/x86/crypto/blowfish-x86_64-asm_64.S12
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S14
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S14
-rw-r--r--arch/x86/crypto/camellia-x86_64-asm_64.S12
-rw-r--r--arch/x86/crypto/cast5-avx-x86_64-asm_64.S12
-rw-r--r--arch/x86/crypto/cast6-avx-x86_64-asm_64.S10
-rw-r--r--arch/x86/crypto/chacha-avx2-x86_64.S6
-rw-r--r--arch/x86/crypto/chacha-avx512vl-x86_64.S6
-rw-r--r--arch/x86/crypto/chacha-ssse3-x86_64.S8
-rw-r--r--arch/x86/crypto/crc32-pclmul_asm.S2
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S2
-rw-r--r--arch/x86/crypto/crct10dif-pcl-asm_64.S2
-rw-r--r--arch/x86/crypto/curve25519-x86_64.c767
-rw-r--r--arch/x86/crypto/des3_ede-asm_64.S4
-rw-r--r--arch/x86/crypto/des3_ede_glue.c4
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_asm.S6
-rw-r--r--arch/x86/crypto/nh-avx2-x86_64.S2
-rw-r--r--arch/x86/crypto/nh-sse2-x86_64.S2
-rw-r--r--arch/x86/crypto/serpent-avx-x86_64-asm_64.S10
-rw-r--r--arch/x86/crypto/serpent-avx2-asm_64.S10
-rw-r--r--arch/x86/crypto/serpent-sse2-i586-asm_32.S6
-rw-r--r--arch/x86/crypto/serpent-sse2-x86_64-asm_64.S6
-rw-r--r--arch/x86/crypto/sha1_avx2_x86_64_asm.S2
-rw-r--r--arch/x86/crypto/sha1_ni_asm.S2
-rw-r--r--arch/x86/crypto/sha1_ssse3_asm.S2
-rw-r--r--arch/x86/crypto/sha256-avx-asm.S2
-rw-r--r--arch/x86/crypto/sha256-avx2-asm.S2
-rw-r--r--arch/x86/crypto/sha256-ssse3-asm.S2
-rw-r--r--arch/x86/crypto/sha256_ni_asm.S2
-rw-r--r--arch/x86/crypto/sha512-avx-asm.S2
-rw-r--r--arch/x86/crypto/sha512-avx2-asm.S2
-rw-r--r--arch/x86/crypto/sha512-ssse3-asm.S2
-rw-r--r--arch/x86/crypto/sm4-aesni-avx-asm_64.S12
-rw-r--r--arch/x86/crypto/sm4-aesni-avx2-asm_64.S8
-rw-r--r--arch/x86/crypto/twofish-avx-x86_64-asm_64.S10
-rw-r--r--arch/x86/crypto/twofish-i586-asm_32.S4
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64-3way.S6
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64.S4
47 files changed, 759 insertions, 525 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index f307c93fc90a..c3af959648e6 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -62,7 +62,9 @@ obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o
-blake2s-x86_64-y := blake2s-core.o blake2s-glue.o
+blake2s-x86_64-y := blake2s-shash.o
+obj-$(if $(CONFIG_CRYPTO_BLAKE2S_X86),y) += libblake2s-x86_64.o
+libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S
index 51d46d93efbc..b48ddebb4748 100644
--- a/arch/x86/crypto/aegis128-aesni-asm.S
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -122,7 +122,7 @@ SYM_FUNC_START_LOCAL(__load_partial)
pxor T0, MSG
.Lld_partial_8:
- ret
+ RET
SYM_FUNC_END(__load_partial)
/*
@@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(__store_partial)
mov %r10b, (%r9)
.Lst_partial_1:
- ret
+ RET
SYM_FUNC_END(__store_partial)
/*
@@ -225,7 +225,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_init)
movdqu STATE4, 0x40(STATEP)
FRAME_END
- ret
+ RET
SYM_FUNC_END(crypto_aegis128_aesni_init)
/*
@@ -337,7 +337,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
movdqu STATE3, 0x30(STATEP)
movdqu STATE4, 0x40(STATEP)
FRAME_END
- ret
+ RET
.Lad_out_1:
movdqu STATE4, 0x00(STATEP)
@@ -346,7 +346,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
movdqu STATE2, 0x30(STATEP)
movdqu STATE3, 0x40(STATEP)
FRAME_END
- ret
+ RET
.Lad_out_2:
movdqu STATE3, 0x00(STATEP)
@@ -355,7 +355,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
movdqu STATE1, 0x30(STATEP)
movdqu STATE2, 0x40(STATEP)
FRAME_END
- ret
+ RET
.Lad_out_3:
movdqu STATE2, 0x00(STATEP)
@@ -364,7 +364,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
movdqu STATE0, 0x30(STATEP)
movdqu STATE1, 0x40(STATEP)
FRAME_END
- ret
+ RET
.Lad_out_4:
movdqu STATE1, 0x00(STATEP)
@@ -373,11 +373,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
movdqu STATE4, 0x30(STATEP)
movdqu STATE0, 0x40(STATEP)
FRAME_END
- ret
+ RET
.Lad_out:
FRAME_END
- ret
+ RET
SYM_FUNC_END(crypto_aegis128_aesni_ad)
.macro encrypt_block a s0 s1 s2 s3 s4 i
@@ -452,7 +452,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc)
movdqu STATE2, 0x30(STATEP)
movdqu STATE3, 0x40(STATEP)
FRAME_END
- ret
+ RET
.Lenc_out_1:
movdqu STATE3, 0x00(STATEP)
@@ -461,7 +461,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc)
movdqu STATE1, 0x30(STATEP)
movdqu STATE2, 0x40(STATEP)
FRAME_END
- ret
+ RET
.Lenc_out_2:
movdqu STATE2, 0x00(STATEP)
@@ -470,7 +470,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc)
movdqu STATE0, 0x30(STATEP)
movdqu STATE1, 0x40(STATEP)
FRAME_END
- ret
+ RET
.Lenc_out_3:
movdqu STATE1, 0x00(STATEP)
@@ -479,7 +479,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc)
movdqu STATE4, 0x30(STATEP)
movdqu STATE0, 0x40(STATEP)
FRAME_END
- ret
+ RET
.Lenc_out_4:
movdqu STATE0, 0x00(STATEP)
@@ -488,11 +488,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc)
movdqu STATE3, 0x30(STATEP)
movdqu STATE4, 0x40(STATEP)
FRAME_END
- ret
+ RET
.Lenc_out:
FRAME_END
- ret
+ RET
SYM_FUNC_END(crypto_aegis128_aesni_enc)
/*
@@ -532,7 +532,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc_tail)
movdqu STATE3, 0x40(STATEP)
FRAME_END
- ret
+ RET
SYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
.macro decrypt_block a s0 s1 s2 s3 s4 i
@@ -606,7 +606,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec)
movdqu STATE2, 0x30(STATEP)
movdqu STATE3, 0x40(STATEP)
FRAME_END
- ret
+ RET
.Ldec_out_1:
movdqu STATE3, 0x00(STATEP)
@@ -615,7 +615,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec)
movdqu STATE1, 0x30(STATEP)
movdqu STATE2, 0x40(STATEP)
FRAME_END
- ret
+ RET
.Ldec_out_2:
movdqu STATE2, 0x00(STATEP)
@@ -624,7 +624,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec)
movdqu STATE0, 0x30(STATEP)
movdqu STATE1, 0x40(STATEP)
FRAME_END
- ret
+ RET
.Ldec_out_3:
movdqu STATE1, 0x00(STATEP)
@@ -633,7 +633,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec)
movdqu STATE4, 0x30(STATEP)
movdqu STATE0, 0x40(STATEP)
FRAME_END
- ret
+ RET
.Ldec_out_4:
movdqu STATE0, 0x00(STATEP)
@@ -642,11 +642,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec)
movdqu STATE3, 0x30(STATEP)
movdqu STATE4, 0x40(STATEP)
FRAME_END
- ret
+ RET
.Ldec_out:
FRAME_END
- ret
+ RET
SYM_FUNC_END(crypto_aegis128_aesni_dec)
/*
@@ -696,7 +696,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec_tail)
movdqu STATE3, 0x40(STATEP)
FRAME_END
- ret
+ RET
SYM_FUNC_END(crypto_aegis128_aesni_dec_tail)
/*
@@ -743,5 +743,5 @@ SYM_FUNC_START(crypto_aegis128_aesni_final)
movdqu MSG, (%rsi)
FRAME_END
- ret
+ RET
SYM_FUNC_END(crypto_aegis128_aesni_final)
diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
index 3f0fc7dd87d7..c799838242a6 100644
--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
@@ -525,7 +525,7 @@ ddq_add_8:
/* return updated IV */
vpshufb xbyteswap, xcounter, xcounter
vmovdqu xcounter, (p_iv)
- ret
+ RET
.endm
/*
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 4e3972570916..363699dd7220 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -1594,7 +1594,7 @@ SYM_FUNC_START(aesni_gcm_dec)
GCM_ENC_DEC dec
GCM_COMPLETE arg10, arg11
FUNC_RESTORE
- ret
+ RET
SYM_FUNC_END(aesni_gcm_dec)
@@ -1683,7 +1683,7 @@ SYM_FUNC_START(aesni_gcm_enc)
GCM_COMPLETE arg10, arg11
FUNC_RESTORE
- ret
+ RET
SYM_FUNC_END(aesni_gcm_enc)
/*****************************************************************************
@@ -1701,7 +1701,7 @@ SYM_FUNC_START(aesni_gcm_init)
FUNC_SAVE
GCM_INIT %arg3, %arg4,%arg5, %arg6
FUNC_RESTORE
- ret
+ RET
SYM_FUNC_END(aesni_gcm_init)
/*****************************************************************************
@@ -1716,7 +1716,7 @@ SYM_FUNC_START(aesni_gcm_enc_update)
FUNC_SAVE
GCM_ENC_DEC enc
FUNC_RESTORE
- ret
+ RET
SYM_FUNC_END(aesni_gcm_enc_update)
/*****************************************************************************
@@ -1731,7 +1731,7 @@ SYM_FUNC_START(aesni_gcm_dec_update)
FUNC_SAVE
GCM_ENC_DEC dec
FUNC_RESTORE
- ret
+ RET
SYM_FUNC_END(aesni_gcm_dec_update)
/*****************************************************************************
@@ -1746,7 +1746,7 @@ SYM_FUNC_START(aesni_gcm_finalize)
FUNC_SAVE
GCM_COMPLETE %arg3 %arg4
FUNC_RESTORE
- ret
+ RET
SYM_FUNC_END(aesni_gcm_finalize)
#endif
@@ -1762,7 +1762,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256a)
pxor %xmm1, %xmm0
movaps %xmm0, (TKEYP)
add $0x10, TKEYP
- ret
+ RET
SYM_FUNC_END(_key_expansion_256a)
SYM_FUNC_END_ALIAS(_key_expansion_128)
@@ -1787,7 +1787,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_192a)
shufps $0b01001110, %xmm2, %xmm1
movaps %xmm1, 0x10(TKEYP)
add $0x20, TKEYP
- ret
+ RET
SYM_FUNC_END(_key_expansion_192a)
SYM_FUNC_START_LOCAL(_key_expansion_192b)
@@ -1806,7 +1806,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_192b)
movaps %xmm0, (TKEYP)
add $0x10, TKEYP
- ret
+ RET
SYM_FUNC_END(_key_expansion_192b)
SYM_FUNC_START_LOCAL(_key_expansion_256b)
@@ -1818,7 +1818,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256b)
pxor %xmm1, %xmm2
movaps %xmm2, (TKEYP)
add $0x10, TKEYP
- ret
+ RET
SYM_FUNC_END(_key_expansion_256b)
/*
@@ -1933,7 +1933,7 @@ SYM_FUNC_START(aesni_set_key)
popl KEYP
#endif
FRAME_END
- ret
+ RET
SYM_FUNC_END(aesni_set_key)
/*
@@ -1957,7 +1957,7 @@ SYM_FUNC_START(aesni_enc)
popl KEYP
#endif
FRAME_END
- ret
+ RET
SYM_FUNC_END(aesni_enc)
/*
@@ -2014,7 +2014,7 @@ SYM_FUNC_START_LOCAL(_aesni_enc1)
aesenc KEY, STATE
movaps 0x70(TKEYP), KEY
aesenclast KEY, STATE
- ret
+ RET
SYM_FUNC_END(_aesni_enc1)
/*
@@ -2122,7 +2122,7 @@ SYM_FUNC_START_LOCAL(_aesni_enc4)
aesenclast KEY, STATE2
aesenclast KEY, STATE3
aesenclast KEY, STATE4
- ret
+ RET
SYM_FUNC_END(_aesni_enc4)
/*
@@ -2147,7 +2147,7 @@ SYM_FUNC_START(aesni_dec)
popl KEYP
#endif
FRAME_END
- ret
+ RET
SYM_FUNC_END(aesni_dec)
/*
@@ -2204,7 +2204,7 @@ SYM_FUNC_START_LOCAL(_aesni_dec1)
aesdec KEY, STATE
movaps 0x70(TKEYP), KEY
aesdeclast KEY, STATE
- ret
+ RET
SYM_FUNC_END(_aesni_dec1)
/*
@@ -2312,7 +2312,7 @@ SYM_FUNC_START_LOCAL(_aesni_dec4)
aesdeclast KEY, STATE2
aesdeclast KEY, STATE3
aesdeclast KEY, STATE4
- ret
+ RET
SYM_FUNC_END(_aesni_dec4)
/*
@@ -2372,7 +2372,7 @@ SYM_FUNC_START(aesni_ecb_enc)
popl LEN
#endif
FRAME_END
- ret
+ RET
SYM_FUNC_END(aesni_ecb_enc)
/*
@@ -2433,7 +2433,7 @@ SYM_FUNC_START(aesni_ecb_dec)
popl LEN
#endif
FRAME_END
- ret
+ RET
SYM_FUNC_END(aesni_ecb_dec)
/*
@@ -2477,7 +2477,7 @@ SYM_FUNC_START(aesni_cbc_enc)
popl IVP
#endif
FRAME_END
- ret
+ RET
SYM_FUNC_END(aesni_cbc_enc)
/*
@@ -2570,7 +2570,7 @@ SYM_FUNC_START(aesni_cbc_dec)
popl IVP
#endif
FRAME_END
- ret
+ RET
SYM_FUNC_END(aesni_cbc_dec)
/*
@@ -2627,7 +2627,7 @@ SYM_FUNC_START(aesni_cts_cbc_enc)
popl IVP
#endif
FRAME_END
- ret
+ RET
SYM_FUNC_END(aesni_cts_cbc_enc)
/*
@@ -2688,7 +2688,7 @@ SYM_FUNC_START(aesni_cts_cbc_dec)
popl IVP
#endif
FRAME_END
- ret
+ RET
SYM_FUNC_END(aesni_cts_cbc_dec)
.pushsection .rodata
@@ -2725,7 +2725,7 @@ SYM_FUNC_START_LOCAL(_aesni_inc_init)
mov $1, TCTR_LOW
movq TCTR_LOW, INC
movq CTR, TCTR_LOW
- ret
+ RET
SYM_FUNC_END(_aesni_inc_init)
/*
@@ -2753,7 +2753,7 @@ SYM_FUNC_START_LOCAL(_aesni_inc)
.Linc_low:
movaps CTR, IV
pshufb BSWAP_MASK, IV
- ret
+ RET
SYM_FUNC_END(_aesni_inc)
/*
@@ -2816,7 +2816,7 @@ SYM_FUNC_START(aesni_ctr_enc)
movups IV, (IVP)
.Lctr_enc_just_ret:
FRAME_END
- ret
+ RET
SYM_FUNC_END(aesni_ctr_enc)
#endif
@@ -2932,7 +2932,7 @@ SYM_FUNC_START(aesni_xts_encrypt)
popl IVP
#endif
FRAME_END
- ret
+ RET
.Lxts_enc_1x:
add $64, LEN
@@ -3092,7 +3092,7 @@ SYM_FUNC_START(aesni_xts_decrypt)
popl IVP
#endif
FRAME_END
- ret
+ RET
.Lxts_dec_1x:
add $64, LEN
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index 98e3552b6e03..0852ab573fd3 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -1767,7 +1767,7 @@ SYM_FUNC_START(aesni_gcm_init_avx_gen2)
FUNC_SAVE
INIT GHASH_MUL_AVX, PRECOMPUTE_AVX
FUNC_RESTORE
- ret
+ RET
SYM_FUNC_END(aesni_gcm_init_avx_gen2)
###############################################################################
@@ -1788,15 +1788,15 @@ SYM_FUNC_START(aesni_gcm_enc_update_avx_gen2)
# must be 192
GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 11
FUNC_RESTORE
- ret
+ RET
key_128_enc_update:
GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 9
FUNC_RESTORE
- ret
+ RET
key_256_enc_update:
GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 13
FUNC_RESTORE
- ret
+ RET
SYM_FUNC_END(aesni_gcm_enc_update_avx_gen2)
###############################################################################
@@ -1817,15 +1817,15 @@ SYM_FUNC_START(aesni_gcm_dec_update_avx_gen2)
# must be 192
GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 11
FUNC_RESTORE
- ret
+ RET
key_128_dec_update:
GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 9
FUNC_RESTORE
- ret
+ RET
key_256_dec_update:
GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 13
FUNC_RESTORE
- ret
+ RET
SYM_FUNC_END(aesni_gcm_dec_update_avx_gen2)
###############################################################################
@@ -1846,15 +1846,15 @@ SYM_FUNC_START(aesni_gcm_finalize_avx_gen2)
# must be 192
GCM_COMPLETE GHASH_MUL_AVX, 11, arg3, arg4
FUNC_RESTORE
- ret
+ RET
key_128_finalize:
GCM_COMPLETE GHASH_MUL_AVX, 9, arg3, arg4
FUNC_RESTORE
- ret
+ RET
key_256_finalize:
GCM_COMPLETE GHASH_MUL_AVX, 13, arg3, arg4
FUNC_RESTORE
- ret
+ RET
SYM_FUNC_END(aesni_gcm_finalize_avx_gen2)
###############################################################################
@@ -2735,7 +2735,7 @@ SYM_FUNC_START(aesni_gcm_init_avx_gen4)
FUNC_SAVE
INIT GHASH_MUL_AVX2, PRECOMPUTE_AVX2
FUNC_RESTORE
- ret
+ RET
SYM_FUNC_END(aesni_gcm_init_avx_gen4)
###############################################################################
@@ -2756,15 +2756,15 @@ SYM_FUNC_START(aesni_gcm_enc_update_avx_gen4)
# must be 192
GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 11
FUNC_RESTORE
- ret
+ RET
key_128_enc_update4:
GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 9
FUNC_RESTORE
- ret
+ RET
key_256_enc_update4:
GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 13
FUNC_RESTORE
- ret
+ RET
SYM_FUNC_END(aesni_gcm_enc_update_avx_gen4)
###############################################################################
@@ -2785,15 +2785,15 @@ SYM_FUNC_START(aesni_gcm_dec_update_avx_gen4)
# must be 192
GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 11
FUNC_RESTORE
- ret
+ RET
key_128_dec_update4:
GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 9
FUNC_RESTORE
- ret
+ RET
key_256_dec_update4:
GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 13
FUNC_RESTORE
- ret
+ RET
SYM_FUNC_END(aesni_gcm_dec_update_avx_gen4)
###############################################################################
@@ -2814,13 +2814,13 @@ SYM_FUNC_START(aesni_gcm_finalize_avx_gen4)
# must be 192
GCM_COMPLETE GHASH_MUL_AVX2, 11, arg3, arg4
FUNC_RESTORE
- ret
+ RET
key_128_finalize4:
GCM_COMPLETE GHASH_MUL_AVX2, 9, arg3, arg4
FUNC_RESTORE
- ret
+ RET
key_256_finalize4:
GCM_COMPLETE GHASH_MUL_AVX2, 13, arg3, arg4
FUNC_RESTORE
- ret
+ RET
SYM_FUNC_END(aesni_gcm_finalize_avx_gen4)
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index e09f4672dd38..41901ba9d3a2 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -1107,7 +1107,7 @@ static struct aead_alg aesni_aeads[] = { {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx),
- .cra_alignmask = AESNI_ALIGN - 1,
+ .cra_alignmask = 0,
.cra_module = THIS_MODULE,
},
}, {
@@ -1124,7 +1124,7 @@ static struct aead_alg aesni_aeads[] = { {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct generic_gcmaes_ctx),
- .cra_alignmask = AESNI_ALIGN - 1,
+ .cra_alignmask = 0,
.cra_module = THIS_MODULE,
},
} };
diff --git a/arch/x86/crypto/blake2s-core.S b/arch/x86/crypto/blake2s-core.S
index 2ca79974f819..b50b35ff1fdb 100644
--- a/arch/x86/crypto/blake2s-core.S
+++ b/arch/x86/crypto/blake2s-core.S
@@ -171,7 +171,7 @@ SYM_FUNC_START(blake2s_compress_ssse3)
movdqu %xmm1,0x10(%rdi)
movdqu %xmm14,0x20(%rdi)
.Lendofloop:
- ret
+ RET
SYM_FUNC_END(blake2s_compress_ssse3)
#ifdef CONFIG_AS_AVX512
@@ -251,6 +251,6 @@ SYM_FUNC_START(blake2s_compress_avx512)
vmovdqu %xmm1,0x10(%rdi)
vmovdqu %xmm4,0x20(%rdi)
vzeroupper
- retq
+ RET
SYM_FUNC_END(blake2s_compress_avx512)
#endif /* CONFIG_AS_AVX512 */
diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
index a40365ab301e..69853c13e8fb 100644
--- a/arch/x86/crypto/blake2s-glue.c
+++ b/arch/x86/crypto/blake2s-glue.c
@@ -5,7 +5,6 @@
#include <crypto/internal/blake2s.h>
#include <crypto/internal/simd.h>
-#include <crypto/internal/hash.h>
#include <linux/types.h>
#include <linux/jump_label.h>
@@ -28,9 +27,8 @@ asmlinkage void blake2s_compress_avx512(struct blake2s_state *state,
static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512);
-void blake2s_compress_arch(struct blake2s_state *state,
- const u8 *block, size_t nblocks,
- const u32 inc)
+void blake2s_compress(struct blake2s_state *state, const u8 *block,
+ size_t nblocks, const u32 inc)
{
/* SIMD disables preemption, so relax after processing each page. */
BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
@@ -56,49 +54,12 @@ void blake2s_compress_arch(struct blake2s_state *state,
block += blocks * BLAKE2S_BLOCK_SIZE;
} while (nblocks);
}
-EXPORT_SYMBOL(blake2s_compress_arch);
-
-static int crypto_blake2s_update_x86(struct shash_desc *desc,
- const u8 *in, unsigned int inlen)
-{
- return crypto_blake2s_update(desc, in, inlen, blake2s_compress_arch);
-}
-
-static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out)
-{
- return crypto_blake2s_final(desc, out, blake2s_compress_arch);
-}
-
-#define BLAKE2S_ALG(name, driver_name, digest_size) \
- { \
- .base.cra_name = name, \
- .base.cra_driver_name = driver_name, \
- .base.cra_priority = 200, \
- .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
- .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \
- .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \
- .base.cra_module = THIS_MODULE, \
- .digestsize = digest_size, \
- .setkey = crypto_blake2s_setkey, \
- .init = crypto_blake2s_init, \
- .update = crypto_blake2s_update_x86, \
- .final = crypto_blake2s_final_x86, \
- .descsize = sizeof(struct blake2s_state), \
- }
-
-static struct shash_alg blake2s_algs[] = {
- BLAKE2S_ALG("blake2s-128", "blake2s-128-x86", BLAKE2S_128_HASH_SIZE),
- BLAKE2S_ALG("blake2s-160", "blake2s-160-x86", BLAKE2S_160_HASH_SIZE),
- BLAKE2S_ALG("blake2s-224", "blake2s-224-x86", BLAKE2S_224_HASH_SIZE),
- BLAKE2S_ALG("blake2s-256", "blake2s-256-x86", BLAKE2S_256_HASH_SIZE),
-};
+EXPORT_SYMBOL(blake2s_compress);
static int __init blake2s_mod_init(void)
{
- if (!boot_cpu_has(X86_FEATURE_SSSE3))
- return 0;
-
- static_branch_enable(&blake2s_use_ssse3);
+ if (boot_cpu_has(X86_FEATURE_SSSE3))
+ static_branch_enable(&blake2s_use_ssse3);
if (IS_ENABLED(CONFIG_AS_AVX512) &&
boot_cpu_has(X86_FEATURE_AVX) &&
@@ -109,26 +70,9 @@ static int __init blake2s_mod_init(void)
XFEATURE_MASK_AVX512, NULL))
static_branch_enable(&blake2s_use_avx512);
- return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
- crypto_register_shashes(blake2s_algs,
- ARRAY_SIZE(blake2s_algs)) : 0;
-}
-
-static void __exit blake2s_mod_exit(void)
-{
- if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
- crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+ return 0;
}
module_init(blake2s_mod_init);
-module_exit(blake2s_mod_exit);
-MODULE_ALIAS_CRYPTO("blake2s-128");
-MODULE_ALIAS_CRYPTO("blake2s-128-x86");
-MODULE_ALIAS_CRYPTO("blake2s-160");
-MODULE_ALIAS_CRYPTO("blake2s-160-x86");
-MODULE_ALIAS_CRYPTO("blake2s-224");
-MODULE_ALIAS_CRYPTO("blake2s-224-x86");
-MODULE_ALIAS_CRYPTO("blake2s-256");
-MODULE_ALIAS_CRYPTO("blake2s-256-x86");
MODULE_LICENSE("GPL v2");
diff --git a/arch/x86/crypto/blake2s-shash.c b/arch/x86/crypto/blake2s-shash.c
new file mode 100644
index 000000000000..f9e2fecdb761
--- /dev/null
+++ b/arch/x86/crypto/blake2s-shash.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <crypto/internal/blake2s.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/hash.h>
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sizes.h>
+
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+
+static int crypto_blake2s_update_x86(struct shash_desc *desc,
+ const u8 *in, unsigned int inlen)
+{
+ return crypto_blake2s_update(desc, in, inlen, blake2s_compress);
+}
+
+static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out)
+{
+ return crypto_blake2s_final(desc, out, blake2s_compress);
+}
+
+#define BLAKE2S_ALG(name, driver_name, digest_size) \
+ { \
+ .base.cra_name = name, \
+ .base.cra_driver_name = driver_name, \
+ .base.cra_priority = 200, \
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \
+ .base.cra_module = THIS_MODULE, \
+ .digestsize = digest_size, \
+ .setkey = crypto_blake2s_setkey, \
+ .init = crypto_blake2s_init, \
+ .update = crypto_blake2s_update_x86, \
+ .final = crypto_blake2s_final_x86, \
+ .descsize = sizeof(struct blake2s_state), \
+ }
+
+static struct shash_alg blake2s_algs[] = {
+ BLAKE2S_ALG("blake2s-128", "blake2s-128-x86", BLAKE2S_128_HASH_SIZE),
+ BLAKE2S_ALG("blake2s-160", "blake2s-160-x86", BLAKE2S_160_HASH_SIZE),
+ BLAKE2S_ALG("blake2s-224", "blake2s-224-x86", BLAKE2S_224_HASH_SIZE),
+ BLAKE2S_ALG("blake2s-256", "blake2s-256-x86", BLAKE2S_256_HASH_SIZE),
+};
+
+static int __init blake2s_mod_init(void)
+{
+ if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
+ return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+ return 0;
+}
+
+static void __exit blake2s_mod_exit(void)
+{
+ if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
+ crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+}
+
+module_init(blake2s_mod_init);
+module_exit(blake2s_mod_exit);
+
+MODULE_ALIAS_CRYPTO("blake2s-128");
+MODULE_ALIAS_CRYPTO("blake2s-128-x86");
+MODULE_ALIAS_CRYPTO("blake2s-160");
+MODULE_ALIAS_CRYPTO("blake2s-160-x86");
+MODULE_ALIAS_CRYPTO("blake2s-224");
+MODULE_ALIAS_CRYPTO("blake2s-224-x86");
+MODULE_ALIAS_CRYPTO("blake2s-256");
+MODULE_ALIAS_CRYPTO("blake2s-256-x86");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S
index 4222ac6d6584..802d71582689 100644
--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S
+++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
@@ -135,10 +135,10 @@ SYM_FUNC_START(__blowfish_enc_blk)
jnz .L__enc_xor;
write_block();
- ret;
+ RET;
.L__enc_xor:
xor_block();
- ret;
+ RET;
SYM_FUNC_END(__blowfish_enc_blk)
SYM_FUNC_START(blowfish_dec_blk)
@@ -170,7 +170,7 @@ SYM_FUNC_START(blowfish_dec_blk)
movq %r11, %r12;
- ret;
+ RET;
SYM_FUNC_END(blowfish_dec_blk)
/**********************************************************************
@@ -322,14 +322,14 @@ SYM_FUNC_START(__blowfish_enc_blk_4way)
popq %rbx;
popq %r12;
- ret;
+ RET;
.L__enc_xor4:
xor_block4();
popq %rbx;
popq %r12;
- ret;
+ RET;
SYM_FUNC_END(__blowfish_enc_blk_4way)
SYM_FUNC_START(blowfish_dec_blk_4way)
@@ -364,5 +364,5 @@ SYM_FUNC_START(blowfish_dec_blk_4way)
popq %rbx;
popq %r12;
- ret;
+ RET;
SYM_FUNC_END(blowfish_dec_blk_4way)
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index e2a0e0f4bf9d..2e1658ddbe1a 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -192,7 +192,7 @@ SYM_FUNC_START_LOCAL(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_c
roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
%rcx, (%r9));
- ret;
+ RET;
SYM_FUNC_END(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
.align 8
@@ -200,7 +200,7 @@ SYM_FUNC_START_LOCAL(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_a
roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3,
%xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
%rax, (%r9));
- ret;
+ RET;
SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
/*
@@ -778,7 +778,7 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk16)
%xmm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 16(%rax));
FRAME_END
- ret;
+ RET;
.align 8
.Lenc_max32:
@@ -865,7 +865,7 @@ SYM_FUNC_START_LOCAL(__camellia_dec_blk16)
%xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax));
FRAME_END
- ret;
+ RET;
.align 8
.Ldec_max32:
@@ -906,7 +906,7 @@ SYM_FUNC_START(camellia_ecb_enc_16way)
%xmm8, %rsi);
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(camellia_ecb_enc_16way)
SYM_FUNC_START(camellia_ecb_dec_16way)
@@ -936,7 +936,7 @@ SYM_FUNC_START(camellia_ecb_dec_16way)
%xmm8, %rsi);
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(camellia_ecb_dec_16way)
SYM_FUNC_START(camellia_cbc_dec_16way)
@@ -987,5 +987,5 @@ SYM_FUNC_START(camellia_cbc_dec_16way)
%xmm8, %rsi);
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(camellia_cbc_dec_16way)
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index 706f70829a07..0e4e9abbf4de 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -226,7 +226,7 @@ SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_c
roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
%ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15,
%rcx, (%r9));
- ret;
+ RET;
SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
.align 8
@@ -234,7 +234,7 @@ SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_a
roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3,
%ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11,
%rax, (%r9));
- ret;
+ RET;
SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
/*
@@ -814,7 +814,7 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk32)
%ymm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 32(%rax));
FRAME_END
- ret;
+ RET;
.align 8
.Lenc_max32:
@@ -901,7 +901,7 @@ SYM_FUNC_START_LOCAL(__camellia_dec_blk32)
%ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax));
FRAME_END
- ret;
+ RET;
.align 8
.Ldec_max32:
@@ -946,7 +946,7 @@ SYM_FUNC_START(camellia_ecb_enc_32way)
vzeroupper;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(camellia_ecb_enc_32way)
SYM_FUNC_START(camellia_ecb_dec_32way)
@@ -980,7 +980,7 @@ SYM_FUNC_START(camellia_ecb_dec_32way)
vzeroupper;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(camellia_ecb_dec_32way)
SYM_FUNC_START(camellia_cbc_dec_32way)
@@ -1047,5 +1047,5 @@ SYM_FUNC_START(camellia_cbc_dec_32way)
addq $(16 * 32), %rsp;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(camellia_cbc_dec_32way)
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
index 1372e6408850..347c059f5940 100644
--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -213,13 +213,13 @@ SYM_FUNC_START(__camellia_enc_blk)
enc_outunpack(mov, RT1);
movq RR12, %r12;
- ret;
+ RET;
.L__enc_xor:
enc_outunpack(xor, RT1);
movq RR12, %r12;
- ret;
+ RET;
SYM_FUNC_END(__camellia_enc_blk)
SYM_FUNC_START(camellia_dec_blk)
@@ -257,7 +257,7 @@ SYM_FUNC_START(camellia_dec_blk)
dec_outunpack();
movq RR12, %r12;
- ret;
+ RET;
SYM_FUNC_END(camellia_dec_blk)
/**********************************************************************
@@ -448,14 +448,14 @@ SYM_FUNC_START(__camellia_enc_blk_2way)
movq RR12, %r12;
popq %rbx;
- ret;
+ RET;
.L__enc2_xor:
enc_outunpack2(xor, RT2);
movq RR12, %r12;
popq %rbx;
- ret;
+ RET;
SYM_FUNC_END(__camellia_enc_blk_2way)
SYM_FUNC_START(camellia_dec_blk_2way)
@@ -495,5 +495,5 @@ SYM_FUNC_START(camellia_dec_blk_2way)
movq RR12, %r12;
movq RXOR, %rbx;
- ret;
+ RET;
SYM_FUNC_END(camellia_dec_blk_2way)
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index 8a6181b08b59..b258af420c92 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -279,7 +279,7 @@ SYM_FUNC_START_LOCAL(__cast5_enc_blk16)
outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
- ret;
+ RET;
SYM_FUNC_END(__cast5_enc_blk16)
.align 16
@@ -352,7 +352,7 @@ SYM_FUNC_START_LOCAL(__cast5_dec_blk16)
outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
- ret;
+ RET;
.L__skip_dec:
vpsrldq $4, RKR, RKR;
@@ -393,7 +393,7 @@ SYM_FUNC_START(cast5_ecb_enc_16way)
popq %r15;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(cast5_ecb_enc_16way)
SYM_FUNC_START(cast5_ecb_dec_16way)
@@ -431,7 +431,7 @@ SYM_FUNC_START(cast5_ecb_dec_16way)
popq %r15;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(cast5_ecb_dec_16way)
SYM_FUNC_START(cast5_cbc_dec_16way)
@@ -483,7 +483,7 @@ SYM_FUNC_START(cast5_cbc_dec_16way)
popq %r15;
popq %r12;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(cast5_cbc_dec_16way)
SYM_FUNC_START(cast5_ctr_16way)
@@ -559,5 +559,5 @@ SYM_FUNC_START(cast5_ctr_16way)
popq %r15;
popq %r12;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(cast5_ctr_16way)
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index fbddcecc3e3f..82b716fd5dba 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -289,7 +289,7 @@ SYM_FUNC_START_LOCAL(__cast6_enc_blk8)
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
- ret;
+ RET;
SYM_FUNC_END(__cast6_enc_blk8)
.align 8
@@ -336,7 +336,7 @@ SYM_FUNC_START_LOCAL(__cast6_dec_blk8)
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
- ret;
+ RET;
SYM_FUNC_END(__cast6_dec_blk8)
SYM_FUNC_START(cast6_ecb_enc_8way)
@@ -359,7 +359,7 @@ SYM_FUNC_START(cast6_ecb_enc_8way)
popq %r15;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(cast6_ecb_enc_8way)
SYM_FUNC_START(cast6_ecb_dec_8way)
@@ -382,7 +382,7 @@ SYM_FUNC_START(cast6_ecb_dec_8way)
popq %r15;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(cast6_ecb_dec_8way)
SYM_FUNC_START(cast6_cbc_dec_8way)
@@ -408,5 +408,5 @@ SYM_FUNC_START(cast6_cbc_dec_8way)
popq %r15;
popq %r12;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(cast6_cbc_dec_8way)
diff --git a/arch/x86/crypto/chacha-avx2-x86_64.S b/arch/x86/crypto/chacha-avx2-x86_64.S
index ee9a40ab4109..f3d8fc018249 100644
--- a/arch/x86/crypto/chacha-avx2-x86_64.S
+++ b/arch/x86/crypto/chacha-avx2-x86_64.S
@@ -193,7 +193,7 @@ SYM_FUNC_START(chacha_2block_xor_avx2)
.Ldone2:
vzeroupper
- ret
+ RET
.Lxorpart2:
# xor remaining bytes from partial register into output
@@ -498,7 +498,7 @@ SYM_FUNC_START(chacha_4block_xor_avx2)
.Ldone4:
vzeroupper
- ret
+ RET
.Lxorpart4:
# xor remaining bytes from partial register into output
@@ -992,7 +992,7 @@ SYM_FUNC_START(chacha_8block_xor_avx2)
.Ldone8:
vzeroupper
lea -8(%r10),%rsp
- ret
+ RET
.Lxorpart8:
# xor remaining bytes from partial register into output
diff --git a/arch/x86/crypto/chacha-avx512vl-x86_64.S b/arch/x86/crypto/chacha-avx512vl-x86_64.S
index bb193fde123a..946f74dd6fba 100644
--- a/arch/x86/crypto/chacha-avx512vl-x86_64.S
+++ b/arch/x86/crypto/chacha-avx512vl-x86_64.S
@@ -166,7 +166,7 @@ SYM_FUNC_START(chacha_2block_xor_avx512vl)
.Ldone2:
vzeroupper
- ret
+ RET
.Lxorpart2:
# xor remaining bytes from partial register into output
@@ -432,7 +432,7 @@ SYM_FUNC_START(chacha_4block_xor_avx512vl)
.Ldone4:
vzeroupper
- ret
+ RET
.Lxorpart4:
# xor remaining bytes from partial register into output
@@ -812,7 +812,7 @@ SYM_FUNC_START(chacha_8block_xor_avx512vl)
.Ldone8:
vzeroupper
- ret
+ RET
.Lxorpart8:
# xor remaining bytes from partial register into output
diff --git a/arch/x86/crypto/chacha-ssse3-x86_64.S b/arch/x86/crypto/chacha-ssse3-x86_64.S
index ca1788bfee16..7111949cd5b9 100644
--- a/arch/x86/crypto/chacha-ssse3-x86_64.S
+++ b/arch/x86/crypto/chacha-ssse3-x86_64.S
@@ -108,7 +108,7 @@ SYM_FUNC_START_LOCAL(chacha_permute)
sub $2,%r8d
jnz .Ldoubleround
- ret
+ RET
SYM_FUNC_END(chacha_permute)
SYM_FUNC_START(chacha_block_xor_ssse3)
@@ -166,7 +166,7 @@ SYM_FUNC_START(chacha_block_xor_ssse3)
.Ldone:
FRAME_END
- ret
+ RET
.Lxorpart:
# xor remaining bytes from partial register into output
@@ -217,7 +217,7 @@ SYM_FUNC_START(hchacha_block_ssse3)
movdqu %xmm3,0x10(%rsi)
FRAME_END
- ret
+ RET
SYM_FUNC_END(hchacha_block_ssse3)
SYM_FUNC_START(chacha_4block_xor_ssse3)
@@ -762,7 +762,7 @@ SYM_FUNC_START(chacha_4block_xor_ssse3)
.Ldone4:
lea -8(%r10),%rsp
- ret
+ RET
.Lxorpart4:
# xor remaining bytes from partial register into output
diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S
index 6e7d4c4d3208..c392a6edbfff 100644
--- a/arch/x86/crypto/crc32-pclmul_asm.S
+++ b/arch/x86/crypto/crc32-pclmul_asm.S
@@ -236,5 +236,5 @@ fold_64:
pxor %xmm2, %xmm1
pextrd $0x01, %xmm1, %eax
- ret
+ RET
SYM_FUNC_END(crc32_pclmul_le_16)
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index ac1f303eed0f..80c0d22fc42c 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -306,7 +306,7 @@ do_return:
popq %rsi
popq %rdi
popq %rbx
- ret
+ RET
SYM_FUNC_END(crc_pcl)
.section .rodata, "a", @progbits
diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S
index b2533d63030e..721474abfb71 100644
--- a/arch/x86/crypto/crct10dif-pcl-asm_64.S
+++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S
@@ -257,7 +257,7 @@ SYM_FUNC_START(crc_t10dif_pcl)
# Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of xmm0.
pextrw $0, %xmm0, %eax
- ret
+ RET
.align 16
.Lless_than_256_bytes:
diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
index 38caf61cd5b7..d55fa9e9b9e6 100644
--- a/arch/x86/crypto/curve25519-x86_64.c
+++ b/arch/x86/crypto/curve25519-x86_64.c
@@ -64,10 +64,9 @@ static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2)
/* Return the carry bit in a register */
" adcx %%r11, %1;"
- : "+&r" (f2), "=&r" (carry_r)
- : "r" (out), "r" (f1)
- : "%r8", "%r9", "%r10", "%r11", "memory", "cc"
- );
+ : "+&r"(f2), "=&r"(carry_r)
+ : "r"(out), "r"(f1)
+ : "%r8", "%r9", "%r10", "%r11", "memory", "cc");
return carry_r;
}
@@ -108,10 +107,9 @@ static inline void fadd(u64 *out, const u64 *f1, const u64 *f2)
" cmovc %0, %%rax;"
" add %%rax, %%r8;"
" movq %%r8, 0(%1);"
- : "+&r" (f2)
- : "r" (out), "r" (f1)
- : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"
- );
+ : "+&r"(f2)
+ : "r"(out), "r"(f1)
+ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc");
}
/* Computes the field subtraction of two field elements */
@@ -151,10 +149,9 @@ static inline void fsub(u64 *out, const u64 *f1, const u64 *f2)
" movq %%r9, 8(%0);"
" movq %%r10, 16(%0);"
" movq %%r11, 24(%0);"
- :
- : "r" (out), "r" (f1), "r" (f2)
- : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"
- );
+ :
+ : "r"(out), "r"(f1), "r"(f2)
+ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc");
}
/* Computes a field multiplication: out <- f1 * f2
@@ -162,239 +159,400 @@ static inline void fsub(u64 *out, const u64 *f1, const u64 *f2)
static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
{
asm volatile(
+
/* Compute the raw multiplication: tmp <- src1 * src2 */
/* Compute src1[0] * src2 */
- " movq 0(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 0(%0);"
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
- " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
- " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
- " adox %%rdx, %%rax;"
+ " movq 0(%0), %%rdx;"
+ " mulxq 0(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " movq %%r8, 0(%2);"
+ " mulxq 8(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " movq %%r10, 8(%2);"
+ " mulxq 16(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " mulxq 24(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+
/* Compute src1[1] * src2 */
- " movq 8(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
- " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
- " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
- " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
+ " movq 8(%0), %%rdx;"
+ " mulxq 0(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " adcxq 8(%2), %%r8;"
+ " movq %%r8, 8(%2);"
+ " mulxq 8(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " adcx %%rbx, %%r10;"
+ " movq %%r10, 16(%2);"
+ " mulxq 16(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " adcx %%r14, %%rbx;"
+ " mov $0, %%r8;"
+ " mulxq 24(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " adcx %%rax, %%r14;"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ " adcx %%r8, %%rax;"
+
/* Compute src1[2] * src2 */
- " movq 16(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
- " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
- " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
- " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
+ " movq 16(%0), %%rdx;"
+ " mulxq 0(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " adcxq 16(%2), %%r8;"
+ " movq %%r8, 16(%2);"
+ " mulxq 8(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " adcx %%rbx, %%r10;"
+ " movq %%r10, 24(%2);"
+ " mulxq 16(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " adcx %%r14, %%rbx;"
+ " mov $0, %%r8;"
+ " mulxq 24(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " adcx %%rax, %%r14;"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ " adcx %%r8, %%rax;"
+
/* Compute src1[3] * src2 */
- " movq 24(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
- " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
- " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
- " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);"
+ " movq 24(%0), %%rdx;"
+ " mulxq 0(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " adcxq 24(%2), %%r8;"
+ " movq %%r8, 24(%2);"
+ " mulxq 8(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " adcx %%rbx, %%r10;"
+ " movq %%r10, 32(%2);"
+ " mulxq 16(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " adcx %%r14, %%rbx;"
+ " movq %%rbx, 40(%2);"
+ " mov $0, %%r8;"
+ " mulxq 24(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " adcx %%rax, %%r14;"
+ " movq %%r14, 48(%2);"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ " adcx %%r8, %%rax;"
+ " movq %%rax, 56(%2);"
+
/* Line up pointers */
- " mov %0, %1;"
" mov %2, %0;"
+ " mov %3, %2;"
/* Wrap the result back into the field */
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
" mov $38, %%rdx;"
- " mulxq 32(%1), %%r8, %%r13;"
- " xor %k3, %k3;"
- " adoxq 0(%1), %%r8;"
- " mulxq 40(%1), %%r9, %%rbx;"
+ " mulxq 32(%0), %%r8, %%r13;"
+ " xor %k1, %k1;"
+ " adoxq 0(%0), %%r8;"
+ " mulxq 40(%0), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
- " adoxq 8(%1), %%r9;"
- " mulxq 48(%1), %%r10, %%r13;"
+ " adoxq 8(%0), %%r9;"
+ " mulxq 48(%0), %%r10, %%r13;"
" adcx %%rbx, %%r10;"
- " adoxq 16(%1), %%r10;"
- " mulxq 56(%1), %%r11, %%rax;"
+ " adoxq 16(%0), %%r10;"
+ " mulxq 56(%0), %%r11, %%rax;"
" adcx %%r13, %%r11;"
- " adoxq 24(%1), %%r11;"
- " adcx %3, %%rax;"
- " adox %3, %%rax;"
+ " adoxq 24(%0), %%r11;"
+ " adcx %1, %%rax;"
+ " adox %1, %%rax;"
" imul %%rdx, %%rax;"
/* Step 2: Fold the carry back into dst */
" add %%rax, %%r8;"
- " adcx %3, %%r9;"
- " movq %%r9, 8(%0);"
- " adcx %3, %%r10;"
- " movq %%r10, 16(%0);"
- " adcx %3, %%r11;"
- " movq %%r11, 24(%0);"
+ " adcx %1, %%r9;"
+ " movq %%r9, 8(%2);"
+ " adcx %1, %%r10;"
+ " movq %%r10, 16(%2);"
+ " adcx %1, %%r11;"
+ " movq %%r11, 24(%2);"
/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
" mov $0, %%rax;"
" cmovc %%rdx, %%rax;"
" add %%rax, %%r8;"
- " movq %%r8, 0(%0);"
- : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
- :
- : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc"
- );
+ " movq %%r8, 0(%2);"
+ : "+&r"(f1), "+&r"(f2), "+&r"(tmp)
+ : "r"(out)
+ : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13",
+ "%r14", "memory", "cc");
}
/* Computes two field multiplications:
- * out[0] <- f1[0] * f2[0]
- * out[1] <- f1[1] * f2[1]
- * Uses the 16-element buffer tmp for intermediate results. */
+ * out[0] <- f1[0] * f2[0]
+ * out[1] <- f1[1] * f2[1]
+ * Uses the 16-element buffer tmp for intermediate results: */
static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
{
asm volatile(
+
/* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */
/* Compute src1[0] * src2 */
- " movq 0(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 0(%0);"
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
- " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
- " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
- " adox %%rdx, %%rax;"
+ " movq 0(%0), %%rdx;"
+ " mulxq 0(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " movq %%r8, 0(%2);"
+ " mulxq 8(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " movq %%r10, 8(%2);"
+ " mulxq 16(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " mulxq 24(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+
/* Compute src1[1] * src2 */
- " movq 8(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
- " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
- " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
- " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
+ " movq 8(%0), %%rdx;"
+ " mulxq 0(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " adcxq 8(%2), %%r8;"
+ " movq %%r8, 8(%2);"
+ " mulxq 8(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " adcx %%rbx, %%r10;"
+ " movq %%r10, 16(%2);"
+ " mulxq 16(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " adcx %%r14, %%rbx;"
+ " mov $0, %%r8;"
+ " mulxq 24(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " adcx %%rax, %%r14;"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ " adcx %%r8, %%rax;"
+
/* Compute src1[2] * src2 */
- " movq 16(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
- " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
- " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
- " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
+ " movq 16(%0), %%rdx;"
+ " mulxq 0(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " adcxq 16(%2), %%r8;"
+ " movq %%r8, 16(%2);"
+ " mulxq 8(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " adcx %%rbx, %%r10;"
+ " movq %%r10, 24(%2);"
+ " mulxq 16(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " adcx %%r14, %%rbx;"
+ " mov $0, %%r8;"
+ " mulxq 24(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " adcx %%rax, %%r14;"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ " adcx %%r8, %%rax;"
+
/* Compute src1[3] * src2 */
- " movq 24(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
- " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
- " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
- " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);"
+ " movq 24(%0), %%rdx;"
+ " mulxq 0(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " adcxq 24(%2), %%r8;"
+ " movq %%r8, 24(%2);"
+ " mulxq 8(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " adcx %%rbx, %%r10;"
+ " movq %%r10, 32(%2);"
+ " mulxq 16(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " adcx %%r14, %%rbx;"
+ " movq %%rbx, 40(%2);"
+ " mov $0, %%r8;"
+ " mulxq 24(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " adcx %%rax, %%r14;"
+ " movq %%r14, 48(%2);"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ " adcx %%r8, %%rax;"
+ " movq %%rax, 56(%2);"
/* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */
/* Compute src1[0] * src2 */
- " movq 32(%1), %%rdx;"
- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 64(%0);"
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);"
- " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
- " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
- " adox %%rdx, %%rax;"
+ " movq 32(%0), %%rdx;"
+ " mulxq 32(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " movq %%r8, 64(%2);"
+ " mulxq 40(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " movq %%r10, 72(%2);"
+ " mulxq 48(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " mulxq 56(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+
/* Compute src1[1] * src2 */
- " movq 40(%1), %%rdx;"
- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);"
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);"
- " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
- " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
- " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
+ " movq 40(%0), %%rdx;"
+ " mulxq 32(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " adcxq 72(%2), %%r8;"
+ " movq %%r8, 72(%2);"
+ " mulxq 40(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " adcx %%rbx, %%r10;"
+ " movq %%r10, 80(%2);"
+ " mulxq 48(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " adcx %%r14, %%rbx;"
+ " mov $0, %%r8;"
+ " mulxq 56(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " adcx %%rax, %%r14;"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ " adcx %%r8, %%rax;"
+
/* Compute src1[2] * src2 */
- " movq 48(%1), %%rdx;"
- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);"
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);"
- " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
- " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
- " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
+ " movq 48(%0), %%rdx;"
+ " mulxq 32(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " adcxq 80(%2), %%r8;"
+ " movq %%r8, 80(%2);"
+ " mulxq 40(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " adcx %%rbx, %%r10;"
+ " movq %%r10, 88(%2);"
+ " mulxq 48(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " adcx %%r14, %%rbx;"
+ " mov $0, %%r8;"
+ " mulxq 56(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " adcx %%rax, %%r14;"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ " adcx %%r8, %%rax;"
+
/* Compute src1[3] * src2 */
- " movq 56(%1), %%rdx;"
- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);"
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);"
- " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 104(%0);" " mov $0, %%r8;"
- " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;"
- " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);"
+ " movq 56(%0), %%rdx;"
+ " mulxq 32(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " adcxq 88(%2), %%r8;"
+ " movq %%r8, 88(%2);"
+ " mulxq 40(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " adcx %%rbx, %%r10;"
+ " movq %%r10, 96(%2);"
+ " mulxq 48(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " adcx %%r14, %%rbx;"
+ " movq %%rbx, 104(%2);"
+ " mov $0, %%r8;"
+ " mulxq 56(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " adcx %%rax, %%r14;"
+ " movq %%r14, 112(%2);"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ " adcx %%r8, %%rax;"
+ " movq %%rax, 120(%2);"
+
/* Line up pointers */
- " mov %0, %1;"
" mov %2, %0;"
+ " mov %3, %2;"
/* Wrap the results back into the field */
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
" mov $38, %%rdx;"
- " mulxq 32(%1), %%r8, %%r13;"
- " xor %k3, %k3;"
- " adoxq 0(%1), %%r8;"
- " mulxq 40(%1), %%r9, %%rbx;"
+ " mulxq 32(%0), %%r8, %%r13;"
+ " xor %k1, %k1;"
+ " adoxq 0(%0), %%r8;"
+ " mulxq 40(%0), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
- " adoxq 8(%1), %%r9;"
- " mulxq 48(%1), %%r10, %%r13;"
+ " adoxq 8(%0), %%r9;"
+ " mulxq 48(%0), %%r10, %%r13;"
" adcx %%rbx, %%r10;"
- " adoxq 16(%1), %%r10;"
- " mulxq 56(%1), %%r11, %%rax;"
+ " adoxq 16(%0), %%r10;"
+ " mulxq 56(%0), %%r11, %%rax;"
" adcx %%r13, %%r11;"
- " adoxq 24(%1), %%r11;"
- " adcx %3, %%rax;"
- " adox %3, %%rax;"
+ " adoxq 24(%0), %%r11;"
+ " adcx %1, %%rax;"
+ " adox %1, %%rax;"
" imul %%rdx, %%rax;"
/* Step 2: Fold the carry back into dst */
" add %%rax, %%r8;"
- " adcx %3, %%r9;"
- " movq %%r9, 8(%0);"
- " adcx %3, %%r10;"
- " movq %%r10, 16(%0);"
- " adcx %3, %%r11;"
- " movq %%r11, 24(%0);"
+ " adcx %1, %%r9;"
+ " movq %%r9, 8(%2);"
+ " adcx %1, %%r10;"
+ " movq %%r10, 16(%2);"
+ " adcx %1, %%r11;"
+ " movq %%r11, 24(%2);"
/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
" mov $0, %%rax;"
" cmovc %%rdx, %%rax;"
" add %%rax, %%r8;"
- " movq %%r8, 0(%0);"
+ " movq %%r8, 0(%2);"
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
" mov $38, %%rdx;"
- " mulxq 96(%1), %%r8, %%r13;"
- " xor %k3, %k3;"
- " adoxq 64(%1), %%r8;"
- " mulxq 104(%1), %%r9, %%rbx;"
+ " mulxq 96(%0), %%r8, %%r13;"
+ " xor %k1, %k1;"
+ " adoxq 64(%0), %%r8;"
+ " mulxq 104(%0), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
- " adoxq 72(%1), %%r9;"
- " mulxq 112(%1), %%r10, %%r13;"
+ " adoxq 72(%0), %%r9;"
+ " mulxq 112(%0), %%r10, %%r13;"
" adcx %%rbx, %%r10;"
- " adoxq 80(%1), %%r10;"
- " mulxq 120(%1), %%r11, %%rax;"
+ " adoxq 80(%0), %%r10;"
+ " mulxq 120(%0), %%r11, %%rax;"
" adcx %%r13, %%r11;"
- " adoxq 88(%1), %%r11;"
- " adcx %3, %%rax;"
- " adox %3, %%rax;"
+ " adoxq 88(%0), %%r11;"
+ " adcx %1, %%rax;"
+ " adox %1, %%rax;"
" imul %%rdx, %%rax;"
/* Step 2: Fold the carry back into dst */
" add %%rax, %%r8;"
- " adcx %3, %%r9;"
- " movq %%r9, 40(%0);"
- " adcx %3, %%r10;"
- " movq %%r10, 48(%0);"
- " adcx %3, %%r11;"
- " movq %%r11, 56(%0);"
+ " adcx %1, %%r9;"
+ " movq %%r9, 40(%2);"
+ " adcx %1, %%r10;"
+ " movq %%r10, 48(%2);"
+ " adcx %1, %%r11;"
+ " movq %%r11, 56(%2);"
/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
" mov $0, %%rax;"
" cmovc %%rdx, %%rax;"
" add %%rax, %%r8;"
- " movq %%r8, 32(%0);"
- : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
- :
- : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc"
- );
+ " movq %%r8, 32(%2);"
+ : "+&r"(f1), "+&r"(f2), "+&r"(tmp)
+ : "r"(out)
+ : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13",
+ "%r14", "memory", "cc");
}
-/* Computes the field multiplication of four-element f1 with value in f2 */
+/* Computes the field multiplication of four-element f1 with value in f2
+ * Requires f2 to be smaller than 2^17 */
static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2)
{
register u64 f2_r asm("rdx") = f2;
asm volatile(
/* Compute the raw multiplication of f1*f2 */
- " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */
- " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */
+ " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */
+ " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */
" add %%rcx, %%r9;"
" mov $0, %%rcx;"
- " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */
+ " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */
" adcx %%rbx, %%r10;"
- " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */
+ " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */
" adcx %%r13, %%r11;"
" adcx %%rcx, %%rax;"
@@ -418,17 +576,17 @@ static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2)
" cmovc %%rdx, %%rax;"
" add %%rax, %%r8;"
" movq %%r8, 0(%1);"
- : "+&r" (f2_r)
- : "r" (out), "r" (f1)
- : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "memory", "cc"
- );
+ : "+&r"(f2_r)
+ : "r"(out), "r"(f1)
+ : "%rax", "%rbx", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r13",
+ "memory", "cc");
}
/* Computes p1 <- bit ? p2 : p1 in constant time */
static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2)
{
asm volatile(
- /* Invert the polarity of bit to match cmov expectations */
+ /* Transfer bit into CF flag */
" add $18446744073709551615, %0;"
/* cswap p1[0], p2[0] */
@@ -502,10 +660,9 @@ static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2)
" cmovc %%r10, %%r9;"
" movq %%r8, 56(%1);"
" movq %%r9, 56(%2);"
- : "+&r" (bit)
- : "r" (p1), "r" (p2)
- : "%r8", "%r9", "%r10", "memory", "cc"
- );
+ : "+&r"(bit)
+ : "r"(p1), "r"(p2)
+ : "%r8", "%r9", "%r10", "memory", "cc");
}
/* Computes the square of a field element: out <- f * f
@@ -516,15 +673,22 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
/* Compute the raw multiplication: tmp <- f * f */
/* Step 1: Compute all partial products */
- " movq 0(%1), %%rdx;" /* f[0] */
- " mulxq 8(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */
- " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
- " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
- " movq 24(%1), %%rdx;" /* f[3] */
- " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
- " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
- " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
- " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
+ " movq 0(%0), %%rdx;" /* f[0] */
+ " mulxq 8(%0), %%r8, %%r14;"
+ " xor %%r15d, %%r15d;" /* f[1]*f[0] */
+ " mulxq 16(%0), %%r9, %%r10;"
+ " adcx %%r14, %%r9;" /* f[2]*f[0] */
+ " mulxq 24(%0), %%rax, %%rcx;"
+ " adcx %%rax, %%r10;" /* f[3]*f[0] */
+ " movq 24(%0), %%rdx;" /* f[3] */
+ " mulxq 8(%0), %%r11, %%rbx;"
+ " adcx %%rcx, %%r11;" /* f[1]*f[3] */
+ " mulxq 16(%0), %%rax, %%r13;"
+ " adcx %%rax, %%rbx;" /* f[2]*f[3] */
+ " movq 8(%0), %%rdx;"
+ " adcx %%r15, %%r13;" /* f1 */
+ " mulxq 16(%0), %%rax, %%rcx;"
+ " mov $0, %%r14;" /* f[2]*f[1] */
/* Step 2: Compute two parallel carry chains */
" xor %%r15d, %%r15d;"
@@ -542,39 +706,50 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
" adcx %%r14, %%r14;"
/* Step 3: Compute intermediate squares */
- " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
- " movq %%rax, 0(%0);"
- " add %%rcx, %%r8;" " movq %%r8, 8(%0);"
- " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
- " adcx %%rax, %%r9;" " movq %%r9, 16(%0);"
- " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);"
- " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
- " adcx %%rax, %%r11;" " movq %%r11, 32(%0);"
- " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);"
- " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
- " adcx %%rax, %%r13;" " movq %%r13, 48(%0);"
- " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);"
+ " movq 0(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
+ " movq %%rax, 0(%1);"
+ " add %%rcx, %%r8;"
+ " movq %%r8, 8(%1);"
+ " movq 8(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
+ " adcx %%rax, %%r9;"
+ " movq %%r9, 16(%1);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 24(%1);"
+ " movq 16(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
+ " adcx %%rax, %%r11;"
+ " movq %%r11, 32(%1);"
+ " adcx %%rcx, %%rbx;"
+ " movq %%rbx, 40(%1);"
+ " movq 24(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
+ " adcx %%rax, %%r13;"
+ " movq %%r13, 48(%1);"
+ " adcx %%rcx, %%r14;"
+ " movq %%r14, 56(%1);"
/* Line up pointers */
- " mov %0, %1;"
- " mov %2, %0;"
+ " mov %1, %0;"
+ " mov %2, %1;"
/* Wrap the result back into the field */
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
" mov $38, %%rdx;"
- " mulxq 32(%1), %%r8, %%r13;"
+ " mulxq 32(%0), %%r8, %%r13;"
" xor %%ecx, %%ecx;"
- " adoxq 0(%1), %%r8;"
- " mulxq 40(%1), %%r9, %%rbx;"
+ " adoxq 0(%0), %%r8;"
+ " mulxq 40(%0), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
- " adoxq 8(%1), %%r9;"
- " mulxq 48(%1), %%r10, %%r13;"
+ " adoxq 8(%0), %%r9;"
+ " mulxq 48(%0), %%r10, %%r13;"
" adcx %%rbx, %%r10;"
- " adoxq 16(%1), %%r10;"
- " mulxq 56(%1), %%r11, %%rax;"
+ " adoxq 16(%0), %%r10;"
+ " mulxq 56(%0), %%r11, %%rax;"
" adcx %%r13, %%r11;"
- " adoxq 24(%1), %%r11;"
+ " adoxq 24(%0), %%r11;"
" adcx %%rcx, %%rax;"
" adox %%rcx, %%rax;"
" imul %%rdx, %%rax;"
@@ -582,40 +757,47 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
/* Step 2: Fold the carry back into dst */
" add %%rax, %%r8;"
" adcx %%rcx, %%r9;"
- " movq %%r9, 8(%0);"
+ " movq %%r9, 8(%1);"
" adcx %%rcx, %%r10;"
- " movq %%r10, 16(%0);"
+ " movq %%r10, 16(%1);"
" adcx %%rcx, %%r11;"
- " movq %%r11, 24(%0);"
+ " movq %%r11, 24(%1);"
/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
" mov $0, %%rax;"
" cmovc %%rdx, %%rax;"
" add %%rax, %%r8;"
- " movq %%r8, 0(%0);"
- : "+&r" (tmp), "+&r" (f), "+&r" (out)
- :
- : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc"
- );
+ " movq %%r8, 0(%1);"
+ : "+&r"(f), "+&r"(tmp)
+ : "r"(out)
+ : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11",
+ "%r13", "%r14", "%r15", "memory", "cc");
}
/* Computes two field squarings:
- * out[0] <- f[0] * f[0]
- * out[1] <- f[1] * f[1]
+ * out[0] <- f[0] * f[0]
+ * out[1] <- f[1] * f[1]
* Uses the 16-element buffer tmp for intermediate results */
static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
{
asm volatile(
/* Step 1: Compute all partial products */
- " movq 0(%1), %%rdx;" /* f[0] */
- " mulxq 8(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */
- " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
- " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
- " movq 24(%1), %%rdx;" /* f[3] */
- " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
- " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
- " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
- " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
+ " movq 0(%0), %%rdx;" /* f[0] */
+ " mulxq 8(%0), %%r8, %%r14;"
+ " xor %%r15d, %%r15d;" /* f[1]*f[0] */
+ " mulxq 16(%0), %%r9, %%r10;"
+ " adcx %%r14, %%r9;" /* f[2]*f[0] */
+ " mulxq 24(%0), %%rax, %%rcx;"
+ " adcx %%rax, %%r10;" /* f[3]*f[0] */
+ " movq 24(%0), %%rdx;" /* f[3] */
+ " mulxq 8(%0), %%r11, %%rbx;"
+ " adcx %%rcx, %%r11;" /* f[1]*f[3] */
+ " mulxq 16(%0), %%rax, %%r13;"
+ " adcx %%rax, %%rbx;" /* f[2]*f[3] */
+ " movq 8(%0), %%rdx;"
+ " adcx %%r15, %%r13;" /* f1 */
+ " mulxq 16(%0), %%rax, %%rcx;"
+ " mov $0, %%r14;" /* f[2]*f[1] */
/* Step 2: Compute two parallel carry chains */
" xor %%r15d, %%r15d;"
@@ -633,29 +815,47 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
" adcx %%r14, %%r14;"
/* Step 3: Compute intermediate squares */
- " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
- " movq %%rax, 0(%0);"
- " add %%rcx, %%r8;" " movq %%r8, 8(%0);"
- " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
- " adcx %%rax, %%r9;" " movq %%r9, 16(%0);"
- " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);"
- " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
- " adcx %%rax, %%r11;" " movq %%r11, 32(%0);"
- " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);"
- " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
- " adcx %%rax, %%r13;" " movq %%r13, 48(%0);"
- " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);"
+ " movq 0(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
+ " movq %%rax, 0(%1);"
+ " add %%rcx, %%r8;"
+ " movq %%r8, 8(%1);"
+ " movq 8(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
+ " adcx %%rax, %%r9;"
+ " movq %%r9, 16(%1);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 24(%1);"
+ " movq 16(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
+ " adcx %%rax, %%r11;"
+ " movq %%r11, 32(%1);"
+ " adcx %%rcx, %%rbx;"
+ " movq %%rbx, 40(%1);"
+ " movq 24(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
+ " adcx %%rax, %%r13;"
+ " movq %%r13, 48(%1);"
+ " adcx %%rcx, %%r14;"
+ " movq %%r14, 56(%1);"
/* Step 1: Compute all partial products */
- " movq 32(%1), %%rdx;" /* f[0] */
- " mulxq 40(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */
- " mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
- " mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
- " movq 56(%1), %%rdx;" /* f[3] */
- " mulxq 40(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
- " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
- " movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
- " mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
+ " movq 32(%0), %%rdx;" /* f[0] */
+ " mulxq 40(%0), %%r8, %%r14;"
+ " xor %%r15d, %%r15d;" /* f[1]*f[0] */
+ " mulxq 48(%0), %%r9, %%r10;"
+ " adcx %%r14, %%r9;" /* f[2]*f[0] */
+ " mulxq 56(%0), %%rax, %%rcx;"
+ " adcx %%rax, %%r10;" /* f[3]*f[0] */
+ " movq 56(%0), %%rdx;" /* f[3] */
+ " mulxq 40(%0), %%r11, %%rbx;"
+ " adcx %%rcx, %%r11;" /* f[1]*f[3] */
+ " mulxq 48(%0), %%rax, %%r13;"
+ " adcx %%rax, %%rbx;" /* f[2]*f[3] */
+ " movq 40(%0), %%rdx;"
+ " adcx %%r15, %%r13;" /* f1 */
+ " mulxq 48(%0), %%rax, %%rcx;"
+ " mov $0, %%r14;" /* f[2]*f[1] */
/* Step 2: Compute two parallel carry chains */
" xor %%r15d, %%r15d;"
@@ -673,37 +873,48 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
" adcx %%r14, %%r14;"
/* Step 3: Compute intermediate squares */
- " movq 32(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
- " movq %%rax, 64(%0);"
- " add %%rcx, %%r8;" " movq %%r8, 72(%0);"
- " movq 40(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
- " adcx %%rax, %%r9;" " movq %%r9, 80(%0);"
- " adcx %%rcx, %%r10;" " movq %%r10, 88(%0);"
- " movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
- " adcx %%rax, %%r11;" " movq %%r11, 96(%0);"
- " adcx %%rcx, %%rbx;" " movq %%rbx, 104(%0);"
- " movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
- " adcx %%rax, %%r13;" " movq %%r13, 112(%0);"
- " adcx %%rcx, %%r14;" " movq %%r14, 120(%0);"
+ " movq 32(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
+ " movq %%rax, 64(%1);"
+ " add %%rcx, %%r8;"
+ " movq %%r8, 72(%1);"
+ " movq 40(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
+ " adcx %%rax, %%r9;"
+ " movq %%r9, 80(%1);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 88(%1);"
+ " movq 48(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
+ " adcx %%rax, %%r11;"
+ " movq %%r11, 96(%1);"
+ " adcx %%rcx, %%rbx;"
+ " movq %%rbx, 104(%1);"
+ " movq 56(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
+ " adcx %%rax, %%r13;"
+ " movq %%r13, 112(%1);"
+ " adcx %%rcx, %%r14;"
+ " movq %%r14, 120(%1);"
/* Line up pointers */
- " mov %0, %1;"
- " mov %2, %0;"
+ " mov %1, %0;"
+ " mov %2, %1;"
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
" mov $38, %%rdx;"
- " mulxq 32(%1), %%r8, %%r13;"
+ " mulxq 32(%0), %%r8, %%r13;"
" xor %%ecx, %%ecx;"
- " adoxq 0(%1), %%r8;"
- " mulxq 40(%1), %%r9, %%rbx;"
+ " adoxq 0(%0), %%r8;"
+ " mulxq 40(%0), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
- " adoxq 8(%1), %%r9;"
- " mulxq 48(%1), %%r10, %%r13;"
+ " adoxq 8(%0), %%r9;"
+ " mulxq 48(%0), %%r10, %%r13;"
" adcx %%rbx, %%r10;"
- " adoxq 16(%1), %%r10;"
- " mulxq 56(%1), %%r11, %%rax;"
+ " adoxq 16(%0), %%r10;"
+ " mulxq 56(%0), %%r11, %%rax;"
" adcx %%r13, %%r11;"
- " adoxq 24(%1), %%r11;"
+ " adoxq 24(%0), %%r11;"
" adcx %%rcx, %%rax;"
" adox %%rcx, %%rax;"
" imul %%rdx, %%rax;"
@@ -711,32 +922,32 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
/* Step 2: Fold the carry back into dst */
" add %%rax, %%r8;"
" adcx %%rcx, %%r9;"
- " movq %%r9, 8(%0);"
+ " movq %%r9, 8(%1);"
" adcx %%rcx, %%r10;"
- " movq %%r10, 16(%0);"
+ " movq %%r10, 16(%1);"
" adcx %%rcx, %%r11;"
- " movq %%r11, 24(%0);"
+ " movq %%r11, 24(%1);"
/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
" mov $0, %%rax;"
" cmovc %%rdx, %%rax;"
" add %%rax, %%r8;"
- " movq %%r8, 0(%0);"
+ " movq %%r8, 0(%1);"
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
" mov $38, %%rdx;"
- " mulxq 96(%1), %%r8, %%r13;"
+ " mulxq 96(%0), %%r8, %%r13;"
" xor %%ecx, %%ecx;"
- " adoxq 64(%1), %%r8;"
- " mulxq 104(%1), %%r9, %%rbx;"
+ " adoxq 64(%0), %%r8;"
+ " mulxq 104(%0), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
- " adoxq 72(%1), %%r9;"
- " mulxq 112(%1), %%r10, %%r13;"
+ " adoxq 72(%0), %%r9;"
+ " mulxq 112(%0), %%r10, %%r13;"
" adcx %%rbx, %%r10;"
- " adoxq 80(%1), %%r10;"
- " mulxq 120(%1), %%r11, %%rax;"
+ " adoxq 80(%0), %%r10;"
+ " mulxq 120(%0), %%r11, %%rax;"
" adcx %%r13, %%r11;"
- " adoxq 88(%1), %%r11;"
+ " adoxq 88(%0), %%r11;"
" adcx %%rcx, %%rax;"
" adox %%rcx, %%rax;"
" imul %%rdx, %%rax;"
@@ -744,21 +955,21 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
/* Step 2: Fold the carry back into dst */
" add %%rax, %%r8;"
" adcx %%rcx, %%r9;"
- " movq %%r9, 40(%0);"
+ " movq %%r9, 40(%1);"
" adcx %%rcx, %%r10;"
- " movq %%r10, 48(%0);"
+ " movq %%r10, 48(%1);"
" adcx %%rcx, %%r11;"
- " movq %%r11, 56(%0);"
+ " movq %%r11, 56(%1);"
/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
" mov $0, %%rax;"
" cmovc %%rdx, %%rax;"
" add %%rax, %%r8;"
- " movq %%r8, 32(%0);"
- : "+&r" (tmp), "+&r" (f), "+&r" (out)
- :
- : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc"
- );
+ " movq %%r8, 32(%1);"
+ : "+&r"(f), "+&r"(tmp)
+ : "r"(out)
+ : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11",
+ "%r13", "%r14", "%r15", "memory", "cc");
}
static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2)
diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S
index fac0fdc3f25d..f4c760f4cade 100644
--- a/arch/x86/crypto/des3_ede-asm_64.S
+++ b/arch/x86/crypto/des3_ede-asm_64.S
@@ -243,7 +243,7 @@ SYM_FUNC_START(des3_ede_x86_64_crypt_blk)
popq %r12;
popq %rbx;
- ret;
+ RET;
SYM_FUNC_END(des3_ede_x86_64_crypt_blk)
/***********************************************************************
@@ -528,7 +528,7 @@ SYM_FUNC_START(des3_ede_x86_64_crypt_blk_3way)
popq %r12;
popq %rbx;
- ret;
+ RET;
SYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way)
.section .rodata, "a", @progbits
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
index e7cb68a3db3b..787c234d2469 100644
--- a/arch/x86/crypto/des3_ede_glue.c
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -164,7 +164,7 @@ static int cbc_encrypt(struct skcipher_request *req)
err = skcipher_walk_virt(&walk, req, false);
- while ((nbytes = walk.nbytes)) {
+ while (walk.nbytes) {
nbytes = __cbc_encrypt(ctx, &walk);
err = skcipher_walk_done(&walk, nbytes);
}
@@ -243,7 +243,7 @@ static int cbc_decrypt(struct skcipher_request *req)
err = skcipher_walk_virt(&walk, req, false);
- while ((nbytes = walk.nbytes)) {
+ while (walk.nbytes) {
nbytes = __cbc_decrypt(ctx, &walk);
err = skcipher_walk_done(&walk, nbytes);
}
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index 99ac25e18e09..2bf871899920 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -85,7 +85,7 @@ SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
psrlq $1, T2
pxor T2, T1
pxor T1, DATA
- ret
+ RET
SYM_FUNC_END(__clmul_gf128mul_ble)
/* void clmul_ghash_mul(char *dst, const u128 *shash) */
@@ -99,7 +99,7 @@ SYM_FUNC_START(clmul_ghash_mul)
pshufb BSWAP, DATA
movups DATA, (%rdi)
FRAME_END
- ret
+ RET
SYM_FUNC_END(clmul_ghash_mul)
/*
@@ -128,5 +128,5 @@ SYM_FUNC_START(clmul_ghash_update)
movups DATA, (%rdi)
.Lupdate_just_ret:
FRAME_END
- ret
+ RET
SYM_FUNC_END(clmul_ghash_update)
diff --git a/arch/x86/crypto/nh-avx2-x86_64.S b/arch/x86/crypto/nh-avx2-x86_64.S
index b22c7b936272..6a0b15e7196a 100644
--- a/arch/x86/crypto/nh-avx2-x86_64.S
+++ b/arch/x86/crypto/nh-avx2-x86_64.S
@@ -153,5 +153,5 @@ SYM_FUNC_START(nh_avx2)
vpaddq T1, T0, T0
vpaddq T4, T0, T0
vmovdqu T0, (HASH)
- ret
+ RET
SYM_FUNC_END(nh_avx2)
diff --git a/arch/x86/crypto/nh-sse2-x86_64.S b/arch/x86/crypto/nh-sse2-x86_64.S
index d7ae22dd6683..34c567bbcb4f 100644
--- a/arch/x86/crypto/nh-sse2-x86_64.S
+++ b/arch/x86/crypto/nh-sse2-x86_64.S
@@ -119,5 +119,5 @@ SYM_FUNC_START(nh_sse2)
paddq PASS2_SUMS, T1
movdqu T0, 0x00(HASH)
movdqu T1, 0x10(HASH)
- ret
+ RET
SYM_FUNC_END(nh_sse2)
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
index b7ee24df7fba..82f2313f512b 100644
--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
@@ -601,7 +601,7 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx)
write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
- ret;
+ RET;
SYM_FUNC_END(__serpent_enc_blk8_avx)
.align 8
@@ -655,7 +655,7 @@ SYM_FUNC_START_LOCAL(__serpent_dec_blk8_avx)
write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2);
write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
- ret;
+ RET;
SYM_FUNC_END(__serpent_dec_blk8_avx)
SYM_FUNC_START(serpent_ecb_enc_8way_avx)
@@ -673,7 +673,7 @@ SYM_FUNC_START(serpent_ecb_enc_8way_avx)
store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(serpent_ecb_enc_8way_avx)
SYM_FUNC_START(serpent_ecb_dec_8way_avx)
@@ -691,7 +691,7 @@ SYM_FUNC_START(serpent_ecb_dec_8way_avx)
store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(serpent_ecb_dec_8way_avx)
SYM_FUNC_START(serpent_cbc_dec_8way_avx)
@@ -709,5 +709,5 @@ SYM_FUNC_START(serpent_cbc_dec_8way_avx)
store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(serpent_cbc_dec_8way_avx)
diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S
index 9161b6e441f3..8ea34c9b9316 100644
--- a/arch/x86/crypto/serpent-avx2-asm_64.S
+++ b/arch/x86/crypto/serpent-avx2-asm_64.S
@@ -601,7 +601,7 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk16)
write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
- ret;
+ RET;
SYM_FUNC_END(__serpent_enc_blk16)
.align 8
@@ -655,7 +655,7 @@ SYM_FUNC_START_LOCAL(__serpent_dec_blk16)
write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2);
write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
- ret;
+ RET;
SYM_FUNC_END(__serpent_dec_blk16)
SYM_FUNC_START(serpent_ecb_enc_16way)
@@ -677,7 +677,7 @@ SYM_FUNC_START(serpent_ecb_enc_16way)
vzeroupper;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(serpent_ecb_enc_16way)
SYM_FUNC_START(serpent_ecb_dec_16way)
@@ -699,7 +699,7 @@ SYM_FUNC_START(serpent_ecb_dec_16way)
vzeroupper;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(serpent_ecb_dec_16way)
SYM_FUNC_START(serpent_cbc_dec_16way)
@@ -722,5 +722,5 @@ SYM_FUNC_START(serpent_cbc_dec_16way)
vzeroupper;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(serpent_cbc_dec_16way)
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
index 6379b99cb722..8ccb03ad7cef 100644
--- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S
+++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
@@ -553,12 +553,12 @@ SYM_FUNC_START(__serpent_enc_blk_4way)
write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
- ret;
+ RET;
.L__enc_xor4:
xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
- ret;
+ RET;
SYM_FUNC_END(__serpent_enc_blk_4way)
SYM_FUNC_START(serpent_dec_blk_4way)
@@ -612,5 +612,5 @@ SYM_FUNC_START(serpent_dec_blk_4way)
movl arg_dst(%esp), %eax;
write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA);
- ret;
+ RET;
SYM_FUNC_END(serpent_dec_blk_4way)
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
index efb6dc17dc90..e0998a011d1d 100644
--- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
@@ -675,13 +675,13 @@ SYM_FUNC_START(__serpent_enc_blk_8way)
write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
- ret;
+ RET;
.L__enc_xor8:
xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
- ret;
+ RET;
SYM_FUNC_END(__serpent_enc_blk_8way)
SYM_FUNC_START(serpent_dec_blk_8way)
@@ -735,5 +735,5 @@ SYM_FUNC_START(serpent_dec_blk_8way)
write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2);
write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
- ret;
+ RET;
SYM_FUNC_END(serpent_dec_blk_8way)
diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
index 5eed620f4676..a96b2fd26dab 100644
--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -674,7 +674,7 @@ _loop3:
pop %r12
pop %rbx
- ret
+ RET
SYM_FUNC_END(\name)
.endm
diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S
index 5d8415f482bd..2f94ec0e763b 100644
--- a/arch/x86/crypto/sha1_ni_asm.S
+++ b/arch/x86/crypto/sha1_ni_asm.S
@@ -290,7 +290,7 @@ SYM_FUNC_START(sha1_ni_transform)
mov %rbp, %rsp
pop %rbp
- ret
+ RET
SYM_FUNC_END(sha1_ni_transform)
.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index d25668d2a1e9..263f916362e0 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -99,7 +99,7 @@
pop %rbp
pop %r12
pop %rbx
- ret
+ RET
SYM_FUNC_END(\name)
.endm
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S
index 4739cd31b9db..3baa1ec39097 100644
--- a/arch/x86/crypto/sha256-avx-asm.S
+++ b/arch/x86/crypto/sha256-avx-asm.S
@@ -458,7 +458,7 @@ done_hash:
popq %r13
popq %r12
popq %rbx
- ret
+ RET
SYM_FUNC_END(sha256_transform_avx)
.section .rodata.cst256.K256, "aM", @progbits, 256
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 4087f7432a7e..9bcdbc47b8b4 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -710,7 +710,7 @@ done_hash:
popq %r13
popq %r12
popq %rbx
- ret
+ RET
SYM_FUNC_END(sha256_transform_rorx)
.section .rodata.cst512.K256, "aM", @progbits, 512
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S
index ddfa863b4ee3..c4a5db612c32 100644
--- a/arch/x86/crypto/sha256-ssse3-asm.S
+++ b/arch/x86/crypto/sha256-ssse3-asm.S
@@ -472,7 +472,7 @@ done_hash:
popq %r12
popq %rbx
- ret
+ RET
SYM_FUNC_END(sha256_transform_ssse3)
.section .rodata.cst256.K256, "aM", @progbits, 256
diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S
index 7abade04a3a3..94d50dd27cb5 100644
--- a/arch/x86/crypto/sha256_ni_asm.S
+++ b/arch/x86/crypto/sha256_ni_asm.S
@@ -326,7 +326,7 @@ SYM_FUNC_START(sha256_ni_transform)
.Ldone_hash:
- ret
+ RET
SYM_FUNC_END(sha256_ni_transform)
.section .rodata.cst256.K256, "aM", @progbits, 256
diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S
index 3d8f0fd4eea8..1fefe6dd3a9e 100644
--- a/arch/x86/crypto/sha512-avx-asm.S
+++ b/arch/x86/crypto/sha512-avx-asm.S
@@ -361,7 +361,7 @@ updateblock:
pop %rbx
nowork:
- ret
+ RET
SYM_FUNC_END(sha512_transform_avx)
########################################################################
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
index 072cb0f0deae..5cdaab7d6901 100644
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -679,7 +679,7 @@ done_hash:
pop %r12
pop %rbx
- ret
+ RET
SYM_FUNC_END(sha512_transform_rorx)
########################################################################
diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S
index bd51c9070bed..b84c22e06c5f 100644
--- a/arch/x86/crypto/sha512-ssse3-asm.S
+++ b/arch/x86/crypto/sha512-ssse3-asm.S
@@ -363,7 +363,7 @@ updateblock:
pop %rbx
nowork:
- ret
+ RET
SYM_FUNC_END(sha512_transform_ssse3)
########################################################################
diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
index 1cc72b4804fa..4767ab61ff48 100644
--- a/arch/x86/crypto/sm4-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
@@ -246,7 +246,7 @@ SYM_FUNC_START(sm4_aesni_avx_crypt4)
.Lblk4_store_output_done:
vzeroall;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(sm4_aesni_avx_crypt4)
.align 8
@@ -356,7 +356,7 @@ SYM_FUNC_START_LOCAL(__sm4_crypt_blk8)
vpshufb RTMP2, RB3, RB3;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(__sm4_crypt_blk8)
/*
@@ -412,7 +412,7 @@ SYM_FUNC_START(sm4_aesni_avx_crypt8)
.Lblk8_store_output_done:
vzeroall;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(sm4_aesni_avx_crypt8)
/*
@@ -487,7 +487,7 @@ SYM_FUNC_START(sm4_aesni_avx_ctr_enc_blk8)
vzeroall;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8)
/*
@@ -537,7 +537,7 @@ SYM_FUNC_START(sm4_aesni_avx_cbc_dec_blk8)
vzeroall;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8)
/*
@@ -590,5 +590,5 @@ SYM_FUNC_START(sm4_aesni_avx_cfb_dec_blk8)
vzeroall;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(sm4_aesni_avx_cfb_dec_blk8)
diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
index 9c5d3f3ad45a..4732fe8bb65b 100644
--- a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
@@ -268,7 +268,7 @@ SYM_FUNC_START_LOCAL(__sm4_crypt_blk16)
vpshufb RTMP2, RB3, RB3;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(__sm4_crypt_blk16)
#define inc_le128(x, minus_one, tmp) \
@@ -387,7 +387,7 @@ SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16)
vzeroall;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16)
/*
@@ -441,7 +441,7 @@ SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16)
vzeroall;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16)
/*
@@ -497,5 +497,5 @@ SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16)
vzeroall;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(sm4_aesni_avx2_cfb_dec_blk16)
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index 37e63b3c664e..31f9b2ec3857 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -267,7 +267,7 @@ SYM_FUNC_START_LOCAL(__twofish_enc_blk8)
outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
- ret;
+ RET;
SYM_FUNC_END(__twofish_enc_blk8)
.align 8
@@ -307,7 +307,7 @@ SYM_FUNC_START_LOCAL(__twofish_dec_blk8)
outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
- ret;
+ RET;
SYM_FUNC_END(__twofish_dec_blk8)
SYM_FUNC_START(twofish_ecb_enc_8way)
@@ -327,7 +327,7 @@ SYM_FUNC_START(twofish_ecb_enc_8way)
store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(twofish_ecb_enc_8way)
SYM_FUNC_START(twofish_ecb_dec_8way)
@@ -347,7 +347,7 @@ SYM_FUNC_START(twofish_ecb_dec_8way)
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(twofish_ecb_dec_8way)
SYM_FUNC_START(twofish_cbc_dec_8way)
@@ -372,5 +372,5 @@ SYM_FUNC_START(twofish_cbc_dec_8way)
popq %r12;
FRAME_END
- ret;
+ RET;
SYM_FUNC_END(twofish_cbc_dec_8way)
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S
index a6f09e4f2e46..3abcad661884 100644
--- a/arch/x86/crypto/twofish-i586-asm_32.S
+++ b/arch/x86/crypto/twofish-i586-asm_32.S
@@ -260,7 +260,7 @@ SYM_FUNC_START(twofish_enc_blk)
pop %ebx
pop %ebp
mov $1, %eax
- ret
+ RET
SYM_FUNC_END(twofish_enc_blk)
SYM_FUNC_START(twofish_dec_blk)
@@ -317,5 +317,5 @@ SYM_FUNC_START(twofish_dec_blk)
pop %ebx
pop %ebp
mov $1, %eax
- ret
+ RET
SYM_FUNC_END(twofish_dec_blk)
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
index bca4cea757ce..d2288bf38a8a 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
@@ -258,7 +258,7 @@ SYM_FUNC_START(__twofish_enc_blk_3way)
popq %rbx;
popq %r12;
popq %r13;
- ret;
+ RET;
.L__enc_xor3:
outunpack_enc3(xor);
@@ -266,7 +266,7 @@ SYM_FUNC_START(__twofish_enc_blk_3way)
popq %rbx;
popq %r12;
popq %r13;
- ret;
+ RET;
SYM_FUNC_END(__twofish_enc_blk_3way)
SYM_FUNC_START(twofish_dec_blk_3way)
@@ -301,5 +301,5 @@ SYM_FUNC_START(twofish_dec_blk_3way)
popq %rbx;
popq %r12;
popq %r13;
- ret;
+ RET;
SYM_FUNC_END(twofish_dec_blk_3way)
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S
index d2e56232494a..775af290cd19 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
@@ -252,7 +252,7 @@ SYM_FUNC_START(twofish_enc_blk)
popq R1
movl $1,%eax
- ret
+ RET
SYM_FUNC_END(twofish_enc_blk)
SYM_FUNC_START(twofish_dec_blk)
@@ -304,5 +304,5 @@ SYM_FUNC_START(twofish_dec_blk)
popq R1
movl $1,%eax
- ret
+ RET
SYM_FUNC_END(twofish_dec_blk)