From 7a95cbd9a984644744dd32a0195b22bf0ef714f9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 24 Jan 2018 19:10:08 -0800 Subject: crypto: sha1-mb - remove HASH_FIRST flag The HASH_FIRST flag is never set. Remove it. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/sha1-mb/sha1_mb.c | 28 +++------------------------- arch/x86/crypto/sha1-mb/sha1_mb_ctx.h | 8 +++----- 2 files changed, 6 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/sha1-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c index acf9fdf01671..e17655ffde79 100644 --- a/arch/x86/crypto/sha1-mb/sha1_mb.c +++ b/arch/x86/crypto/sha1-mb/sha1_mb.c @@ -106,13 +106,6 @@ static asmlinkage struct job_sha1* (*sha1_job_mgr_flush) static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job) (struct sha1_mb_mgr *state); -static inline void sha1_init_digest(uint32_t *digest) -{ - static const uint32_t initial_digest[SHA1_DIGEST_LENGTH] = {SHA1_H0, - SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }; - memcpy(digest, initial_digest, sizeof(initial_digest)); -} - static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], uint64_t total_len) { @@ -244,11 +237,8 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr, uint32_t len, int flags) { - if (flags & (~HASH_ENTIRE)) { - /* - * User should not pass anything other than FIRST, UPDATE, or - * LAST - */ + if (flags & ~(HASH_UPDATE | HASH_LAST)) { + /* User should not pass anything other than UPDATE or LAST */ ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; return ctx; } @@ -259,24 +249,12 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr, return ctx; } - if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) { + if (ctx->status & HASH_CTX_STS_COMPLETE) { /* Cannot update a finished job. */ ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; return ctx; } - - if (flags & HASH_FIRST) { - /* Init digest */ - sha1_init_digest(ctx->job.result_digest); - - /* Reset byte counter */ - ctx->total_length = 0; - - /* Clear extra blocks */ - ctx->partial_block_buffer_length = 0; - } - /* * If we made it here, there were no errors during this call to * submit diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h index 13590ccf965c..9454bd16f9f8 100644 --- a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h +++ b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h @@ -57,11 +57,9 @@ #include "sha1_mb_mgr.h" #define HASH_UPDATE 0x00 -#define HASH_FIRST 0x01 -#define HASH_LAST 0x02 -#define HASH_ENTIRE 0x03 -#define HASH_DONE 0x04 -#define HASH_FINAL 0x08 +#define HASH_LAST 0x01 +#define HASH_DONE 0x02 +#define HASH_FINAL 0x04 #define HASH_CTX_STS_IDLE 0x00 #define HASH_CTX_STS_PROCESSING 0x01 -- cgit v1.2.3 From ee689d164abb3ae6951f6c5e8c78f9212a265449 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 24 Jan 2018 19:10:15 -0800 Subject: crypto: sha256-mb - remove HASH_FIRST flag The HASH_FIRST flag is never set. Remove it. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/sha256-mb/sha256_mb.c | 27 +++------------------------ arch/x86/crypto/sha256-mb/sha256_mb_ctx.h | 8 +++----- 2 files changed, 6 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c index 7926a226b120..4c46ac1b6653 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb.c +++ b/arch/x86/crypto/sha256-mb/sha256_mb.c @@ -106,14 +106,6 @@ static asmlinkage struct job_sha256* (*sha256_job_mgr_flush) static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job) (struct sha256_mb_mgr *state); -inline void sha256_init_digest(uint32_t *digest) -{ - static const uint32_t initial_digest[SHA256_DIGEST_LENGTH] = { - SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, - SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7}; - memcpy(digest, initial_digest, sizeof(initial_digest)); -} - inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2], uint64_t total_len) { @@ -245,10 +237,8 @@ static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr, uint32_t len, int flags) { - if (flags & (~HASH_ENTIRE)) { - /* User should not pass anything other than FIRST, UPDATE - * or LAST - */ + if (flags & ~(HASH_UPDATE | HASH_LAST)) { + /* User should not pass anything other than UPDATE or LAST */ ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; return ctx; } @@ -259,23 +249,12 @@ static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr, return ctx; } - if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) { + if (ctx->status & HASH_CTX_STS_COMPLETE) { /* Cannot update a finished job. */ ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; return ctx; } - if (flags & HASH_FIRST) { - /* Init digest */ - sha256_init_digest(ctx->job.result_digest); - - /* Reset byte counter */ - ctx->total_length = 0; - - /* Clear extra blocks */ - ctx->partial_block_buffer_length = 0; - } - /* If we made it here, there was no error during this call to submit */ ctx->error = HASH_CTX_ERROR_NONE; diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h index aabb30320af0..7c432543dc7f 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h +++ b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h @@ -57,11 +57,9 @@ #include "sha256_mb_mgr.h" #define HASH_UPDATE 0x00 -#define HASH_FIRST 0x01 -#define HASH_LAST 0x02 -#define HASH_ENTIRE 0x03 -#define HASH_DONE 0x04 -#define HASH_FINAL 0x08 +#define HASH_LAST 0x01 +#define HASH_DONE 0x02 +#define HASH_FINAL 0x04 #define HASH_CTX_STS_IDLE 0x00 #define HASH_CTX_STS_PROCESSING 0x01 -- cgit v1.2.3 From c7f582f5de82f16b5e2eae80d0228e9fd0749040 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 24 Jan 2018 19:10:21 -0800 Subject: crypto: sha512-mb - remove HASH_FIRST flag The HASH_FIRST flag is never set. Remove it. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/sha512-mb/sha512_mb.c | 30 +++--------------------------- arch/x86/crypto/sha512-mb/sha512_mb_ctx.h | 8 +++----- 2 files changed, 6 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c index 458409b7568d..39e2bbdc1836 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb.c +++ b/arch/x86/crypto/sha512-mb/sha512_mb.c @@ -107,15 +107,6 @@ static asmlinkage struct job_sha512* (*sha512_job_mgr_flush) static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job) (struct sha512_mb_mgr *state); -inline void sha512_init_digest(uint64_t *digest) -{ - static const uint64_t initial_digest[SHA512_DIGEST_LENGTH] = { - SHA512_H0, SHA512_H1, SHA512_H2, - SHA512_H3, SHA512_H4, SHA512_H5, - SHA512_H6, SHA512_H7 }; - memcpy(digest, initial_digest, sizeof(initial_digest)); -} - inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2], uint64_t total_len) { @@ -263,11 +254,8 @@ static struct sha512_hash_ctx mgr = cstate->mgr; spin_lock_irqsave(&cstate->work_lock, irqflags); - if (flags & (~HASH_ENTIRE)) { - /* - * User should not pass anything other than FIRST, UPDATE, or - * LAST - */ + if (flags & ~(HASH_UPDATE | HASH_LAST)) { + /* User should not pass anything other than UPDATE or LAST */ ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; goto unlock; } @@ -278,24 +266,12 @@ static struct sha512_hash_ctx goto unlock; } - if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) { + if (ctx->status & HASH_CTX_STS_COMPLETE) { /* Cannot update a finished job. */ ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; goto unlock; } - - if (flags & HASH_FIRST) { - /* Init digest */ - sha512_init_digest(ctx->job.result_digest); - - /* Reset byte counter */ - ctx->total_length = 0; - - /* Clear extra blocks */ - ctx->partial_block_buffer_length = 0; - } - /* * If we made it here, there were no errors during this call to * submit diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h index e4653f5eec3f..e5c465bd821e 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h +++ b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h @@ -57,11 +57,9 @@ #include "sha512_mb_mgr.h" #define HASH_UPDATE 0x00 -#define HASH_FIRST 0x01 -#define HASH_LAST 0x02 -#define HASH_ENTIRE 0x03 -#define HASH_DONE 0x04 -#define HASH_FINAL 0x08 +#define HASH_LAST 0x01 +#define HASH_DONE 0x02 +#define HASH_FINAL 0x04 #define HASH_CTX_STS_IDLE 0x00 #define HASH_CTX_STS_PROCESSING 0x01 -- cgit v1.2.3 From 4ff8b1dd814ba4c2dc4a8ce3cf77274e01bd1c93 Mon Sep 17 00:00:00 2001 From: Jinbum Park Date: Mon, 12 Feb 2018 22:52:37 +0900 Subject: crypto: arm/aes-cipher - move S-box to .rodata section Move the AES inverse S-box to the .rodata section where it is safe from abuse by speculation. Signed-off-by: Jinbum Park Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-cipher-core.S | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S index 54b384084637..184d6c2d15d5 100644 --- a/arch/arm/crypto/aes-cipher-core.S +++ b/arch/arm/crypto/aes-cipher-core.S @@ -174,6 +174,16 @@ .ltorg .endm +ENTRY(__aes_arm_encrypt) + do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2 +ENDPROC(__aes_arm_encrypt) + + .align 5 +ENTRY(__aes_arm_decrypt) + do_crypt iround, crypto_it_tab, __aes_arm_inverse_sbox, 0 +ENDPROC(__aes_arm_decrypt) + + .section ".rodata", "a" .align L1_CACHE_SHIFT .type __aes_arm_inverse_sbox, %object __aes_arm_inverse_sbox: @@ -210,12 +220,3 @@ __aes_arm_inverse_sbox: .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d .size __aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox - -ENTRY(__aes_arm_encrypt) - do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2 -ENDPROC(__aes_arm_encrypt) - - .align 5 -ENTRY(__aes_arm_decrypt) - do_crypt iround, crypto_it_tab, __aes_arm_inverse_sbox, 0 -ENDPROC(__aes_arm_decrypt) -- cgit v1.2.3 From e1fd316fbae104f96cc9c36073fe102d298f8576 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 14 Feb 2018 09:38:12 -0800 Subject: crypto: aesni - Merge INITIAL_BLOCKS_ENC/DEC Use macro operations to merge implemetations of INITIAL_BLOCKS, since they differ by only a small handful of lines. Use macro counter \@ to simplify implementation. Signed-off-by: Dave Watson Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 298 ++++++-------------------------------- 1 file changed, 48 insertions(+), 250 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 12e8484a8ee7..251a65f84198 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -276,234 +276,7 @@ _done_read_partial_block_\@: */ -.macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ -XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation - MOVADQ SHUF_MASK(%rip), %xmm14 - mov arg7, %r10 # %r10 = AAD - mov arg8, %r11 # %r11 = aadLen - pxor %xmm\i, %xmm\i - pxor \XMM2, \XMM2 - - cmp $16, %r11 - jl _get_AAD_rest\num_initial_blocks\operation -_get_AAD_blocks\num_initial_blocks\operation: - movdqu (%r10), %xmm\i - PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data - pxor %xmm\i, \XMM2 - GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - add $16, %r10 - sub $16, %r11 - cmp $16, %r11 - jge _get_AAD_blocks\num_initial_blocks\operation - - movdqu \XMM2, %xmm\i - - /* read the last <16B of AAD */ -_get_AAD_rest\num_initial_blocks\operation: - cmp $0, %r11 - je _get_AAD_done\num_initial_blocks\operation - - READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i - PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data - pxor \XMM2, %xmm\i - GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - -_get_AAD_done\num_initial_blocks\operation: - xor %r11, %r11 # initialise the data pointer offset as zero - # start AES for num_initial_blocks blocks - - mov %arg5, %rax # %rax = *Y0 - movdqu (%rax), \XMM0 # XMM0 = Y0 - PSHUFB_XMM %xmm14, \XMM0 - -.if (\i == 5) || (\i == 6) || (\i == 7) - MOVADQ ONE(%RIP),\TMP1 - MOVADQ (%arg1),\TMP2 -.irpc index, \i_seq - paddd \TMP1, \XMM0 # INCR Y0 - movdqa \XMM0, %xmm\index - PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap - pxor \TMP2, %xmm\index -.endr - lea 0x10(%arg1),%r10 - mov keysize,%eax - shr $2,%eax # 128->4, 192->6, 256->8 - add $5,%eax # 128->9, 192->11, 256->13 - -aes_loop_initial_dec\num_initial_blocks: - MOVADQ (%r10),\TMP1 -.irpc index, \i_seq - AESENC \TMP1, %xmm\index -.endr - add $16,%r10 - sub $1,%eax - jnz aes_loop_initial_dec\num_initial_blocks - - MOVADQ (%r10), \TMP1 -.irpc index, \i_seq - AESENCLAST \TMP1, %xmm\index # Last Round -.endr -.irpc index, \i_seq - movdqu (%arg3 , %r11, 1), \TMP1 - pxor \TMP1, %xmm\index - movdqu %xmm\index, (%arg2 , %r11, 1) - # write back plaintext/ciphertext for num_initial_blocks - add $16, %r11 - - movdqa \TMP1, %xmm\index - PSHUFB_XMM %xmm14, %xmm\index - # prepare plaintext/ciphertext for GHASH computation -.endr -.endif - - # apply GHASH on num_initial_blocks blocks - -.if \i == 5 - pxor %xmm5, %xmm6 - GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - pxor %xmm6, %xmm7 - GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - pxor %xmm7, %xmm8 - GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 -.elseif \i == 6 - pxor %xmm6, %xmm7 - GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - pxor %xmm7, %xmm8 - GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 -.elseif \i == 7 - pxor %xmm7, %xmm8 - GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 -.endif - cmp $64, %r13 - jl _initial_blocks_done\num_initial_blocks\operation - # no need for precomputed values -/* -* -* Precomputations for HashKey parallel with encryption of first 4 blocks. -* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i -*/ - MOVADQ ONE(%rip), \TMP1 - paddd \TMP1, \XMM0 # INCR Y0 - MOVADQ \XMM0, \XMM1 - PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap - - paddd \TMP1, \XMM0 # INCR Y0 - MOVADQ \XMM0, \XMM2 - PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap - - paddd \TMP1, \XMM0 # INCR Y0 - MOVADQ \XMM0, \XMM3 - PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap - - paddd \TMP1, \XMM0 # INCR Y0 - MOVADQ \XMM0, \XMM4 - PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap - - MOVADQ 0(%arg1),\TMP1 - pxor \TMP1, \XMM1 - pxor \TMP1, \XMM2 - pxor \TMP1, \XMM3 - pxor \TMP1, \XMM4 - movdqa \TMP3, \TMP5 - pshufd $78, \TMP3, \TMP1 - pxor \TMP3, \TMP1 - movdqa \TMP1, HashKey_k(%rsp) - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^2<<1 (mod poly) - movdqa \TMP5, HashKey_2(%rsp) -# HashKey_2 = HashKey^2<<1 (mod poly) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_2_k(%rsp) -.irpc index, 1234 # do 4 rounds - movaps 0x10*\index(%arg1), \TMP1 - AESENC \TMP1, \XMM1 - AESENC \TMP1, \XMM2 - AESENC \TMP1, \XMM3 - AESENC \TMP1, \XMM4 -.endr - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_3(%rsp) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_3_k(%rsp) -.irpc index, 56789 # do next 5 rounds - movaps 0x10*\index(%arg1), \TMP1 - AESENC \TMP1, \XMM1 - AESENC \TMP1, \XMM2 - AESENC \TMP1, \XMM3 - AESENC \TMP1, \XMM4 -.endr - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_4(%rsp) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_4_k(%rsp) - lea 0xa0(%arg1),%r10 - mov keysize,%eax - shr $2,%eax # 128->4, 192->6, 256->8 - sub $4,%eax # 128->0, 192->2, 256->4 - jz aes_loop_pre_dec_done\num_initial_blocks - -aes_loop_pre_dec\num_initial_blocks: - MOVADQ (%r10),\TMP2 -.irpc index, 1234 - AESENC \TMP2, %xmm\index -.endr - add $16,%r10 - sub $1,%eax - jnz aes_loop_pre_dec\num_initial_blocks - -aes_loop_pre_dec_done\num_initial_blocks: - MOVADQ (%r10), \TMP2 - AESENCLAST \TMP2, \XMM1 - AESENCLAST \TMP2, \XMM2 - AESENCLAST \TMP2, \XMM3 - AESENCLAST \TMP2, \XMM4 - movdqu 16*0(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM1 - movdqu \XMM1, 16*0(%arg2 , %r11 , 1) - movdqa \TMP1, \XMM1 - movdqu 16*1(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM2 - movdqu \XMM2, 16*1(%arg2 , %r11 , 1) - movdqa \TMP1, \XMM2 - movdqu 16*2(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM3 - movdqu \XMM3, 16*2(%arg2 , %r11 , 1) - movdqa \TMP1, \XMM3 - movdqu 16*3(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM4 - movdqu \XMM4, 16*3(%arg2 , %r11 , 1) - movdqa \TMP1, \XMM4 - add $64, %r11 - PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap - pxor \XMMDst, \XMM1 -# combine GHASHed value with the corresponding ciphertext - PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap - PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap - PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap - -_initial_blocks_done\num_initial_blocks\operation: - -.endm - - -/* -* if a = number of total plaintext bytes -* b = floor(a/16) -* num_initial_blocks = b mod 4 -* encrypt the initial num_initial_blocks blocks and apply ghash on -* the ciphertext -* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers -* are clobbered -* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified -*/ - - -.macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ +.macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation MOVADQ SHUF_MASK(%rip), %xmm14 mov arg7, %r10 # %r10 = AAD @@ -512,8 +285,8 @@ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation pxor \XMM2, \XMM2 cmp $16, %r11 - jl _get_AAD_rest\num_initial_blocks\operation -_get_AAD_blocks\num_initial_blocks\operation: + jl _get_AAD_rest\@ +_get_AAD_blocks\@: movdqu (%r10), %xmm\i PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data pxor %xmm\i, \XMM2 @@ -521,21 +294,21 @@ _get_AAD_blocks\num_initial_blocks\operation: add $16, %r10 sub $16, %r11 cmp $16, %r11 - jge _get_AAD_blocks\num_initial_blocks\operation + jge _get_AAD_blocks\@ movdqu \XMM2, %xmm\i /* read the last <16B of AAD */ -_get_AAD_rest\num_initial_blocks\operation: +_get_AAD_rest\@: cmp $0, %r11 - je _get_AAD_done\num_initial_blocks\operation + je _get_AAD_done\@ READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data pxor \XMM2, %xmm\i GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 -_get_AAD_done\num_initial_blocks\operation: +_get_AAD_done\@: xor %r11, %r11 # initialise the data pointer offset as zero # start AES for num_initial_blocks blocks @@ -549,7 +322,11 @@ _get_AAD_done\num_initial_blocks\operation: MOVADQ 0(%arg1),\TMP2 .irpc index, \i_seq paddd \TMP1, \XMM0 # INCR Y0 +.ifc \operation, dec + movdqa \XMM0, %xmm\index +.else MOVADQ \XMM0, %xmm\index +.endif PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap pxor \TMP2, %xmm\index .endr @@ -558,14 +335,14 @@ _get_AAD_done\num_initial_blocks\operation: shr $2,%eax # 128->4, 192->6, 256->8 add $5,%eax # 128->9, 192->11, 256->13 -aes_loop_initial_enc\num_initial_blocks: +aes_loop_initial_\@: MOVADQ (%r10),\TMP1 .irpc index, \i_seq AESENC \TMP1, %xmm\index .endr add $16,%r10 sub $1,%eax - jnz aes_loop_initial_enc\num_initial_blocks + jnz aes_loop_initial_\@ MOVADQ (%r10), \TMP1 .irpc index, \i_seq @@ -577,6 +354,10 @@ aes_loop_initial_enc\num_initial_blocks: movdqu %xmm\index, (%arg2 , %r11, 1) # write back plaintext/ciphertext for num_initial_blocks add $16, %r11 + +.ifc \operation, dec + movdqa \TMP1, %xmm\index +.endif PSHUFB_XMM %xmm14, %xmm\index # prepare plaintext/ciphertext for GHASH computation @@ -602,7 +383,7 @@ aes_loop_initial_enc\num_initial_blocks: GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 .endif cmp $64, %r13 - jl _initial_blocks_done\num_initial_blocks\operation + jl _initial_blocks_done\@ # no need for precomputed values /* * @@ -672,18 +453,18 @@ aes_loop_initial_enc\num_initial_blocks: mov keysize,%eax shr $2,%eax # 128->4, 192->6, 256->8 sub $4,%eax # 128->0, 192->2, 256->4 - jz aes_loop_pre_enc_done\num_initial_blocks + jz aes_loop_pre_done\@ -aes_loop_pre_enc\num_initial_blocks: +aes_loop_pre_\@: MOVADQ (%r10),\TMP2 .irpc index, 1234 AESENC \TMP2, %xmm\index .endr add $16,%r10 sub $1,%eax - jnz aes_loop_pre_enc\num_initial_blocks + jnz aes_loop_pre_\@ -aes_loop_pre_enc_done\num_initial_blocks: +aes_loop_pre_done\@: MOVADQ (%r10), \TMP2 AESENCLAST \TMP2, \XMM1 AESENCLAST \TMP2, \XMM2 @@ -691,16 +472,33 @@ aes_loop_pre_enc_done\num_initial_blocks: AESENCLAST \TMP2, \XMM4 movdqu 16*0(%arg3 , %r11 , 1), \TMP1 pxor \TMP1, \XMM1 +.ifc \operation, dec + movdqu \XMM1, 16*0(%arg2 , %r11 , 1) + movdqa \TMP1, \XMM1 +.endif movdqu 16*1(%arg3 , %r11 , 1), \TMP1 pxor \TMP1, \XMM2 +.ifc \operation, dec + movdqu \XMM2, 16*1(%arg2 , %r11 , 1) + movdqa \TMP1, \XMM2 +.endif movdqu 16*2(%arg3 , %r11 , 1), \TMP1 pxor \TMP1, \XMM3 +.ifc \operation, dec + movdqu \XMM3, 16*2(%arg2 , %r11 , 1) + movdqa \TMP1, \XMM3 +.endif movdqu 16*3(%arg3 , %r11 , 1), \TMP1 pxor \TMP1, \XMM4 +.ifc \operation, dec + movdqu \XMM4, 16*3(%arg2 , %r11 , 1) + movdqa \TMP1, \XMM4 +.else movdqu \XMM1, 16*0(%arg2 , %r11 , 1) movdqu \XMM2, 16*1(%arg2 , %r11 , 1) movdqu \XMM3, 16*2(%arg2 , %r11 , 1) movdqu \XMM4, 16*3(%arg2 , %r11 , 1) +.endif add $64, %r11 PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap @@ -710,7 +508,7 @@ aes_loop_pre_enc_done\num_initial_blocks: PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap -_initial_blocks_done\num_initial_blocks\operation: +_initial_blocks_done\@: .endm @@ -1379,22 +1177,22 @@ ENTRY(aesni_gcm_dec) jb _initial_num_blocks_is_1_decrypt je _initial_num_blocks_is_2_decrypt _initial_num_blocks_is_3_decrypt: - INITIAL_BLOCKS_DEC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec sub $48, %r13 jmp _initial_blocks_decrypted _initial_num_blocks_is_2_decrypt: - INITIAL_BLOCKS_DEC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec sub $32, %r13 jmp _initial_blocks_decrypted _initial_num_blocks_is_1_decrypt: - INITIAL_BLOCKS_DEC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec sub $16, %r13 jmp _initial_blocks_decrypted _initial_num_blocks_is_0_decrypt: - INITIAL_BLOCKS_DEC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec _initial_blocks_decrypted: cmp $0, %r13 @@ -1641,22 +1439,22 @@ ENTRY(aesni_gcm_enc) jb _initial_num_blocks_is_1_encrypt je _initial_num_blocks_is_2_encrypt _initial_num_blocks_is_3_encrypt: - INITIAL_BLOCKS_ENC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc sub $48, %r13 jmp _initial_blocks_encrypted _initial_num_blocks_is_2_encrypt: - INITIAL_BLOCKS_ENC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc sub $32, %r13 jmp _initial_blocks_encrypted _initial_num_blocks_is_1_encrypt: - INITIAL_BLOCKS_ENC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc sub $16, %r13 jmp _initial_blocks_encrypted _initial_num_blocks_is_0_encrypt: - INITIAL_BLOCKS_ENC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc _initial_blocks_encrypted: -- cgit v1.2.3 From 6c2c86b3e05822abec2fc0d7a5c31637b57cb126 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 14 Feb 2018 09:38:35 -0800 Subject: crypto: aesni - Macro-ify func save/restore Macro-ify function save and restore. These will be used in new functions added for scatter/gather update operations. Signed-off-by: Dave Watson Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 53 ++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 251a65f84198..d2645f62720e 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -171,6 +171,26 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff #define TKEYP T1 #endif +.macro FUNC_SAVE + push %r12 + push %r13 + push %r14 + mov %rsp, %r14 +# +# states of %xmm registers %xmm6:%xmm15 not saved +# all %xmm registers are clobbered +# + sub $VARIABLE_OFFSET, %rsp + and $~63, %rsp +.endm + + +.macro FUNC_RESTORE + mov %r14, %rsp + pop %r14 + pop %r13 + pop %r12 +.endm #ifdef __x86_64__ /* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) @@ -1131,16 +1151,7 @@ _esb_loop_\@: * *****************************************************************************/ ENTRY(aesni_gcm_dec) - push %r12 - push %r13 - push %r14 - mov %rsp, %r14 -/* -* states of %xmm registers %xmm6:%xmm15 not saved -* all %xmm registers are clobbered -*/ - sub $VARIABLE_OFFSET, %rsp - and $~63, %rsp # align rsp to 64 bytes + FUNC_SAVE mov %arg6, %r12 movdqu (%r12), %xmm13 # %xmm13 = HashKey movdqa SHUF_MASK(%rip), %xmm2 @@ -1310,10 +1321,7 @@ _T_1_decrypt: _T_16_decrypt: movdqu %xmm0, (%r10) _return_T_done_decrypt: - mov %r14, %rsp - pop %r14 - pop %r13 - pop %r12 + FUNC_RESTORE ret ENDPROC(aesni_gcm_dec) @@ -1394,22 +1402,12 @@ ENDPROC(aesni_gcm_dec) * poly = x^128 + x^127 + x^126 + x^121 + 1 ***************************************************************************/ ENTRY(aesni_gcm_enc) - push %r12 - push %r13 - push %r14 - mov %rsp, %r14 -# -# states of %xmm registers %xmm6:%xmm15 not saved -# all %xmm registers are clobbered -# - sub $VARIABLE_OFFSET, %rsp - and $~63, %rsp + FUNC_SAVE mov %arg6, %r12 movdqu (%r12), %xmm13 movdqa SHUF_MASK(%rip), %xmm2 PSHUFB_XMM %xmm2, %xmm13 - # precompute HashKey<<1 mod poly from the HashKey (required for GHASH) movdqa %xmm13, %xmm2 @@ -1577,10 +1575,7 @@ _T_1_encrypt: _T_16_encrypt: movdqu %xmm0, (%r10) _return_T_done_encrypt: - mov %r14, %rsp - pop %r14 - pop %r13 - pop %r12 + FUNC_RESTORE ret ENDPROC(aesni_gcm_enc) -- cgit v1.2.3 From 7af964c2fc7f415b334eb9d2e52ebdf258df662e Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 14 Feb 2018 09:38:45 -0800 Subject: crypto: aesni - Add GCM_INIT macro Reduce code duplication by introducting GCM_INIT macro. This macro will also be exposed as a function for implementing scatter/gather support, since INIT only needs to be called once for the full operation. Signed-off-by: Dave Watson Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 84 +++++++++++++++------------------------ 1 file changed, 33 insertions(+), 51 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index d2645f62720e..d73fee82e630 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -192,6 +192,37 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff pop %r12 .endm + +# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding. +# Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13 +.macro GCM_INIT + mov %arg6, %r12 + movdqu (%r12), %xmm13 + movdqa SHUF_MASK(%rip), %xmm2 + PSHUFB_XMM %xmm2, %xmm13 + + # precompute HashKey<<1 mod poly from the HashKey (required for GHASH) + + movdqa %xmm13, %xmm2 + psllq $1, %xmm13 + psrlq $63, %xmm2 + movdqa %xmm2, %xmm1 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + por %xmm2, %xmm13 + + # reduce HashKey<<1 + + pshufd $0x24, %xmm1, %xmm2 + pcmpeqd TWOONE(%rip), %xmm2 + pand POLY(%rip), %xmm2 + pxor %xmm2, %xmm13 + movdqa %xmm13, HashKey(%rsp) + mov %arg4, %r13 # %xmm13 holds HashKey<<1 (mod poly) + and $-16, %r13 + mov %r13, %r12 +.endm + #ifdef __x86_64__ /* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) * @@ -1152,36 +1183,11 @@ _esb_loop_\@: *****************************************************************************/ ENTRY(aesni_gcm_dec) FUNC_SAVE - mov %arg6, %r12 - movdqu (%r12), %xmm13 # %xmm13 = HashKey - movdqa SHUF_MASK(%rip), %xmm2 - PSHUFB_XMM %xmm2, %xmm13 - - -# Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH) - - movdqa %xmm13, %xmm2 - psllq $1, %xmm13 - psrlq $63, %xmm2 - movdqa %xmm2, %xmm1 - pslldq $8, %xmm2 - psrldq $8, %xmm1 - por %xmm2, %xmm13 - - # Reduction - - pshufd $0x24, %xmm1, %xmm2 - pcmpeqd TWOONE(%rip), %xmm2 - pand POLY(%rip), %xmm2 - pxor %xmm2, %xmm13 # %xmm13 holds the HashKey<<1 (mod poly) + GCM_INIT # Decrypt first few blocks - movdqa %xmm13, HashKey(%rsp) # store HashKey<<1 (mod poly) - mov %arg4, %r13 # save the number of bytes of plaintext/ciphertext - and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) - mov %r13, %r12 and $(3<<4), %r12 jz _initial_num_blocks_is_0_decrypt cmp $(2<<4), %r12 @@ -1403,32 +1409,8 @@ ENDPROC(aesni_gcm_dec) ***************************************************************************/ ENTRY(aesni_gcm_enc) FUNC_SAVE - mov %arg6, %r12 - movdqu (%r12), %xmm13 - movdqa SHUF_MASK(%rip), %xmm2 - PSHUFB_XMM %xmm2, %xmm13 - -# precompute HashKey<<1 mod poly from the HashKey (required for GHASH) - - movdqa %xmm13, %xmm2 - psllq $1, %xmm13 - psrlq $63, %xmm2 - movdqa %xmm2, %xmm1 - pslldq $8, %xmm2 - psrldq $8, %xmm1 - por %xmm2, %xmm13 - - # reduce HashKey<<1 - - pshufd $0x24, %xmm1, %xmm2 - pcmpeqd TWOONE(%rip), %xmm2 - pand POLY(%rip), %xmm2 - pxor %xmm2, %xmm13 - movdqa %xmm13, HashKey(%rsp) - mov %arg4, %r13 # %xmm13 holds HashKey<<1 (mod poly) - and $-16, %r13 - mov %r13, %r12 + GCM_INIT # Encrypt first few blocks and $(3<<4), %r12 -- cgit v1.2.3 From adcadab3698d5104e68899c6d2a2230399780807 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 14 Feb 2018 09:38:57 -0800 Subject: crypto: aesni - Add GCM_COMPLETE macro Merge encode and decode tag calculations in GCM_COMPLETE macro. Scatter/gather routines will call this once at the end of encryption or decryption. Signed-off-by: Dave Watson Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 172 ++++++++++++++------------------------ 1 file changed, 63 insertions(+), 109 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index d73fee82e630..b2e2793ecd28 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -223,6 +223,67 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff mov %r13, %r12 .endm +# GCM_COMPLETE Finishes update of tag of last partial block +# Output: Authorization Tag (AUTH_TAG) +# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15 +.macro GCM_COMPLETE + mov arg8, %r12 # %r13 = aadLen (number of bytes) + shl $3, %r12 # convert into number of bits + movd %r12d, %xmm15 # len(A) in %xmm15 + shl $3, %arg4 # len(C) in bits (*128) + MOVQ_R64_XMM %arg4, %xmm1 + pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 + pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) + pxor %xmm15, %xmm8 + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + # final GHASH computation + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm8 + + mov %arg5, %rax # %rax = *Y0 + movdqu (%rax), %xmm0 # %xmm0 = Y0 + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) + pxor %xmm8, %xmm0 +_return_T_\@: + mov arg9, %r10 # %r10 = authTag + mov arg10, %r11 # %r11 = auth_tag_len + cmp $16, %r11 + je _T_16_\@ + cmp $8, %r11 + jl _T_4_\@ +_T_8_\@: + MOVQ_R64_XMM %xmm0, %rax + mov %rax, (%r10) + add $8, %r10 + sub $8, %r11 + psrldq $8, %xmm0 + cmp $0, %r11 + je _return_T_done_\@ +_T_4_\@: + movd %xmm0, %eax + mov %eax, (%r10) + add $4, %r10 + sub $4, %r11 + psrldq $4, %xmm0 + cmp $0, %r11 + je _return_T_done_\@ +_T_123_\@: + movd %xmm0, %eax + cmp $2, %r11 + jl _T_1_\@ + mov %ax, (%r10) + cmp $2, %r11 + je _return_T_done_\@ + add $2, %r10 + sar $16, %eax +_T_1_\@: + mov %al, (%r10) + jmp _return_T_done_\@ +_T_16_\@: + movdqu %xmm0, (%r10) +_return_T_done_\@: +.endm + #ifdef __x86_64__ /* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) * @@ -1272,61 +1333,7 @@ _less_than_8_bytes_left_decrypt: sub $1, %r13 jne _less_than_8_bytes_left_decrypt _multiple_of_16_bytes_decrypt: - mov arg8, %r12 # %r13 = aadLen (number of bytes) - shl $3, %r12 # convert into number of bits - movd %r12d, %xmm15 # len(A) in %xmm15 - shl $3, %arg4 # len(C) in bits (*128) - MOVQ_R64_XMM %arg4, %xmm1 - pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 - pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) - pxor %xmm15, %xmm8 - GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 - # final GHASH computation - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm8 - - mov %arg5, %rax # %rax = *Y0 - movdqu (%rax), %xmm0 # %xmm0 = Y0 - ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) - pxor %xmm8, %xmm0 -_return_T_decrypt: - mov arg9, %r10 # %r10 = authTag - mov arg10, %r11 # %r11 = auth_tag_len - cmp $16, %r11 - je _T_16_decrypt - cmp $8, %r11 - jl _T_4_decrypt -_T_8_decrypt: - MOVQ_R64_XMM %xmm0, %rax - mov %rax, (%r10) - add $8, %r10 - sub $8, %r11 - psrldq $8, %xmm0 - cmp $0, %r11 - je _return_T_done_decrypt -_T_4_decrypt: - movd %xmm0, %eax - mov %eax, (%r10) - add $4, %r10 - sub $4, %r11 - psrldq $4, %xmm0 - cmp $0, %r11 - je _return_T_done_decrypt -_T_123_decrypt: - movd %xmm0, %eax - cmp $2, %r11 - jl _T_1_decrypt - mov %ax, (%r10) - cmp $2, %r11 - je _return_T_done_decrypt - add $2, %r10 - sar $16, %eax -_T_1_decrypt: - mov %al, (%r10) - jmp _return_T_done_decrypt -_T_16_decrypt: - movdqu %xmm0, (%r10) -_return_T_done_decrypt: + GCM_COMPLETE FUNC_RESTORE ret ENDPROC(aesni_gcm_dec) @@ -1502,61 +1509,8 @@ _less_than_8_bytes_left_encrypt: sub $1, %r13 jne _less_than_8_bytes_left_encrypt _multiple_of_16_bytes_encrypt: - mov arg8, %r12 # %r12 = addLen (number of bytes) - shl $3, %r12 - movd %r12d, %xmm15 # len(A) in %xmm15 - shl $3, %arg4 # len(C) in bits (*128) - MOVQ_R64_XMM %arg4, %xmm1 - pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 - pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) - pxor %xmm15, %xmm8 - GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 - # final GHASH computation - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm8 # perform a 16 byte swap - - mov %arg5, %rax # %rax = *Y0 - movdqu (%rax), %xmm0 # %xmm0 = Y0 - ENCRYPT_SINGLE_BLOCK %xmm0, %xmm15 # Encrypt(K, Y0) - pxor %xmm8, %xmm0 _return_T_encrypt: - mov arg9, %r10 # %r10 = authTag - mov arg10, %r11 # %r11 = auth_tag_len - cmp $16, %r11 - je _T_16_encrypt - cmp $8, %r11 - jl _T_4_encrypt -_T_8_encrypt: - MOVQ_R64_XMM %xmm0, %rax - mov %rax, (%r10) - add $8, %r10 - sub $8, %r11 - psrldq $8, %xmm0 - cmp $0, %r11 - je _return_T_done_encrypt -_T_4_encrypt: - movd %xmm0, %eax - mov %eax, (%r10) - add $4, %r10 - sub $4, %r11 - psrldq $4, %xmm0 - cmp $0, %r11 - je _return_T_done_encrypt -_T_123_encrypt: - movd %xmm0, %eax - cmp $2, %r11 - jl _T_1_encrypt - mov %ax, (%r10) - cmp $2, %r11 - je _return_T_done_encrypt - add $2, %r10 - sar $16, %eax -_T_1_encrypt: - mov %al, (%r10) - jmp _return_T_done_encrypt -_T_16_encrypt: - movdqu %xmm0, (%r10) -_return_T_done_encrypt: + GCM_COMPLETE FUNC_RESTORE ret ENDPROC(aesni_gcm_enc) -- cgit v1.2.3 From ba45833e3e76aac409b7ebc3a428515a1b53ebda Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 14 Feb 2018 09:39:10 -0800 Subject: crypto: aesni - Merge encode and decode to GCM_ENC_DEC macro Make a macro for the main encode/decode routine. Only a small handful of lines differ for enc and dec. This will also become the main scatter/gather update routine. Signed-off-by: Dave Watson Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 293 +++++++++++++++----------------------- 1 file changed, 114 insertions(+), 179 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index b2e2793ecd28..796d1a52cf03 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -223,6 +223,118 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff mov %r13, %r12 .endm +# GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context +# struct has been initialized by GCM_INIT. +# Requires the input data be at least 1 byte long because of READ_PARTIAL_BLOCK +# Clobbers rax, r10-r13, and xmm0-xmm15 +.macro GCM_ENC_DEC operation + # Encrypt/Decrypt first few blocks + + and $(3<<4), %r12 + jz _initial_num_blocks_is_0_\@ + cmp $(2<<4), %r12 + jb _initial_num_blocks_is_1_\@ + je _initial_num_blocks_is_2_\@ +_initial_num_blocks_is_3_\@: + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation + sub $48, %r13 + jmp _initial_blocks_\@ +_initial_num_blocks_is_2_\@: + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation + sub $32, %r13 + jmp _initial_blocks_\@ +_initial_num_blocks_is_1_\@: + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation + sub $16, %r13 + jmp _initial_blocks_\@ +_initial_num_blocks_is_0_\@: + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation +_initial_blocks_\@: + + # Main loop - Encrypt/Decrypt remaining blocks + + cmp $0, %r13 + je _zero_cipher_left_\@ + sub $64, %r13 + je _four_cipher_left_\@ +_crypt_by_4_\@: + GHASH_4_ENCRYPT_4_PARALLEL_\operation %xmm9, %xmm10, %xmm11, %xmm12, \ + %xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \ + %xmm7, %xmm8, enc + add $64, %r11 + sub $64, %r13 + jne _crypt_by_4_\@ +_four_cipher_left_\@: + GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ +%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 +_zero_cipher_left_\@: + mov %arg4, %r13 + and $15, %r13 # %r13 = arg4 (mod 16) + je _multiple_of_16_bytes_\@ + + # Handle the last <16 Byte block separately + paddd ONE(%rip), %xmm0 # INCR CNT to get Yn + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm0 + + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) + + lea (%arg3,%r11,1), %r10 + mov %r13, %r12 + READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 + + lea ALL_F+16(%rip), %r12 + sub %r13, %r12 +.ifc \operation, dec + movdqa %xmm1, %xmm2 +.endif + pxor %xmm1, %xmm0 # XOR Encrypt(K, Yn) + movdqu (%r12), %xmm1 + # get the appropriate mask to mask out top 16-r13 bytes of xmm0 + pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 +.ifc \operation, dec + pand %xmm1, %xmm2 + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10 ,%xmm2 + + pxor %xmm2, %xmm8 +.else + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10,%xmm0 + + pxor %xmm0, %xmm8 +.endif + + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 +.ifc \operation, enc + # GHASH computation for the last <16 byte block + movdqa SHUF_MASK(%rip), %xmm10 + # shuffle xmm0 back to output as ciphertext + PSHUFB_XMM %xmm10, %xmm0 +.endif + + # Output %r13 bytes + MOVQ_R64_XMM %xmm0, %rax + cmp $8, %r13 + jle _less_than_8_bytes_left_\@ + mov %rax, (%arg2 , %r11, 1) + add $8, %r11 + psrldq $8, %xmm0 + MOVQ_R64_XMM %xmm0, %rax + sub $8, %r13 +_less_than_8_bytes_left_\@: + mov %al, (%arg2, %r11, 1) + add $1, %r11 + shr $8, %rax + sub $1, %r13 + jne _less_than_8_bytes_left_\@ +_multiple_of_16_bytes_\@: +.endm + # GCM_COMPLETE Finishes update of tag of last partial block # Output: Authorization Tag (AUTH_TAG) # Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15 @@ -1246,93 +1358,7 @@ ENTRY(aesni_gcm_dec) FUNC_SAVE GCM_INIT - - # Decrypt first few blocks - - and $(3<<4), %r12 - jz _initial_num_blocks_is_0_decrypt - cmp $(2<<4), %r12 - jb _initial_num_blocks_is_1_decrypt - je _initial_num_blocks_is_2_decrypt -_initial_num_blocks_is_3_decrypt: - INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec - sub $48, %r13 - jmp _initial_blocks_decrypted -_initial_num_blocks_is_2_decrypt: - INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec - sub $32, %r13 - jmp _initial_blocks_decrypted -_initial_num_blocks_is_1_decrypt: - INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec - sub $16, %r13 - jmp _initial_blocks_decrypted -_initial_num_blocks_is_0_decrypt: - INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec -_initial_blocks_decrypted: - cmp $0, %r13 - je _zero_cipher_left_decrypt - sub $64, %r13 - je _four_cipher_left_decrypt -_decrypt_by_4: - GHASH_4_ENCRYPT_4_PARALLEL_DEC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ -%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec - add $64, %r11 - sub $64, %r13 - jne _decrypt_by_4 -_four_cipher_left_decrypt: - GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ -%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 -_zero_cipher_left_decrypt: - mov %arg4, %r13 - and $15, %r13 # %r13 = arg4 (mod 16) - je _multiple_of_16_bytes_decrypt - - # Handle the last <16 byte block separately - - paddd ONE(%rip), %xmm0 # increment CNT to get Yn - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm0 - - ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn) - - lea (%arg3,%r11,1), %r10 - mov %r13, %r12 - READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 - - lea ALL_F+16(%rip), %r12 - sub %r13, %r12 - movdqa %xmm1, %xmm2 - pxor %xmm1, %xmm0 # Ciphertext XOR E(K, Yn) - movdqu (%r12), %xmm1 - # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0 - pand %xmm1, %xmm0 # mask out top 16-%r13 bytes of %xmm0 - pand %xmm1, %xmm2 - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10 ,%xmm2 - - pxor %xmm2, %xmm8 - GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 - - # output %r13 bytes - MOVQ_R64_XMM %xmm0, %rax - cmp $8, %r13 - jle _less_than_8_bytes_left_decrypt - mov %rax, (%arg2 , %r11, 1) - add $8, %r11 - psrldq $8, %xmm0 - MOVQ_R64_XMM %xmm0, %rax - sub $8, %r13 -_less_than_8_bytes_left_decrypt: - mov %al, (%arg2, %r11, 1) - add $1, %r11 - shr $8, %rax - sub $1, %r13 - jne _less_than_8_bytes_left_decrypt -_multiple_of_16_bytes_decrypt: + GCM_ENC_DEC dec GCM_COMPLETE FUNC_RESTORE ret @@ -1418,98 +1444,7 @@ ENTRY(aesni_gcm_enc) FUNC_SAVE GCM_INIT - # Encrypt first few blocks - - and $(3<<4), %r12 - jz _initial_num_blocks_is_0_encrypt - cmp $(2<<4), %r12 - jb _initial_num_blocks_is_1_encrypt - je _initial_num_blocks_is_2_encrypt -_initial_num_blocks_is_3_encrypt: - INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc - sub $48, %r13 - jmp _initial_blocks_encrypted -_initial_num_blocks_is_2_encrypt: - INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc - sub $32, %r13 - jmp _initial_blocks_encrypted -_initial_num_blocks_is_1_encrypt: - INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc - sub $16, %r13 - jmp _initial_blocks_encrypted -_initial_num_blocks_is_0_encrypt: - INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc -_initial_blocks_encrypted: - - # Main loop - Encrypt remaining blocks - - cmp $0, %r13 - je _zero_cipher_left_encrypt - sub $64, %r13 - je _four_cipher_left_encrypt -_encrypt_by_4_encrypt: - GHASH_4_ENCRYPT_4_PARALLEL_ENC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ -%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc - add $64, %r11 - sub $64, %r13 - jne _encrypt_by_4_encrypt -_four_cipher_left_encrypt: - GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ -%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 -_zero_cipher_left_encrypt: - mov %arg4, %r13 - and $15, %r13 # %r13 = arg4 (mod 16) - je _multiple_of_16_bytes_encrypt - - # Handle the last <16 Byte block separately - paddd ONE(%rip), %xmm0 # INCR CNT to get Yn - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm0 - - ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) - - lea (%arg3,%r11,1), %r10 - mov %r13, %r12 - READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 - - lea ALL_F+16(%rip), %r12 - sub %r13, %r12 - pxor %xmm1, %xmm0 # Plaintext XOR Encrypt(K, Yn) - movdqu (%r12), %xmm1 - # get the appropriate mask to mask out top 16-r13 bytes of xmm0 - pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10,%xmm0 - - pxor %xmm0, %xmm8 - GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 - # GHASH computation for the last <16 byte block - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm0 - - # shuffle xmm0 back to output as ciphertext - - # Output %r13 bytes - MOVQ_R64_XMM %xmm0, %rax - cmp $8, %r13 - jle _less_than_8_bytes_left_encrypt - mov %rax, (%arg2 , %r11, 1) - add $8, %r11 - psrldq $8, %xmm0 - MOVQ_R64_XMM %xmm0, %rax - sub $8, %r13 -_less_than_8_bytes_left_encrypt: - mov %al, (%arg2, %r11, 1) - add $1, %r11 - shr $8, %rax - sub $1, %r13 - jne _less_than_8_bytes_left_encrypt -_multiple_of_16_bytes_encrypt: -_return_T_encrypt: + GCM_ENC_DEC enc GCM_COMPLETE FUNC_RESTORE ret -- cgit v1.2.3 From 9ee4a5df220919c0b62165cb2813d63a7bdcb83d Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 14 Feb 2018 09:39:23 -0800 Subject: crypto: aesni - Introduce gcm_context_data Introduce a gcm_context_data struct that will be used to pass context data between scatter/gather update calls. It is passed as the second argument (after crypto keys), other args are renumbered. Signed-off-by: Dave Watson Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 115 +++++++++++++++++++++---------------- arch/x86/crypto/aesni-intel_glue.c | 81 ++++++++++++++++++-------- 2 files changed, 121 insertions(+), 75 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 796d1a52cf03..8c73ab61b5da 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -112,6 +112,14 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff // (for Karatsuba purposes) #define VARIABLE_OFFSET 16*8 +#define AadHash 16*0 +#define AadLen 16*1 +#define InLen (16*1)+8 +#define PBlockEncKey 16*2 +#define OrigIV 16*3 +#define CurCount 16*4 +#define PBlockLen 16*5 + #define arg1 rdi #define arg2 rsi #define arg3 rdx @@ -122,6 +130,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff #define arg8 STACK_OFFSET+16(%r14) #define arg9 STACK_OFFSET+24(%r14) #define arg10 STACK_OFFSET+32(%r14) +#define arg11 STACK_OFFSET+40(%r14) #define keysize 2*15*16(%arg1) #endif @@ -196,9 +205,9 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff # GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding. # Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13 .macro GCM_INIT - mov %arg6, %r12 + mov arg7, %r12 movdqu (%r12), %xmm13 - movdqa SHUF_MASK(%rip), %xmm2 + movdqa SHUF_MASK(%rip), %xmm2 PSHUFB_XMM %xmm2, %xmm13 # precompute HashKey<<1 mod poly from the HashKey (required for GHASH) @@ -218,7 +227,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff pand POLY(%rip), %xmm2 pxor %xmm2, %xmm13 movdqa %xmm13, HashKey(%rsp) - mov %arg4, %r13 # %xmm13 holds HashKey<<1 (mod poly) + mov %arg5, %r13 # %xmm13 holds HashKey<<1 (mod poly) and $-16, %r13 mov %r13, %r12 .endm @@ -272,18 +281,18 @@ _four_cipher_left_\@: GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 _zero_cipher_left_\@: - mov %arg4, %r13 - and $15, %r13 # %r13 = arg4 (mod 16) + mov %arg5, %r13 + and $15, %r13 # %r13 = arg5 (mod 16) je _multiple_of_16_bytes_\@ # Handle the last <16 Byte block separately paddd ONE(%rip), %xmm0 # INCR CNT to get Yn - movdqa SHUF_MASK(%rip), %xmm10 + movdqa SHUF_MASK(%rip), %xmm10 PSHUFB_XMM %xmm10, %xmm0 ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) - lea (%arg3,%r11,1), %r10 + lea (%arg4,%r11,1), %r10 mov %r13, %r12 READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 @@ -321,13 +330,13 @@ _zero_cipher_left_\@: MOVQ_R64_XMM %xmm0, %rax cmp $8, %r13 jle _less_than_8_bytes_left_\@ - mov %rax, (%arg2 , %r11, 1) + mov %rax, (%arg3 , %r11, 1) add $8, %r11 psrldq $8, %xmm0 MOVQ_R64_XMM %xmm0, %rax sub $8, %r13 _less_than_8_bytes_left_\@: - mov %al, (%arg2, %r11, 1) + mov %al, (%arg3, %r11, 1) add $1, %r11 shr $8, %rax sub $1, %r13 @@ -339,11 +348,11 @@ _multiple_of_16_bytes_\@: # Output: Authorization Tag (AUTH_TAG) # Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15 .macro GCM_COMPLETE - mov arg8, %r12 # %r13 = aadLen (number of bytes) + mov arg9, %r12 # %r13 = aadLen (number of bytes) shl $3, %r12 # convert into number of bits movd %r12d, %xmm15 # len(A) in %xmm15 - shl $3, %arg4 # len(C) in bits (*128) - MOVQ_R64_XMM %arg4, %xmm1 + shl $3, %arg5 # len(C) in bits (*128) + MOVQ_R64_XMM %arg5, %xmm1 pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) pxor %xmm15, %xmm8 @@ -352,13 +361,13 @@ _multiple_of_16_bytes_\@: movdqa SHUF_MASK(%rip), %xmm10 PSHUFB_XMM %xmm10, %xmm8 - mov %arg5, %rax # %rax = *Y0 + mov %arg6, %rax # %rax = *Y0 movdqu (%rax), %xmm0 # %xmm0 = Y0 ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) pxor %xmm8, %xmm0 _return_T_\@: - mov arg9, %r10 # %r10 = authTag - mov arg10, %r11 # %r11 = auth_tag_len + mov arg10, %r10 # %r10 = authTag + mov arg11, %r11 # %r11 = auth_tag_len cmp $16, %r11 je _T_16_\@ cmp $8, %r11 @@ -496,15 +505,15 @@ _done_read_partial_block_\@: * the ciphertext * %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers * are clobbered -* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified +* arg1, %arg3, %arg4, %r14 are used as a pointer only, not modified */ .macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation MOVADQ SHUF_MASK(%rip), %xmm14 - mov arg7, %r10 # %r10 = AAD - mov arg8, %r11 # %r11 = aadLen + mov arg8, %r10 # %r10 = AAD + mov arg9, %r11 # %r11 = aadLen pxor %xmm\i, %xmm\i pxor \XMM2, \XMM2 @@ -536,7 +545,7 @@ _get_AAD_done\@: xor %r11, %r11 # initialise the data pointer offset as zero # start AES for num_initial_blocks blocks - mov %arg5, %rax # %rax = *Y0 + mov %arg6, %rax # %rax = *Y0 movdqu (%rax), \XMM0 # XMM0 = Y0 PSHUFB_XMM %xmm14, \XMM0 @@ -573,9 +582,9 @@ aes_loop_initial_\@: AESENCLAST \TMP1, %xmm\index # Last Round .endr .irpc index, \i_seq - movdqu (%arg3 , %r11, 1), \TMP1 + movdqu (%arg4 , %r11, 1), \TMP1 pxor \TMP1, %xmm\index - movdqu %xmm\index, (%arg2 , %r11, 1) + movdqu %xmm\index, (%arg3 , %r11, 1) # write back plaintext/ciphertext for num_initial_blocks add $16, %r11 @@ -694,34 +703,34 @@ aes_loop_pre_done\@: AESENCLAST \TMP2, \XMM2 AESENCLAST \TMP2, \XMM3 AESENCLAST \TMP2, \XMM4 - movdqu 16*0(%arg3 , %r11 , 1), \TMP1 + movdqu 16*0(%arg4 , %r11 , 1), \TMP1 pxor \TMP1, \XMM1 .ifc \operation, dec - movdqu \XMM1, 16*0(%arg2 , %r11 , 1) + movdqu \XMM1, 16*0(%arg3 , %r11 , 1) movdqa \TMP1, \XMM1 .endif - movdqu 16*1(%arg3 , %r11 , 1), \TMP1 + movdqu 16*1(%arg4 , %r11 , 1), \TMP1 pxor \TMP1, \XMM2 .ifc \operation, dec - movdqu \XMM2, 16*1(%arg2 , %r11 , 1) + movdqu \XMM2, 16*1(%arg3 , %r11 , 1) movdqa \TMP1, \XMM2 .endif - movdqu 16*2(%arg3 , %r11 , 1), \TMP1 + movdqu 16*2(%arg4 , %r11 , 1), \TMP1 pxor \TMP1, \XMM3 .ifc \operation, dec - movdqu \XMM3, 16*2(%arg2 , %r11 , 1) + movdqu \XMM3, 16*2(%arg3 , %r11 , 1) movdqa \TMP1, \XMM3 .endif - movdqu 16*3(%arg3 , %r11 , 1), \TMP1 + movdqu 16*3(%arg4 , %r11 , 1), \TMP1 pxor \TMP1, \XMM4 .ifc \operation, dec - movdqu \XMM4, 16*3(%arg2 , %r11 , 1) + movdqu \XMM4, 16*3(%arg3 , %r11 , 1) movdqa \TMP1, \XMM4 .else - movdqu \XMM1, 16*0(%arg2 , %r11 , 1) - movdqu \XMM2, 16*1(%arg2 , %r11 , 1) - movdqu \XMM3, 16*2(%arg2 , %r11 , 1) - movdqu \XMM4, 16*3(%arg2 , %r11 , 1) + movdqu \XMM1, 16*0(%arg3 , %r11 , 1) + movdqu \XMM2, 16*1(%arg3 , %r11 , 1) + movdqu \XMM3, 16*2(%arg3 , %r11 , 1) + movdqu \XMM4, 16*3(%arg3 , %r11 , 1) .endif add $64, %r11 @@ -739,7 +748,7 @@ _initial_blocks_done\@: /* * encrypt 4 blocks at a time * ghash the 4 previously encrypted ciphertext blocks -* arg1, %arg2, %arg3 are used as pointers only, not modified +* arg1, %arg3, %arg4 are used as pointers only, not modified * %r11 is the data offset value */ .macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \ @@ -883,18 +892,18 @@ aes_loop_par_enc_done: AESENCLAST \TMP3, \XMM4 movdqa HashKey_k(%rsp), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) - movdqu (%arg3,%r11,1), \TMP3 + movdqu (%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK - movdqu 16(%arg3,%r11,1), \TMP3 + movdqu 16(%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK - movdqu 32(%arg3,%r11,1), \TMP3 + movdqu 32(%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK - movdqu 48(%arg3,%r11,1), \TMP3 + movdqu 48(%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK - movdqu \XMM1, (%arg2,%r11,1) # Write to the ciphertext buffer - movdqu \XMM2, 16(%arg2,%r11,1) # Write to the ciphertext buffer - movdqu \XMM3, 32(%arg2,%r11,1) # Write to the ciphertext buffer - movdqu \XMM4, 48(%arg2,%r11,1) # Write to the ciphertext buffer + movdqu \XMM1, (%arg3,%r11,1) # Write to the ciphertext buffer + movdqu \XMM2, 16(%arg3,%r11,1) # Write to the ciphertext buffer + movdqu \XMM3, 32(%arg3,%r11,1) # Write to the ciphertext buffer + movdqu \XMM4, 48(%arg3,%r11,1) # Write to the ciphertext buffer PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap @@ -947,7 +956,7 @@ aes_loop_par_enc_done: /* * decrypt 4 blocks at a time * ghash the 4 previously decrypted ciphertext blocks -* arg1, %arg2, %arg3 are used as pointers only, not modified +* arg1, %arg3, %arg4 are used as pointers only, not modified * %r11 is the data offset value */ .macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \ @@ -1091,21 +1100,21 @@ aes_loop_par_dec_done: AESENCLAST \TMP3, \XMM4 movdqa HashKey_k(%rsp), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) - movdqu (%arg3,%r11,1), \TMP3 + movdqu (%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK - movdqu \XMM1, (%arg2,%r11,1) # Write to plaintext buffer + movdqu \XMM1, (%arg3,%r11,1) # Write to plaintext buffer movdqa \TMP3, \XMM1 - movdqu 16(%arg3,%r11,1), \TMP3 + movdqu 16(%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK - movdqu \XMM2, 16(%arg2,%r11,1) # Write to plaintext buffer + movdqu \XMM2, 16(%arg3,%r11,1) # Write to plaintext buffer movdqa \TMP3, \XMM2 - movdqu 32(%arg3,%r11,1), \TMP3 + movdqu 32(%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK - movdqu \XMM3, 32(%arg2,%r11,1) # Write to plaintext buffer + movdqu \XMM3, 32(%arg3,%r11,1) # Write to plaintext buffer movdqa \TMP3, \XMM3 - movdqu 48(%arg3,%r11,1), \TMP3 + movdqu 48(%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK - movdqu \XMM4, 48(%arg2,%r11,1) # Write to plaintext buffer + movdqu \XMM4, 48(%arg3,%r11,1) # Write to plaintext buffer movdqa \TMP3, \XMM4 PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap @@ -1278,6 +1287,8 @@ _esb_loop_\@: .endm /***************************************************************************** * void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data +* // Context data * u8 *out, // Plaintext output. Encrypt in-place is allowed. * const u8 *in, // Ciphertext input * u64 plaintext_len, // Length of data in bytes for decryption. @@ -1367,6 +1378,8 @@ ENDPROC(aesni_gcm_dec) /***************************************************************************** * void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data +* // Context data * u8 *out, // Ciphertext output. Encrypt in-place is allowed. * const u8 *in, // Plaintext input * u64 plaintext_len, // Length of data in bytes for encryption. diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 34cf1c1f8c98..4dd5b9bbbb83 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -72,6 +72,21 @@ struct aesni_xts_ctx { u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR; }; +#define GCM_BLOCK_LEN 16 + +struct gcm_context_data { + /* init, update and finalize context data */ + u8 aad_hash[GCM_BLOCK_LEN]; + u64 aad_length; + u64 in_length; + u8 partial_block_enc_key[GCM_BLOCK_LEN]; + u8 orig_IV[GCM_BLOCK_LEN]; + u8 current_counter[GCM_BLOCK_LEN]; + u64 partial_block_len; + u64 unused; + u8 hash_keys[GCM_BLOCK_LEN * 8]; +}; + asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, unsigned int key_len); asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out, @@ -105,6 +120,7 @@ asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out, /* asmlinkage void aesni_gcm_enc() * void *ctx, AES Key schedule. Starts on a 16 byte boundary. + * struct gcm_context_data. May be uninitialized. * u8 *out, Ciphertext output. Encrypt in-place is allowed. * const u8 *in, Plaintext input * unsigned long plaintext_len, Length of data in bytes for encryption. @@ -117,13 +133,15 @@ asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out, * unsigned long auth_tag_len), Authenticated Tag Length in bytes. * Valid values are 16 (most likely), 12 or 8. */ -asmlinkage void aesni_gcm_enc(void *ctx, u8 *out, +asmlinkage void aesni_gcm_enc(void *ctx, + struct gcm_context_data *gdata, u8 *out, const u8 *in, unsigned long plaintext_len, u8 *iv, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len); /* asmlinkage void aesni_gcm_dec() * void *ctx, AES Key schedule. Starts on a 16 byte boundary. + * struct gcm_context_data. May be uninitialized. * u8 *out, Plaintext output. Decrypt in-place is allowed. * const u8 *in, Ciphertext input * unsigned long ciphertext_len, Length of data in bytes for decryption. @@ -137,7 +155,8 @@ asmlinkage void aesni_gcm_enc(void *ctx, u8 *out, * unsigned long auth_tag_len) Authenticated Tag Length in bytes. * Valid values are 16 (most likely), 12 or 8. */ -asmlinkage void aesni_gcm_dec(void *ctx, u8 *out, +asmlinkage void aesni_gcm_dec(void *ctx, + struct gcm_context_data *gdata, u8 *out, const u8 *in, unsigned long ciphertext_len, u8 *iv, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len); @@ -167,15 +186,17 @@ asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, u8 *out, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len); -static void aesni_gcm_enc_avx(void *ctx, u8 *out, +static void aesni_gcm_enc_avx(void *ctx, + struct gcm_context_data *data, u8 *out, const u8 *in, unsigned long plaintext_len, u8 *iv, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len) { struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)){ - aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, - aad_len, auth_tag, auth_tag_len); + aesni_gcm_enc(ctx, data, out, in, + plaintext_len, iv, hash_subkey, aad, + aad_len, auth_tag, auth_tag_len); } else { aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad, @@ -183,15 +204,17 @@ static void aesni_gcm_enc_avx(void *ctx, u8 *out, } } -static void aesni_gcm_dec_avx(void *ctx, u8 *out, +static void aesni_gcm_dec_avx(void *ctx, + struct gcm_context_data *data, u8 *out, const u8 *in, unsigned long ciphertext_len, u8 *iv, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len) { struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { - aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad, - aad_len, auth_tag, auth_tag_len); + aesni_gcm_dec(ctx, data, out, in, + ciphertext_len, iv, hash_subkey, aad, + aad_len, auth_tag, auth_tag_len); } else { aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad, @@ -218,15 +241,17 @@ asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, u8 *out, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len); -static void aesni_gcm_enc_avx2(void *ctx, u8 *out, +static void aesni_gcm_enc_avx2(void *ctx, + struct gcm_context_data *data, u8 *out, const u8 *in, unsigned long plaintext_len, u8 *iv, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len) { struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { - aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, - aad_len, auth_tag, auth_tag_len); + aesni_gcm_enc(ctx, data, out, in, + plaintext_len, iv, hash_subkey, aad, + aad_len, auth_tag, auth_tag_len); } else if (plaintext_len < AVX_GEN4_OPTSIZE) { aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad, @@ -238,15 +263,17 @@ static void aesni_gcm_enc_avx2(void *ctx, u8 *out, } } -static void aesni_gcm_dec_avx2(void *ctx, u8 *out, +static void aesni_gcm_dec_avx2(void *ctx, + struct gcm_context_data *data, u8 *out, const u8 *in, unsigned long ciphertext_len, u8 *iv, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len) { struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { - aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, - aad, aad_len, auth_tag, auth_tag_len); + aesni_gcm_dec(ctx, data, out, in, + ciphertext_len, iv, hash_subkey, + aad, aad_len, auth_tag, auth_tag_len); } else if (ciphertext_len < AVX_GEN4_OPTSIZE) { aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad, @@ -259,15 +286,19 @@ static void aesni_gcm_dec_avx2(void *ctx, u8 *out, } #endif -static void (*aesni_gcm_enc_tfm)(void *ctx, u8 *out, - const u8 *in, unsigned long plaintext_len, u8 *iv, - u8 *hash_subkey, const u8 *aad, unsigned long aad_len, - u8 *auth_tag, unsigned long auth_tag_len); +static void (*aesni_gcm_enc_tfm)(void *ctx, + struct gcm_context_data *data, u8 *out, + const u8 *in, unsigned long plaintext_len, + u8 *iv, u8 *hash_subkey, const u8 *aad, + unsigned long aad_len, u8 *auth_tag, + unsigned long auth_tag_len); -static void (*aesni_gcm_dec_tfm)(void *ctx, u8 *out, - const u8 *in, unsigned long ciphertext_len, u8 *iv, - u8 *hash_subkey, const u8 *aad, unsigned long aad_len, - u8 *auth_tag, unsigned long auth_tag_len); +static void (*aesni_gcm_dec_tfm)(void *ctx, + struct gcm_context_data *data, u8 *out, + const u8 *in, unsigned long ciphertext_len, + u8 *iv, u8 *hash_subkey, const u8 *aad, + unsigned long aad_len, u8 *auth_tag, + unsigned long auth_tag_len); static inline struct aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) @@ -753,6 +784,7 @@ static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen, unsigned long auth_tag_len = crypto_aead_authsize(tfm); struct scatter_walk src_sg_walk; struct scatter_walk dst_sg_walk = {}; + struct gcm_context_data data AESNI_ALIGN_ATTR; if (sg_is_last(req->src) && (!PageHighMem(sg_page(req->src)) || @@ -782,7 +814,7 @@ static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen, } kernel_fpu_begin(); - aesni_gcm_enc_tfm(aes_ctx, dst, src, req->cryptlen, iv, + aesni_gcm_enc_tfm(aes_ctx, &data, dst, src, req->cryptlen, iv, hash_subkey, assoc, assoclen, dst + req->cryptlen, auth_tag_len); kernel_fpu_end(); @@ -817,6 +849,7 @@ static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen, u8 authTag[16]; struct scatter_walk src_sg_walk; struct scatter_walk dst_sg_walk = {}; + struct gcm_context_data data AESNI_ALIGN_ATTR; int retval = 0; tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len); @@ -849,7 +882,7 @@ static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen, kernel_fpu_begin(); - aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv, + aesni_gcm_dec_tfm(aes_ctx, &data, dst, src, tempCipherLen, iv, hash_subkey, assoc, assoclen, authTag, auth_tag_len); kernel_fpu_end(); -- cgit v1.2.3 From c594c5408552ed8d735b402f5a69b45be7d44ae3 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 14 Feb 2018 09:39:36 -0800 Subject: crypto: aesni - Split AAD hash calculation to separate macro AAD hash only needs to be calculated once for each scatter/gather operation. Move it to its own macro, and call it from GCM_INIT instead of INITIAL_BLOCKS. Signed-off-by: Dave Watson Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 71 ++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 8c73ab61b5da..9df4d98e6d3b 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -230,6 +230,10 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff mov %arg5, %r13 # %xmm13 holds HashKey<<1 (mod poly) and $-16, %r13 mov %r13, %r12 + + CALC_AAD_HASH %xmm13 %xmm0 %xmm1 %xmm2 %xmm3 %xmm4 \ + %xmm5 %xmm6 + mov %r13, %r12 .endm # GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context @@ -497,51 +501,62 @@ _read_next_byte_lt8_\@: _done_read_partial_block_\@: .endm -/* -* if a = number of total plaintext bytes -* b = floor(a/16) -* num_initial_blocks = b mod 4 -* encrypt the initial num_initial_blocks blocks and apply ghash on -* the ciphertext -* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers -* are clobbered -* arg1, %arg3, %arg4, %r14 are used as a pointer only, not modified -*/ - - -.macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ -XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation - MOVADQ SHUF_MASK(%rip), %xmm14 - mov arg8, %r10 # %r10 = AAD - mov arg9, %r11 # %r11 = aadLen - pxor %xmm\i, %xmm\i - pxor \XMM2, \XMM2 +# CALC_AAD_HASH: Calculates the hash of the data which will not be encrypted. +# clobbers r10-11, xmm14 +.macro CALC_AAD_HASH HASHKEY TMP1 TMP2 TMP3 TMP4 TMP5 \ + TMP6 TMP7 + MOVADQ SHUF_MASK(%rip), %xmm14 + mov arg8, %r10 # %r10 = AAD + mov arg9, %r11 # %r11 = aadLen + pxor \TMP7, \TMP7 + pxor \TMP6, \TMP6 cmp $16, %r11 jl _get_AAD_rest\@ _get_AAD_blocks\@: - movdqu (%r10), %xmm\i - PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data - pxor %xmm\i, \XMM2 - GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + movdqu (%r10), \TMP7 + PSHUFB_XMM %xmm14, \TMP7 # byte-reflect the AAD data + pxor \TMP7, \TMP6 + GHASH_MUL \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 add $16, %r10 sub $16, %r11 cmp $16, %r11 jge _get_AAD_blocks\@ - movdqu \XMM2, %xmm\i + movdqu \TMP6, \TMP7 /* read the last <16B of AAD */ _get_AAD_rest\@: cmp $0, %r11 je _get_AAD_done\@ - READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i - PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data - pxor \XMM2, %xmm\i - GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7 + PSHUFB_XMM %xmm14, \TMP7 # byte-reflect the AAD data + pxor \TMP6, \TMP7 + GHASH_MUL \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 + movdqu \TMP7, \TMP6 _get_AAD_done\@: + movdqu \TMP6, AadHash(%arg2) +.endm + +/* +* if a = number of total plaintext bytes +* b = floor(a/16) +* num_initial_blocks = b mod 4 +* encrypt the initial num_initial_blocks blocks and apply ghash on +* the ciphertext +* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers +* are clobbered +* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified +*/ + + +.macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ + XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation + + movdqu AadHash(%arg2), %xmm\i # XMM0 = Y0 + xor %r11, %r11 # initialise the data pointer offset as zero # start AES for num_initial_blocks blocks -- cgit v1.2.3 From 9660474b0e635196628a86a43e205d37e8936587 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 14 Feb 2018 09:39:45 -0800 Subject: crypto: aesni - Fill in new context data structures Fill in aadhash, aadlen, pblocklen, curcount with appropriate values. pblocklen, aadhash, and pblockenckey are also updated at the end of each scatter/gather operation, to be carried over to the next operation. Signed-off-by: Dave Watson Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 51 ++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 9df4d98e6d3b..c2fe5b77705c 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -205,6 +205,21 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff # GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding. # Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13 .macro GCM_INIT + + mov arg9, %r11 + mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length + xor %r11, %r11 + mov %r11, InLen(%arg2) # ctx_data.in_length = 0 + mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0 + mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0 + mov %arg6, %rax + movdqu (%rax), %xmm0 + movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv + + movdqa SHUF_MASK(%rip), %xmm2 + PSHUFB_XMM %xmm2, %xmm0 + movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv + mov arg7, %r12 movdqu (%r12), %xmm13 movdqa SHUF_MASK(%rip), %xmm2 @@ -227,13 +242,9 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff pand POLY(%rip), %xmm2 pxor %xmm2, %xmm13 movdqa %xmm13, HashKey(%rsp) - mov %arg5, %r13 # %xmm13 holds HashKey<<1 (mod poly) - and $-16, %r13 - mov %r13, %r12 CALC_AAD_HASH %xmm13 %xmm0 %xmm1 %xmm2 %xmm3 %xmm4 \ %xmm5 %xmm6 - mov %r13, %r12 .endm # GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context @@ -241,6 +252,12 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff # Requires the input data be at least 1 byte long because of READ_PARTIAL_BLOCK # Clobbers rax, r10-r13, and xmm0-xmm15 .macro GCM_ENC_DEC operation + movdqu AadHash(%arg2), %xmm8 + movdqu HashKey(%rsp), %xmm13 + add %arg5, InLen(%arg2) + mov %arg5, %r13 # save the number of bytes + and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) + mov %r13, %r12 # Encrypt/Decrypt first few blocks and $(3<<4), %r12 @@ -285,16 +302,23 @@ _four_cipher_left_\@: GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 _zero_cipher_left_\@: + movdqu %xmm8, AadHash(%arg2) + movdqu %xmm0, CurCount(%arg2) + mov %arg5, %r13 and $15, %r13 # %r13 = arg5 (mod 16) je _multiple_of_16_bytes_\@ + mov %r13, PBlockLen(%arg2) + # Handle the last <16 Byte block separately paddd ONE(%rip), %xmm0 # INCR CNT to get Yn + movdqu %xmm0, CurCount(%arg2) movdqa SHUF_MASK(%rip), %xmm10 PSHUFB_XMM %xmm10, %xmm0 ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) + movdqu %xmm0, PBlockEncKey(%arg2) lea (%arg4,%r11,1), %r10 mov %r13, %r12 @@ -323,6 +347,7 @@ _zero_cipher_left_\@: .endif GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + movdqu %xmm8, AadHash(%arg2) .ifc \operation, enc # GHASH computation for the last <16 byte block movdqa SHUF_MASK(%rip), %xmm10 @@ -352,11 +377,15 @@ _multiple_of_16_bytes_\@: # Output: Authorization Tag (AUTH_TAG) # Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15 .macro GCM_COMPLETE - mov arg9, %r12 # %r13 = aadLen (number of bytes) + movdqu AadHash(%arg2), %xmm8 + movdqu HashKey(%rsp), %xmm13 + mov AadLen(%arg2), %r12 # %r13 = aadLen (number of bytes) shl $3, %r12 # convert into number of bits movd %r12d, %xmm15 # len(A) in %xmm15 - shl $3, %arg5 # len(C) in bits (*128) - MOVQ_R64_XMM %arg5, %xmm1 + mov InLen(%arg2), %r12 + shl $3, %r12 # len(C) in bits (*128) + MOVQ_R64_XMM %r12, %xmm1 + pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) pxor %xmm15, %xmm8 @@ -365,8 +394,7 @@ _multiple_of_16_bytes_\@: movdqa SHUF_MASK(%rip), %xmm10 PSHUFB_XMM %xmm10, %xmm8 - mov %arg6, %rax # %rax = *Y0 - movdqu (%rax), %xmm0 # %xmm0 = Y0 + movdqu OrigIV(%arg2), %xmm0 # %xmm0 = Y0 ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) pxor %xmm8, %xmm0 _return_T_\@: @@ -554,15 +582,14 @@ _get_AAD_done\@: .macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation + MOVADQ SHUF_MASK(%rip), %xmm14 movdqu AadHash(%arg2), %xmm\i # XMM0 = Y0 xor %r11, %r11 # initialise the data pointer offset as zero # start AES for num_initial_blocks blocks - mov %arg6, %rax # %rax = *Y0 - movdqu (%rax), \XMM0 # XMM0 = Y0 - PSHUFB_XMM %xmm14, \XMM0 + movdqu CurCount(%arg2), \XMM0 # XMM0 = Y0 .if (\i == 5) || (\i == 6) || (\i == 7) -- cgit v1.2.3 From e2e34b0856463727292498d756308cba957fe477 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 14 Feb 2018 09:39:55 -0800 Subject: crypto: aesni - Move ghash_mul to GCM_COMPLETE Prepare to handle partial blocks between scatter/gather calls. For the last partial block, we only want to calculate the aadhash in GCM_COMPLETE, and a new partial block macro will handle both aadhash update and encrypting partial blocks between calls. Signed-off-by: Dave Watson Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index c2fe5b77705c..aabc739eca19 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -346,7 +346,6 @@ _zero_cipher_left_\@: pxor %xmm0, %xmm8 .endif - GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 movdqu %xmm8, AadHash(%arg2) .ifc \operation, enc # GHASH computation for the last <16 byte block @@ -379,6 +378,15 @@ _multiple_of_16_bytes_\@: .macro GCM_COMPLETE movdqu AadHash(%arg2), %xmm8 movdqu HashKey(%rsp), %xmm13 + + mov PBlockLen(%arg2), %r12 + + cmp $0, %r12 + je _partial_done\@ + + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + +_partial_done\@: mov AadLen(%arg2), %r12 # %r13 = aadLen (number of bytes) shl $3, %r12 # convert into number of bits movd %r12d, %xmm15 # len(A) in %xmm15 -- cgit v1.2.3 From 1476db2d129d5e4fc59e93a7abd22edcb26b52f5 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 14 Feb 2018 09:40:10 -0800 Subject: crypto: aesni - Move HashKey computation from stack to gcm_context HashKey computation only needs to happen once per scatter/gather operation, save it between calls in gcm_context struct instead of on the stack. Since the asm no longer stores anything on the stack, we can use %rsp directly, and clean up the frame save/restore macros a bit. Hashkeys actually only need to be calculated once per key and could be moved to when set_key is called, however, the current glue code falls back to generic aes code if fpu is disabled. Signed-off-by: Dave Watson Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 205 ++++++++++++++++++++------------------ 1 file changed, 106 insertions(+), 99 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index aabc739eca19..af9013401387 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -94,23 +94,6 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff #define STACK_OFFSET 8*3 -#define HashKey 16*0 // store HashKey <<1 mod poly here -#define HashKey_2 16*1 // store HashKey^2 <<1 mod poly here -#define HashKey_3 16*2 // store HashKey^3 <<1 mod poly here -#define HashKey_4 16*3 // store HashKey^4 <<1 mod poly here -#define HashKey_k 16*4 // store XOR of High 64 bits and Low 64 - // bits of HashKey <<1 mod poly here - //(for Karatsuba purposes) -#define HashKey_2_k 16*5 // store XOR of High 64 bits and Low 64 - // bits of HashKey^2 <<1 mod poly here - // (for Karatsuba purposes) -#define HashKey_3_k 16*6 // store XOR of High 64 bits and Low 64 - // bits of HashKey^3 <<1 mod poly here - // (for Karatsuba purposes) -#define HashKey_4_k 16*7 // store XOR of High 64 bits and Low 64 - // bits of HashKey^4 <<1 mod poly here - // (for Karatsuba purposes) -#define VARIABLE_OFFSET 16*8 #define AadHash 16*0 #define AadLen 16*1 @@ -119,6 +102,22 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff #define OrigIV 16*3 #define CurCount 16*4 #define PBlockLen 16*5 +#define HashKey 16*6 // store HashKey <<1 mod poly here +#define HashKey_2 16*7 // store HashKey^2 <<1 mod poly here +#define HashKey_3 16*8 // store HashKey^3 <<1 mod poly here +#define HashKey_4 16*9 // store HashKey^4 <<1 mod poly here +#define HashKey_k 16*10 // store XOR of High 64 bits and Low 64 + // bits of HashKey <<1 mod poly here + //(for Karatsuba purposes) +#define HashKey_2_k 16*11 // store XOR of High 64 bits and Low 64 + // bits of HashKey^2 <<1 mod poly here + // (for Karatsuba purposes) +#define HashKey_3_k 16*12 // store XOR of High 64 bits and Low 64 + // bits of HashKey^3 <<1 mod poly here + // (for Karatsuba purposes) +#define HashKey_4_k 16*13 // store XOR of High 64 bits and Low 64 + // bits of HashKey^4 <<1 mod poly here + // (for Karatsuba purposes) #define arg1 rdi #define arg2 rsi @@ -126,11 +125,11 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff #define arg4 rcx #define arg5 r8 #define arg6 r9 -#define arg7 STACK_OFFSET+8(%r14) -#define arg8 STACK_OFFSET+16(%r14) -#define arg9 STACK_OFFSET+24(%r14) -#define arg10 STACK_OFFSET+32(%r14) -#define arg11 STACK_OFFSET+40(%r14) +#define arg7 STACK_OFFSET+8(%rsp) +#define arg8 STACK_OFFSET+16(%rsp) +#define arg9 STACK_OFFSET+24(%rsp) +#define arg10 STACK_OFFSET+32(%rsp) +#define arg11 STACK_OFFSET+40(%rsp) #define keysize 2*15*16(%arg1) #endif @@ -184,28 +183,79 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff push %r12 push %r13 push %r14 - mov %rsp, %r14 # # states of %xmm registers %xmm6:%xmm15 not saved # all %xmm registers are clobbered # - sub $VARIABLE_OFFSET, %rsp - and $~63, %rsp .endm .macro FUNC_RESTORE - mov %r14, %rsp pop %r14 pop %r13 pop %r12 .endm +# Precompute hashkeys. +# Input: Hash subkey. +# Output: HashKeys stored in gcm_context_data. Only needs to be called +# once per key. +# clobbers r12, and tmp xmm registers. +.macro PRECOMPUTE TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7 + mov arg7, %r12 + movdqu (%r12), \TMP3 + movdqa SHUF_MASK(%rip), \TMP2 + PSHUFB_XMM \TMP2, \TMP3 + + # precompute HashKey<<1 mod poly from the HashKey (required for GHASH) + + movdqa \TMP3, \TMP2 + psllq $1, \TMP3 + psrlq $63, \TMP2 + movdqa \TMP2, \TMP1 + pslldq $8, \TMP2 + psrldq $8, \TMP1 + por \TMP2, \TMP3 + + # reduce HashKey<<1 + + pshufd $0x24, \TMP1, \TMP2 + pcmpeqd TWOONE(%rip), \TMP2 + pand POLY(%rip), \TMP2 + pxor \TMP2, \TMP3 + movdqa \TMP3, HashKey(%arg2) + + movdqa \TMP3, \TMP5 + pshufd $78, \TMP3, \TMP1 + pxor \TMP3, \TMP1 + movdqa \TMP1, HashKey_k(%arg2) + + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^2<<1 (mod poly) + movdqa \TMP5, HashKey_2(%arg2) +# HashKey_2 = HashKey^2<<1 (mod poly) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_2_k(%arg2) + + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqa \TMP5, HashKey_3(%arg2) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_3_k(%arg2) + + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqa \TMP5, HashKey_4(%arg2) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_4_k(%arg2) +.endm # GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding. # Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13 .macro GCM_INIT - mov arg9, %r11 mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length xor %r11, %r11 @@ -220,28 +270,8 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff PSHUFB_XMM %xmm2, %xmm0 movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv - mov arg7, %r12 - movdqu (%r12), %xmm13 - movdqa SHUF_MASK(%rip), %xmm2 - PSHUFB_XMM %xmm2, %xmm13 - - # precompute HashKey<<1 mod poly from the HashKey (required for GHASH) - - movdqa %xmm13, %xmm2 - psllq $1, %xmm13 - psrlq $63, %xmm2 - movdqa %xmm2, %xmm1 - pslldq $8, %xmm2 - psrldq $8, %xmm1 - por %xmm2, %xmm13 - - # reduce HashKey<<1 - - pshufd $0x24, %xmm1, %xmm2 - pcmpeqd TWOONE(%rip), %xmm2 - pand POLY(%rip), %xmm2 - pxor %xmm2, %xmm13 - movdqa %xmm13, HashKey(%rsp) + PRECOMPUTE %xmm1 %xmm2 %xmm3 %xmm4 %xmm5 %xmm6 %xmm7 + movdqa HashKey(%arg2), %xmm13 CALC_AAD_HASH %xmm13 %xmm0 %xmm1 %xmm2 %xmm3 %xmm4 \ %xmm5 %xmm6 @@ -253,7 +283,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff # Clobbers rax, r10-r13, and xmm0-xmm15 .macro GCM_ENC_DEC operation movdqu AadHash(%arg2), %xmm8 - movdqu HashKey(%rsp), %xmm13 + movdqu HashKey(%arg2), %xmm13 add %arg5, InLen(%arg2) mov %arg5, %r13 # save the number of bytes and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) @@ -377,7 +407,7 @@ _multiple_of_16_bytes_\@: # Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15 .macro GCM_COMPLETE movdqu AadHash(%arg2), %xmm8 - movdqu HashKey(%rsp), %xmm13 + movdqu HashKey(%arg2), %xmm13 mov PBlockLen(%arg2), %r12 @@ -584,7 +614,7 @@ _get_AAD_done\@: * the ciphertext * %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers * are clobbered -* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified +* arg1, %arg2, %arg3 are used as a pointer only, not modified */ @@ -695,17 +725,6 @@ aes_loop_initial_\@: pxor \TMP1, \XMM2 pxor \TMP1, \XMM3 pxor \TMP1, \XMM4 - movdqa \TMP3, \TMP5 - pshufd $78, \TMP3, \TMP1 - pxor \TMP3, \TMP1 - movdqa \TMP1, HashKey_k(%rsp) - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^2<<1 (mod poly) - movdqa \TMP5, HashKey_2(%rsp) -# HashKey_2 = HashKey^2<<1 (mod poly) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_2_k(%rsp) .irpc index, 1234 # do 4 rounds movaps 0x10*\index(%arg1), \TMP1 AESENC \TMP1, \XMM1 @@ -713,12 +732,6 @@ aes_loop_initial_\@: AESENC \TMP1, \XMM3 AESENC \TMP1, \XMM4 .endr - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_3(%rsp) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_3_k(%rsp) .irpc index, 56789 # do next 5 rounds movaps 0x10*\index(%arg1), \TMP1 AESENC \TMP1, \XMM1 @@ -726,12 +739,6 @@ aes_loop_initial_\@: AESENC \TMP1, \XMM3 AESENC \TMP1, \XMM4 .endr - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_4(%rsp) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_4_k(%rsp) lea 0xa0(%arg1),%r10 mov keysize,%eax shr $2,%eax # 128->4, 192->6, 256->8 @@ -816,7 +823,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pshufd $78, \XMM5, \TMP6 pxor \XMM5, \TMP6 paddd ONE(%rip), \XMM0 # INCR CNT - movdqa HashKey_4(%rsp), \TMP5 + movdqa HashKey_4(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 movdqa \XMM0, \XMM1 paddd ONE(%rip), \XMM0 # INCR CNT @@ -835,7 +842,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pxor (%arg1), \XMM2 pxor (%arg1), \XMM3 pxor (%arg1), \XMM4 - movdqa HashKey_4_k(%rsp), \TMP5 + movdqa HashKey_4_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) movaps 0x10(%arg1), \TMP1 AESENC \TMP1, \XMM1 # Round 1 @@ -850,7 +857,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM6, \TMP1 pshufd $78, \XMM6, \TMP2 pxor \XMM6, \TMP2 - movdqa HashKey_3(%rsp), \TMP5 + movdqa HashKey_3(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 movaps 0x30(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 3 @@ -863,7 +870,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_3_k(%rsp), \TMP5 + movdqa HashKey_3_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x50(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 5 @@ -877,7 +884,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM7, \TMP1 pshufd $78, \XMM7, \TMP2 pxor \XMM7, \TMP2 - movdqa HashKey_2(%rsp ), \TMP5 + movdqa HashKey_2(%arg2), \TMP5 # Multiply TMP5 * HashKey using karatsuba @@ -893,7 +900,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_2_k(%rsp), \TMP5 + movdqa HashKey_2_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x80(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 8 @@ -911,7 +918,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM8, \TMP1 pshufd $78, \XMM8, \TMP2 pxor \XMM8, \TMP2 - movdqa HashKey(%rsp), \TMP5 + movdqa HashKey(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 movaps 0x90(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 9 @@ -940,7 +947,7 @@ aes_loop_par_enc_done: AESENCLAST \TMP3, \XMM2 AESENCLAST \TMP3, \XMM3 AESENCLAST \TMP3, \XMM4 - movdqa HashKey_k(%rsp), \TMP5 + movdqa HashKey_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movdqu (%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK @@ -1024,7 +1031,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pshufd $78, \XMM5, \TMP6 pxor \XMM5, \TMP6 paddd ONE(%rip), \XMM0 # INCR CNT - movdqa HashKey_4(%rsp), \TMP5 + movdqa HashKey_4(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 movdqa \XMM0, \XMM1 paddd ONE(%rip), \XMM0 # INCR CNT @@ -1043,7 +1050,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pxor (%arg1), \XMM2 pxor (%arg1), \XMM3 pxor (%arg1), \XMM4 - movdqa HashKey_4_k(%rsp), \TMP5 + movdqa HashKey_4_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) movaps 0x10(%arg1), \TMP1 AESENC \TMP1, \XMM1 # Round 1 @@ -1058,7 +1065,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM6, \TMP1 pshufd $78, \XMM6, \TMP2 pxor \XMM6, \TMP2 - movdqa HashKey_3(%rsp), \TMP5 + movdqa HashKey_3(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 movaps 0x30(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 3 @@ -1071,7 +1078,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_3_k(%rsp), \TMP5 + movdqa HashKey_3_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x50(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 5 @@ -1085,7 +1092,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM7, \TMP1 pshufd $78, \XMM7, \TMP2 pxor \XMM7, \TMP2 - movdqa HashKey_2(%rsp ), \TMP5 + movdqa HashKey_2(%arg2), \TMP5 # Multiply TMP5 * HashKey using karatsuba @@ -1101,7 +1108,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_2_k(%rsp), \TMP5 + movdqa HashKey_2_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x80(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 8 @@ -1119,7 +1126,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM8, \TMP1 pshufd $78, \XMM8, \TMP2 pxor \XMM8, \TMP2 - movdqa HashKey(%rsp), \TMP5 + movdqa HashKey(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 movaps 0x90(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 9 @@ -1148,7 +1155,7 @@ aes_loop_par_dec_done: AESENCLAST \TMP3, \XMM2 AESENCLAST \TMP3, \XMM3 AESENCLAST \TMP3, \XMM4 - movdqa HashKey_k(%rsp), \TMP5 + movdqa HashKey_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movdqu (%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK @@ -1224,10 +1231,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM1, \TMP6 pshufd $78, \XMM1, \TMP2 pxor \XMM1, \TMP2 - movdqa HashKey_4(%rsp), \TMP5 + movdqa HashKey_4(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0 - movdqa HashKey_4_k(%rsp), \TMP4 + movdqa HashKey_4_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movdqa \XMM1, \XMMDst movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1 @@ -1237,10 +1244,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM2, \TMP1 pshufd $78, \XMM2, \TMP2 pxor \XMM2, \TMP2 - movdqa HashKey_3(%rsp), \TMP5 + movdqa HashKey_3(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0 - movdqa HashKey_3_k(%rsp), \TMP4 + movdqa HashKey_3_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pxor \TMP1, \TMP6 pxor \XMM2, \XMMDst @@ -1252,10 +1259,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM3, \TMP1 pshufd $78, \XMM3, \TMP2 pxor \XMM3, \TMP2 - movdqa HashKey_2(%rsp), \TMP5 + movdqa HashKey_2(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0 - movdqa HashKey_2_k(%rsp), \TMP4 + movdqa HashKey_2_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pxor \TMP1, \TMP6 pxor \XMM3, \XMMDst @@ -1265,10 +1272,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM4, \TMP1 pshufd $78, \XMM4, \TMP2 pxor \XMM4, \TMP2 - movdqa HashKey(%rsp), \TMP5 + movdqa HashKey(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0 - movdqa HashKey_k(%rsp), \TMP4 + movdqa HashKey_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pxor \TMP1, \TMP6 pxor \XMM4, \XMMDst -- cgit v1.2.3 From ae952c5ec655b2b8bb337acb554d53481b0e01bc Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 14 Feb 2018 09:40:19 -0800 Subject: crypto: aesni - Introduce partial block macro Before this diff, multiple calls to GCM_ENC_DEC will succeed, but only if all calls are a multiple of 16 bytes. Handle partial blocks at the start of GCM_ENC_DEC, and update aadhash as appropriate. The data offset %r11 is also updated after the partial block. Signed-off-by: Dave Watson Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 151 +++++++++++++++++++++++++++++++++++++- 1 file changed, 150 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index af9013401387..760a5174e308 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -285,7 +285,13 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff movdqu AadHash(%arg2), %xmm8 movdqu HashKey(%arg2), %xmm13 add %arg5, InLen(%arg2) + + xor %r11, %r11 # initialise the data pointer offset as zero + PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation + + sub %r11, %arg5 # sub partial block data used mov %arg5, %r13 # save the number of bytes + and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) mov %r13, %r12 # Encrypt/Decrypt first few blocks @@ -606,6 +612,150 @@ _get_AAD_done\@: movdqu \TMP6, AadHash(%arg2) .endm +# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks +# between update calls. +# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK +# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context +# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13 +.macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ + AAD_HASH operation + mov PBlockLen(%arg2), %r13 + cmp $0, %r13 + je _partial_block_done_\@ # Leave Macro if no partial blocks + # Read in input data without over reading + cmp $16, \PLAIN_CYPH_LEN + jl _fewer_than_16_bytes_\@ + movups (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm + jmp _data_read_\@ + +_fewer_than_16_bytes_\@: + lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10 + mov \PLAIN_CYPH_LEN, %r12 + READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1 + + mov PBlockLen(%arg2), %r13 + +_data_read_\@: # Finished reading in data + + movdqu PBlockEncKey(%arg2), %xmm9 + movdqu HashKey(%arg2), %xmm13 + + lea SHIFT_MASK(%rip), %r12 + + # adjust the shuffle mask pointer to be able to shift r13 bytes + # r16-r13 is the number of bytes in plaintext mod 16) + add %r13, %r12 + movdqu (%r12), %xmm2 # get the appropriate shuffle mask + PSHUFB_XMM %xmm2, %xmm9 # shift right r13 bytes + +.ifc \operation, dec + movdqa %xmm1, %xmm3 + pxor %xmm1, %xmm9 # Cyphertext XOR E(K, Yn) + + mov \PLAIN_CYPH_LEN, %r10 + add %r13, %r10 + # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling + sub $16, %r10 + # Determine if if partial block is not being filled and + # shift mask accordingly + jge _no_extra_mask_1_\@ + sub %r10, %r12 +_no_extra_mask_1_\@: + + movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + # get the appropriate mask to mask out bottom r13 bytes of xmm9 + pand %xmm1, %xmm9 # mask out bottom r13 bytes of xmm9 + + pand %xmm1, %xmm3 + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm3 + PSHUFB_XMM %xmm2, %xmm3 + pxor %xmm3, \AAD_HASH + + cmp $0, %r10 + jl _partial_incomplete_1_\@ + + # GHASH computation for the last <16 Byte block + GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 + xor %rax,%rax + + mov %rax, PBlockLen(%arg2) + jmp _dec_done_\@ +_partial_incomplete_1_\@: + add \PLAIN_CYPH_LEN, PBlockLen(%arg2) +_dec_done_\@: + movdqu \AAD_HASH, AadHash(%arg2) +.else + pxor %xmm1, %xmm9 # Plaintext XOR E(K, Yn) + + mov \PLAIN_CYPH_LEN, %r10 + add %r13, %r10 + # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling + sub $16, %r10 + # Determine if if partial block is not being filled and + # shift mask accordingly + jge _no_extra_mask_2_\@ + sub %r10, %r12 +_no_extra_mask_2_\@: + + movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + # get the appropriate mask to mask out bottom r13 bytes of xmm9 + pand %xmm1, %xmm9 + + movdqa SHUF_MASK(%rip), %xmm1 + PSHUFB_XMM %xmm1, %xmm9 + PSHUFB_XMM %xmm2, %xmm9 + pxor %xmm9, \AAD_HASH + + cmp $0, %r10 + jl _partial_incomplete_2_\@ + + # GHASH computation for the last <16 Byte block + GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 + xor %rax,%rax + + mov %rax, PBlockLen(%arg2) + jmp _encode_done_\@ +_partial_incomplete_2_\@: + add \PLAIN_CYPH_LEN, PBlockLen(%arg2) +_encode_done_\@: + movdqu \AAD_HASH, AadHash(%arg2) + + movdqa SHUF_MASK(%rip), %xmm10 + # shuffle xmm9 back to output as ciphertext + PSHUFB_XMM %xmm10, %xmm9 + PSHUFB_XMM %xmm2, %xmm9 +.endif + # output encrypted Bytes + cmp $0, %r10 + jl _partial_fill_\@ + mov %r13, %r12 + mov $16, %r13 + # Set r13 to be the number of bytes to write out + sub %r12, %r13 + jmp _count_set_\@ +_partial_fill_\@: + mov \PLAIN_CYPH_LEN, %r13 +_count_set_\@: + movdqa %xmm9, %xmm0 + MOVQ_R64_XMM %xmm0, %rax + cmp $8, %r13 + jle _less_than_8_bytes_left_\@ + + mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) + add $8, \DATA_OFFSET + psrldq $8, %xmm0 + MOVQ_R64_XMM %xmm0, %rax + sub $8, %r13 +_less_than_8_bytes_left_\@: + movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) + add $1, \DATA_OFFSET + shr $8, %rax + sub $1, %r13 + jne _less_than_8_bytes_left_\@ +_partial_block_done_\@: +.endm # PARTIAL_BLOCK + /* * if a = number of total plaintext bytes * b = floor(a/16) @@ -624,7 +774,6 @@ _get_AAD_done\@: movdqu AadHash(%arg2), %xmm\i # XMM0 = Y0 - xor %r11, %r11 # initialise the data pointer offset as zero # start AES for num_initial_blocks blocks movdqu CurCount(%arg2), \XMM0 # XMM0 = Y0 -- cgit v1.2.3 From 933d6aefd54e4b49c347eb1a6cf8f8785814e90e Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 14 Feb 2018 09:40:31 -0800 Subject: crypto: aesni - Add fast path for > 16 byte update We can fast-path any < 16 byte read if the full message is > 16 bytes, and shift over by the appropriate amount. Usually we are reading > 16 bytes, so this should be faster than the READ_PARTIAL macro introduced in b20209c91e2 for the average case. Signed-off-by: Dave Watson Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 760a5174e308..c00f32c39360 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -356,12 +356,37 @@ _zero_cipher_left_\@: ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) movdqu %xmm0, PBlockEncKey(%arg2) + cmp $16, %arg5 + jge _large_enough_update_\@ + lea (%arg4,%r11,1), %r10 mov %r13, %r12 READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 + jmp _data_read_\@ + +_large_enough_update_\@: + sub $16, %r11 + add %r13, %r11 + + # receive the last <16 Byte block + movdqu (%arg4, %r11, 1), %xmm1 + sub %r13, %r11 + add $16, %r11 + + lea SHIFT_MASK+16(%rip), %r12 + # adjust the shuffle mask pointer to be able to shift 16-r13 bytes + # (r13 is the number of bytes in plaintext mod 16) + sub %r13, %r12 + # get the appropriate shuffle mask + movdqu (%r12), %xmm2 + # shift right 16-r13 bytes + PSHUFB_XMM %xmm2, %xmm1 + +_data_read_\@: lea ALL_F+16(%rip), %r12 sub %r13, %r12 + .ifc \operation, dec movdqa %xmm1, %xmm2 .endif -- cgit v1.2.3 From fb8986e6430a6b226917ec5370c2be06a72803eb Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 14 Feb 2018 09:40:47 -0800 Subject: crypto: aesni - Introduce scatter/gather asm function stubs The asm macros are all set up now, introduce entry points. GCM_INIT and GCM_COMPLETE have arguments supplied, so that the new scatter/gather entry points don't have to take all the arguments, and only the ones they need. Signed-off-by: Dave Watson Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 116 ++++++++++++++++++++++++++++--------- arch/x86/crypto/aesni-intel_glue.c | 16 +++++ 2 files changed, 106 insertions(+), 26 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index c00f32c39360..e762ef417562 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -201,8 +201,8 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff # Output: HashKeys stored in gcm_context_data. Only needs to be called # once per key. # clobbers r12, and tmp xmm registers. -.macro PRECOMPUTE TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7 - mov arg7, %r12 +.macro PRECOMPUTE SUBKEY TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7 + mov \SUBKEY, %r12 movdqu (%r12), \TMP3 movdqa SHUF_MASK(%rip), \TMP2 PSHUFB_XMM \TMP2, \TMP3 @@ -255,14 +255,14 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff # GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding. # Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13 -.macro GCM_INIT - mov arg9, %r11 +.macro GCM_INIT Iv SUBKEY AAD AADLEN + mov \AADLEN, %r11 mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length xor %r11, %r11 mov %r11, InLen(%arg2) # ctx_data.in_length = 0 mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0 mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0 - mov %arg6, %rax + mov \Iv, %rax movdqu (%rax), %xmm0 movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv @@ -270,11 +270,11 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff PSHUFB_XMM %xmm2, %xmm0 movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv - PRECOMPUTE %xmm1 %xmm2 %xmm3 %xmm4 %xmm5 %xmm6 %xmm7 + PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, movdqa HashKey(%arg2), %xmm13 - CALC_AAD_HASH %xmm13 %xmm0 %xmm1 %xmm2 %xmm3 %xmm4 \ - %xmm5 %xmm6 + CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \ + %xmm4, %xmm5, %xmm6 .endm # GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context @@ -436,7 +436,7 @@ _multiple_of_16_bytes_\@: # GCM_COMPLETE Finishes update of tag of last partial block # Output: Authorization Tag (AUTH_TAG) # Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15 -.macro GCM_COMPLETE +.macro GCM_COMPLETE AUTHTAG AUTHTAGLEN movdqu AadHash(%arg2), %xmm8 movdqu HashKey(%arg2), %xmm13 @@ -467,8 +467,8 @@ _partial_done\@: ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) pxor %xmm8, %xmm0 _return_T_\@: - mov arg10, %r10 # %r10 = authTag - mov arg11, %r11 # %r11 = auth_tag_len + mov \AUTHTAG, %r10 # %r10 = authTag + mov \AUTHTAGLEN, %r11 # %r11 = auth_tag_len cmp $16, %r11 je _T_16_\@ cmp $8, %r11 @@ -600,11 +600,11 @@ _done_read_partial_block_\@: # CALC_AAD_HASH: Calculates the hash of the data which will not be encrypted. # clobbers r10-11, xmm14 -.macro CALC_AAD_HASH HASHKEY TMP1 TMP2 TMP3 TMP4 TMP5 \ +.macro CALC_AAD_HASH HASHKEY AAD AADLEN TMP1 TMP2 TMP3 TMP4 TMP5 \ TMP6 TMP7 MOVADQ SHUF_MASK(%rip), %xmm14 - mov arg8, %r10 # %r10 = AAD - mov arg9, %r11 # %r11 = aadLen + mov \AAD, %r10 # %r10 = AAD + mov \AADLEN, %r11 # %r11 = aadLen pxor \TMP7, \TMP7 pxor \TMP6, \TMP6 @@ -1104,18 +1104,18 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation mov keysize,%eax shr $2,%eax # 128->4, 192->6, 256->8 sub $4,%eax # 128->0, 192->2, 256->4 - jz aes_loop_par_enc_done + jz aes_loop_par_enc_done\@ -aes_loop_par_enc: +aes_loop_par_enc\@: MOVADQ (%r10),\TMP3 .irpc index, 1234 AESENC \TMP3, %xmm\index .endr add $16,%r10 sub $1,%eax - jnz aes_loop_par_enc + jnz aes_loop_par_enc\@ -aes_loop_par_enc_done: +aes_loop_par_enc_done\@: MOVADQ (%r10), \TMP3 AESENCLAST \TMP3, \XMM1 # Round 10 AESENCLAST \TMP3, \XMM2 @@ -1312,18 +1312,18 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation mov keysize,%eax shr $2,%eax # 128->4, 192->6, 256->8 sub $4,%eax # 128->0, 192->2, 256->4 - jz aes_loop_par_dec_done + jz aes_loop_par_dec_done\@ -aes_loop_par_dec: +aes_loop_par_dec\@: MOVADQ (%r10),\TMP3 .irpc index, 1234 AESENC \TMP3, %xmm\index .endr add $16,%r10 sub $1,%eax - jnz aes_loop_par_dec + jnz aes_loop_par_dec\@ -aes_loop_par_dec_done: +aes_loop_par_dec_done\@: MOVADQ (%r10), \TMP3 AESENCLAST \TMP3, \XMM1 # last round AESENCLAST \TMP3, \XMM2 @@ -1599,9 +1599,9 @@ _esb_loop_\@: ENTRY(aesni_gcm_dec) FUNC_SAVE - GCM_INIT + GCM_INIT %arg6, arg7, arg8, arg9 GCM_ENC_DEC dec - GCM_COMPLETE + GCM_COMPLETE arg10, arg11 FUNC_RESTORE ret ENDPROC(aesni_gcm_dec) @@ -1687,13 +1687,77 @@ ENDPROC(aesni_gcm_dec) ENTRY(aesni_gcm_enc) FUNC_SAVE - GCM_INIT + GCM_INIT %arg6, arg7, arg8, arg9 GCM_ENC_DEC enc - GCM_COMPLETE + + GCM_COMPLETE arg10, arg11 FUNC_RESTORE ret ENDPROC(aesni_gcm_enc) +/***************************************************************************** +* void aesni_gcm_init(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data, +* // context data +* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) +* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) +* // concatenated with 0x00000001. 16-byte aligned pointer. +* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. +* const u8 *aad, // Additional Authentication Data (AAD) +* u64 aad_len) // Length of AAD in bytes. +*/ +ENTRY(aesni_gcm_init) + FUNC_SAVE + GCM_INIT %arg3, %arg4,%arg5, %arg6 + FUNC_RESTORE + ret +ENDPROC(aesni_gcm_init) + +/***************************************************************************** +* void aesni_gcm_enc_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data, +* // context data +* u8 *out, // Ciphertext output. Encrypt in-place is allowed. +* const u8 *in, // Plaintext input +* u64 plaintext_len, // Length of data in bytes for encryption. +*/ +ENTRY(aesni_gcm_enc_update) + FUNC_SAVE + GCM_ENC_DEC enc + FUNC_RESTORE + ret +ENDPROC(aesni_gcm_enc_update) + +/***************************************************************************** +* void aesni_gcm_dec_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data, +* // context data +* u8 *out, // Ciphertext output. Encrypt in-place is allowed. +* const u8 *in, // Plaintext input +* u64 plaintext_len, // Length of data in bytes for encryption. +*/ +ENTRY(aesni_gcm_dec_update) + FUNC_SAVE + GCM_ENC_DEC dec + FUNC_RESTORE + ret +ENDPROC(aesni_gcm_dec_update) + +/***************************************************************************** +* void aesni_gcm_finalize(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data, +* // context data +* u8 *auth_tag, // Authenticated Tag output. +* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), +* // 12 or 8. +*/ +ENTRY(aesni_gcm_finalize) + FUNC_SAVE + GCM_COMPLETE %arg3 %arg4 + FUNC_RESTORE + ret +ENDPROC(aesni_gcm_finalize) + #endif diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 4dd5b9bbbb83..de986f9395d5 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -161,6 +161,22 @@ asmlinkage void aesni_gcm_dec(void *ctx, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len); +/* Scatter / Gather routines, with args similar to above */ +asmlinkage void aesni_gcm_init(void *ctx, + struct gcm_context_data *gdata, + u8 *iv, + u8 *hash_subkey, const u8 *aad, + unsigned long aad_len); +asmlinkage void aesni_gcm_enc_update(void *ctx, + struct gcm_context_data *gdata, u8 *out, + const u8 *in, unsigned long plaintext_len); +asmlinkage void aesni_gcm_dec_update(void *ctx, + struct gcm_context_data *gdata, u8 *out, + const u8 *in, + unsigned long ciphertext_len); +asmlinkage void aesni_gcm_finalize(void *ctx, + struct gcm_context_data *gdata, + u8 *auth_tag, unsigned long auth_tag_len); #ifdef CONFIG_AS_AVX asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv, -- cgit v1.2.3 From e845520707f85c539ce04bb73c6070e9441480be Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 14 Feb 2018 09:40:58 -0800 Subject: crypto: aesni - Update aesni-intel_glue to use scatter/gather Add gcmaes_crypt_by_sg routine, that will do scatter/gather by sg. Either src or dst may contain multiple buffers, so iterate over both at the same time if they are different. If the input is the same as the output, iterate only over one. Currently both the AAD and TAG must be linear, so copy them out with scatterlist_map_and_copy. If first buffer contains the entire AAD, we can optimize and not copy. Since the AAD can be any size, if copied it must be on the heap. TAG can be on the stack since it is always < 16 bytes. Only the SSE routines are updated so far, so leave the previous gcmaes_en/decrypt routines, and branch to the sg ones if the keysize is inappropriate for avx, or we are SSE only. Signed-off-by: Dave Watson Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 133 +++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index de986f9395d5..acbe7e8336d8 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -791,6 +791,127 @@ static int generic_gcmaes_set_authsize(struct crypto_aead *tfm, return 0; } +static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, + unsigned int assoclen, u8 *hash_subkey, + u8 *iv, void *aes_ctx) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + unsigned long auth_tag_len = crypto_aead_authsize(tfm); + struct gcm_context_data data AESNI_ALIGN_ATTR; + struct scatter_walk dst_sg_walk = {}; + unsigned long left = req->cryptlen; + unsigned long len, srclen, dstlen; + struct scatter_walk assoc_sg_walk; + struct scatter_walk src_sg_walk; + struct scatterlist src_start[2]; + struct scatterlist dst_start[2]; + struct scatterlist *src_sg; + struct scatterlist *dst_sg; + u8 *src, *dst, *assoc; + u8 *assocmem = NULL; + u8 authTag[16]; + + if (!enc) + left -= auth_tag_len; + + /* Linearize assoc, if not already linear */ + if (req->src->length >= assoclen && req->src->length && + (!PageHighMem(sg_page(req->src)) || + req->src->offset + req->src->length < PAGE_SIZE)) { + scatterwalk_start(&assoc_sg_walk, req->src); + assoc = scatterwalk_map(&assoc_sg_walk); + } else { + /* assoc can be any length, so must be on heap */ + assocmem = kmalloc(assoclen, GFP_ATOMIC); + if (unlikely(!assocmem)) + return -ENOMEM; + assoc = assocmem; + + scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0); + } + + src_sg = scatterwalk_ffwd(src_start, req->src, req->assoclen); + scatterwalk_start(&src_sg_walk, src_sg); + if (req->src != req->dst) { + dst_sg = scatterwalk_ffwd(dst_start, req->dst, req->assoclen); + scatterwalk_start(&dst_sg_walk, dst_sg); + } + + kernel_fpu_begin(); + aesni_gcm_init(aes_ctx, &data, iv, + hash_subkey, assoc, assoclen); + if (req->src != req->dst) { + while (left) { + src = scatterwalk_map(&src_sg_walk); + dst = scatterwalk_map(&dst_sg_walk); + srclen = scatterwalk_clamp(&src_sg_walk, left); + dstlen = scatterwalk_clamp(&dst_sg_walk, left); + len = min(srclen, dstlen); + if (len) { + if (enc) + aesni_gcm_enc_update(aes_ctx, &data, + dst, src, len); + else + aesni_gcm_dec_update(aes_ctx, &data, + dst, src, len); + } + left -= len; + + scatterwalk_unmap(src); + scatterwalk_unmap(dst); + scatterwalk_advance(&src_sg_walk, len); + scatterwalk_advance(&dst_sg_walk, len); + scatterwalk_done(&src_sg_walk, 0, left); + scatterwalk_done(&dst_sg_walk, 1, left); + } + } else { + while (left) { + dst = src = scatterwalk_map(&src_sg_walk); + len = scatterwalk_clamp(&src_sg_walk, left); + if (len) { + if (enc) + aesni_gcm_enc_update(aes_ctx, &data, + src, src, len); + else + aesni_gcm_dec_update(aes_ctx, &data, + src, src, len); + } + left -= len; + scatterwalk_unmap(src); + scatterwalk_advance(&src_sg_walk, len); + scatterwalk_done(&src_sg_walk, 1, left); + } + } + aesni_gcm_finalize(aes_ctx, &data, authTag, auth_tag_len); + kernel_fpu_end(); + + if (!assocmem) + scatterwalk_unmap(assoc); + else + kfree(assocmem); + + if (!enc) { + u8 authTagMsg[16]; + + /* Copy out original authTag */ + scatterwalk_map_and_copy(authTagMsg, req->src, + req->assoclen + req->cryptlen - + auth_tag_len, + auth_tag_len, 0); + + /* Compare generated tag with passed in tag. */ + return crypto_memneq(authTagMsg, authTag, auth_tag_len) ? + -EBADMSG : 0; + } + + /* Copy in the authTag */ + scatterwalk_map_and_copy(authTag, req->dst, + req->assoclen + req->cryptlen, + auth_tag_len, 1); + + return 0; +} + static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen, u8 *hash_subkey, u8 *iv, void *aes_ctx) { @@ -802,6 +923,12 @@ static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen, struct scatter_walk dst_sg_walk = {}; struct gcm_context_data data AESNI_ALIGN_ATTR; + if (((struct crypto_aes_ctx *)aes_ctx)->key_length != AES_KEYSIZE_128 || + aesni_gcm_enc_tfm == aesni_gcm_enc || + req->cryptlen < AVX_GEN2_OPTSIZE) { + return gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv, + aes_ctx); + } if (sg_is_last(req->src) && (!PageHighMem(sg_page(req->src)) || req->src->offset + req->src->length <= PAGE_SIZE) && @@ -868,6 +995,12 @@ static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen, struct gcm_context_data data AESNI_ALIGN_ATTR; int retval = 0; + if (((struct crypto_aes_ctx *)aes_ctx)->key_length != AES_KEYSIZE_128 || + aesni_gcm_enc_tfm == aesni_gcm_enc || + req->cryptlen < AVX_GEN2_OPTSIZE) { + return gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv, + aes_ctx); + } tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len); if (sg_is_last(req->src) && -- cgit v1.2.3 From ede9622162fac42eacde231d64e94c926f4be45d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 14 Feb 2018 10:42:21 -0800 Subject: crypto: arm/speck - add NEON-accelerated implementation of Speck-XTS Add an ARM NEON-accelerated implementation of Speck-XTS. It operates on 128-byte chunks at a time, i.e. 8 blocks for Speck128 or 16 blocks for Speck64. Each 128-byte chunk goes through XTS preprocessing, then is encrypted/decrypted (doing one cipher round for all the blocks, then the next round, etc.), then goes through XTS postprocessing. The performance depends on the processor but can be about 3 times faster than the generic code. For example, on an ARMv7 processor we observe the following performance with Speck128/256-XTS: xts-speck128-neon: Encryption 107.9 MB/s, Decryption 108.1 MB/s xts(speck128-generic): Encryption 32.1 MB/s, Decryption 36.6 MB/s In comparison to AES-256-XTS without the Cryptography Extensions: xts-aes-neonbs: Encryption 41.2 MB/s, Decryption 36.7 MB/s xts(aes-asm): Encryption 31.7 MB/s, Decryption 30.8 MB/s xts(aes-generic): Encryption 21.2 MB/s, Decryption 20.9 MB/s Speck64/128-XTS is even faster: xts-speck64-neon: Encryption 138.6 MB/s, Decryption 139.1 MB/s Note that as with the generic code, only the Speck128 and Speck64 variants are supported. Also, for now only the XTS mode of operation is supported, to target the disk and file encryption use cases. The NEON code also only handles the portion of the data that is evenly divisible into 128-byte chunks, with any remainder handled by a C fallback. Of course, other modes of operation could be added later if needed, and/or the NEON code could be updated to handle other buffer sizes. The XTS specification is only defined for AES which has a 128-bit block size, so for the GF(2^64) math needed for Speck64-XTS we use the reducing polynomial 'x^64 + x^4 + x^3 + x + 1' given by the original XEX paper. Of course, when possible users should use Speck128-XTS, but even that may be too slow on some processors; Speck64-XTS can be faster. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm/crypto/Kconfig | 6 + arch/arm/crypto/Makefile | 2 + arch/arm/crypto/speck-neon-core.S | 432 ++++++++++++++++++++++++++++++++++++++ arch/arm/crypto/speck-neon-glue.c | 288 +++++++++++++++++++++++++ 4 files changed, 728 insertions(+) create mode 100644 arch/arm/crypto/speck-neon-core.S create mode 100644 arch/arm/crypto/speck-neon-glue.c (limited to 'arch') diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index b8e69fe282b8..925d1364727a 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -121,4 +121,10 @@ config CRYPTO_CHACHA20_NEON select CRYPTO_BLKCIPHER select CRYPTO_CHACHA20 +config CRYPTO_SPECK_NEON + tristate "NEON accelerated Speck cipher algorithms" + depends on KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_SPECK + endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 30ef8e291271..a758107c5525 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o +obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o @@ -53,6 +54,7 @@ ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o +speck-neon-y := speck-neon-core.o speck-neon-glue.o quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) diff --git a/arch/arm/crypto/speck-neon-core.S b/arch/arm/crypto/speck-neon-core.S new file mode 100644 index 000000000000..3c1e203e53b9 --- /dev/null +++ b/arch/arm/crypto/speck-neon-core.S @@ -0,0 +1,432 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS + * + * Copyright (c) 2018 Google, Inc + * + * Author: Eric Biggers + */ + +#include + + .text + .fpu neon + + // arguments + ROUND_KEYS .req r0 // const {u64,u32} *round_keys + NROUNDS .req r1 // int nrounds + DST .req r2 // void *dst + SRC .req r3 // const void *src + NBYTES .req r4 // unsigned int nbytes + TWEAK .req r5 // void *tweak + + // registers which hold the data being encrypted/decrypted + X0 .req q0 + X0_L .req d0 + X0_H .req d1 + Y0 .req q1 + Y0_H .req d3 + X1 .req q2 + X1_L .req d4 + X1_H .req d5 + Y1 .req q3 + Y1_H .req d7 + X2 .req q4 + X2_L .req d8 + X2_H .req d9 + Y2 .req q5 + Y2_H .req d11 + X3 .req q6 + X3_L .req d12 + X3_H .req d13 + Y3 .req q7 + Y3_H .req d15 + + // the round key, duplicated in all lanes + ROUND_KEY .req q8 + ROUND_KEY_L .req d16 + ROUND_KEY_H .req d17 + + // index vector for vtbl-based 8-bit rotates + ROTATE_TABLE .req d18 + + // multiplication table for updating XTS tweaks + GF128MUL_TABLE .req d19 + GF64MUL_TABLE .req d19 + + // current XTS tweak value(s) + TWEAKV .req q10 + TWEAKV_L .req d20 + TWEAKV_H .req d21 + + TMP0 .req q12 + TMP0_L .req d24 + TMP0_H .req d25 + TMP1 .req q13 + TMP2 .req q14 + TMP3 .req q15 + + .align 4 +.Lror64_8_table: + .byte 1, 2, 3, 4, 5, 6, 7, 0 +.Lror32_8_table: + .byte 1, 2, 3, 0, 5, 6, 7, 4 +.Lrol64_8_table: + .byte 7, 0, 1, 2, 3, 4, 5, 6 +.Lrol32_8_table: + .byte 3, 0, 1, 2, 7, 4, 5, 6 +.Lgf128mul_table: + .byte 0, 0x87 + .fill 14 +.Lgf64mul_table: + .byte 0, 0x1b, (0x1b << 1), (0x1b << 1) ^ 0x1b + .fill 12 + +/* + * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time + * + * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for + * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes + * of ROUND_KEY. 'n' is the lane size: 64 for Speck128, or 32 for Speck64. + * + * The 8-bit rotates are implemented using vtbl instead of vshr + vsli because + * the vtbl approach is faster on some processors and the same speed on others. + */ +.macro _speck_round_128bytes n + + // x = ror(x, 8) + vtbl.8 X0_L, {X0_L}, ROTATE_TABLE + vtbl.8 X0_H, {X0_H}, ROTATE_TABLE + vtbl.8 X1_L, {X1_L}, ROTATE_TABLE + vtbl.8 X1_H, {X1_H}, ROTATE_TABLE + vtbl.8 X2_L, {X2_L}, ROTATE_TABLE + vtbl.8 X2_H, {X2_H}, ROTATE_TABLE + vtbl.8 X3_L, {X3_L}, ROTATE_TABLE + vtbl.8 X3_H, {X3_H}, ROTATE_TABLE + + // x += y + vadd.u\n X0, Y0 + vadd.u\n X1, Y1 + vadd.u\n X2, Y2 + vadd.u\n X3, Y3 + + // x ^= k + veor X0, ROUND_KEY + veor X1, ROUND_KEY + veor X2, ROUND_KEY + veor X3, ROUND_KEY + + // y = rol(y, 3) + vshl.u\n TMP0, Y0, #3 + vshl.u\n TMP1, Y1, #3 + vshl.u\n TMP2, Y2, #3 + vshl.u\n TMP3, Y3, #3 + vsri.u\n TMP0, Y0, #(\n - 3) + vsri.u\n TMP1, Y1, #(\n - 3) + vsri.u\n TMP2, Y2, #(\n - 3) + vsri.u\n TMP3, Y3, #(\n - 3) + + // y ^= x + veor Y0, TMP0, X0 + veor Y1, TMP1, X1 + veor Y2, TMP2, X2 + veor Y3, TMP3, X3 +.endm + +/* + * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time + * + * This is the inverse of _speck_round_128bytes(). + */ +.macro _speck_unround_128bytes n + + // y ^= x + veor TMP0, Y0, X0 + veor TMP1, Y1, X1 + veor TMP2, Y2, X2 + veor TMP3, Y3, X3 + + // y = ror(y, 3) + vshr.u\n Y0, TMP0, #3 + vshr.u\n Y1, TMP1, #3 + vshr.u\n Y2, TMP2, #3 + vshr.u\n Y3, TMP3, #3 + vsli.u\n Y0, TMP0, #(\n - 3) + vsli.u\n Y1, TMP1, #(\n - 3) + vsli.u\n Y2, TMP2, #(\n - 3) + vsli.u\n Y3, TMP3, #(\n - 3) + + // x ^= k + veor X0, ROUND_KEY + veor X1, ROUND_KEY + veor X2, ROUND_KEY + veor X3, ROUND_KEY + + // x -= y + vsub.u\n X0, Y0 + vsub.u\n X1, Y1 + vsub.u\n X2, Y2 + vsub.u\n X3, Y3 + + // x = rol(x, 8); + vtbl.8 X0_L, {X0_L}, ROTATE_TABLE + vtbl.8 X0_H, {X0_H}, ROTATE_TABLE + vtbl.8 X1_L, {X1_L}, ROTATE_TABLE + vtbl.8 X1_H, {X1_H}, ROTATE_TABLE + vtbl.8 X2_L, {X2_L}, ROTATE_TABLE + vtbl.8 X2_H, {X2_H}, ROTATE_TABLE + vtbl.8 X3_L, {X3_L}, ROTATE_TABLE + vtbl.8 X3_H, {X3_H}, ROTATE_TABLE +.endm + +.macro _xts128_precrypt_one dst_reg, tweak_buf, tmp + + // Load the next source block + vld1.8 {\dst_reg}, [SRC]! + + // Save the current tweak in the tweak buffer + vst1.8 {TWEAKV}, [\tweak_buf:128]! + + // XOR the next source block with the current tweak + veor \dst_reg, TWEAKV + + /* + * Calculate the next tweak by multiplying the current one by x, + * modulo p(x) = x^128 + x^7 + x^2 + x + 1. + */ + vshr.u64 \tmp, TWEAKV, #63 + vshl.u64 TWEAKV, #1 + veor TWEAKV_H, \tmp\()_L + vtbl.8 \tmp\()_H, {GF128MUL_TABLE}, \tmp\()_H + veor TWEAKV_L, \tmp\()_H +.endm + +.macro _xts64_precrypt_two dst_reg, tweak_buf, tmp + + // Load the next two source blocks + vld1.8 {\dst_reg}, [SRC]! + + // Save the current two tweaks in the tweak buffer + vst1.8 {TWEAKV}, [\tweak_buf:128]! + + // XOR the next two source blocks with the current two tweaks + veor \dst_reg, TWEAKV + + /* + * Calculate the next two tweaks by multiplying the current ones by x^2, + * modulo p(x) = x^64 + x^4 + x^3 + x + 1. + */ + vshr.u64 \tmp, TWEAKV, #62 + vshl.u64 TWEAKV, #2 + vtbl.8 \tmp\()_L, {GF64MUL_TABLE}, \tmp\()_L + vtbl.8 \tmp\()_H, {GF64MUL_TABLE}, \tmp\()_H + veor TWEAKV, \tmp +.endm + +/* + * _speck_xts_crypt() - Speck-XTS encryption/decryption + * + * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer + * using Speck-XTS, specifically the variant with a block size of '2n' and round + * count given by NROUNDS. The expanded round keys are given in ROUND_KEYS, and + * the current XTS tweak value is given in TWEAK. It's assumed that NBYTES is a + * nonzero multiple of 128. + */ +.macro _speck_xts_crypt n, decrypting + push {r4-r7} + mov r7, sp + + /* + * The first four parameters were passed in registers r0-r3. Load the + * additional parameters, which were passed on the stack. + */ + ldr NBYTES, [sp, #16] + ldr TWEAK, [sp, #20] + + /* + * If decrypting, modify the ROUND_KEYS parameter to point to the last + * round key rather than the first, since for decryption the round keys + * are used in reverse order. + */ +.if \decrypting +.if \n == 64 + add ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #3 + sub ROUND_KEYS, #8 +.else + add ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #2 + sub ROUND_KEYS, #4 +.endif +.endif + + // Load the index vector for vtbl-based 8-bit rotates +.if \decrypting + ldr r12, =.Lrol\n\()_8_table +.else + ldr r12, =.Lror\n\()_8_table +.endif + vld1.8 {ROTATE_TABLE}, [r12:64] + + // One-time XTS preparation + + /* + * Allocate stack space to store 128 bytes worth of tweaks. For + * performance, this space is aligned to a 16-byte boundary so that we + * can use the load/store instructions that declare 16-byte alignment. + */ + sub sp, #128 + bic sp, #0xf + +.if \n == 64 + // Load first tweak + vld1.8 {TWEAKV}, [TWEAK] + + // Load GF(2^128) multiplication table + ldr r12, =.Lgf128mul_table + vld1.8 {GF128MUL_TABLE}, [r12:64] +.else + // Load first tweak + vld1.8 {TWEAKV_L}, [TWEAK] + + // Load GF(2^64) multiplication table + ldr r12, =.Lgf64mul_table + vld1.8 {GF64MUL_TABLE}, [r12:64] + + // Calculate second tweak, packing it together with the first + vshr.u64 TMP0_L, TWEAKV_L, #63 + vtbl.u8 TMP0_L, {GF64MUL_TABLE}, TMP0_L + vshl.u64 TWEAKV_H, TWEAKV_L, #1 + veor TWEAKV_H, TMP0_L +.endif + +.Lnext_128bytes_\@: + + /* + * Load the source blocks into {X,Y}[0-3], XOR them with their XTS tweak + * values, and save the tweaks on the stack for later. Then + * de-interleave the 'x' and 'y' elements of each block, i.e. make it so + * that the X[0-3] registers contain only the second halves of blocks, + * and the Y[0-3] registers contain only the first halves of blocks. + * (Speck uses the order (y, x) rather than the more intuitive (x, y).) + */ + mov r12, sp +.if \n == 64 + _xts128_precrypt_one X0, r12, TMP0 + _xts128_precrypt_one Y0, r12, TMP0 + _xts128_precrypt_one X1, r12, TMP0 + _xts128_precrypt_one Y1, r12, TMP0 + _xts128_precrypt_one X2, r12, TMP0 + _xts128_precrypt_one Y2, r12, TMP0 + _xts128_precrypt_one X3, r12, TMP0 + _xts128_precrypt_one Y3, r12, TMP0 + vswp X0_L, Y0_H + vswp X1_L, Y1_H + vswp X2_L, Y2_H + vswp X3_L, Y3_H +.else + _xts64_precrypt_two X0, r12, TMP0 + _xts64_precrypt_two Y0, r12, TMP0 + _xts64_precrypt_two X1, r12, TMP0 + _xts64_precrypt_two Y1, r12, TMP0 + _xts64_precrypt_two X2, r12, TMP0 + _xts64_precrypt_two Y2, r12, TMP0 + _xts64_precrypt_two X3, r12, TMP0 + _xts64_precrypt_two Y3, r12, TMP0 + vuzp.32 Y0, X0 + vuzp.32 Y1, X1 + vuzp.32 Y2, X2 + vuzp.32 Y3, X3 +.endif + + // Do the cipher rounds + + mov r12, ROUND_KEYS + mov r6, NROUNDS + +.Lnext_round_\@: +.if \decrypting +.if \n == 64 + vld1.64 ROUND_KEY_L, [r12] + sub r12, #8 + vmov ROUND_KEY_H, ROUND_KEY_L +.else + vld1.32 {ROUND_KEY_L[],ROUND_KEY_H[]}, [r12] + sub r12, #4 +.endif + _speck_unround_128bytes \n +.else +.if \n == 64 + vld1.64 ROUND_KEY_L, [r12]! + vmov ROUND_KEY_H, ROUND_KEY_L +.else + vld1.32 {ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]! +.endif + _speck_round_128bytes \n +.endif + subs r6, r6, #1 + bne .Lnext_round_\@ + + // Re-interleave the 'x' and 'y' elements of each block +.if \n == 64 + vswp X0_L, Y0_H + vswp X1_L, Y1_H + vswp X2_L, Y2_H + vswp X3_L, Y3_H +.else + vzip.32 Y0, X0 + vzip.32 Y1, X1 + vzip.32 Y2, X2 + vzip.32 Y3, X3 +.endif + + // XOR the encrypted/decrypted blocks with the tweaks we saved earlier + mov r12, sp + vld1.8 {TMP0, TMP1}, [r12:128]! + vld1.8 {TMP2, TMP3}, [r12:128]! + veor X0, TMP0 + veor Y0, TMP1 + veor X1, TMP2 + veor Y1, TMP3 + vld1.8 {TMP0, TMP1}, [r12:128]! + vld1.8 {TMP2, TMP3}, [r12:128]! + veor X2, TMP0 + veor Y2, TMP1 + veor X3, TMP2 + veor Y3, TMP3 + + // Store the ciphertext in the destination buffer + vst1.8 {X0, Y0}, [DST]! + vst1.8 {X1, Y1}, [DST]! + vst1.8 {X2, Y2}, [DST]! + vst1.8 {X3, Y3}, [DST]! + + // Continue if there are more 128-byte chunks remaining, else return + subs NBYTES, #128 + bne .Lnext_128bytes_\@ + + // Store the next tweak +.if \n == 64 + vst1.8 {TWEAKV}, [TWEAK] +.else + vst1.8 {TWEAKV_L}, [TWEAK] +.endif + + mov sp, r7 + pop {r4-r7} + bx lr +.endm + +ENTRY(speck128_xts_encrypt_neon) + _speck_xts_crypt n=64, decrypting=0 +ENDPROC(speck128_xts_encrypt_neon) + +ENTRY(speck128_xts_decrypt_neon) + _speck_xts_crypt n=64, decrypting=1 +ENDPROC(speck128_xts_decrypt_neon) + +ENTRY(speck64_xts_encrypt_neon) + _speck_xts_crypt n=32, decrypting=0 +ENDPROC(speck64_xts_encrypt_neon) + +ENTRY(speck64_xts_decrypt_neon) + _speck_xts_crypt n=32, decrypting=1 +ENDPROC(speck64_xts_decrypt_neon) diff --git a/arch/arm/crypto/speck-neon-glue.c b/arch/arm/crypto/speck-neon-glue.c new file mode 100644 index 000000000000..f012c3ea998f --- /dev/null +++ b/arch/arm/crypto/speck-neon-glue.c @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS + * + * Copyright (c) 2018 Google, Inc + * + * Note: the NIST recommendation for XTS only specifies a 128-bit block size, + * but a 64-bit version (needed for Speck64) is fairly straightforward; the math + * is just done in GF(2^64) instead of GF(2^128), with the reducing polynomial + * x^64 + x^4 + x^3 + x + 1 from the original XEX paper (Rogaway, 2004: + * "Efficient Instantiations of Tweakable Blockciphers and Refinements to Modes + * OCB and PMAC"), represented as 0x1B. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* The assembly functions only handle multiples of 128 bytes */ +#define SPECK_NEON_CHUNK_SIZE 128 + +/* Speck128 */ + +struct speck128_xts_tfm_ctx { + struct speck128_tfm_ctx main_key; + struct speck128_tfm_ctx tweak_key; +}; + +asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds, + void *dst, const void *src, + unsigned int nbytes, void *tweak); + +asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds, + void *dst, const void *src, + unsigned int nbytes, void *tweak); + +typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *, + u8 *, const u8 *); +typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *, + const void *, unsigned int, void *); + +static __always_inline int +__speck128_xts_crypt(struct skcipher_request *req, + speck128_crypt_one_t crypt_one, + speck128_xts_crypt_many_t crypt_many) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + le128 tweak; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv); + + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + u8 *dst = walk.dst.virt.addr; + const u8 *src = walk.src.virt.addr; + + if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) { + unsigned int count; + + count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE); + kernel_neon_begin(); + (*crypt_many)(ctx->main_key.round_keys, + ctx->main_key.nrounds, + dst, src, count, &tweak); + kernel_neon_end(); + dst += count; + src += count; + nbytes -= count; + } + + /* Handle any remainder with generic code */ + while (nbytes >= sizeof(tweak)) { + le128_xor((le128 *)dst, (const le128 *)src, &tweak); + (*crypt_one)(&ctx->main_key, dst, dst); + le128_xor((le128 *)dst, (const le128 *)dst, &tweak); + gf128mul_x_ble(&tweak, &tweak); + + dst += sizeof(tweak); + src += sizeof(tweak); + nbytes -= sizeof(tweak); + } + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int speck128_xts_encrypt(struct skcipher_request *req) +{ + return __speck128_xts_crypt(req, crypto_speck128_encrypt, + speck128_xts_encrypt_neon); +} + +static int speck128_xts_decrypt(struct skcipher_request *req) +{ + return __speck128_xts_crypt(req, crypto_speck128_decrypt, + speck128_xts_decrypt_neon); +} + +static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); + int err; + + err = xts_verify_key(tfm, key, keylen); + if (err) + return err; + + keylen /= 2; + + err = crypto_speck128_setkey(&ctx->main_key, key, keylen); + if (err) + return err; + + return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen); +} + +/* Speck64 */ + +struct speck64_xts_tfm_ctx { + struct speck64_tfm_ctx main_key; + struct speck64_tfm_ctx tweak_key; +}; + +asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds, + void *dst, const void *src, + unsigned int nbytes, void *tweak); + +asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds, + void *dst, const void *src, + unsigned int nbytes, void *tweak); + +typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *, + u8 *, const u8 *); +typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *, + const void *, unsigned int, void *); + +static __always_inline int +__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one, + speck64_xts_crypt_many_t crypt_many) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + __le64 tweak; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv); + + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + u8 *dst = walk.dst.virt.addr; + const u8 *src = walk.src.virt.addr; + + if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) { + unsigned int count; + + count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE); + kernel_neon_begin(); + (*crypt_many)(ctx->main_key.round_keys, + ctx->main_key.nrounds, + dst, src, count, &tweak); + kernel_neon_end(); + dst += count; + src += count; + nbytes -= count; + } + + /* Handle any remainder with generic code */ + while (nbytes >= sizeof(tweak)) { + *(__le64 *)dst = *(__le64 *)src ^ tweak; + (*crypt_one)(&ctx->main_key, dst, dst); + *(__le64 *)dst ^= tweak; + tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^ + ((tweak & cpu_to_le64(1ULL << 63)) ? + 0x1B : 0)); + dst += sizeof(tweak); + src += sizeof(tweak); + nbytes -= sizeof(tweak); + } + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int speck64_xts_encrypt(struct skcipher_request *req) +{ + return __speck64_xts_crypt(req, crypto_speck64_encrypt, + speck64_xts_encrypt_neon); +} + +static int speck64_xts_decrypt(struct skcipher_request *req) +{ + return __speck64_xts_crypt(req, crypto_speck64_decrypt, + speck64_xts_decrypt_neon); +} + +static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); + int err; + + err = xts_verify_key(tfm, key, keylen); + if (err) + return err; + + keylen /= 2; + + err = crypto_speck64_setkey(&ctx->main_key, key, keylen); + if (err) + return err; + + return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen); +} + +static struct skcipher_alg speck_algs[] = { + { + .base.cra_name = "xts(speck128)", + .base.cra_driver_name = "xts-speck128-neon", + .base.cra_priority = 300, + .base.cra_blocksize = SPECK128_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct speck128_xts_tfm_ctx), + .base.cra_alignmask = 7, + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * SPECK128_128_KEY_SIZE, + .max_keysize = 2 * SPECK128_256_KEY_SIZE, + .ivsize = SPECK128_BLOCK_SIZE, + .walksize = SPECK_NEON_CHUNK_SIZE, + .setkey = speck128_xts_setkey, + .encrypt = speck128_xts_encrypt, + .decrypt = speck128_xts_decrypt, + }, { + .base.cra_name = "xts(speck64)", + .base.cra_driver_name = "xts-speck64-neon", + .base.cra_priority = 300, + .base.cra_blocksize = SPECK64_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct speck64_xts_tfm_ctx), + .base.cra_alignmask = 7, + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * SPECK64_96_KEY_SIZE, + .max_keysize = 2 * SPECK64_128_KEY_SIZE, + .ivsize = SPECK64_BLOCK_SIZE, + .walksize = SPECK_NEON_CHUNK_SIZE, + .setkey = speck64_xts_setkey, + .encrypt = speck64_xts_encrypt, + .decrypt = speck64_xts_decrypt, + } +}; + +static int __init speck_neon_module_init(void) +{ + if (!(elf_hwcap & HWCAP_NEON)) + return -ENODEV; + return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs)); +} + +static void __exit speck_neon_module_exit(void) +{ + crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs)); +} + +module_init(speck_neon_module_init); +module_exit(speck_neon_module_exit); + +MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Eric Biggers "); +MODULE_ALIAS_CRYPTO("xts(speck128)"); +MODULE_ALIAS_CRYPTO("xts-speck128-neon"); +MODULE_ALIAS_CRYPTO("xts(speck64)"); +MODULE_ALIAS_CRYPTO("xts-speck64-neon"); -- cgit v1.2.3 From f15f2a2542839adc45d5f216ddd9a895062707fe Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:00 -0800 Subject: crypto: x86/glue_helper - add skcipher_walk functions Add ECB, CBC, and CTR functions to glue_helper which use skcipher_walk rather than blkcipher_walk. This will allow converting the remaining x86 algorithms from the blkcipher interface over to the skcipher interface, after which we'll be able to remove the blkcipher_walk versions of these functions. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/glue_helper.c | 207 ++++++++++++++++++++++++++++++ arch/x86/include/asm/crypto/glue_helper.h | 12 ++ 2 files changed, 219 insertions(+) (limited to 'arch') diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index d61e57960fe0..5b909790bf8a 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -90,6 +90,52 @@ int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, } EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit); +int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req) +{ + void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + const unsigned int bsize = 128 / 8; + struct skcipher_walk walk; + bool fpu_enabled = false; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes)) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int func_bytes; + unsigned int i; + + fpu_enabled = glue_skwalk_fpu_begin(bsize, + gctx->fpu_blocks_limit, + &walk, fpu_enabled, nbytes); + for (i = 0; i < gctx->num_funcs; i++) { + func_bytes = bsize * gctx->funcs[i].num_blocks; + + if (nbytes < func_bytes) + continue; + + /* Process multi-block batch */ + do { + gctx->funcs[i].fn_u.ecb(ctx, dst, src); + src += func_bytes; + dst += func_bytes; + nbytes -= func_bytes; + } while (nbytes >= func_bytes); + + if (nbytes < bsize) + break; + } + err = skcipher_walk_done(&walk, nbytes); + } + + glue_fpu_end(fpu_enabled); + return err; +} +EXPORT_SYMBOL_GPL(glue_ecb_req_128bit); + static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn, struct blkcipher_desc *desc, struct blkcipher_walk *walk) @@ -135,6 +181,38 @@ int glue_cbc_encrypt_128bit(const common_glue_func_t fn, } EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit); +int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn, + struct skcipher_request *req) +{ + void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + const unsigned int bsize = 128 / 8; + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes)) { + const u128 *src = (u128 *)walk.src.virt.addr; + u128 *dst = (u128 *)walk.dst.virt.addr; + u128 *iv = (u128 *)walk.iv; + + do { + u128_xor(dst, src, iv); + fn(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + src++; + dst++; + nbytes -= bsize; + } while (nbytes >= bsize); + + *(u128 *)walk.iv = *iv; + err = skcipher_walk_done(&walk, nbytes); + } + return err; +} +EXPORT_SYMBOL_GPL(glue_cbc_encrypt_req_128bit); + static unsigned int __glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, struct blkcipher_desc *desc, @@ -211,6 +289,67 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, } EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); +int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req) +{ + void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + const unsigned int bsize = 128 / 8; + struct skcipher_walk walk; + bool fpu_enabled = false; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes)) { + const u128 *src = walk.src.virt.addr; + u128 *dst = walk.dst.virt.addr; + unsigned int func_bytes, num_blocks; + unsigned int i; + u128 last_iv; + + fpu_enabled = glue_skwalk_fpu_begin(bsize, + gctx->fpu_blocks_limit, + &walk, fpu_enabled, nbytes); + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + for (i = 0; i < gctx->num_funcs; i++) { + num_blocks = gctx->funcs[i].num_blocks; + func_bytes = bsize * num_blocks; + + if (nbytes < func_bytes) + continue; + + /* Process multi-block batch */ + do { + src -= num_blocks - 1; + dst -= num_blocks - 1; + + gctx->funcs[i].fn_u.cbc(ctx, dst, src); + + nbytes -= func_bytes; + if (nbytes < bsize) + goto done; + + u128_xor(dst, dst, --src); + dst--; + } while (nbytes >= func_bytes); + } +done: + u128_xor(dst, dst, (u128 *)walk.iv); + *(u128 *)walk.iv = last_iv; + err = skcipher_walk_done(&walk, nbytes); + } + + glue_fpu_end(fpu_enabled); + return err; +} +EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit); + static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr, struct blkcipher_desc *desc, struct blkcipher_walk *walk) @@ -301,6 +440,74 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, } EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit); +int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req) +{ + void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + const unsigned int bsize = 128 / 8; + struct skcipher_walk walk; + bool fpu_enabled = false; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) >= bsize) { + const u128 *src = walk.src.virt.addr; + u128 *dst = walk.dst.virt.addr; + unsigned int func_bytes, num_blocks; + unsigned int i; + le128 ctrblk; + + fpu_enabled = glue_skwalk_fpu_begin(bsize, + gctx->fpu_blocks_limit, + &walk, fpu_enabled, nbytes); + + be128_to_le128(&ctrblk, (be128 *)walk.iv); + + for (i = 0; i < gctx->num_funcs; i++) { + num_blocks = gctx->funcs[i].num_blocks; + func_bytes = bsize * num_blocks; + + if (nbytes < func_bytes) + continue; + + /* Process multi-block batch */ + do { + gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk); + src += num_blocks; + dst += num_blocks; + nbytes -= func_bytes; + } while (nbytes >= func_bytes); + + if (nbytes < bsize) + break; + } + + le128_to_be128((be128 *)walk.iv, &ctrblk); + err = skcipher_walk_done(&walk, nbytes); + } + + glue_fpu_end(fpu_enabled); + + if (nbytes) { + le128 ctrblk; + u128 tmp; + + be128_to_le128(&ctrblk, (be128 *)walk.iv); + memcpy(&tmp, walk.src.virt.addr, nbytes); + gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, &tmp, &tmp, + &ctrblk); + memcpy(walk.dst.virt.addr, &tmp, nbytes); + le128_to_be128((be128 *)walk.iv, &ctrblk); + + err = skcipher_walk_done(&walk, 0); + } + + return err; +} +EXPORT_SYMBOL_GPL(glue_ctr_req_128bit); + static unsigned int __glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, void *ctx, struct blkcipher_desc *desc, diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index 553a03de55c3..91c623e6ddbd 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -131,23 +131,35 @@ extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes); +extern int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req); + extern int glue_cbc_encrypt_128bit(const common_glue_func_t fn, struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes); +extern int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn, + struct skcipher_request *req); + extern int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes); +extern int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req); + extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes); +extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req); + extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, struct blkcipher_desc *desc, struct scatterlist *dst, -- cgit v1.2.3 From 2a05cfc35f5e251479c4fa86538461c1e4139e46 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:01 -0800 Subject: crypto: x86/serpent-sse2 - remove LRW algorithm The LRW template now wraps an ECB mode algorithm rather than the block cipher directly. Therefore it is now redundant for crypto modules to wrap their ECB code with generic LRW code themselves via lrw_crypt(). Remove the lrw-serpent-sse2 algorithm which did this. Users who request lrw(serpent) and previously would have gotten lrw-serpent-sse2 will now get lrw(ecb-serpent-sse2) instead, which is just as fast. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/serpent_sse2_glue.c | 130 +----------------------------------- crypto/Kconfig | 2 - 2 files changed, 1 insertion(+), 131 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index ac0e831943f5..e9de69b45184 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -221,85 +220,6 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) __serpent_decrypt(ctx->ctx, srcdst, srcdst); } -struct serpent_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct serpent_ctx serpent_ctx; -}; - -static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __serpent_setkey(&ctx->serpent_ctx, key, keylen - - SERPENT_BLOCK_SIZE); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, key + keylen - - SERPENT_BLOCK_SIZE); -} - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static void lrw_exit_tfm(struct crypto_tfm *tfm) -{ - struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); -} - struct serpent_xts_ctx { struct serpent_ctx tweak_ctx; struct serpent_ctx crypt_ctx; @@ -378,7 +298,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, return ret; } -static struct crypto_alg serpent_algs[10] = { { +static struct crypto_alg serpent_algs[] = { { .cra_name = "__ecb-serpent-sse2", .cra_driver_name = "__driver-ecb-serpent-sse2", .cra_priority = 0, @@ -439,30 +359,6 @@ static struct crypto_alg serpent_algs[10] = { { .decrypt = ctr_crypt, }, }, -}, { - .cra_name = "__lrw-serpent-sse2", - .cra_driver_name = "__driver-lrw-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = lrw_serpent_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, }, { .cra_name = "__xts-serpent-sse2", .cra_driver_name = "__driver-xts-serpent-sse2", @@ -550,30 +446,6 @@ static struct crypto_alg serpent_algs[10] = { { .geniv = "chainiv", }, }, -}, { - .cra_name = "lrw(serpent)", - .cra_driver_name = "lrw-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, }, { .cra_name = "xts(serpent)", .cra_driver_name = "xts-serpent-sse2", diff --git a/crypto/Kconfig b/crypto/Kconfig index 558eff07b799..ecc4e56e80ac 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1427,7 +1427,6 @@ config CRYPTO_SERPENT_SSE2_X86_64 select CRYPTO_ABLK_HELPER select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT - select CRYPTO_LRW select CRYPTO_XTS help Serpent cipher algorithm, by Anderson, Biham & Knudsen. @@ -1449,7 +1448,6 @@ config CRYPTO_SERPENT_SSE2_586 select CRYPTO_ABLK_HELPER select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT - select CRYPTO_LRW select CRYPTO_XTS help Serpent cipher algorithm, by Anderson, Biham & Knudsen. -- cgit v1.2.3 From 8bab4e3cd51f2143a10d5ca6b0ae5b8fc08c72bd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:02 -0800 Subject: crypto: x86/serpent-sse2 - remove XTS algorithm The XTS template now wraps an ECB mode algorithm rather than the block cipher directly. Therefore it is now redundant for crypto modules to wrap their ECB code with generic XTS code themselves via xts_crypt(). Remove the xts-serpent-sse2 algorithm which did this. Users who request xts(serpent) and previously would have gotten xts-serpent-sse2 will now get xts(ecb-serpent-sse2) instead, which is just as fast. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/serpent_sse2_glue.c | 172 ------------------------------------ crypto/Kconfig | 2 - 2 files changed, 174 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index e9de69b45184..9e2734384ce5 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include @@ -170,134 +169,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); } -static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) -{ - return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS, - NULL, fpu_enabled, nbytes); -} - -static inline void serpent_fpu_end(bool fpu_enabled) -{ - glue_fpu_end(fpu_enabled); -} - -struct crypt_priv { - struct serpent_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { - serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_encrypt(ctx->ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { - serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_decrypt(ctx->ctx, srcdst, srcdst); -} - -struct serpent_xts_ctx { - struct serpent_ctx tweak_ctx; - struct serpent_ctx crypt_ctx; -}; - -static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = xts_check_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); -} - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - le128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->crypt_ctx, - .fpu_enabled = false, - }; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = xts_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - le128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->crypt_ctx, - .fpu_enabled = false, - }; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = xts_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - static struct crypto_alg serpent_algs[] = { { .cra_name = "__ecb-serpent-sse2", .cra_driver_name = "__driver-ecb-serpent-sse2", @@ -359,27 +230,6 @@ static struct crypto_alg serpent_algs[] = { { .decrypt = ctr_crypt, }, }, -}, { - .cra_name = "__xts-serpent-sse2", - .cra_driver_name = "__driver-xts-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = xts_serpent_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, }, { .cra_name = "ecb(serpent)", .cra_driver_name = "ecb-serpent-sse2", @@ -446,28 +296,6 @@ static struct crypto_alg serpent_algs[] = { { .geniv = "chainiv", }, }, -}, { - .cra_name = "xts(serpent)", - .cra_driver_name = "xts-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, } }; static int __init serpent_sse2_init(void) diff --git a/crypto/Kconfig b/crypto/Kconfig index ecc4e56e80ac..4b2026e07bac 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1427,7 +1427,6 @@ config CRYPTO_SERPENT_SSE2_X86_64 select CRYPTO_ABLK_HELPER select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT - select CRYPTO_XTS help Serpent cipher algorithm, by Anderson, Biham & Knudsen. @@ -1448,7 +1447,6 @@ config CRYPTO_SERPENT_SSE2_586 select CRYPTO_ABLK_HELPER select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT - select CRYPTO_XTS help Serpent cipher algorithm, by Anderson, Biham & Knudsen. -- cgit v1.2.3 From e0f409dcb82e463663affa461342d54a23ac8456 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:03 -0800 Subject: crypto: x86/serpent-sse2 - convert to skcipher interface Convert the SSE2 implementation of Serpent from the (deprecated) ablkcipher and blkcipher interfaces over to the skcipher interface. Note that this includes replacing the use of ablk_helper with crypto_simd. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/serpent_sse2_glue.c | 221 ++++++++++++------------------------ crypto/Kconfig | 10 +- 2 files changed, 74 insertions(+), 157 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 9e2734384ce5..3dafe137596a 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -30,19 +30,22 @@ */ #include -#include #include #include #include -#include #include -#include -#include #include -#include +#include +#include #include #include +static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); +} + static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) { u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; @@ -137,166 +140,79 @@ static const struct common_glue_ctx serpent_dec_cbc = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&serpent_enc, req); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&serpent_dec, req); } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, - dst, src, nbytes); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt), + req); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, - nbytes); + return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); + return glue_ctr_req_128bit(&serpent_ctr, req); } -static struct crypto_alg serpent_algs[] = { { - .cra_name = "__ecb-serpent-sse2", - .cra_driver_name = "__driver-ecb-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-serpent-sse2", - .cra_driver_name = "__driver-cbc-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-serpent-sse2", - .cra_driver_name = "__driver-ctr-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = serpent_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "ecb(serpent)", - .cra_driver_name = "ecb-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(serpent)", - .cra_driver_name = "cbc-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(serpent)", - .cra_driver_name = "ctr-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, +static struct skcipher_alg serpent_algs[] = { + { + .base.cra_name = "__ecb(serpent)", + .base.cra_driver_name = "__ecb-serpent-sse2", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(serpent)", + .base.cra_driver_name = "__cbc-serpent-sse2", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(serpent)", + .base.cra_driver_name = "__ctr-serpent-sse2", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .chunksize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, }, -} }; +}; + +static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)]; static int __init serpent_sse2_init(void) { @@ -305,12 +221,15 @@ static int __init serpent_sse2_init(void) return -ENODEV; } - return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); + return simd_register_skciphers_compat(serpent_algs, + ARRAY_SIZE(serpent_algs), + serpent_simd_algs); } static void __exit serpent_sse2_exit(void) { - crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); + simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs), + serpent_simd_algs); } module_init(serpent_sse2_init); diff --git a/crypto/Kconfig b/crypto/Kconfig index 4b2026e07bac..f6d3eec494ee 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1422,11 +1422,10 @@ config CRYPTO_SERPENT config CRYPTO_SERPENT_SSE2_X86_64 tristate "Serpent cipher algorithm (x86_64/SSE2)" depends on X86 && 64BIT - select CRYPTO_ALGAPI - select CRYPTO_CRYPTD - select CRYPTO_ABLK_HELPER + select CRYPTO_BLKCIPHER select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT + select CRYPTO_SIMD help Serpent cipher algorithm, by Anderson, Biham & Knudsen. @@ -1442,11 +1441,10 @@ config CRYPTO_SERPENT_SSE2_X86_64 config CRYPTO_SERPENT_SSE2_586 tristate "Serpent cipher algorithm (i586/SSE2)" depends on X86 && !64BIT - select CRYPTO_ALGAPI - select CRYPTO_CRYPTD - select CRYPTO_ABLK_HELPER + select CRYPTO_BLKCIPHER select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT + select CRYPTO_SIMD help Serpent cipher algorithm, by Anderson, Biham & Knudsen. -- cgit v1.2.3 From e5f382e643811dce87d0f47f87e8bd1102a0feb5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:04 -0800 Subject: crypto: x86/serpent-avx2 - remove LRW algorithm The LRW template now wraps an ECB mode algorithm rather than the block cipher directly. Therefore it is now redundant for crypto modules to wrap their ECB code with generic LRW code themselves via lrw_crypt(). Remove the lrw-serpent-avx2 algorithm which did this. Users who request lrw(serpent) and previously would have gotten lrw-serpent-avx2 will now get lrw(ecb-serpent-avx2) instead, which is just as fast. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/serpent_avx2_glue.c | 176 +----------------------------------- crypto/Kconfig | 1 - 2 files changed, 1 insertion(+), 176 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 870f6d812a2d..2bd0f0459db4 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -168,122 +168,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); } -static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) -{ - /* since reusing AVX functions, starts using FPU at 8 parallel blocks */ - return glue_fpu_begin(SERPENT_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes); -} - -static inline void serpent_fpu_end(bool fpu_enabled) -{ - glue_fpu_end(fpu_enabled); -} - -struct crypt_priv { - struct serpent_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) { - serpent_ecb_enc_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS; - } - - while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) { - serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst); - srcdst += bsize * SERPENT_PARALLEL_BLOCKS; - nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_encrypt(ctx->ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) { - serpent_ecb_dec_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS; - } - - while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) { - serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst); - srcdst += bsize * SERPENT_PARALLEL_BLOCKS; - nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_decrypt(ctx->ctx, srcdst, srcdst); -} - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { @@ -304,7 +188,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, &ctx->tweak_ctx, &ctx->crypt_ctx); } -static struct crypto_alg srp_algs[10] = { { +static struct crypto_alg srp_algs[] = { { .cra_name = "__ecb-serpent-avx2", .cra_driver_name = "__driver-ecb-serpent-avx2", .cra_priority = 0, @@ -315,7 +199,6 @@ static struct crypto_alg srp_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[0].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -336,7 +219,6 @@ static struct crypto_alg srp_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[1].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -357,7 +239,6 @@ static struct crypto_alg srp_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[2].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -368,31 +249,6 @@ static struct crypto_alg srp_algs[10] = { { .decrypt = ctr_crypt, }, }, -}, { - .cra_name = "__lrw-serpent-avx2", - .cra_driver_name = "__driver-lrw-serpent-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[3].cra_list), - .cra_exit = lrw_serpent_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = lrw_serpent_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, }, { .cra_name = "__xts-serpent-avx2", .cra_driver_name = "__driver-xts-serpent-avx2", @@ -404,7 +260,6 @@ static struct crypto_alg srp_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[4].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE * 2, @@ -425,7 +280,6 @@ static struct crypto_alg srp_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[5].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -447,7 +301,6 @@ static struct crypto_alg srp_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[6].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -470,7 +323,6 @@ static struct crypto_alg srp_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[7].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -484,31 +336,6 @@ static struct crypto_alg srp_algs[10] = { { .geniv = "chainiv", }, }, -}, { - .cra_name = "lrw(serpent)", - .cra_driver_name = "lrw-serpent-avx2", - .cra_priority = 600, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[8].cra_list), - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, }, { .cra_name = "xts(serpent)", .cra_driver_name = "xts-serpent-avx2", @@ -519,7 +346,6 @@ static struct crypto_alg srp_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[9].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { diff --git a/crypto/Kconfig b/crypto/Kconfig index f6d3eec494ee..fc733a5aa47b 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1488,7 +1488,6 @@ config CRYPTO_SERPENT_AVX2_X86_64 select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT select CRYPTO_SERPENT_AVX_X86_64 - select CRYPTO_LRW select CRYPTO_XTS help Serpent cipher algorithm, by Anderson, Biham & Knudsen. -- cgit v1.2.3 From 340b83032634c37f10626be7388052583a9a4d95 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:05 -0800 Subject: crypto: x86/serpent-avx - remove LRW algorithm The LRW template now wraps an ECB mode algorithm rather than the block cipher directly. Therefore it is now redundant for crypto modules to wrap their ECB code with generic LRW code themselves via lrw_crypt(). Remove the lrw-serpent-avx algorithm which did this. Users who request lrw(serpent) and previously would have gotten lrw-serpent-avx will now get lrw(ecb-serpent-avx) instead, which is just as fast. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/serpent_avx_glue.c | 177 +----------------------------- arch/x86/include/asm/crypto/serpent-avx.h | 10 -- crypto/Kconfig | 1 - 3 files changed, 1 insertion(+), 187 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 6f778d3daa22..494cfa4382e0 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -202,132 +201,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); } -static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) -{ - return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS, - NULL, fpu_enabled, nbytes); -} - -static inline void serpent_fpu_end(bool fpu_enabled) -{ - glue_fpu_end(fpu_enabled); -} - -struct crypt_priv { - struct serpent_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { - serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_encrypt(ctx->ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { - serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_decrypt(ctx->ctx, srcdst, srcdst); -} - -int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __serpent_setkey(&ctx->serpent_ctx, key, keylen - - SERPENT_BLOCK_SIZE); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, key + keylen - - SERPENT_BLOCK_SIZE); -} -EXPORT_SYMBOL_GPL(lrw_serpent_setkey); - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -void lrw_serpent_exit_tfm(struct crypto_tfm *tfm) -{ - struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); -} -EXPORT_SYMBOL_GPL(lrw_serpent_exit_tfm); - int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { @@ -368,7 +241,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, &ctx->tweak_ctx, &ctx->crypt_ctx); } -static struct crypto_alg serpent_algs[10] = { { +static struct crypto_alg serpent_algs[] = { { .cra_name = "__ecb-serpent-avx", .cra_driver_name = "__driver-ecb-serpent-avx", .cra_priority = 0, @@ -429,30 +302,6 @@ static struct crypto_alg serpent_algs[10] = { { .decrypt = ctr_crypt, }, }, -}, { - .cra_name = "__lrw-serpent-avx", - .cra_driver_name = "__driver-lrw-serpent-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_serpent_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = lrw_serpent_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, }, { .cra_name = "__xts-serpent-avx", .cra_driver_name = "__driver-xts-serpent-avx", @@ -540,30 +389,6 @@ static struct crypto_alg serpent_algs[10] = { { .geniv = "chainiv", }, }, -}, { - .cra_name = "lrw(serpent)", - .cra_driver_name = "lrw-serpent-avx", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, }, { .cra_name = "xts(serpent)", .cra_driver_name = "xts-serpent-avx", diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h index c958b7bd0fcb..527e5edd0404 100644 --- a/arch/x86/include/asm/crypto/serpent-avx.h +++ b/arch/x86/include/asm/crypto/serpent-avx.h @@ -7,11 +7,6 @@ #define SERPENT_PARALLEL_BLOCKS 8 -struct serpent_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct serpent_ctx serpent_ctx; -}; - struct serpent_xts_ctx { struct serpent_ctx tweak_ctx; struct serpent_ctx crypt_ctx; @@ -38,11 +33,6 @@ extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); - -extern void lrw_serpent_exit_tfm(struct crypto_tfm *tfm); - extern int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen); diff --git a/crypto/Kconfig b/crypto/Kconfig index fc733a5aa47b..65012ebec0ab 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1465,7 +1465,6 @@ config CRYPTO_SERPENT_AVX_X86_64 select CRYPTO_ABLK_HELPER select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT - select CRYPTO_LRW select CRYPTO_XTS help Serpent cipher algorithm, by Anderson, Biham & Knudsen. -- cgit v1.2.3 From e16bf974b3d965edc9bd76fc645c3ee2c40c33b8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:06 -0800 Subject: crypto: x86/serpent-avx,avx2 - convert to skcipher interface Convert the AVX and AVX2 implementations of Serpent from the (deprecated) ablkcipher and blkcipher interfaces over to the skcipher interface. Note that this includes replacing the use of ablk_helper with crypto_simd. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/serpent_avx2_glue.c | 304 +++++++++----------------- arch/x86/crypto/serpent_avx_glue.c | 343 ++++++++++-------------------- arch/x86/include/asm/crypto/serpent-avx.h | 7 +- crypto/Kconfig | 11 +- 4 files changed, 219 insertions(+), 446 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 2bd0f0459db4..03347b16ac9d 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -14,15 +14,12 @@ #include #include #include -#include #include -#include -#include -#include +#include #include -#include -#include +#include #include +#include #define SERPENT_AVX2_PARALLEL_BLOCKS 16 @@ -40,6 +37,12 @@ asmlinkage void serpent_xts_enc_16way(struct serpent_ctx *ctx, u8 *dst, asmlinkage void serpent_xts_dec_16way(struct serpent_ctx *ctx, u8 *dst, const u8 *src, le128 *iv); +static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); +} + static const struct common_glue_ctx serpent_enc = { .num_funcs = 3, .fpu_blocks_limit = 8, @@ -136,229 +139,113 @@ static const struct common_glue_ctx serpent_dec_xts = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&serpent_enc, req); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&serpent_dec, req); } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, - dst, src, nbytes); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt), + req); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, - nbytes); + return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); + return glue_ctr_req_128bit(&serpent_ctr, req); } -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&serpent_enc_xts, req, + XTS_TWEAK_CAST(__serpent_encrypt), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&serpent_dec_xts, req, + XTS_TWEAK_CAST(__serpent_encrypt), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static struct crypto_alg srp_algs[] = { { - .cra_name = "__ecb-serpent-avx2", - .cra_driver_name = "__driver-ecb-serpent-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-serpent-avx2", - .cra_driver_name = "__driver-cbc-serpent-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-serpent-avx2", - .cra_driver_name = "__driver-ctr-serpent-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = serpent_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__xts-serpent-avx2", - .cra_driver_name = "__driver-xts-serpent-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = xts_serpent_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(serpent)", - .cra_driver_name = "ecb-serpent-avx2", - .cra_priority = 600, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(serpent)", - .cra_driver_name = "cbc-serpent-avx2", - .cra_priority = 600, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(serpent)", - .cra_driver_name = "ctr-serpent-avx2", - .cra_priority = 600, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "xts(serpent)", - .cra_driver_name = "xts-serpent-avx2", - .cra_priority = 600, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, +static struct skcipher_alg serpent_algs[] = { + { + .base.cra_name = "__ecb(serpent)", + .base.cra_driver_name = "__ecb-serpent-avx2", + .base.cra_priority = 600, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(serpent)", + .base.cra_driver_name = "__cbc-serpent-avx2", + .base.cra_priority = 600, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(serpent)", + .base.cra_driver_name = "__ctr-serpent-avx2", + .base.cra_priority = 600, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .chunksize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base.cra_name = "__xts(serpent)", + .base.cra_driver_name = "__xts-serpent-avx2", + .base.cra_priority = 600, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_xts_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * SERPENT_MIN_KEY_SIZE, + .max_keysize = 2 * SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = xts_serpent_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, }, -} }; +}; + +static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)]; static int __init init(void) { @@ -374,12 +261,15 @@ static int __init init(void) return -ENODEV; } - return crypto_register_algs(srp_algs, ARRAY_SIZE(srp_algs)); + return simd_register_skciphers_compat(serpent_algs, + ARRAY_SIZE(serpent_algs), + serpent_simd_algs); } static void __exit fini(void) { - crypto_unregister_algs(srp_algs, ARRAY_SIZE(srp_algs)); + simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs), + serpent_simd_algs); } module_init(init); diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 494cfa4382e0..458567ecf76c 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -24,20 +24,15 @@ */ #include -#include #include #include #include -#include #include +#include #include -#include -#include -#include #include -#include -#include #include +#include /* 8-way parallel cipher functions */ asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, @@ -90,6 +85,31 @@ void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) } EXPORT_SYMBOL_GPL(serpent_xts_dec); +static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); +} + +int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + int err; + + err = xts_verify_key(tfm, key, keylen); + if (err) + return err; + + /* first half of xts-key is for crypt */ + err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); +} +EXPORT_SYMBOL_GPL(xts_serpent_setkey); static const struct common_glue_ctx serpent_enc = { .num_funcs = 2, @@ -169,249 +189,113 @@ static const struct common_glue_ctx serpent_dec_xts = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&serpent_enc, req); } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, - dst, src, nbytes); + return glue_ecb_req_128bit(&serpent_dec, req); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, - nbytes); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt), + req); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); + return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); } -int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) +static int ctr_crypt(struct skcipher_request *req) { - struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = xts_check_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); + return glue_ctr_req_128bit(&serpent_ctr, req); } -EXPORT_SYMBOL_GPL(xts_serpent_setkey); -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&serpent_enc_xts, req, + XTS_TWEAK_CAST(__serpent_encrypt), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&serpent_dec_xts, req, + XTS_TWEAK_CAST(__serpent_encrypt), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static struct crypto_alg serpent_algs[] = { { - .cra_name = "__ecb-serpent-avx", - .cra_driver_name = "__driver-ecb-serpent-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-serpent-avx", - .cra_driver_name = "__driver-cbc-serpent-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-serpent-avx", - .cra_driver_name = "__driver-ctr-serpent-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = serpent_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__xts-serpent-avx", - .cra_driver_name = "__driver-xts-serpent-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = xts_serpent_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(serpent)", - .cra_driver_name = "ecb-serpent-avx", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(serpent)", - .cra_driver_name = "cbc-serpent-avx", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(serpent)", - .cra_driver_name = "ctr-serpent-avx", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "xts(serpent)", - .cra_driver_name = "xts-serpent-avx", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, +static struct skcipher_alg serpent_algs[] = { + { + .base.cra_name = "__ecb(serpent)", + .base.cra_driver_name = "__ecb-serpent-avx", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(serpent)", + .base.cra_driver_name = "__cbc-serpent-avx", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(serpent)", + .base.cra_driver_name = "__ctr-serpent-avx", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .chunksize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base.cra_name = "__xts(serpent)", + .base.cra_driver_name = "__xts-serpent-avx", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_xts_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * SERPENT_MIN_KEY_SIZE, + .max_keysize = 2 * SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = xts_serpent_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, }, -} }; +}; + +static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)]; static int __init serpent_init(void) { @@ -423,12 +307,15 @@ static int __init serpent_init(void) return -ENODEV; } - return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); + return simd_register_skciphers_compat(serpent_algs, + ARRAY_SIZE(serpent_algs), + serpent_simd_algs); } static void __exit serpent_exit(void) { - crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); + simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs), + serpent_simd_algs); } module_init(serpent_init); diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h index 527e5edd0404..db7c9cc32234 100644 --- a/arch/x86/include/asm/crypto/serpent-avx.h +++ b/arch/x86/include/asm/crypto/serpent-avx.h @@ -2,8 +2,11 @@ #ifndef ASM_X86_SERPENT_AVX_H #define ASM_X86_SERPENT_AVX_H -#include +#include #include +#include + +struct crypto_skcipher; #define SERPENT_PARALLEL_BLOCKS 8 @@ -33,7 +36,7 @@ extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, +extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); #endif diff --git a/crypto/Kconfig b/crypto/Kconfig index 65012ebec0ab..cca8271d8894 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1460,11 +1460,10 @@ config CRYPTO_SERPENT_SSE2_586 config CRYPTO_SERPENT_AVX_X86_64 tristate "Serpent cipher algorithm (x86_64/AVX)" depends on X86 && 64BIT - select CRYPTO_ALGAPI - select CRYPTO_CRYPTD - select CRYPTO_ABLK_HELPER + select CRYPTO_BLKCIPHER select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT + select CRYPTO_SIMD select CRYPTO_XTS help Serpent cipher algorithm, by Anderson, Biham & Knudsen. @@ -1481,13 +1480,7 @@ config CRYPTO_SERPENT_AVX_X86_64 config CRYPTO_SERPENT_AVX2_X86_64 tristate "Serpent cipher algorithm (x86_64/AVX2)" depends on X86 && 64BIT - select CRYPTO_ALGAPI - select CRYPTO_CRYPTD - select CRYPTO_ABLK_HELPER - select CRYPTO_GLUE_HELPER_X86 - select CRYPTO_SERPENT select CRYPTO_SERPENT_AVX_X86_64 - select CRYPTO_XTS help Serpent cipher algorithm, by Anderson, Biham & Knudsen. -- cgit v1.2.3 From 68bfc4924b536ed052198121aef01c525ff6831d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:07 -0800 Subject: crypto: x86/twofish-3way - remove LRW algorithm The LRW template now wraps an ECB mode algorithm rather than the block cipher directly. Therefore it is now redundant for crypto modules to wrap their ECB code with generic LRW code themselves via lrw_crypt(). Remove the lrw-twofish-3way algorithm which did this. Users who request lrw(twofish) and previously would have gotten lrw-twofish-3way will now get lrw(ecb-twofish-3way) instead, which is just as fast. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/twofish_avx_glue.c | 26 +++++++++++ arch/x86/crypto/twofish_glue_3way.c | 81 +---------------------------------- arch/x86/include/asm/crypto/twofish.h | 11 ----- crypto/Kconfig | 1 - 4 files changed, 27 insertions(+), 92 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index b7a3904b953c..3358e23f9ffb 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -262,6 +262,32 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) twofish_dec_blk(ctx->ctx, srcdst, srcdst); } +struct twofish_lrw_ctx { + struct lrw_table_ctx lrw_table; + struct twofish_ctx twofish_ctx; +}; + +static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + int err; + + err = __twofish_setkey(&ctx->twofish_ctx, key, keylen - TF_BLOCK_SIZE, + &tfm->crt_flags); + if (err) + return err; + + return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); +} + +static void lrw_twofish_exit_tfm(struct crypto_tfm *tfm) +{ + struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + + lrw_free_table(&ctx->lrw_table); +} + static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 243e90a4b5d9..2d6dc5d7f0aa 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -30,7 +30,6 @@ #include #include #include -#include #include EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); @@ -213,63 +212,6 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) twofish_dec_blk(ctx, srcdst, srcdst); } -int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __twofish_setkey(&ctx->twofish_ctx, key, keylen - TF_BLOCK_SIZE, - &tfm->crt_flags); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); -} -EXPORT_SYMBOL_GPL(lrw_twofish_setkey); - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[3]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &ctx->twofish_ctx, - .crypt_fn = encrypt_callback, - }; - - return lrw_crypt(desc, dst, src, nbytes, &req); -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[3]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &ctx->twofish_ctx, - .crypt_fn = decrypt_callback, - }; - - return lrw_crypt(desc, dst, src, nbytes, &req); -} - -void lrw_twofish_exit_tfm(struct crypto_tfm *tfm) -{ - struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); -} -EXPORT_SYMBOL_GPL(lrw_twofish_exit_tfm); - int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { @@ -328,7 +270,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, return xts_crypt(desc, dst, src, nbytes, &req); } -static struct crypto_alg tf_algs[5] = { { +static struct crypto_alg tf_algs[] = { { .cra_name = "ecb(twofish)", .cra_driver_name = "ecb-twofish-3way", .cra_priority = 300, @@ -387,27 +329,6 @@ static struct crypto_alg tf_algs[5] = { { .decrypt = ctr_crypt, }, }, -}, { - .cra_name = "lrw(twofish)", - .cra_driver_name = "lrw-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_twofish_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, - .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = lrw_twofish_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, }, { .cra_name = "xts(twofish)", .cra_driver_name = "xts-twofish-3way", diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h index 65bb80adba3e..3c904116ee00 100644 --- a/arch/x86/include/asm/crypto/twofish.h +++ b/arch/x86/include/asm/crypto/twofish.h @@ -4,14 +4,8 @@ #include #include -#include #include -struct twofish_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct twofish_ctx twofish_ctx; -}; - struct twofish_xts_ctx { struct twofish_ctx tweak_ctx; struct twofish_ctx crypt_ctx; @@ -36,11 +30,6 @@ extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); - -extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm); - extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen); diff --git a/crypto/Kconfig b/crypto/Kconfig index cca8271d8894..eba78fa85147 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1584,7 +1584,6 @@ config CRYPTO_TWOFISH_X86_64_3WAY select CRYPTO_TWOFISH_COMMON select CRYPTO_TWOFISH_X86_64 select CRYPTO_GLUE_HELPER_X86 - select CRYPTO_LRW select CRYPTO_XTS help Twofish cipher algorithm (x86_64, 3-way parallel). -- cgit v1.2.3 From ebeea983dd185a29e237ce1a5217fa7196cf64ad Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:08 -0800 Subject: crypto: x86/twofish-3way - remove XTS algorithm The XTS template now wraps an ECB mode algorithm rather than the block cipher directly. Therefore it is now redundant for crypto modules to wrap their ECB code with generic XTS code themselves via xts_crypt(). Remove the xts-twofish-3way algorithm which did this. Users who request xts(twofish) and previously would have gotten xts-twofish-3way will now get xts(ecb-twofish-3way) instead, which is just as fast. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/twofish_avx_glue.c | 25 ++++++++ arch/x86/crypto/twofish_glue_3way.c | 109 ---------------------------------- arch/x86/include/asm/crypto/twofish.h | 8 --- crypto/Kconfig | 1 - 4 files changed, 25 insertions(+), 118 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 3358e23f9ffb..3750b2cd3019 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -79,6 +79,31 @@ static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) GLUE_FUNC_CAST(twofish_dec_blk)); } +struct twofish_xts_ctx { + struct twofish_ctx tweak_ctx; + struct twofish_ctx crypt_ctx; +}; + +static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + int err; + + err = xts_check_key(tfm, key, keylen); + if (err) + return err; + + /* first half of xts-key is for crypt */ + err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, + flags); +} static const struct common_glue_ctx twofish_enc = { .num_funcs = 3, diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 2d6dc5d7f0aa..bdbd2f6ab9d7 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -30,7 +30,6 @@ #include #include #include -#include EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); @@ -182,94 +181,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); } -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = TF_BLOCK_SIZE; - struct twofish_ctx *ctx = priv; - int i; - - if (nbytes == 3 * bsize) { - twofish_enc_blk_3way(ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - twofish_enc_blk(ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = TF_BLOCK_SIZE; - struct twofish_ctx *ctx = priv; - int i; - - if (nbytes == 3 * bsize) { - twofish_dec_blk_3way(ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - twofish_dec_blk(ctx, srcdst, srcdst); -} - -int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; - int err; - - err = xts_check_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, - flags); -} -EXPORT_SYMBOL_GPL(xts_twofish_setkey); - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - le128 buf[3]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), - .crypt_ctx = &ctx->crypt_ctx, - .crypt_fn = encrypt_callback, - }; - - return xts_crypt(desc, dst, src, nbytes, &req); -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - le128 buf[3]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), - .crypt_ctx = &ctx->crypt_ctx, - .crypt_fn = decrypt_callback, - }; - - return xts_crypt(desc, dst, src, nbytes, &req); -} - static struct crypto_alg tf_algs[] = { { .cra_name = "ecb(twofish)", .cra_driver_name = "ecb-twofish-3way", @@ -329,26 +240,6 @@ static struct crypto_alg tf_algs[] = { { .decrypt = ctr_crypt, }, }, -}, { - .cra_name = "xts(twofish)", - .cra_driver_name = "xts-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE * 2, - .max_keysize = TF_MAX_KEY_SIZE * 2, - .ivsize = TF_BLOCK_SIZE, - .setkey = xts_twofish_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, } }; static bool is_blacklisted_cpu(void) diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h index 3c904116ee00..f618bf272b90 100644 --- a/arch/x86/include/asm/crypto/twofish.h +++ b/arch/x86/include/asm/crypto/twofish.h @@ -6,11 +6,6 @@ #include #include -struct twofish_xts_ctx { - struct twofish_ctx tweak_ctx; - struct twofish_ctx crypt_ctx; -}; - /* regular block cipher functions from twofish_x86_64 module */ asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, const u8 *src); @@ -30,7 +25,4 @@ extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); - #endif /* ASM_X86_TWOFISH_H */ diff --git a/crypto/Kconfig b/crypto/Kconfig index eba78fa85147..6a18aa26bbc7 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1584,7 +1584,6 @@ config CRYPTO_TWOFISH_X86_64_3WAY select CRYPTO_TWOFISH_COMMON select CRYPTO_TWOFISH_X86_64 select CRYPTO_GLUE_HELPER_X86 - select CRYPTO_XTS help Twofish cipher algorithm (x86_64, 3-way parallel). -- cgit v1.2.3 From 37992fa47f9ac7e42650eec748d50da0398afa70 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:09 -0800 Subject: crypto: x86/twofish-3way - convert to skcipher interface Convert the 3-way implementation of Twofish from the (deprecated) blkcipher interface over to the skcipher interface. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/twofish_glue_3way.c | 151 ++++++++++++++++-------------------- crypto/Kconfig | 2 +- 2 files changed, 68 insertions(+), 85 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index bdbd2f6ab9d7..571485502ec8 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -20,20 +20,26 @@ * */ -#include +#include +#include +#include +#include +#include +#include #include #include #include #include -#include -#include -#include -#include -#include EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); +static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return twofish_setkey(&tfm->base, key, keylen); +} + static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, const u8 *src) { @@ -149,98 +155,74 @@ static const struct common_glue_ctx twofish_dec_cbc = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&twofish_enc, req); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&twofish_dec, req); } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, - dst, src, nbytes); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk), + req); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, - nbytes); + return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); + return glue_ctr_req_128bit(&twofish_ctr, req); } -static struct crypto_alg tf_algs[] = { { - .cra_name = "ecb(twofish)", - .cra_driver_name = "ecb-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = twofish_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "cbc(twofish)", - .cra_driver_name = "cbc-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = twofish_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "ctr(twofish)", - .cra_driver_name = "ctr-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = twofish_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, +static struct skcipher_alg tf_skciphers[] = { + { + .base.cra_name = "ecb(twofish)", + .base.cra_driver_name = "ecb-twofish-3way", + .base.cra_priority = 300, + .base.cra_blocksize = TF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "cbc(twofish)", + .base.cra_driver_name = "cbc-twofish-3way", + .base.cra_priority = 300, + .base.cra_blocksize = TF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "ctr(twofish)", + .base.cra_driver_name = "ctr-twofish-3way", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .chunksize = TF_BLOCK_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, }, -} }; +}; static bool is_blacklisted_cpu(void) { @@ -290,12 +272,13 @@ static int __init init(void) return -ENODEV; } - return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs)); + return crypto_register_skciphers(tf_skciphers, + ARRAY_SIZE(tf_skciphers)); } static void __exit fini(void) { - crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs)); + crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers)); } module_init(init); diff --git a/crypto/Kconfig b/crypto/Kconfig index 6a18aa26bbc7..c0a26c6cbb0c 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1580,7 +1580,7 @@ config CRYPTO_TWOFISH_X86_64 config CRYPTO_TWOFISH_X86_64_3WAY tristate "Twofish cipher algorithm (x86_64, 3-way parallel)" depends on X86 && 64BIT - select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER select CRYPTO_TWOFISH_COMMON select CRYPTO_TWOFISH_X86_64 select CRYPTO_GLUE_HELPER_X86 -- cgit v1.2.3 From 876e9f0c12778bcdc64040031974e0dcf1d99bd5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:10 -0800 Subject: crypto: x86/twofish-avx - remove LRW algorithm The LRW template now wraps an ECB mode algorithm rather than the block cipher directly. Therefore it is now redundant for crypto modules to wrap their ECB code with generic LRW code themselves via lrw_crypt(). Remove the lrw-twofish-avx algorithm which did this. Users who request lrw(twofish) and previously would have gotten lrw-twofish-avx will now get lrw(ecb-twofish-avx) instead, which is just as fast. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/twofish_avx_glue.c | 189 +------------------------------------ crypto/Kconfig | 1 - 2 files changed, 1 insertion(+), 189 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 3750b2cd3019..35de5e5a87b3 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -227,144 +226,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); } -static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes) -{ - return glue_fpu_begin(TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS, NULL, - fpu_enabled, nbytes); -} - -static inline void twofish_fpu_end(bool fpu_enabled) -{ - glue_fpu_end(fpu_enabled); -} - -struct crypt_priv { - struct twofish_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = TF_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { - twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) - twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst); - - nbytes %= bsize * 3; - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - twofish_enc_blk(ctx->ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = TF_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { - twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) - twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst); - - nbytes %= bsize * 3; - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - twofish_dec_blk(ctx->ctx, srcdst, srcdst); -} - -struct twofish_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct twofish_ctx twofish_ctx; -}; - -static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __twofish_setkey(&ctx->twofish_ctx, key, keylen - TF_BLOCK_SIZE, - &tfm->crt_flags); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); -} - -static void lrw_twofish_exit_tfm(struct crypto_tfm *tfm) -{ - struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); -} - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[TWOFISH_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->twofish_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - twofish_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[TWOFISH_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->twofish_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - twofish_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { @@ -385,7 +246,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, &ctx->tweak_ctx, &ctx->crypt_ctx); } -static struct crypto_alg twofish_algs[10] = { { +static struct crypto_alg twofish_algs[] = { { .cra_name = "__ecb-twofish-avx", .cra_driver_name = "__driver-ecb-twofish-avx", .cra_priority = 0, @@ -446,30 +307,6 @@ static struct crypto_alg twofish_algs[10] = { { .decrypt = ctr_crypt, }, }, -}, { - .cra_name = "__lrw-twofish-avx", - .cra_driver_name = "__driver-lrw-twofish-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_twofish_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE + - TF_BLOCK_SIZE, - .max_keysize = TF_MAX_KEY_SIZE + - TF_BLOCK_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = lrw_twofish_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, }, { .cra_name = "__xts-twofish-avx", .cra_driver_name = "__driver-xts-twofish-avx", @@ -557,30 +394,6 @@ static struct crypto_alg twofish_algs[10] = { { .geniv = "chainiv", }, }, -}, { - .cra_name = "lrw(twofish)", - .cra_driver_name = "lrw-twofish-avx", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE + - TF_BLOCK_SIZE, - .max_keysize = TF_MAX_KEY_SIZE + - TF_BLOCK_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, }, { .cra_name = "xts(twofish)", .cra_driver_name = "xts-twofish-avx", diff --git a/crypto/Kconfig b/crypto/Kconfig index c0a26c6cbb0c..ac19f3ed8692 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1608,7 +1608,6 @@ config CRYPTO_TWOFISH_AVX_X86_64 select CRYPTO_TWOFISH_COMMON select CRYPTO_TWOFISH_X86_64 select CRYPTO_TWOFISH_X86_64_3WAY - select CRYPTO_LRW select CRYPTO_XTS help Twofish cipher algorithm (x86_64/AVX). -- cgit v1.2.3 From 0e6ab46dadb0ec7b76ab3aa41eae3920cdd4d82b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:11 -0800 Subject: crypto: x86/twofish-avx - convert to skcipher interface Convert the AVX implementation of Twofish from the (deprecated) ablkcipher and blkcipher interfaces over to the skcipher interface. Note that this includes replacing the use of ablk_helper with crypto_simd. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/twofish_avx_glue.c | 315 ++++++++++++------------------------- crypto/Kconfig | 6 +- 2 files changed, 102 insertions(+), 219 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 35de5e5a87b3..66d989230d10 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -24,23 +24,15 @@ */ #include -#include #include #include #include -#include #include +#include #include -#include -#include -#include #include -#include -#include #include -#include -#include -#include +#include #define TWOFISH_PARALLEL_BLOCKS 8 @@ -60,6 +52,12 @@ asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, const u8 *src, le128 *iv); +static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return twofish_setkey(&tfm->base, key, keylen); +} + static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, const u8 *src) { @@ -83,14 +81,14 @@ struct twofish_xts_ctx { struct twofish_ctx crypt_ctx; }; -static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, +static int xts_twofish_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { - struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; + struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + u32 *flags = &tfm->base.crt_flags; int err; - err = xts_check_key(tfm, key, keylen); + err = xts_verify_key(tfm, key, keylen); if (err) return err; @@ -194,229 +192,113 @@ static const struct common_glue_ctx twofish_dec_xts = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&twofish_enc, req); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&twofish_dec, req); } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, - dst, src, nbytes); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk), + req); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, - nbytes); + return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); + return glue_ctr_req_128bit(&twofish_ctr, req); } -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(twofish_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&twofish_enc_xts, req, + XTS_TWEAK_CAST(twofish_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(twofish_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&twofish_dec_xts, req, + XTS_TWEAK_CAST(twofish_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static struct crypto_alg twofish_algs[] = { { - .cra_name = "__ecb-twofish-avx", - .cra_driver_name = "__driver-ecb-twofish-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = twofish_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-twofish-avx", - .cra_driver_name = "__driver-cbc-twofish-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = twofish_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-twofish-avx", - .cra_driver_name = "__driver-ctr-twofish-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = twofish_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__xts-twofish-avx", - .cra_driver_name = "__driver-xts-twofish-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE * 2, - .max_keysize = TF_MAX_KEY_SIZE * 2, - .ivsize = TF_BLOCK_SIZE, - .setkey = xts_twofish_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(twofish)", - .cra_driver_name = "ecb-twofish-avx", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(twofish)", - .cra_driver_name = "cbc-twofish-avx", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(twofish)", - .cra_driver_name = "ctr-twofish-avx", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "xts(twofish)", - .cra_driver_name = "xts-twofish-avx", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE * 2, - .max_keysize = TF_MAX_KEY_SIZE * 2, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, +static struct skcipher_alg twofish_algs[] = { + { + .base.cra_name = "__ecb(twofish)", + .base.cra_driver_name = "__ecb-twofish-avx", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = TF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(twofish)", + .base.cra_driver_name = "__cbc-twofish-avx", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = TF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(twofish)", + .base.cra_driver_name = "__ctr-twofish-avx", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .chunksize = TF_BLOCK_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base.cra_name = "__xts(twofish)", + .base.cra_driver_name = "__xts-twofish-avx", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = TF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct twofish_xts_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * TF_MIN_KEY_SIZE, + .max_keysize = 2 * TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = xts_twofish_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, }, -} }; +}; + +static struct simd_skcipher_alg *twofish_simd_algs[ARRAY_SIZE(twofish_algs)]; static int __init twofish_init(void) { @@ -427,12 +309,15 @@ static int __init twofish_init(void) return -ENODEV; } - return crypto_register_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); + return simd_register_skciphers_compat(twofish_algs, + ARRAY_SIZE(twofish_algs), + twofish_simd_algs); } static void __exit twofish_exit(void) { - crypto_unregister_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); + simd_unregister_skciphers(twofish_algs, ARRAY_SIZE(twofish_algs), + twofish_simd_algs); } module_init(twofish_init); diff --git a/crypto/Kconfig b/crypto/Kconfig index ac19f3ed8692..03ebb34b590c 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1601,14 +1601,12 @@ config CRYPTO_TWOFISH_X86_64_3WAY config CRYPTO_TWOFISH_AVX_X86_64 tristate "Twofish cipher algorithm (x86_64/AVX)" depends on X86 && 64BIT - select CRYPTO_ALGAPI - select CRYPTO_CRYPTD - select CRYPTO_ABLK_HELPER + select CRYPTO_BLKCIPHER select CRYPTO_GLUE_HELPER_X86 + select CRYPTO_SIMD select CRYPTO_TWOFISH_COMMON select CRYPTO_TWOFISH_X86_64 select CRYPTO_TWOFISH_X86_64_3WAY - select CRYPTO_XTS help Twofish cipher algorithm (x86_64/AVX). -- cgit v1.2.3 From 8f461b1e02ed546fbd0f11611138da67fd85a30f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:12 -0800 Subject: crypto: x86/cast5-avx - fix ECB encryption when long sg follows short one With ecb-cast5-avx, if a 128+ byte scatterlist element followed a shorter one, then the algorithm accidentally encrypted/decrypted only 8 bytes instead of the expected 128 bytes. Fix it by setting the encryption/decryption 'fn' correctly. Fixes: c12ab20b162c ("crypto: cast5/avx - avoid using temporary stack buffers") Cc: # v3.8+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/cast5_avx_glue.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index dbea6020ffe7..575292a33bdf 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -66,8 +66,6 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src); int err; - fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; - err = blkcipher_walk_virt(desc, walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; @@ -79,6 +77,7 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, /* Process multi-block batch */ if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { + fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; do { fn(ctx, wdst, wsrc); -- cgit v1.2.3 From 1e63183a203dba8333677c9490455df48f937ea0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:13 -0800 Subject: crypto: x86/cast5-avx - convert to skcipher interface Convert the AVX implementation of CAST5 from the (deprecated) ablkcipher and blkcipher interfaces over to the skcipher interface. Note that this includes replacing the use of ablk_helper with crypto_simd. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/cast5_avx_glue.c | 351 ++++++++++++++------------------------- crypto/Kconfig | 7 +- 2 files changed, 130 insertions(+), 228 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index 575292a33bdf..56f2a1b0ccf5 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -21,18 +21,14 @@ * */ -#include -#include -#include -#include -#include -#include +#include #include #include -#include -#include -#include -#include +#include +#include +#include +#include +#include #define CAST5_PARALLEL_BLOCKS 16 @@ -45,10 +41,17 @@ asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst, asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src, __be64 *iv); -static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes) +static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) { - return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, - NULL, fpu_enabled, nbytes); + return cast5_setkey(&tfm->base, key, keylen); +} + +static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk, + unsigned int nbytes) +{ + return glue_skwalk_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, + walk, fpu_enabled, nbytes); } static inline void cast5_fpu_end(bool fpu_enabled) @@ -56,24 +59,24 @@ static inline void cast5_fpu_end(bool fpu_enabled) return glue_fpu_end(fpu_enabled); } -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, - bool enc) +static int ecb_crypt(struct skcipher_request *req, bool enc) { bool fpu_enabled = false; - struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; const unsigned int bsize = CAST5_BLOCK_SIZE; unsigned int nbytes; void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src); int err; - err = blkcipher_walk_virt(desc, walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, false); - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; + while ((nbytes = walk.nbytes)) { + u8 *wsrc = walk.src.virt.addr; + u8 *wdst = walk.dst.virt.addr; - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); /* Process multi-block batch */ if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { @@ -102,76 +105,58 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, } while (nbytes >= bsize); done: - err = blkcipher_walk_done(desc, walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } cast5_fpu_end(fpu_enabled); return err; } -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, true); + return ecb_crypt(req, true); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, false); + return ecb_crypt(req, false); } -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static int cbc_encrypt(struct skcipher_request *req) { - struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = CAST5_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - u64 *iv = (u64 *)walk->iv; - - do { - *dst = *src ^ *iv; - __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - - *(u64 *)walk->iv = *iv; - return nbytes; -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + u64 *src = (u64 *)walk.src.virt.addr; + u64 *dst = (u64 *)walk.dst.virt.addr; + u64 *iv = (u64 *)walk.iv; + + do { + *dst = *src ^ *iv; + __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + src++; + dst++; + nbytes -= bsize; + } while (nbytes >= bsize); + + *(u64 *)walk.iv = *iv; + err = skcipher_walk_done(&walk, nbytes); } return err; } -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __cbc_decrypt(struct cast5_ctx *ctx, + struct skcipher_walk *walk) { - struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = CAST5_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -223,31 +208,29 @@ done: return nbytes; } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); bool fpu_enabled = false; - struct blkcipher_walk walk; + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); - nbytes = __cbc_decrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); + nbytes = __cbc_decrypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } cast5_fpu_end(fpu_enabled); return err; } -static void ctr_crypt_final(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx) { - struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); u8 *ctrblk = walk->iv; u8 keystream[CAST5_BLOCK_SIZE]; u8 *src = walk->src.virt.addr; @@ -260,10 +243,9 @@ static void ctr_crypt_final(struct blkcipher_desc *desc, crypto_inc(ctrblk, CAST5_BLOCK_SIZE); } -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __ctr_crypt(struct skcipher_walk *walk, + struct cast5_ctx *ctx) { - struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = CAST5_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -306,162 +288,80 @@ done: return nbytes; } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); bool fpu_enabled = false; - struct blkcipher_walk walk; + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); - nbytes = __ctr_crypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); + nbytes = __ctr_crypt(&walk, ctx); + err = skcipher_walk_done(&walk, nbytes); } cast5_fpu_end(fpu_enabled); if (walk.nbytes) { - ctr_crypt_final(desc, &walk); - err = blkcipher_walk_done(desc, &walk, 0); + ctr_crypt_final(&walk, ctx); + err = skcipher_walk_done(&walk, 0); } return err; } +static struct skcipher_alg cast5_algs[] = { + { + .base.cra_name = "__ecb(cast5)", + .base.cra_driver_name = "__ecb-cast5-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAST5_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct cast5_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .setkey = cast5_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(cast5)", + .base.cra_driver_name = "__cbc-cast5-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAST5_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct cast5_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .ivsize = CAST5_BLOCK_SIZE, + .setkey = cast5_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(cast5)", + .base.cra_driver_name = "__ctr-cast5-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct cast5_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .ivsize = CAST5_BLOCK_SIZE, + .chunksize = CAST5_BLOCK_SIZE, + .setkey = cast5_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + } +}; -static struct crypto_alg cast5_algs[6] = { { - .cra_name = "__ecb-cast5-avx", - .cra_driver_name = "__driver-ecb-cast5-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAST5_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct cast5_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .setkey = cast5_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-cast5-avx", - .cra_driver_name = "__driver-cbc-cast5-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAST5_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct cast5_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .setkey = cast5_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-cast5-avx", - .cra_driver_name = "__driver-ctr-cast5-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cast5_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .ivsize = CAST5_BLOCK_SIZE, - .setkey = cast5_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "ecb(cast5)", - .cra_driver_name = "ecb-cast5-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAST5_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(cast5)", - .cra_driver_name = "cbc-cast5-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAST5_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .ivsize = CAST5_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(cast5)", - .cra_driver_name = "ctr-cast5-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .ivsize = CAST5_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -} }; +static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)]; static int __init cast5_init(void) { @@ -473,12 +373,15 @@ static int __init cast5_init(void) return -ENODEV; } - return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); + return simd_register_skciphers_compat(cast5_algs, + ARRAY_SIZE(cast5_algs), + cast5_simd_algs); } static void __exit cast5_exit(void) { - crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); + simd_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs), + cast5_simd_algs); } module_init(cast5_init); diff --git a/crypto/Kconfig b/crypto/Kconfig index 03ebb34b590c..7bd720fcc8d0 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1238,11 +1238,10 @@ config CRYPTO_CAST5 config CRYPTO_CAST5_AVX_X86_64 tristate "CAST5 (CAST-128) cipher algorithm (x86_64/AVX)" depends on X86 && 64BIT - select CRYPTO_ALGAPI - select CRYPTO_CRYPTD - select CRYPTO_ABLK_HELPER - select CRYPTO_CAST_COMMON + select CRYPTO_BLKCIPHER select CRYPTO_CAST5 + select CRYPTO_CAST_COMMON + select CRYPTO_SIMD help The CAST5 encryption algorithm (synonymous with CAST-128) is described in RFC2144. -- cgit v1.2.3 From f51a1fa43972c93e08a608df51182d90ab8d7594 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:14 -0800 Subject: crypto: x86/cast6-avx - remove LRW algorithm The LRW template now wraps an ECB mode algorithm rather than the block cipher directly. Therefore it is now redundant for crypto modules to wrap their ECB code with generic LRW code themselves via lrw_crypt(). Remove the lrw-cast6-avx algorithm which did this. Users who request lrw(cast6) and previously would have gotten lrw-cast6-avx will now get lrw(ecb-cast6-avx) instead, which is just as fast. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/cast6_avx_glue.c | 180 +-------------------------------------- crypto/Kconfig | 1 - 2 files changed, 1 insertion(+), 180 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 50e684768c55..d2fbf2be771e 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -34,9 +34,7 @@ #include #include #include -#include #include -#include #include #define CAST6_PARALLEL_BLOCKS 8 @@ -189,134 +187,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, return glue_ctr_crypt_128bit(&cast6_ctr, desc, dst, src, nbytes); } -static inline bool cast6_fpu_begin(bool fpu_enabled, unsigned int nbytes) -{ - return glue_fpu_begin(CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS, - NULL, fpu_enabled, nbytes); -} - -static inline void cast6_fpu_end(bool fpu_enabled) -{ - glue_fpu_end(fpu_enabled); -} - -struct crypt_priv { - struct cast6_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = CAST6_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { - cast6_ecb_enc_8way(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __cast6_encrypt(ctx->ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = CAST6_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { - cast6_ecb_dec_8way(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __cast6_decrypt(ctx->ctx, srcdst, srcdst); -} - -struct cast6_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct cast6_ctx cast6_ctx; -}; - -static int lrw_cast6_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __cast6_setkey(&ctx->cast6_ctx, key, keylen - CAST6_BLOCK_SIZE, - &tfm->crt_flags); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, key + keylen - CAST6_BLOCK_SIZE); -} - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[CAST6_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->cast6_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - cast6_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[CAST6_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->cast6_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - cast6_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static void lrw_exit_tfm(struct crypto_tfm *tfm) -{ - struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); -} - struct cast6_xts_ctx { struct cast6_ctx tweak_ctx; struct cast6_ctx crypt_ctx; @@ -363,7 +233,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, &ctx->tweak_ctx, &ctx->crypt_ctx); } -static struct crypto_alg cast6_algs[10] = { { +static struct crypto_alg cast6_algs[] = { { .cra_name = "__ecb-cast6-avx", .cra_driver_name = "__driver-ecb-cast6-avx", .cra_priority = 0, @@ -424,30 +294,6 @@ static struct crypto_alg cast6_algs[10] = { { .decrypt = ctr_crypt, }, }, -}, { - .cra_name = "__lrw-cast6-avx", - .cra_driver_name = "__driver-lrw-cast6-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct cast6_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE + - CAST6_BLOCK_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE + - CAST6_BLOCK_SIZE, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = lrw_cast6_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, }, { .cra_name = "__xts-cast6-avx", .cra_driver_name = "__driver-xts-cast6-avx", @@ -535,30 +381,6 @@ static struct crypto_alg cast6_algs[10] = { { .geniv = "chainiv", }, }, -}, { - .cra_name = "lrw(cast6)", - .cra_driver_name = "lrw-cast6-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE + - CAST6_BLOCK_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE + - CAST6_BLOCK_SIZE, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, }, { .cra_name = "xts(cast6)", .cra_driver_name = "xts-cast6-avx", diff --git a/crypto/Kconfig b/crypto/Kconfig index 7bd720fcc8d0..1e5ee62a11f6 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1266,7 +1266,6 @@ config CRYPTO_CAST6_AVX_X86_64 select CRYPTO_GLUE_HELPER_X86 select CRYPTO_CAST_COMMON select CRYPTO_CAST6 - select CRYPTO_LRW select CRYPTO_XTS help The CAST6 encryption algorithm (synonymous with CAST-256) is -- cgit v1.2.3 From 4bd969243143e224d5aa450fdea1c882a75aa58e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:15 -0800 Subject: crypto: x86/cast6-avx - convert to skcipher interface Convert the AVX implementation of CAST6 from the (deprecated) ablkcipher and blkcipher interfaces over to the skcipher interface. Note that this includes replacing the use of ablk_helper with crypto_simd. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/cast6_avx_glue.c | 311 +++++++++++++-------------------------- crypto/Kconfig | 9 +- 2 files changed, 104 insertions(+), 216 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index d2fbf2be771e..9fb66b5e94b2 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -24,16 +24,12 @@ */ #include -#include #include #include #include -#include #include #include -#include -#include -#include +#include #include #include @@ -54,6 +50,12 @@ asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst, asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src, le128 *iv); +static int cast6_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return cast6_setkey(&tfm->base, key, keylen); +} + static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) { glue_xts_crypt_128bit_one(ctx, dst, src, iv, @@ -155,36 +157,30 @@ static const struct common_glue_ctx cast6_dec_xts = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&cast6_enc, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&cast6_enc, req); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&cast6_dec, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&cast6_dec, req); } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__cast6_encrypt), desc, - dst, src, nbytes); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__cast6_encrypt), + req); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_128bit(&cast6_dec_cbc, desc, dst, src, - nbytes); + return glue_cbc_decrypt_req_128bit(&cast6_dec_cbc, req); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&cast6_ctr, desc, dst, src, nbytes); + return glue_ctr_req_128bit(&cast6_ctr, req); } struct cast6_xts_ctx { @@ -192,14 +188,14 @@ struct cast6_xts_ctx { struct cast6_ctx crypt_ctx; }; -static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) +static int xts_cast6_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) { - struct cast6_xts_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; + struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + u32 *flags = &tfm->base.crt_flags; int err; - err = xts_check_key(tfm, key, keylen); + err = xts_verify_key(tfm, key, keylen); if (err) return err; @@ -213,197 +209,87 @@ static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key, flags); } -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&cast6_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(__cast6_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&cast6_enc_xts, req, + XTS_TWEAK_CAST(__cast6_encrypt), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&cast6_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(__cast6_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&cast6_dec_xts, req, + XTS_TWEAK_CAST(__cast6_encrypt), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static struct crypto_alg cast6_algs[] = { { - .cra_name = "__ecb-cast6-avx", - .cra_driver_name = "__driver-ecb-cast6-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct cast6_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .setkey = cast6_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-cast6-avx", - .cra_driver_name = "__driver-cbc-cast6-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct cast6_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .setkey = cast6_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-cast6-avx", - .cra_driver_name = "__driver-ctr-cast6-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cast6_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = cast6_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__xts-cast6-avx", - .cra_driver_name = "__driver-xts-cast6-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct cast6_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE * 2, - .max_keysize = CAST6_MAX_KEY_SIZE * 2, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = xts_cast6_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(cast6)", - .cra_driver_name = "ecb-cast6-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(cast6)", - .cra_driver_name = "cbc-cast6-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(cast6)", - .cra_driver_name = "ctr-cast6-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "xts(cast6)", - .cra_driver_name = "xts-cast6-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE * 2, - .max_keysize = CAST6_MAX_KEY_SIZE * 2, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, +static struct skcipher_alg cast6_algs[] = { + { + .base.cra_name = "__ecb(cast6)", + .base.cra_driver_name = "__ecb-cast6-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAST6_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct cast6_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .setkey = cast6_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(cast6)", + .base.cra_driver_name = "__cbc-cast6-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAST6_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct cast6_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = cast6_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(cast6)", + .base.cra_driver_name = "__ctr-cast6-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct cast6_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .chunksize = CAST6_BLOCK_SIZE, + .setkey = cast6_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base.cra_name = "__xts(cast6)", + .base.cra_driver_name = "__xts-cast6-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAST6_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct cast6_xts_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * CAST6_MIN_KEY_SIZE, + .max_keysize = 2 * CAST6_MAX_KEY_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = xts_cast6_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, }, -} }; +}; + +static struct simd_skcipher_alg *cast6_simd_algs[ARRAY_SIZE(cast6_algs)]; static int __init cast6_init(void) { @@ -415,12 +301,15 @@ static int __init cast6_init(void) return -ENODEV; } - return crypto_register_algs(cast6_algs, ARRAY_SIZE(cast6_algs)); + return simd_register_skciphers_compat(cast6_algs, + ARRAY_SIZE(cast6_algs), + cast6_simd_algs); } static void __exit cast6_exit(void) { - crypto_unregister_algs(cast6_algs, ARRAY_SIZE(cast6_algs)); + simd_unregister_skciphers(cast6_algs, ARRAY_SIZE(cast6_algs), + cast6_simd_algs); } module_init(cast6_init); diff --git a/crypto/Kconfig b/crypto/Kconfig index 1e5ee62a11f6..536a7fac205f 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1260,12 +1260,11 @@ config CRYPTO_CAST6 config CRYPTO_CAST6_AVX_X86_64 tristate "CAST6 (CAST-256) cipher algorithm (x86_64/AVX)" depends on X86 && 64BIT - select CRYPTO_ALGAPI - select CRYPTO_CRYPTD - select CRYPTO_ABLK_HELPER - select CRYPTO_GLUE_HELPER_X86 - select CRYPTO_CAST_COMMON + select CRYPTO_BLKCIPHER select CRYPTO_CAST6 + select CRYPTO_CAST_COMMON + select CRYPTO_GLUE_HELPER_X86 + select CRYPTO_SIMD select CRYPTO_XTS help The CAST6 encryption algorithm (synonymous with CAST-256) is -- cgit v1.2.3 From c1679171c61c2998a16dc4cc8b0ed0569db1fba9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:16 -0800 Subject: crypto: x86/blowfish: convert to skcipher interface Convert the x86 asm implementation of Blowfish from the (deprecated) blkcipher interface over to the skcipher interface. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/blowfish_glue.c | 230 ++++++++++++++++++++-------------------- crypto/Kconfig | 2 +- 2 files changed, 114 insertions(+), 118 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index f9eca34301e2..3e0c07cc9124 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -25,13 +25,13 @@ * */ -#include +#include #include +#include #include #include #include #include -#include /* regular block cipher functions */ asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, @@ -77,20 +77,28 @@ static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src); } -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, +static int blowfish_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return blowfish_setkey(&tfm->base, key, keylen); +} + +static int ecb_crypt(struct skcipher_request *req, void (*fn)(struct bf_ctx *, u8 *, const u8 *), void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *)) { - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = BF_BLOCK_SIZE; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; unsigned int nbytes; int err; - err = blkcipher_walk_virt(desc, walk); + err = skcipher_walk_virt(&walk, req, false); - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; + while ((nbytes = walk.nbytes)) { + u8 *wsrc = walk.src.virt.addr; + u8 *wdst = walk.dst.virt.addr; /* Process four block batch */ if (nbytes >= bsize * 4) { @@ -116,34 +124,25 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, } while (nbytes >= bsize); done: - err = blkcipher_walk_done(desc, walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } return err; } -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way); + return ecb_crypt(req, blowfish_enc_blk, blowfish_enc_blk_4way); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way); + return ecb_crypt(req, blowfish_dec_blk, blowfish_dec_blk_4way); } -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __cbc_encrypt(struct bf_ctx *ctx, + struct skcipher_walk *walk) { - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = BF_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -164,27 +163,27 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, return nbytes; } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + nbytes = __cbc_encrypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } return err; } -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __cbc_decrypt(struct bf_ctx *ctx, + struct skcipher_walk *walk) { - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = BF_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -245,24 +244,25 @@ done: return nbytes; } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - nbytes = __cbc_decrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + nbytes = __cbc_decrypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } return err; } -static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk) +static void ctr_crypt_final(struct bf_ctx *ctx, struct skcipher_walk *walk) { u8 *ctrblk = walk->iv; u8 keystream[BF_BLOCK_SIZE]; @@ -276,10 +276,8 @@ static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk) crypto_inc(ctrblk, BF_BLOCK_SIZE); } -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __ctr_crypt(struct bf_ctx *ctx, struct skcipher_walk *walk) { - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = BF_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -332,29 +330,30 @@ done: return nbytes; } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) { - nbytes = __ctr_crypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + nbytes = __ctr_crypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } - if (walk.nbytes) { - ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk); - err = blkcipher_walk_done(desc, &walk, 0); + if (nbytes) { + ctr_crypt_final(ctx, &walk); + err = skcipher_walk_done(&walk, 0); } return err; } -static struct crypto_alg bf_algs[4] = { { +static struct crypto_alg bf_cipher_alg = { .cra_name = "blowfish", .cra_driver_name = "blowfish-asm", .cra_priority = 200, @@ -372,66 +371,50 @@ static struct crypto_alg bf_algs[4] = { { .cia_decrypt = blowfish_decrypt, } } -}, { - .cra_name = "ecb(blowfish)", - .cra_driver_name = "ecb-blowfish-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .setkey = blowfish_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, +}; + +static struct skcipher_alg bf_skcipher_algs[] = { + { + .base.cra_name = "ecb(blowfish)", + .base.cra_driver_name = "ecb-blowfish-asm", + .base.cra_priority = 300, + .base.cra_blocksize = BF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct bf_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = BF_MIN_KEY_SIZE, + .max_keysize = BF_MAX_KEY_SIZE, + .setkey = blowfish_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "cbc(blowfish)", + .base.cra_driver_name = "cbc-blowfish-asm", + .base.cra_priority = 300, + .base.cra_blocksize = BF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct bf_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = BF_MIN_KEY_SIZE, + .max_keysize = BF_MAX_KEY_SIZE, + .ivsize = BF_BLOCK_SIZE, + .setkey = blowfish_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "ctr(blowfish)", + .base.cra_driver_name = "ctr-blowfish-asm", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct bf_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = BF_MIN_KEY_SIZE, + .max_keysize = BF_MAX_KEY_SIZE, + .ivsize = BF_BLOCK_SIZE, + .chunksize = BF_BLOCK_SIZE, + .setkey = blowfish_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, }, -}, { - .cra_name = "cbc(blowfish)", - .cra_driver_name = "cbc-blowfish-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .ivsize = BF_BLOCK_SIZE, - .setkey = blowfish_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "ctr(blowfish)", - .cra_driver_name = "ctr-blowfish-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .ivsize = BF_BLOCK_SIZE, - .setkey = blowfish_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -} }; +}; static bool is_blacklisted_cpu(void) { @@ -456,6 +439,8 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); static int __init init(void) { + int err; + if (!force && is_blacklisted_cpu()) { printk(KERN_INFO "blowfish-x86_64: performance on this CPU " @@ -464,12 +449,23 @@ static int __init init(void) return -ENODEV; } - return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs)); + err = crypto_register_alg(&bf_cipher_alg); + if (err) + return err; + + err = crypto_register_skciphers(bf_skcipher_algs, + ARRAY_SIZE(bf_skcipher_algs)); + if (err) + crypto_unregister_alg(&bf_cipher_alg); + + return err; } static void __exit fini(void) { - crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs)); + crypto_unregister_alg(&bf_cipher_alg); + crypto_unregister_skciphers(bf_skcipher_algs, + ARRAY_SIZE(bf_skcipher_algs)); } module_init(init); diff --git a/crypto/Kconfig b/crypto/Kconfig index 536a7fac205f..fb58cb4b6826 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1114,7 +1114,7 @@ config CRYPTO_BLOWFISH_COMMON config CRYPTO_BLOWFISH_X86_64 tristate "Blowfish cipher algorithm (x86_64)" depends on X86 && 64BIT - select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER select CRYPTO_BLOWFISH_COMMON help Blowfish cipher algorithm (x86_64), by Bruce Schneier. -- cgit v1.2.3 From 09c0f03bf8ce9304e0d17131c46690b2c95209f4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:17 -0800 Subject: crypto: x86/des3_ede - convert to skcipher interface Convert the x86 asm implementation of Triple DES from the (deprecated) blkcipher interface over to the skcipher interface. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/des3_ede_glue.c | 238 ++++++++++++++++++++-------------------- crypto/Kconfig | 2 +- 2 files changed, 120 insertions(+), 120 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c index 30c0a37f4882..f9c7bdc5be5a 100644 --- a/arch/x86/crypto/des3_ede_glue.c +++ b/arch/x86/crypto/des3_ede_glue.c @@ -20,13 +20,13 @@ * */ -#include +#include #include +#include #include #include #include #include -#include struct des3_ede_x86_ctx { u32 enc_expkey[DES3_EDE_EXPKEY_WORDS]; @@ -83,18 +83,18 @@ static void des3_ede_x86_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) des3_ede_dec_blk(crypto_tfm_ctx(tfm), dst, src); } -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, - const u32 *expkey) +static int ecb_crypt(struct skcipher_request *req, const u32 *expkey) { - unsigned int bsize = DES3_EDE_BLOCK_SIZE; + const unsigned int bsize = DES3_EDE_BLOCK_SIZE; + struct skcipher_walk walk; unsigned int nbytes; int err; - err = blkcipher_walk_virt(desc, walk); + err = skcipher_walk_virt(&walk, req, false); - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; + while ((nbytes = walk.nbytes)) { + u8 *wsrc = walk.src.virt.addr; + u8 *wdst = walk.dst.virt.addr; /* Process four block batch */ if (nbytes >= bsize * 3) { @@ -121,36 +121,31 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, } while (nbytes >= bsize); done: - err = blkcipher_walk_done(desc, walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } return err; } -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, ctx->enc_expkey); + return ecb_crypt(req, ctx->enc_expkey); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, ctx->dec_expkey); + return ecb_crypt(req, ctx->dec_expkey); } -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __cbc_encrypt(struct des3_ede_x86_ctx *ctx, + struct skcipher_walk *walk) { - struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = DES3_EDE_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -171,27 +166,27 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, return nbytes; } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + nbytes = __cbc_encrypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } return err; } -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __cbc_decrypt(struct des3_ede_x86_ctx *ctx, + struct skcipher_walk *walk) { - struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = DES3_EDE_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -250,25 +245,26 @@ done: return nbytes; } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - nbytes = __cbc_decrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + nbytes = __cbc_decrypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } return err; } static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx, - struct blkcipher_walk *walk) + struct skcipher_walk *walk) { u8 *ctrblk = walk->iv; u8 keystream[DES3_EDE_BLOCK_SIZE]; @@ -282,10 +278,9 @@ static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx, crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE); } -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __ctr_crypt(struct des3_ede_x86_ctx *ctx, + struct skcipher_walk *walk) { - struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = DES3_EDE_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; __be64 *src = (__be64 *)walk->src.virt.addr; @@ -333,23 +328,24 @@ done: return nbytes; } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, DES3_EDE_BLOCK_SIZE); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) { - nbytes = __ctr_crypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + nbytes = __ctr_crypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } - if (walk.nbytes) { - ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk); - err = blkcipher_walk_done(desc, &walk, 0); + if (nbytes) { + ctr_crypt_final(ctx, &walk); + err = skcipher_walk_done(&walk, 0); } return err; @@ -381,7 +377,14 @@ static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key, return 0; } -static struct crypto_alg des3_ede_algs[4] = { { +static int des3_ede_x86_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, + unsigned int keylen) +{ + return des3_ede_x86_setkey(&tfm->base, key, keylen); +} + +static struct crypto_alg des3_ede_cipher = { .cra_name = "des3_ede", .cra_driver_name = "des3_ede-asm", .cra_priority = 200, @@ -399,66 +402,50 @@ static struct crypto_alg des3_ede_algs[4] = { { .cia_decrypt = des3_ede_x86_decrypt, } } -}, { - .cra_name = "ecb(des3_ede)", - .cra_driver_name = "ecb-des3_ede-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = DES3_EDE_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .setkey = des3_ede_x86_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "cbc(des3_ede)", - .cra_driver_name = "cbc-des3_ede-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = DES3_EDE_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES3_EDE_BLOCK_SIZE, - .setkey = des3_ede_x86_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "ctr(des3_ede)", - .cra_driver_name = "ctr-des3_ede-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES3_EDE_BLOCK_SIZE, - .setkey = des3_ede_x86_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -} }; +}; + +struct skcipher_alg des3_ede_skciphers[] = { + { + .base.cra_name = "ecb(des3_ede)", + .base.cra_driver_name = "ecb-des3_ede-asm", + .base.cra_priority = 300, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .setkey = des3_ede_x86_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "cbc(des3_ede)", + .base.cra_driver_name = "cbc-des3_ede-asm", + .base.cra_priority = 300, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .setkey = des3_ede_x86_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "ctr(des3_ede)", + .base.cra_driver_name = "ctr-des3_ede-asm", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .chunksize = DES3_EDE_BLOCK_SIZE, + .setkey = des3_ede_x86_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + } +}; static bool is_blacklisted_cpu(void) { @@ -483,17 +470,30 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); static int __init des3_ede_x86_init(void) { + int err; + if (!force && is_blacklisted_cpu()) { pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n"); return -ENODEV; } - return crypto_register_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs)); + err = crypto_register_alg(&des3_ede_cipher); + if (err) + return err; + + err = crypto_register_skciphers(des3_ede_skciphers, + ARRAY_SIZE(des3_ede_skciphers)); + if (err) + crypto_unregister_alg(&des3_ede_cipher); + + return err; } static void __exit des3_ede_x86_fini(void) { - crypto_unregister_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs)); + crypto_unregister_alg(&des3_ede_cipher); + crypto_unregister_skciphers(des3_ede_skciphers, + ARRAY_SIZE(des3_ede_skciphers)); } module_init(des3_ede_x86_init); diff --git a/crypto/Kconfig b/crypto/Kconfig index fb58cb4b6826..12b92328f5c5 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1291,7 +1291,7 @@ config CRYPTO_DES_SPARC64 config CRYPTO_DES3_EDE_X86_64 tristate "Triple DES EDE cipher algorithm (x86-64)" depends on X86 && 64BIT - select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER select CRYPTO_DES help Triple DES EDE (FIPS 46-3) algorithm. -- cgit v1.2.3 From 6fcb81b562cf5b75c543a9e29fea30d439af3560 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:18 -0800 Subject: crypto: x86/camellia-aesni-avx - remove LRW algorithm The LRW template now wraps an ECB mode algorithm rather than the block cipher directly. Therefore it is now redundant for crypto modules to wrap their ECB code with generic LRW code themselves via lrw_crypt(). Remove the lrw-camellia-aesni algorithm which did this. Users who request lrw(camellia) and previously would have gotten lrw-camellia-aesni will now get lrw(ecb-camellia-aesni) instead, which is just as fast. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/camellia_aesni_avx_glue.c | 168 +----------------------------- arch/x86/include/asm/crypto/camellia.h | 1 + crypto/Kconfig | 1 - 3 files changed, 2 insertions(+), 168 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index d96429da88eb..038bc2ae5313 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -186,18 +185,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); } -static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes) -{ - return glue_fpu_begin(CAMELLIA_BLOCK_SIZE, - CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled, - nbytes); -} - -static inline void camellia_fpu_end(bool fpu_enabled) -{ - glue_fpu_end(fpu_enabled); -} - static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { @@ -205,111 +192,6 @@ static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, &tfm->crt_flags); } -struct crypt_priv { - struct camellia_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) { - camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - } - - while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { - camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_enc_blk(ctx->ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) { - camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - } - - while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { - camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_dec_blk(ctx->ctx, srcdst, srcdst); -} - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->camellia_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - camellia_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->camellia_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - camellia_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { @@ -330,7 +212,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, &ctx->tweak_ctx, &ctx->crypt_ctx); } -static struct crypto_alg cmll_algs[10] = { { +static struct crypto_alg cmll_algs[] = { { .cra_name = "__ecb-camellia-aesni", .cra_driver_name = "__driver-ecb-camellia-aesni", .cra_priority = 0, @@ -391,30 +273,6 @@ static struct crypto_alg cmll_algs[10] = { { .decrypt = ctr_crypt, }, }, -}, { - .cra_name = "__lrw-camellia-aesni", - .cra_driver_name = "__driver-lrw-camellia-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_camellia_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = lrw_camellia_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, }, { .cra_name = "__xts-camellia-aesni", .cra_driver_name = "__driver-xts-camellia-aesni", @@ -502,30 +360,6 @@ static struct crypto_alg cmll_algs[10] = { { .geniv = "chainiv", }, }, -}, { - .cra_name = "lrw(camellia)", - .cra_driver_name = "lrw-camellia-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, }, { .cra_name = "xts(camellia)", .cra_driver_name = "xts-camellia-aesni", diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h index 10f8d590bcfe..24eff92a5356 100644 --- a/arch/x86/include/asm/crypto/camellia.h +++ b/arch/x86/include/asm/crypto/camellia.h @@ -2,6 +2,7 @@ #ifndef ASM_X86_CAMELLIA_H #define ASM_X86_CAMELLIA_H +#include #include #include diff --git a/crypto/Kconfig b/crypto/Kconfig index 12b92328f5c5..b7c33bc0fb3b 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1169,7 +1169,6 @@ config CRYPTO_CAMELLIA_AESNI_AVX_X86_64 select CRYPTO_ABLK_HELPER select CRYPTO_GLUE_HELPER_X86 select CRYPTO_CAMELLIA_X86_64 - select CRYPTO_LRW select CRYPTO_XTS help Camellia cipher algorithm module (x86_64/AES-NI/AVX). -- cgit v1.2.3 From 44c9b75409819707a94b377e69ff7948928e4b1b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:19 -0800 Subject: crypto: x86/camellia-aesni-avx2 - remove LRW algorithm The LRW template now wraps an ECB mode algorithm rather than the block cipher directly. Therefore it is now redundant for crypto modules to wrap their ECB code with generic LRW code themselves via lrw_crypt(). Remove the lrw-camellia-aesni-avx2 algorithm which did this. Users who request lrw(camellia) and previously would have gotten lrw-camellia-aesni-avx2 will now get lrw(ecb-camellia-aesni-avx2) instead, which is just as fast. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/camellia_aesni_avx2_glue.c | 180 +---------------------------- crypto/Kconfig | 1 - 2 files changed, 1 insertion(+), 180 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index 60907c139c4e..42a488dff50c 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -182,18 +181,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); } -static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes) -{ - return glue_fpu_begin(CAMELLIA_BLOCK_SIZE, - CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled, - nbytes); -} - -static inline void camellia_fpu_end(bool fpu_enabled) -{ - glue_fpu_end(fpu_enabled); -} - static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { @@ -201,123 +188,6 @@ static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, &tfm->crt_flags); } -struct crypt_priv { - struct camellia_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) { - camellia_ecb_enc_32way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS; - } - - if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) { - camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - } - - while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { - camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_enc_blk(ctx->ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) { - camellia_ecb_dec_32way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS; - } - - if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) { - camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - } - - while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { - camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_dec_blk(ctx->ctx, srcdst, srcdst); -} - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->camellia_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - camellia_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->camellia_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - camellia_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { @@ -338,7 +208,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, &ctx->tweak_ctx, &ctx->crypt_ctx); } -static struct crypto_alg cmll_algs[10] = { { +static struct crypto_alg cmll_algs[] = { { .cra_name = "__ecb-camellia-aesni-avx2", .cra_driver_name = "__driver-ecb-camellia-aesni-avx2", .cra_priority = 0, @@ -399,30 +269,6 @@ static struct crypto_alg cmll_algs[10] = { { .decrypt = ctr_crypt, }, }, -}, { - .cra_name = "__lrw-camellia-aesni-avx2", - .cra_driver_name = "__driver-lrw-camellia-aesni-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_camellia_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = lrw_camellia_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, }, { .cra_name = "__xts-camellia-aesni-avx2", .cra_driver_name = "__driver-xts-camellia-aesni-avx2", @@ -510,30 +356,6 @@ static struct crypto_alg cmll_algs[10] = { { .geniv = "chainiv", }, }, -}, { - .cra_name = "lrw(camellia)", - .cra_driver_name = "lrw-camellia-aesni-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, }, { .cra_name = "xts(camellia)", .cra_driver_name = "xts-camellia-aesni-avx2", diff --git a/crypto/Kconfig b/crypto/Kconfig index b7c33bc0fb3b..b4f520787122 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1191,7 +1191,6 @@ config CRYPTO_CAMELLIA_AESNI_AVX2_X86_64 select CRYPTO_GLUE_HELPER_X86 select CRYPTO_CAMELLIA_X86_64 select CRYPTO_CAMELLIA_AESNI_AVX_X86_64 - select CRYPTO_LRW select CRYPTO_XTS help Camellia cipher algorithm module (x86_64/AES-NI/AVX2). -- cgit v1.2.3 From 6043d341f0b57034ec92e26d353ccb450563d18e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:20 -0800 Subject: crypto: x86/camellia - remove LRW algorithm The LRW template now wraps an ECB mode algorithm rather than the block cipher directly. Therefore it is now redundant for crypto modules to wrap their ECB code with generic LRW code themselves via lrw_crypt(). Remove the lrw-camellia-asm algorithm which did this. Users who request lrw(camellia) and previously would have gotten lrw-camellia-asm will now get lrw(ecb-camellia-asm) instead, which is just as fast. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/camellia_glue.c | 85 +--------------------------------- arch/x86/include/asm/crypto/camellia.h | 10 ---- crypto/Kconfig | 1 - 3 files changed, 1 insertion(+), 95 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index af4840ab2a3d..9ae2af27e759 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -1437,65 +1436,6 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) camellia_dec_blk(ctx, srcdst, srcdst); } -int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __camellia_setkey(&ctx->camellia_ctx, key, - keylen - CAMELLIA_BLOCK_SIZE, - &tfm->crt_flags); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, - key + keylen - CAMELLIA_BLOCK_SIZE); -} -EXPORT_SYMBOL_GPL(lrw_camellia_setkey); - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[2 * 4]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &ctx->camellia_ctx, - .crypt_fn = encrypt_callback, - }; - - return lrw_crypt(desc, dst, src, nbytes, &req); -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[2 * 4]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &ctx->camellia_ctx, - .crypt_fn = decrypt_callback, - }; - - return lrw_crypt(desc, dst, src, nbytes, &req); -} - -void lrw_camellia_exit_tfm(struct crypto_tfm *tfm) -{ - struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); -} -EXPORT_SYMBOL_GPL(lrw_camellia_exit_tfm); - int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { @@ -1554,7 +1494,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, return xts_crypt(desc, dst, src, nbytes, &req); } -static struct crypto_alg camellia_algs[6] = { { +static struct crypto_alg camellia_algs[] = { { .cra_name = "camellia", .cra_driver_name = "camellia-asm", .cra_priority = 200, @@ -1631,29 +1571,6 @@ static struct crypto_alg camellia_algs[6] = { { .decrypt = ctr_crypt, }, }, -}, { - .cra_name = "lrw(camellia)", - .cra_driver_name = "lrw-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_camellia_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = lrw_camellia_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, }, { .cra_name = "xts(camellia)", .cra_driver_name = "xts-camellia-asm", diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h index 24eff92a5356..08d71444edc2 100644 --- a/arch/x86/include/asm/crypto/camellia.h +++ b/arch/x86/include/asm/crypto/camellia.h @@ -2,7 +2,6 @@ #ifndef ASM_X86_CAMELLIA_H #define ASM_X86_CAMELLIA_H -#include #include #include @@ -17,11 +16,6 @@ struct camellia_ctx { u32 key_length; }; -struct camellia_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct camellia_ctx camellia_ctx; -}; - struct camellia_xts_ctx { struct camellia_ctx tweak_ctx; struct camellia_ctx crypt_ctx; @@ -31,10 +25,6 @@ extern int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key, unsigned int key_len, u32 *flags); -extern int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); -extern void lrw_camellia_exit_tfm(struct crypto_tfm *tfm); - extern int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen); diff --git a/crypto/Kconfig b/crypto/Kconfig index b4f520787122..d2e8fac596da 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1147,7 +1147,6 @@ config CRYPTO_CAMELLIA_X86_64 depends on CRYPTO select CRYPTO_ALGAPI select CRYPTO_GLUE_HELPER_X86 - select CRYPTO_LRW select CRYPTO_XTS help Camellia cipher algorithm module (x86_64). -- cgit v1.2.3 From 451cc493246e59e265715dd9d2f8e1a25b23b83d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:21 -0800 Subject: crypto: x86/camellia - remove XTS algorithm The XTS template now wraps an ECB mode algorithm rather than the block cipher directly. Therefore it is now redundant for crypto modules to wrap their ECB code with generic XTS code themselves via xts_crypt(). Remove the xts-camellia-asm algorithm which did this. Users who request xts(camellia) and previously would have gotten xts-camellia-asm will now get xts(ecb-camellia-asm) instead, which is just as fast. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/camellia_aesni_avx_glue.c | 22 ++++++ arch/x86/crypto/camellia_glue.c | 111 ------------------------------ arch/x86/include/asm/crypto/camellia.h | 3 +- crypto/Kconfig | 1 - 4 files changed, 24 insertions(+), 113 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 038bc2ae5313..72911a706923 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -192,6 +192,28 @@ static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, &tfm->crt_flags); } +int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + int err; + + err = xts_check_key(tfm, key, keylen); + if (err) + return err; + + /* first half of xts-key is for crypt */ + err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, + flags); +} +EXPORT_SYMBOL_GPL(xts_camellia_setkey); + static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index 9ae2af27e759..804ca69d5d0a 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include @@ -1404,96 +1403,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); } -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct camellia_ctx *ctx = priv; - int i; - - while (nbytes >= 2 * bsize) { - camellia_enc_blk_2way(ctx, srcdst, srcdst); - srcdst += bsize * 2; - nbytes -= bsize * 2; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_enc_blk(ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct camellia_ctx *ctx = priv; - int i; - - while (nbytes >= 2 * bsize) { - camellia_dec_blk_2way(ctx, srcdst, srcdst); - srcdst += bsize * 2; - nbytes -= bsize * 2; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_dec_blk(ctx, srcdst, srcdst); -} - -int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; - int err; - - err = xts_check_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, - flags); -} -EXPORT_SYMBOL_GPL(xts_camellia_setkey); - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - le128 buf[2 * 4]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), - .crypt_ctx = &ctx->crypt_ctx, - .crypt_fn = encrypt_callback, - }; - - return xts_crypt(desc, dst, src, nbytes, &req); -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - le128 buf[2 * 4]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), - .crypt_ctx = &ctx->crypt_ctx, - .crypt_fn = decrypt_callback, - }; - - return xts_crypt(desc, dst, src, nbytes, &req); -} - static struct crypto_alg camellia_algs[] = { { .cra_name = "camellia", .cra_driver_name = "camellia-asm", @@ -1571,26 +1480,6 @@ static struct crypto_alg camellia_algs[] = { { .decrypt = ctr_crypt, }, }, -}, { - .cra_name = "xts(camellia)", - .cra_driver_name = "xts-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, - .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = xts_camellia_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, } }; static bool is_blacklisted_cpu(void) diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h index 08d71444edc2..28080ccbff9f 100644 --- a/arch/x86/include/asm/crypto/camellia.h +++ b/arch/x86/include/asm/crypto/camellia.h @@ -2,8 +2,9 @@ #ifndef ASM_X86_CAMELLIA_H #define ASM_X86_CAMELLIA_H -#include +#include #include +#include #define CAMELLIA_MIN_KEY_SIZE 16 #define CAMELLIA_MAX_KEY_SIZE 32 diff --git a/crypto/Kconfig b/crypto/Kconfig index d2e8fac596da..4062efc8b31b 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1147,7 +1147,6 @@ config CRYPTO_CAMELLIA_X86_64 depends on CRYPTO select CRYPTO_ALGAPI select CRYPTO_GLUE_HELPER_X86 - select CRYPTO_XTS help Camellia cipher algorithm module (x86_64). -- cgit v1.2.3 From 1af6d03710126392e7333417fcaf2427bfc8f169 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:22 -0800 Subject: crypto: x86/camellia - convert to skcipher interface Convert the x86 asm implementation of Camellia from the (deprecated) blkcipher interface over to the skcipher interface. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/camellia_glue.c | 162 ++++++++++++++++++++-------------------- crypto/Kconfig | 2 +- 2 files changed, 80 insertions(+), 84 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index 804ca69d5d0a..dcd5e0f71b00 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -23,7 +23,6 @@ * */ -#include #include #include #include @@ -1270,13 +1269,19 @@ int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key, } EXPORT_SYMBOL_GPL(__camellia_setkey); -static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, +static int camellia_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len) { - return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len, + return __camellia_setkey(crypto_tfm_ctx(tfm), key, key_len, &tfm->crt_flags); } +static int camellia_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + return camellia_setkey(&tfm->base, key, key_len); +} + void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src) { u128 iv = *src; @@ -1371,39 +1376,33 @@ static const struct common_glue_ctx camellia_dec_cbc = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&camellia_enc, req); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&camellia_dec, req); } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc, - dst, src, nbytes); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk), + req); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src, - nbytes); + return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); + return glue_ctr_req_128bit(&camellia_ctr, req); } -static struct crypto_alg camellia_algs[] = { { +static struct crypto_alg camellia_cipher_alg = { .cra_name = "camellia", .cra_driver_name = "camellia-asm", .cra_priority = 200, @@ -1421,66 +1420,50 @@ static struct crypto_alg camellia_algs[] = { { .cia_decrypt = camellia_decrypt } } -}, { - .cra_name = "ecb(camellia)", - .cra_driver_name = "ecb-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = camellia_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "cbc(camellia)", - .cra_driver_name = "cbc-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "ctr(camellia)", - .cra_driver_name = "ctr-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -} }; +}; + +static struct skcipher_alg camellia_skcipher_algs[] = { + { + .base.cra_name = "ecb(camellia)", + .base.cra_driver_name = "ecb-camellia-asm", + .base.cra_priority = 300, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "cbc(camellia)", + .base.cra_driver_name = "cbc-camellia-asm", + .base.cra_priority = 300, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "ctr(camellia)", + .base.cra_driver_name = "ctr-camellia-asm", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .chunksize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + } +}; static bool is_blacklisted_cpu(void) { @@ -1506,6 +1489,8 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); static int __init init(void) { + int err; + if (!force && is_blacklisted_cpu()) { printk(KERN_INFO "camellia-x86_64: performance on this CPU " @@ -1514,12 +1499,23 @@ static int __init init(void) return -ENODEV; } - return crypto_register_algs(camellia_algs, ARRAY_SIZE(camellia_algs)); + err = crypto_register_alg(&camellia_cipher_alg); + if (err) + return err; + + err = crypto_register_skciphers(camellia_skcipher_algs, + ARRAY_SIZE(camellia_skcipher_algs)); + if (err) + crypto_unregister_alg(&camellia_cipher_alg); + + return err; } static void __exit fini(void) { - crypto_unregister_algs(camellia_algs, ARRAY_SIZE(camellia_algs)); + crypto_unregister_alg(&camellia_cipher_alg); + crypto_unregister_skciphers(camellia_skcipher_algs, + ARRAY_SIZE(camellia_skcipher_algs)); } module_init(init); diff --git a/crypto/Kconfig b/crypto/Kconfig index 4062efc8b31b..7227c20a9ba5 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1145,7 +1145,7 @@ config CRYPTO_CAMELLIA_X86_64 tristate "Camellia cipher algorithm (x86_64)" depends on X86 && 64BIT depends on CRYPTO - select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER select CRYPTO_GLUE_HELPER_X86 help Camellia cipher algorithm module (x86_64). -- cgit v1.2.3 From 44893bc2962363417ff9bed7876e0e58741e4f76 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:23 -0800 Subject: crypto: x86/camellia-aesni-avx, avx2 - convert to skcipher interface Convert the AESNI AVX and AESNI AVX2 implementations of Camellia from the (deprecated) ablkcipher and blkcipher interfaces over to the skcipher interface. Note that this includes replacing the use of ablk_helper with crypto_simd. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/camellia_aesni_avx2_glue.c | 313 +++++++++------------------- arch/x86/crypto/camellia_aesni_avx_glue.c | 321 +++++++++-------------------- arch/x86/include/asm/crypto/camellia.h | 4 +- crypto/Kconfig | 13 +- 4 files changed, 208 insertions(+), 443 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index 42a488dff50c..d4992e458f92 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -10,17 +10,15 @@ * */ -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include +#include +#include #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 #define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32 @@ -149,236 +147,120 @@ static const struct common_glue_ctx camellia_dec_xts = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) { - return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes); + return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen, + &tfm->base.crt_flags); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&camellia_enc, req); } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc, - dst, src, nbytes); + return glue_ecb_req_128bit(&camellia_dec, req); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src, - nbytes); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk), + req); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); + return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); } -static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len) +static int ctr_crypt(struct skcipher_request *req) { - return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len, - &tfm->crt_flags); + return glue_ctr_req_128bit(&camellia_ctr, req); } -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(camellia_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&camellia_enc_xts, req, + XTS_TWEAK_CAST(camellia_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(camellia_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&camellia_dec_xts, req, + XTS_TWEAK_CAST(camellia_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static struct crypto_alg cmll_algs[] = { { - .cra_name = "__ecb-camellia-aesni-avx2", - .cra_driver_name = "__driver-ecb-camellia-aesni-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = camellia_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-camellia-aesni-avx2", - .cra_driver_name = "__driver-cbc-camellia-aesni-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = camellia_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-camellia-aesni-avx2", - .cra_driver_name = "__driver-ctr-camellia-aesni-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__xts-camellia-aesni-avx2", - .cra_driver_name = "__driver-xts-camellia-aesni-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, - .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = xts_camellia_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(camellia)", - .cra_driver_name = "ecb-camellia-aesni-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(camellia)", - .cra_driver_name = "cbc-camellia-aesni-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(camellia)", - .cra_driver_name = "ctr-camellia-aesni-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "xts(camellia)", - .cra_driver_name = "xts-camellia-aesni-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, - .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, +static struct skcipher_alg camellia_algs[] = { + { + .base.cra_name = "__ecb(camellia)", + .base.cra_driver_name = "__ecb-camellia-aesni-avx2", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(camellia)", + .base.cra_driver_name = "__cbc-camellia-aesni-avx2", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(camellia)", + .base.cra_driver_name = "__ctr-camellia-aesni-avx2", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .chunksize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base.cra_name = "__xts(camellia)", + .base.cra_driver_name = "__xts-camellia-aesni-avx2", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_xts_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * CAMELLIA_MIN_KEY_SIZE, + .max_keysize = 2 * CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = xts_camellia_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, }, -} }; +}; + +static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)]; static int __init camellia_aesni_init(void) { @@ -398,12 +280,15 @@ static int __init camellia_aesni_init(void) return -ENODEV; } - return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs)); + return simd_register_skciphers_compat(camellia_algs, + ARRAY_SIZE(camellia_algs), + camellia_simd_algs); } static void __exit camellia_aesni_fini(void) { - crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs)); + simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs), + camellia_simd_algs); } module_init(camellia_aesni_init); diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 72911a706923..d09f6521466a 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -10,17 +10,15 @@ * */ -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include +#include +#include #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 @@ -153,53 +151,47 @@ static const struct common_glue_ctx camellia_dec_xts = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) { - return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes); + return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen, + &tfm->base.crt_flags); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&camellia_enc, req); } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc, - dst, src, nbytes); + return glue_ecb_req_128bit(&camellia_dec, req); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src, - nbytes); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk), + req); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); + return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); } -static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len) +static int ctr_crypt(struct skcipher_request *req) { - return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len, - &tfm->crt_flags); + return glue_ctr_req_128bit(&camellia_ctr, req); } -int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, +int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { - struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; + struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + u32 *flags = &tfm->base.crt_flags; int err; - err = xts_check_key(tfm, key, keylen); + err = xts_verify_key(tfm, key, keylen); if (err) return err; @@ -214,197 +206,87 @@ int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, } EXPORT_SYMBOL_GPL(xts_camellia_setkey); -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(camellia_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&camellia_enc_xts, req, + XTS_TWEAK_CAST(camellia_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(camellia_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&camellia_dec_xts, req, + XTS_TWEAK_CAST(camellia_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static struct crypto_alg cmll_algs[] = { { - .cra_name = "__ecb-camellia-aesni", - .cra_driver_name = "__driver-ecb-camellia-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = camellia_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-camellia-aesni", - .cra_driver_name = "__driver-cbc-camellia-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = camellia_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-camellia-aesni", - .cra_driver_name = "__driver-ctr-camellia-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__xts-camellia-aesni", - .cra_driver_name = "__driver-xts-camellia-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, - .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = xts_camellia_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(camellia)", - .cra_driver_name = "ecb-camellia-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(camellia)", - .cra_driver_name = "cbc-camellia-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(camellia)", - .cra_driver_name = "ctr-camellia-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "xts(camellia)", - .cra_driver_name = "xts-camellia-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, - .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, +static struct skcipher_alg camellia_algs[] = { + { + .base.cra_name = "__ecb(camellia)", + .base.cra_driver_name = "__ecb-camellia-aesni", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(camellia)", + .base.cra_driver_name = "__cbc-camellia-aesni", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(camellia)", + .base.cra_driver_name = "__ctr-camellia-aesni", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .chunksize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base.cra_name = "__xts(camellia)", + .base.cra_driver_name = "__xts-camellia-aesni", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_xts_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * CAMELLIA_MIN_KEY_SIZE, + .max_keysize = 2 * CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = xts_camellia_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, }, -} }; +}; + +static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)]; static int __init camellia_aesni_init(void) { @@ -423,12 +305,15 @@ static int __init camellia_aesni_init(void) return -ENODEV; } - return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs)); + return simd_register_skciphers_compat(camellia_algs, + ARRAY_SIZE(camellia_algs), + camellia_simd_algs); } static void __exit camellia_aesni_fini(void) { - crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs)); + simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs), + camellia_simd_algs); } module_init(camellia_aesni_init); diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h index 28080ccbff9f..a5d86fc0593f 100644 --- a/arch/x86/include/asm/crypto/camellia.h +++ b/arch/x86/include/asm/crypto/camellia.h @@ -12,6 +12,8 @@ #define CAMELLIA_TABLE_BYTE_LEN 272 #define CAMELLIA_PARALLEL_BLOCKS 2 +struct crypto_skcipher; + struct camellia_ctx { u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; u32 key_length; @@ -26,7 +28,7 @@ extern int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key, unsigned int key_len, u32 *flags); -extern int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, +extern int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); /* regular block cipher functions */ diff --git a/crypto/Kconfig b/crypto/Kconfig index 7227c20a9ba5..8783dcf20fc3 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1162,11 +1162,10 @@ config CRYPTO_CAMELLIA_AESNI_AVX_X86_64 tristate "Camellia cipher algorithm (x86_64/AES-NI/AVX)" depends on X86 && 64BIT depends on CRYPTO - select CRYPTO_ALGAPI - select CRYPTO_CRYPTD - select CRYPTO_ABLK_HELPER - select CRYPTO_GLUE_HELPER_X86 + select CRYPTO_BLKCIPHER select CRYPTO_CAMELLIA_X86_64 + select CRYPTO_GLUE_HELPER_X86 + select CRYPTO_SIMD select CRYPTO_XTS help Camellia cipher algorithm module (x86_64/AES-NI/AVX). @@ -1183,13 +1182,7 @@ config CRYPTO_CAMELLIA_AESNI_AVX2_X86_64 tristate "Camellia cipher algorithm (x86_64/AES-NI/AVX2)" depends on X86 && 64BIT depends on CRYPTO - select CRYPTO_ALGAPI - select CRYPTO_CRYPTD - select CRYPTO_ABLK_HELPER - select CRYPTO_GLUE_HELPER_X86 - select CRYPTO_CAMELLIA_X86_64 select CRYPTO_CAMELLIA_AESNI_AVX_X86_64 - select CRYPTO_XTS help Camellia cipher algorithm module (x86_64/AES-NI/AVX2). -- cgit v1.2.3 From 217afccf65064709fb032652ee17cc0a8f68b7b5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:25 -0800 Subject: crypto: lrw - remove lrw_crypt() Now that all users of lrw_crypt() have been removed in favor of the LRW template wrapping an ECB mode algorithm, remove lrw_crypt(). Also remove crypto/lrw.h as that is no longer needed either; and fold 'struct lrw_table_ctx' into 'struct priv', lrw_init_table() into setkey(), and lrw_free_table() into exit_tfm(). Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/glue_helper.c | 1 - crypto/lrw.c | 152 +++++++++++------------------------------- include/crypto/lrw.h | 44 ------------ 3 files changed, 39 insertions(+), 158 deletions(-) delete mode 100644 include/crypto/lrw.h (limited to 'arch') diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 5b909790bf8a..cd5e7cebdb9f 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include diff --git a/crypto/lrw.c b/crypto/lrw.c index cbbd7c50ad19..a09cdaa6ddf3 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -28,13 +28,31 @@ #include #include -#include #define LRW_BUFFER_SIZE 128u +#define LRW_BLOCK_SIZE 16 + struct priv { struct crypto_skcipher *child; - struct lrw_table_ctx table; + + /* + * optimizes multiplying a random (non incrementing, as at the + * start of a new sector) value with key2, we could also have + * used 4k optimization tables or no optimization at all. In the + * latter case we would have to store key2 here + */ + struct gf128mul_64k *table; + + /* + * stores: + * key2*{ 0,0,...0,0,0,0,1 }, key2*{ 0,0,...0,0,0,1,1 }, + * key2*{ 0,0,...0,0,1,1,1 }, key2*{ 0,0,...0,1,1,1,1 } + * key2*{ 0,0,...1,1,1,1,1 }, etc + * needed for optimized multiplication of incrementing values + * with key2 + */ + be128 mulinc[128]; }; struct rctx { @@ -65,11 +83,25 @@ static inline void setbit128_bbe(void *b, int bit) ), b); } -int lrw_init_table(struct lrw_table_ctx *ctx, const u8 *tweak) +static int setkey(struct crypto_skcipher *parent, const u8 *key, + unsigned int keylen) { + struct priv *ctx = crypto_skcipher_ctx(parent); + struct crypto_skcipher *child = ctx->child; + int err, bsize = LRW_BLOCK_SIZE; + const u8 *tweak = key + keylen - bsize; be128 tmp = { 0 }; int i; + crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(child, key, keylen - bsize); + crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); + if (err) + return err; + if (ctx->table) gf128mul_free_64k(ctx->table); @@ -87,34 +119,6 @@ int lrw_init_table(struct lrw_table_ctx *ctx, const u8 *tweak) return 0; } -EXPORT_SYMBOL_GPL(lrw_init_table); - -void lrw_free_table(struct lrw_table_ctx *ctx) -{ - if (ctx->table) - gf128mul_free_64k(ctx->table); -} -EXPORT_SYMBOL_GPL(lrw_free_table); - -static int setkey(struct crypto_skcipher *parent, const u8 *key, - unsigned int keylen) -{ - struct priv *ctx = crypto_skcipher_ctx(parent); - struct crypto_skcipher *child = ctx->child; - int err, bsize = LRW_BLOCK_SIZE; - const u8 *tweak = key + keylen - bsize; - - crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & - CRYPTO_TFM_REQ_MASK); - err = crypto_skcipher_setkey(child, key, keylen - bsize); - crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) & - CRYPTO_TFM_RES_MASK); - if (err) - return err; - - return lrw_init_table(&ctx->table, tweak); -} static inline void inc(be128 *iv) { @@ -238,7 +242,7 @@ static int pre_crypt(struct skcipher_request *req) /* T <- I*Key2, using the optimization * discussed in the specification */ be128_xor(&rctx->t, &rctx->t, - &ctx->table.mulinc[get_index128(iv)]); + &ctx->mulinc[get_index128(iv)]); inc(iv); } while ((avail -= bs) >= bs); @@ -301,7 +305,7 @@ static int init_crypt(struct skcipher_request *req, crypto_completion_t done) memcpy(&rctx->t, req->iv, sizeof(rctx->t)); /* T <- I*Key2 */ - gf128mul_64k_bbe(&rctx->t, ctx->table.table); + gf128mul_64k_bbe(&rctx->t, ctx->table); return 0; } @@ -416,85 +420,6 @@ static int decrypt(struct skcipher_request *req) return do_decrypt(req, init_crypt(req, decrypt_done)); } -int lrw_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst, - struct scatterlist *ssrc, unsigned int nbytes, - struct lrw_crypt_req *req) -{ - const unsigned int bsize = LRW_BLOCK_SIZE; - const unsigned int max_blks = req->tbuflen / bsize; - struct lrw_table_ctx *ctx = req->table_ctx; - struct blkcipher_walk walk; - unsigned int nblocks; - be128 *iv, *src, *dst, *t; - be128 *t_buf = req->tbuf; - int err, i; - - BUG_ON(max_blks < 1); - - blkcipher_walk_init(&walk, sdst, ssrc, nbytes); - - err = blkcipher_walk_virt(desc, &walk); - nbytes = walk.nbytes; - if (!nbytes) - return err; - - nblocks = min(walk.nbytes / bsize, max_blks); - src = (be128 *)walk.src.virt.addr; - dst = (be128 *)walk.dst.virt.addr; - - /* calculate first value of T */ - iv = (be128 *)walk.iv; - t_buf[0] = *iv; - - /* T <- I*Key2 */ - gf128mul_64k_bbe(&t_buf[0], ctx->table); - - i = 0; - goto first; - - for (;;) { - do { - for (i = 0; i < nblocks; i++) { - /* T <- I*Key2, using the optimization - * discussed in the specification */ - be128_xor(&t_buf[i], t, - &ctx->mulinc[get_index128(iv)]); - inc(iv); -first: - t = &t_buf[i]; - - /* PP <- T xor P */ - be128_xor(dst + i, t, src + i); - } - - /* CC <- E(Key2,PP) */ - req->crypt_fn(req->crypt_ctx, (u8 *)dst, - nblocks * bsize); - - /* C <- T xor CC */ - for (i = 0; i < nblocks; i++) - be128_xor(dst + i, dst + i, &t_buf[i]); - - src += nblocks; - dst += nblocks; - nbytes -= nblocks * bsize; - nblocks = min(nbytes / bsize, max_blks); - } while (nblocks > 0); - - err = blkcipher_walk_done(desc, &walk, nbytes); - nbytes = walk.nbytes; - if (!nbytes) - break; - - nblocks = min(nbytes / bsize, max_blks); - src = (be128 *)walk.src.virt.addr; - dst = (be128 *)walk.dst.virt.addr; - } - - return err; -} -EXPORT_SYMBOL_GPL(lrw_crypt); - static int init_tfm(struct crypto_skcipher *tfm) { struct skcipher_instance *inst = skcipher_alg_instance(tfm); @@ -518,7 +443,8 @@ static void exit_tfm(struct crypto_skcipher *tfm) { struct priv *ctx = crypto_skcipher_ctx(tfm); - lrw_free_table(&ctx->table); + if (ctx->table) + gf128mul_free_64k(ctx->table); crypto_free_skcipher(ctx->child); } diff --git a/include/crypto/lrw.h b/include/crypto/lrw.h deleted file mode 100644 index a9d44c06d081..000000000000 --- a/include/crypto/lrw.h +++ /dev/null @@ -1,44 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _CRYPTO_LRW_H -#define _CRYPTO_LRW_H - -#include - -struct scatterlist; -struct gf128mul_64k; -struct blkcipher_desc; - -#define LRW_BLOCK_SIZE 16 - -struct lrw_table_ctx { - /* optimizes multiplying a random (non incrementing, as at the - * start of a new sector) value with key2, we could also have - * used 4k optimization tables or no optimization at all. In the - * latter case we would have to store key2 here */ - struct gf128mul_64k *table; - /* stores: - * key2*{ 0,0,...0,0,0,0,1 }, key2*{ 0,0,...0,0,0,1,1 }, - * key2*{ 0,0,...0,0,1,1,1 }, key2*{ 0,0,...0,1,1,1,1 } - * key2*{ 0,0,...1,1,1,1,1 }, etc - * needed for optimized multiplication of incrementing values - * with key2 */ - be128 mulinc[128]; -}; - -int lrw_init_table(struct lrw_table_ctx *ctx, const u8 *tweak); -void lrw_free_table(struct lrw_table_ctx *ctx); - -struct lrw_crypt_req { - be128 *tbuf; - unsigned int tbuflen; - - struct lrw_table_ctx *table_ctx; - void *crypt_ctx; - void (*crypt_fn)(void *ctx, u8 *blks, unsigned int nbytes); -}; - -int lrw_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes, - struct lrw_crypt_req *req); - -#endif /* _CRYPTO_LRW_H */ -- cgit v1.2.3 From 0d87d0f4254cc581d3fac51d1e11b51b2549e42a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:26 -0800 Subject: crypto: x86/glue_helper - remove blkcipher_walk functions Now that all glue_helper users have been switched from the blkcipher interface over to the skcipher interface, remove the versions of the glue_helper functions that handled the blkcipher interface. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/glue_helper.c | 344 ------------------------------ arch/x86/include/asm/crypto/glue_helper.h | 62 ------ 2 files changed, 406 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index cd5e7cebdb9f..fab5fa1aed77 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -32,63 +32,6 @@ #include #include -static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - void *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = 128 / 8; - unsigned int nbytes, i, func_bytes; - bool fpu_enabled = false; - int err; - - err = blkcipher_walk_virt(desc, walk); - - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; - - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); - - for (i = 0; i < gctx->num_funcs; i++) { - func_bytes = bsize * gctx->funcs[i].num_blocks; - - /* Process multi-block batch */ - if (nbytes >= func_bytes) { - do { - gctx->funcs[i].fn_u.ecb(ctx, wdst, - wsrc); - - wsrc += func_bytes; - wdst += func_bytes; - nbytes -= func_bytes; - } while (nbytes >= func_bytes); - - if (nbytes < bsize) - goto done; - } - } - -done: - err = blkcipher_walk_done(desc, walk, nbytes); - } - - glue_fpu_end(fpu_enabled); - return err; -} - -int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return __glue_ecb_crypt_128bit(gctx, desc, &walk); -} -EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit); - int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req) { @@ -135,51 +78,6 @@ int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, } EXPORT_SYMBOL_GPL(glue_ecb_req_128bit); -static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - void *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = 128 / 8; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 *iv = (u128 *)walk->iv; - - do { - u128_xor(dst, src, iv); - fn(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - - *(u128 *)walk->iv = *iv; - return nbytes; -} - -int glue_cbc_encrypt_128bit(const common_glue_func_t fn, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - return err; -} -EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit); - int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn, struct skcipher_request *req) { @@ -212,82 +110,6 @@ int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn, } EXPORT_SYMBOL_GPL(glue_cbc_encrypt_req_128bit); -static unsigned int -__glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - void *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = 128 / 8; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 last_iv; - unsigned int num_blocks, func_bytes; - unsigned int i; - - /* Start of the last block. */ - src += nbytes / bsize - 1; - dst += nbytes / bsize - 1; - - last_iv = *src; - - for (i = 0; i < gctx->num_funcs; i++) { - num_blocks = gctx->funcs[i].num_blocks; - func_bytes = bsize * num_blocks; - - /* Process multi-block batch */ - if (nbytes >= func_bytes) { - do { - nbytes -= func_bytes - bsize; - src -= num_blocks - 1; - dst -= num_blocks - 1; - - gctx->funcs[i].fn_u.cbc(ctx, dst, src); - - nbytes -= bsize; - if (nbytes < bsize) - goto done; - - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; - } while (nbytes >= func_bytes); - } - } - -done: - u128_xor(dst, dst, (u128 *)walk->iv); - *(u128 *)walk->iv = last_iv; - - return nbytes; -} - -int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); - nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - glue_fpu_end(fpu_enabled); - return err; -} -EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); - int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req) { @@ -349,96 +171,6 @@ done: } EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit); -static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - void *ctx = crypto_blkcipher_ctx(desc->tfm); - u8 *src = (u8 *)walk->src.virt.addr; - u8 *dst = (u8 *)walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - le128 ctrblk; - u128 tmp; - - be128_to_le128(&ctrblk, (be128 *)walk->iv); - - memcpy(&tmp, src, nbytes); - fn_ctr(ctx, &tmp, &tmp, &ctrblk); - memcpy(dst, &tmp, nbytes); - - le128_to_be128((be128 *)walk->iv, &ctrblk); -} - -static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - const unsigned int bsize = 128 / 8; - void *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - le128 ctrblk; - unsigned int num_blocks, func_bytes; - unsigned int i; - - be128_to_le128(&ctrblk, (be128 *)walk->iv); - - /* Process multi-block batch */ - for (i = 0; i < gctx->num_funcs; i++) { - num_blocks = gctx->funcs[i].num_blocks; - func_bytes = bsize * num_blocks; - - if (nbytes >= func_bytes) { - do { - gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk); - - src += num_blocks; - dst += num_blocks; - nbytes -= func_bytes; - } while (nbytes >= func_bytes); - - if (nbytes < bsize) - goto done; - } - } - -done: - le128_to_be128((be128 *)walk->iv, &ctrblk); - return nbytes; -} - -int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, bsize); - - while ((nbytes = walk.nbytes) >= bsize) { - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); - nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - glue_fpu_end(fpu_enabled); - - if (walk.nbytes) { - glue_ctr_crypt_final_128bit( - gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); - err = blkcipher_walk_done(desc, &walk, 0); - } - - return err; -} -EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit); - int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req) { @@ -507,42 +239,6 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, } EXPORT_SYMBOL_GPL(glue_ctr_req_128bit); -static unsigned int __glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, - void *ctx, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - const unsigned int bsize = 128 / 8; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - unsigned int num_blocks, func_bytes; - unsigned int i; - - /* Process multi-block batch */ - for (i = 0; i < gctx->num_funcs; i++) { - num_blocks = gctx->funcs[i].num_blocks; - func_bytes = bsize * num_blocks; - - if (nbytes >= func_bytes) { - do { - gctx->funcs[i].fn_u.xts(ctx, dst, src, - (le128 *)walk->iv); - - src += num_blocks; - dst += num_blocks; - nbytes -= func_bytes; - } while (nbytes >= func_bytes); - - if (nbytes < bsize) - goto done; - } - } - -done: - return nbytes; -} - static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx, void *ctx, struct skcipher_walk *walk) @@ -578,46 +274,6 @@ done: return nbytes; } -/* for implementations implementing faster XTS IV generator */ -int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes, - void (*tweak_fn)(void *ctx, u8 *dst, const u8 *src), - void *tweak_ctx, void *crypt_ctx) -{ - const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - - err = blkcipher_walk_virt(desc, &walk); - nbytes = walk.nbytes; - if (!nbytes) - return err; - - /* set minimum length to bsize, for tweak_fn */ - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, - nbytes < bsize ? bsize : nbytes); - - /* calculate first value of T */ - tweak_fn(tweak_ctx, walk.iv, walk.iv); - - while (nbytes) { - nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk); - - err = blkcipher_walk_done(desc, &walk, nbytes); - nbytes = walk.nbytes; - } - - glue_fpu_end(fpu_enabled); - - return err; -} -EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); - int glue_xts_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req, common_glue_func_t tweak_fn, void *tweak_ctx, diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index 91c623e6ddbd..b925a6363b3f 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -44,32 +44,6 @@ struct common_glue_ctx { struct common_glue_func_entry funcs[]; }; -static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, - struct blkcipher_desc *desc, - bool fpu_enabled, unsigned int nbytes) -{ - if (likely(fpu_blocks_limit < 0)) - return false; - - if (fpu_enabled) - return true; - - /* - * Vector-registers are only used when chunk to be processed is large - * enough, so do not enable FPU until it is necessary. - */ - if (nbytes < bsize * (unsigned int)fpu_blocks_limit) - return false; - - if (desc) { - /* prevent sleeping if FPU is in use */ - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - } - - kernel_fpu_begin(); - return true; -} - static inline bool glue_skwalk_fpu_begin(unsigned int bsize, int fpu_blocks_limit, struct skcipher_walk *walk, @@ -126,54 +100,18 @@ static inline void le128_inc(le128 *i) i->b = cpu_to_le64(b); } -extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes); - extern int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req); -extern int glue_cbc_encrypt_128bit(const common_glue_func_t fn, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes); - extern int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn, struct skcipher_request *req); -extern int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes); - extern int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req); -extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes); - extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req); -extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes, - common_glue_func_t tweak_fn, void *tweak_ctx, - void *crypt_ctx); - -extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes, - common_glue_func_t tweak_fn, void *tweak_ctx, - void *crypt_ctx); - extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req, common_glue_func_t tweak_fn, void *tweak_ctx, -- cgit v1.2.3 From 75d8a5532fc6db34e5aa712ec8117c9f9cb83088 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Feb 2018 23:48:27 -0800 Subject: crypto: x86/glue_helper - rename glue_skwalk_fpu_begin() There are no users of the original glue_fpu_begin() anymore, so rename glue_skwalk_fpu_begin() to glue_fpu_begin() so that it matches glue_fpu_end() again. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/cast5_avx_glue.c | 4 ++-- arch/x86/crypto/glue_helper.c | 21 +++++++++------------ arch/x86/include/asm/crypto/glue_helper.h | 7 +++---- 3 files changed, 14 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index 56f2a1b0ccf5..41034745d6a2 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -50,8 +50,8 @@ static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk, unsigned int nbytes) { - return glue_skwalk_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, - walk, fpu_enabled, nbytes); + return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, + walk, fpu_enabled, nbytes); } static inline void cast5_fpu_end(bool fpu_enabled) diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index fab5fa1aed77..a78ef99a9981 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -50,9 +50,8 @@ int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, unsigned int func_bytes; unsigned int i; - fpu_enabled = glue_skwalk_fpu_begin(bsize, - gctx->fpu_blocks_limit, - &walk, fpu_enabled, nbytes); + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + &walk, fpu_enabled, nbytes); for (i = 0; i < gctx->num_funcs; i++) { func_bytes = bsize * gctx->funcs[i].num_blocks; @@ -129,9 +128,8 @@ int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, unsigned int i; u128 last_iv; - fpu_enabled = glue_skwalk_fpu_begin(bsize, - gctx->fpu_blocks_limit, - &walk, fpu_enabled, nbytes); + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + &walk, fpu_enabled, nbytes); /* Start of the last block. */ src += nbytes / bsize - 1; dst += nbytes / bsize - 1; @@ -190,9 +188,8 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, unsigned int i; le128 ctrblk; - fpu_enabled = glue_skwalk_fpu_begin(bsize, - gctx->fpu_blocks_limit, - &walk, fpu_enabled, nbytes); + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + &walk, fpu_enabled, nbytes); be128_to_le128(&ctrblk, (be128 *)walk.iv); @@ -291,9 +288,9 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx, return err; /* set minimum length to bsize, for tweak_fn */ - fpu_enabled = glue_skwalk_fpu_begin(bsize, gctx->fpu_blocks_limit, - &walk, fpu_enabled, - nbytes < bsize ? bsize : nbytes); + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + &walk, fpu_enabled, + nbytes < bsize ? bsize : nbytes); /* calculate first value of T */ tweak_fn(tweak_ctx, walk.iv, walk.iv); diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index b925a6363b3f..d1818634ae7e 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -44,10 +44,9 @@ struct common_glue_ctx { struct common_glue_func_entry funcs[]; }; -static inline bool glue_skwalk_fpu_begin(unsigned int bsize, - int fpu_blocks_limit, - struct skcipher_walk *walk, - bool fpu_enabled, unsigned int nbytes) +static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, + struct skcipher_walk *walk, + bool fpu_enabled, unsigned int nbytes) { if (likely(fpu_blocks_limit < 0)) return false; -- cgit v1.2.3 From 9cc16b4d32f9f91dc7000aa726605a1add1899c7 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Sat, 3 Mar 2018 04:29:46 +0800 Subject: crypto: x86/des3_ede - des3_ede_skciphers[] can be static Fixes: 09c0f03bf8ce ("crypto: x86/des3_ede - convert to skcipher interface") Signed-off-by: Fengguang Wu Acked-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/des3_ede_glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c index f9c7bdc5be5a..5c610d4ef9fc 100644 --- a/arch/x86/crypto/des3_ede_glue.c +++ b/arch/x86/crypto/des3_ede_glue.c @@ -404,7 +404,7 @@ static struct crypto_alg des3_ede_cipher = { } }; -struct skcipher_alg des3_ede_skciphers[] = { +static struct skcipher_alg des3_ede_skciphers[] = { { .base.cra_name = "ecb(des3_ede)", .base.cra_driver_name = "ecb-des3_ede-asm", -- cgit v1.2.3 From 91a2abb78f940ac821345cb7cc376dca94336c2f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 5 Mar 2018 11:17:07 -0800 Subject: crypto: arm64/speck - add NEON-accelerated implementation of Speck-XTS Add a NEON-accelerated implementation of Speck128-XTS and Speck64-XTS for ARM64. This is ported from the 32-bit version. It may be useful on devices with 64-bit ARM CPUs that don't have the Cryptography Extensions, so cannot do AES efficiently -- e.g. the Cortex-A53 processor on the Raspberry Pi 3. It generally works the same way as the 32-bit version, but there are some slight differences due to the different instructions, registers, and syntax available in ARM64 vs. in ARM32. For example, in the 64-bit version there are enough registers to hold the XTS tweaks for each 128-byte chunk, so they don't need to be saved on the stack. Benchmarks on a Raspberry Pi 3 running a 64-bit kernel: Algorithm Encryption Decryption --------- ---------- ---------- Speck64/128-XTS (NEON) 92.2 MB/s 92.2 MB/s Speck128/256-XTS (NEON) 75.0 MB/s 75.0 MB/s Speck128/256-XTS (generic) 47.4 MB/s 35.6 MB/s AES-128-XTS (NEON bit-sliced) 33.4 MB/s 29.6 MB/s AES-256-XTS (NEON bit-sliced) 24.6 MB/s 21.7 MB/s The code performs well on higher-end ARM64 processors as well, though such processors tend to have the Crypto Extensions which make AES preferred. For example, here are the same benchmarks run on a HiKey960 (with CPU affinity set for the A73 cores), with the Crypto Extensions implementation of AES-256-XTS added: Algorithm Encryption Decryption --------- ----------- ----------- AES-256-XTS (Crypto Extensions) 1273.3 MB/s 1274.7 MB/s Speck64/128-XTS (NEON) 359.8 MB/s 348.0 MB/s Speck128/256-XTS (NEON) 292.5 MB/s 286.1 MB/s Speck128/256-XTS (generic) 186.3 MB/s 181.8 MB/s AES-128-XTS (NEON bit-sliced) 142.0 MB/s 124.3 MB/s AES-256-XTS (NEON bit-sliced) 104.7 MB/s 91.1 MB/s Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 6 + arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/speck-neon-core.S | 352 ++++++++++++++++++++++++++++++++++++ arch/arm64/crypto/speck-neon-glue.c | 282 +++++++++++++++++++++++++++++ 4 files changed, 643 insertions(+) create mode 100644 arch/arm64/crypto/speck-neon-core.S create mode 100644 arch/arm64/crypto/speck-neon-glue.c (limited to 'arch') diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 285c36c7b408..cb5a243110c4 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -113,4 +113,10 @@ config CRYPTO_AES_ARM64_BS select CRYPTO_AES_ARM64 select CRYPTO_SIMD +config CRYPTO_SPECK_NEON + tristate "NEON accelerated Speck cipher algorithms" + depends on KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_SPECK + endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index cee9b8d9830b..d94ebd15a859 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -53,6 +53,9 @@ sha512-arm64-y := sha512-glue.o sha512-core.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o +obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o +speck-neon-y := speck-neon-core.o speck-neon-glue.o + obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o diff --git a/arch/arm64/crypto/speck-neon-core.S b/arch/arm64/crypto/speck-neon-core.S new file mode 100644 index 000000000000..b14463438b09 --- /dev/null +++ b/arch/arm64/crypto/speck-neon-core.S @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM64 NEON-accelerated implementation of Speck128-XTS and Speck64-XTS + * + * Copyright (c) 2018 Google, Inc + * + * Author: Eric Biggers + */ + +#include + + .text + + // arguments + ROUND_KEYS .req x0 // const {u64,u32} *round_keys + NROUNDS .req w1 // int nrounds + NROUNDS_X .req x1 + DST .req x2 // void *dst + SRC .req x3 // const void *src + NBYTES .req w4 // unsigned int nbytes + TWEAK .req x5 // void *tweak + + // registers which hold the data being encrypted/decrypted + // (underscores avoid a naming collision with ARM64 registers x0-x3) + X_0 .req v0 + Y_0 .req v1 + X_1 .req v2 + Y_1 .req v3 + X_2 .req v4 + Y_2 .req v5 + X_3 .req v6 + Y_3 .req v7 + + // the round key, duplicated in all lanes + ROUND_KEY .req v8 + + // index vector for tbl-based 8-bit rotates + ROTATE_TABLE .req v9 + ROTATE_TABLE_Q .req q9 + + // temporary registers + TMP0 .req v10 + TMP1 .req v11 + TMP2 .req v12 + TMP3 .req v13 + + // multiplication table for updating XTS tweaks + GFMUL_TABLE .req v14 + GFMUL_TABLE_Q .req q14 + + // next XTS tweak value(s) + TWEAKV_NEXT .req v15 + + // XTS tweaks for the blocks currently being encrypted/decrypted + TWEAKV0 .req v16 + TWEAKV1 .req v17 + TWEAKV2 .req v18 + TWEAKV3 .req v19 + TWEAKV4 .req v20 + TWEAKV5 .req v21 + TWEAKV6 .req v22 + TWEAKV7 .req v23 + + .align 4 +.Lror64_8_table: + .octa 0x080f0e0d0c0b0a090007060504030201 +.Lror32_8_table: + .octa 0x0c0f0e0d080b0a090407060500030201 +.Lrol64_8_table: + .octa 0x0e0d0c0b0a09080f0605040302010007 +.Lrol32_8_table: + .octa 0x0e0d0c0f0a09080b0605040702010003 +.Lgf128mul_table: + .octa 0x00000000000000870000000000000001 +.Lgf64mul_table: + .octa 0x0000000000000000000000002d361b00 + +/* + * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time + * + * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for + * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes + * of ROUND_KEY. 'n' is the lane size: 64 for Speck128, or 32 for Speck64. + * 'lanes' is the lane specifier: "2d" for Speck128 or "4s" for Speck64. + */ +.macro _speck_round_128bytes n, lanes + + // x = ror(x, 8) + tbl X_0.16b, {X_0.16b}, ROTATE_TABLE.16b + tbl X_1.16b, {X_1.16b}, ROTATE_TABLE.16b + tbl X_2.16b, {X_2.16b}, ROTATE_TABLE.16b + tbl X_3.16b, {X_3.16b}, ROTATE_TABLE.16b + + // x += y + add X_0.\lanes, X_0.\lanes, Y_0.\lanes + add X_1.\lanes, X_1.\lanes, Y_1.\lanes + add X_2.\lanes, X_2.\lanes, Y_2.\lanes + add X_3.\lanes, X_3.\lanes, Y_3.\lanes + + // x ^= k + eor X_0.16b, X_0.16b, ROUND_KEY.16b + eor X_1.16b, X_1.16b, ROUND_KEY.16b + eor X_2.16b, X_2.16b, ROUND_KEY.16b + eor X_3.16b, X_3.16b, ROUND_KEY.16b + + // y = rol(y, 3) + shl TMP0.\lanes, Y_0.\lanes, #3 + shl TMP1.\lanes, Y_1.\lanes, #3 + shl TMP2.\lanes, Y_2.\lanes, #3 + shl TMP3.\lanes, Y_3.\lanes, #3 + sri TMP0.\lanes, Y_0.\lanes, #(\n - 3) + sri TMP1.\lanes, Y_1.\lanes, #(\n - 3) + sri TMP2.\lanes, Y_2.\lanes, #(\n - 3) + sri TMP3.\lanes, Y_3.\lanes, #(\n - 3) + + // y ^= x + eor Y_0.16b, TMP0.16b, X_0.16b + eor Y_1.16b, TMP1.16b, X_1.16b + eor Y_2.16b, TMP2.16b, X_2.16b + eor Y_3.16b, TMP3.16b, X_3.16b +.endm + +/* + * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time + * + * This is the inverse of _speck_round_128bytes(). + */ +.macro _speck_unround_128bytes n, lanes + + // y ^= x + eor TMP0.16b, Y_0.16b, X_0.16b + eor TMP1.16b, Y_1.16b, X_1.16b + eor TMP2.16b, Y_2.16b, X_2.16b + eor TMP3.16b, Y_3.16b, X_3.16b + + // y = ror(y, 3) + ushr Y_0.\lanes, TMP0.\lanes, #3 + ushr Y_1.\lanes, TMP1.\lanes, #3 + ushr Y_2.\lanes, TMP2.\lanes, #3 + ushr Y_3.\lanes, TMP3.\lanes, #3 + sli Y_0.\lanes, TMP0.\lanes, #(\n - 3) + sli Y_1.\lanes, TMP1.\lanes, #(\n - 3) + sli Y_2.\lanes, TMP2.\lanes, #(\n - 3) + sli Y_3.\lanes, TMP3.\lanes, #(\n - 3) + + // x ^= k + eor X_0.16b, X_0.16b, ROUND_KEY.16b + eor X_1.16b, X_1.16b, ROUND_KEY.16b + eor X_2.16b, X_2.16b, ROUND_KEY.16b + eor X_3.16b, X_3.16b, ROUND_KEY.16b + + // x -= y + sub X_0.\lanes, X_0.\lanes, Y_0.\lanes + sub X_1.\lanes, X_1.\lanes, Y_1.\lanes + sub X_2.\lanes, X_2.\lanes, Y_2.\lanes + sub X_3.\lanes, X_3.\lanes, Y_3.\lanes + + // x = rol(x, 8) + tbl X_0.16b, {X_0.16b}, ROTATE_TABLE.16b + tbl X_1.16b, {X_1.16b}, ROTATE_TABLE.16b + tbl X_2.16b, {X_2.16b}, ROTATE_TABLE.16b + tbl X_3.16b, {X_3.16b}, ROTATE_TABLE.16b +.endm + +.macro _next_xts_tweak next, cur, tmp, n +.if \n == 64 + /* + * Calculate the next tweak by multiplying the current one by x, + * modulo p(x) = x^128 + x^7 + x^2 + x + 1. + */ + sshr \tmp\().2d, \cur\().2d, #63 + and \tmp\().16b, \tmp\().16b, GFMUL_TABLE.16b + shl \next\().2d, \cur\().2d, #1 + ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 + eor \next\().16b, \next\().16b, \tmp\().16b +.else + /* + * Calculate the next two tweaks by multiplying the current ones by x^2, + * modulo p(x) = x^64 + x^4 + x^3 + x + 1. + */ + ushr \tmp\().2d, \cur\().2d, #62 + shl \next\().2d, \cur\().2d, #2 + tbl \tmp\().16b, {GFMUL_TABLE.16b}, \tmp\().16b + eor \next\().16b, \next\().16b, \tmp\().16b +.endif +.endm + +/* + * _speck_xts_crypt() - Speck-XTS encryption/decryption + * + * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer + * using Speck-XTS, specifically the variant with a block size of '2n' and round + * count given by NROUNDS. The expanded round keys are given in ROUND_KEYS, and + * the current XTS tweak value is given in TWEAK. It's assumed that NBYTES is a + * nonzero multiple of 128. + */ +.macro _speck_xts_crypt n, lanes, decrypting + + /* + * If decrypting, modify the ROUND_KEYS parameter to point to the last + * round key rather than the first, since for decryption the round keys + * are used in reverse order. + */ +.if \decrypting + mov NROUNDS, NROUNDS /* zero the high 32 bits */ +.if \n == 64 + add ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #3 + sub ROUND_KEYS, ROUND_KEYS, #8 +.else + add ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #2 + sub ROUND_KEYS, ROUND_KEYS, #4 +.endif +.endif + + // Load the index vector for tbl-based 8-bit rotates +.if \decrypting + ldr ROTATE_TABLE_Q, .Lrol\n\()_8_table +.else + ldr ROTATE_TABLE_Q, .Lror\n\()_8_table +.endif + + // One-time XTS preparation +.if \n == 64 + // Load first tweak + ld1 {TWEAKV0.16b}, [TWEAK] + + // Load GF(2^128) multiplication table + ldr GFMUL_TABLE_Q, .Lgf128mul_table +.else + // Load first tweak + ld1 {TWEAKV0.8b}, [TWEAK] + + // Load GF(2^64) multiplication table + ldr GFMUL_TABLE_Q, .Lgf64mul_table + + // Calculate second tweak, packing it together with the first + ushr TMP0.2d, TWEAKV0.2d, #63 + shl TMP1.2d, TWEAKV0.2d, #1 + tbl TMP0.8b, {GFMUL_TABLE.16b}, TMP0.8b + eor TMP0.8b, TMP0.8b, TMP1.8b + mov TWEAKV0.d[1], TMP0.d[0] +.endif + +.Lnext_128bytes_\@: + + // Calculate XTS tweaks for next 128 bytes + _next_xts_tweak TWEAKV1, TWEAKV0, TMP0, \n + _next_xts_tweak TWEAKV2, TWEAKV1, TMP0, \n + _next_xts_tweak TWEAKV3, TWEAKV2, TMP0, \n + _next_xts_tweak TWEAKV4, TWEAKV3, TMP0, \n + _next_xts_tweak TWEAKV5, TWEAKV4, TMP0, \n + _next_xts_tweak TWEAKV6, TWEAKV5, TMP0, \n + _next_xts_tweak TWEAKV7, TWEAKV6, TMP0, \n + _next_xts_tweak TWEAKV_NEXT, TWEAKV7, TMP0, \n + + // Load the next source blocks into {X,Y}[0-3] + ld1 {X_0.16b-Y_1.16b}, [SRC], #64 + ld1 {X_2.16b-Y_3.16b}, [SRC], #64 + + // XOR the source blocks with their XTS tweaks + eor TMP0.16b, X_0.16b, TWEAKV0.16b + eor Y_0.16b, Y_0.16b, TWEAKV1.16b + eor TMP1.16b, X_1.16b, TWEAKV2.16b + eor Y_1.16b, Y_1.16b, TWEAKV3.16b + eor TMP2.16b, X_2.16b, TWEAKV4.16b + eor Y_2.16b, Y_2.16b, TWEAKV5.16b + eor TMP3.16b, X_3.16b, TWEAKV6.16b + eor Y_3.16b, Y_3.16b, TWEAKV7.16b + + /* + * De-interleave the 'x' and 'y' elements of each block, i.e. make it so + * that the X[0-3] registers contain only the second halves of blocks, + * and the Y[0-3] registers contain only the first halves of blocks. + * (Speck uses the order (y, x) rather than the more intuitive (x, y).) + */ + uzp2 X_0.\lanes, TMP0.\lanes, Y_0.\lanes + uzp1 Y_0.\lanes, TMP0.\lanes, Y_0.\lanes + uzp2 X_1.\lanes, TMP1.\lanes, Y_1.\lanes + uzp1 Y_1.\lanes, TMP1.\lanes, Y_1.\lanes + uzp2 X_2.\lanes, TMP2.\lanes, Y_2.\lanes + uzp1 Y_2.\lanes, TMP2.\lanes, Y_2.\lanes + uzp2 X_3.\lanes, TMP3.\lanes, Y_3.\lanes + uzp1 Y_3.\lanes, TMP3.\lanes, Y_3.\lanes + + // Do the cipher rounds + mov x6, ROUND_KEYS + mov w7, NROUNDS +.Lnext_round_\@: +.if \decrypting + ld1r {ROUND_KEY.\lanes}, [x6] + sub x6, x6, #( \n / 8 ) + _speck_unround_128bytes \n, \lanes +.else + ld1r {ROUND_KEY.\lanes}, [x6], #( \n / 8 ) + _speck_round_128bytes \n, \lanes +.endif + subs w7, w7, #1 + bne .Lnext_round_\@ + + // Re-interleave the 'x' and 'y' elements of each block + zip1 TMP0.\lanes, Y_0.\lanes, X_0.\lanes + zip2 Y_0.\lanes, Y_0.\lanes, X_0.\lanes + zip1 TMP1.\lanes, Y_1.\lanes, X_1.\lanes + zip2 Y_1.\lanes, Y_1.\lanes, X_1.\lanes + zip1 TMP2.\lanes, Y_2.\lanes, X_2.\lanes + zip2 Y_2.\lanes, Y_2.\lanes, X_2.\lanes + zip1 TMP3.\lanes, Y_3.\lanes, X_3.\lanes + zip2 Y_3.\lanes, Y_3.\lanes, X_3.\lanes + + // XOR the encrypted/decrypted blocks with the tweaks calculated earlier + eor X_0.16b, TMP0.16b, TWEAKV0.16b + eor Y_0.16b, Y_0.16b, TWEAKV1.16b + eor X_1.16b, TMP1.16b, TWEAKV2.16b + eor Y_1.16b, Y_1.16b, TWEAKV3.16b + eor X_2.16b, TMP2.16b, TWEAKV4.16b + eor Y_2.16b, Y_2.16b, TWEAKV5.16b + eor X_3.16b, TMP3.16b, TWEAKV6.16b + eor Y_3.16b, Y_3.16b, TWEAKV7.16b + mov TWEAKV0.16b, TWEAKV_NEXT.16b + + // Store the ciphertext in the destination buffer + st1 {X_0.16b-Y_1.16b}, [DST], #64 + st1 {X_2.16b-Y_3.16b}, [DST], #64 + + // Continue if there are more 128-byte chunks remaining + subs NBYTES, NBYTES, #128 + bne .Lnext_128bytes_\@ + + // Store the next tweak and return +.if \n == 64 + st1 {TWEAKV_NEXT.16b}, [TWEAK] +.else + st1 {TWEAKV_NEXT.8b}, [TWEAK] +.endif + ret +.endm + +ENTRY(speck128_xts_encrypt_neon) + _speck_xts_crypt n=64, lanes=2d, decrypting=0 +ENDPROC(speck128_xts_encrypt_neon) + +ENTRY(speck128_xts_decrypt_neon) + _speck_xts_crypt n=64, lanes=2d, decrypting=1 +ENDPROC(speck128_xts_decrypt_neon) + +ENTRY(speck64_xts_encrypt_neon) + _speck_xts_crypt n=32, lanes=4s, decrypting=0 +ENDPROC(speck64_xts_encrypt_neon) + +ENTRY(speck64_xts_decrypt_neon) + _speck_xts_crypt n=32, lanes=4s, decrypting=1 +ENDPROC(speck64_xts_decrypt_neon) diff --git a/arch/arm64/crypto/speck-neon-glue.c b/arch/arm64/crypto/speck-neon-glue.c new file mode 100644 index 000000000000..6e233aeb4ff4 --- /dev/null +++ b/arch/arm64/crypto/speck-neon-glue.c @@ -0,0 +1,282 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS + * (64-bit version; based on the 32-bit version) + * + * Copyright (c) 2018 Google, Inc + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* The assembly functions only handle multiples of 128 bytes */ +#define SPECK_NEON_CHUNK_SIZE 128 + +/* Speck128 */ + +struct speck128_xts_tfm_ctx { + struct speck128_tfm_ctx main_key; + struct speck128_tfm_ctx tweak_key; +}; + +asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds, + void *dst, const void *src, + unsigned int nbytes, void *tweak); + +asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds, + void *dst, const void *src, + unsigned int nbytes, void *tweak); + +typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *, + u8 *, const u8 *); +typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *, + const void *, unsigned int, void *); + +static __always_inline int +__speck128_xts_crypt(struct skcipher_request *req, + speck128_crypt_one_t crypt_one, + speck128_xts_crypt_many_t crypt_many) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + le128 tweak; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv); + + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + u8 *dst = walk.dst.virt.addr; + const u8 *src = walk.src.virt.addr; + + if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) { + unsigned int count; + + count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE); + kernel_neon_begin(); + (*crypt_many)(ctx->main_key.round_keys, + ctx->main_key.nrounds, + dst, src, count, &tweak); + kernel_neon_end(); + dst += count; + src += count; + nbytes -= count; + } + + /* Handle any remainder with generic code */ + while (nbytes >= sizeof(tweak)) { + le128_xor((le128 *)dst, (const le128 *)src, &tweak); + (*crypt_one)(&ctx->main_key, dst, dst); + le128_xor((le128 *)dst, (const le128 *)dst, &tweak); + gf128mul_x_ble(&tweak, &tweak); + + dst += sizeof(tweak); + src += sizeof(tweak); + nbytes -= sizeof(tweak); + } + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int speck128_xts_encrypt(struct skcipher_request *req) +{ + return __speck128_xts_crypt(req, crypto_speck128_encrypt, + speck128_xts_encrypt_neon); +} + +static int speck128_xts_decrypt(struct skcipher_request *req) +{ + return __speck128_xts_crypt(req, crypto_speck128_decrypt, + speck128_xts_decrypt_neon); +} + +static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); + int err; + + err = xts_verify_key(tfm, key, keylen); + if (err) + return err; + + keylen /= 2; + + err = crypto_speck128_setkey(&ctx->main_key, key, keylen); + if (err) + return err; + + return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen); +} + +/* Speck64 */ + +struct speck64_xts_tfm_ctx { + struct speck64_tfm_ctx main_key; + struct speck64_tfm_ctx tweak_key; +}; + +asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds, + void *dst, const void *src, + unsigned int nbytes, void *tweak); + +asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds, + void *dst, const void *src, + unsigned int nbytes, void *tweak); + +typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *, + u8 *, const u8 *); +typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *, + const void *, unsigned int, void *); + +static __always_inline int +__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one, + speck64_xts_crypt_many_t crypt_many) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + __le64 tweak; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv); + + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + u8 *dst = walk.dst.virt.addr; + const u8 *src = walk.src.virt.addr; + + if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) { + unsigned int count; + + count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE); + kernel_neon_begin(); + (*crypt_many)(ctx->main_key.round_keys, + ctx->main_key.nrounds, + dst, src, count, &tweak); + kernel_neon_end(); + dst += count; + src += count; + nbytes -= count; + } + + /* Handle any remainder with generic code */ + while (nbytes >= sizeof(tweak)) { + *(__le64 *)dst = *(__le64 *)src ^ tweak; + (*crypt_one)(&ctx->main_key, dst, dst); + *(__le64 *)dst ^= tweak; + tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^ + ((tweak & cpu_to_le64(1ULL << 63)) ? + 0x1B : 0)); + dst += sizeof(tweak); + src += sizeof(tweak); + nbytes -= sizeof(tweak); + } + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int speck64_xts_encrypt(struct skcipher_request *req) +{ + return __speck64_xts_crypt(req, crypto_speck64_encrypt, + speck64_xts_encrypt_neon); +} + +static int speck64_xts_decrypt(struct skcipher_request *req) +{ + return __speck64_xts_crypt(req, crypto_speck64_decrypt, + speck64_xts_decrypt_neon); +} + +static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); + int err; + + err = xts_verify_key(tfm, key, keylen); + if (err) + return err; + + keylen /= 2; + + err = crypto_speck64_setkey(&ctx->main_key, key, keylen); + if (err) + return err; + + return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen); +} + +static struct skcipher_alg speck_algs[] = { + { + .base.cra_name = "xts(speck128)", + .base.cra_driver_name = "xts-speck128-neon", + .base.cra_priority = 300, + .base.cra_blocksize = SPECK128_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct speck128_xts_tfm_ctx), + .base.cra_alignmask = 7, + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * SPECK128_128_KEY_SIZE, + .max_keysize = 2 * SPECK128_256_KEY_SIZE, + .ivsize = SPECK128_BLOCK_SIZE, + .walksize = SPECK_NEON_CHUNK_SIZE, + .setkey = speck128_xts_setkey, + .encrypt = speck128_xts_encrypt, + .decrypt = speck128_xts_decrypt, + }, { + .base.cra_name = "xts(speck64)", + .base.cra_driver_name = "xts-speck64-neon", + .base.cra_priority = 300, + .base.cra_blocksize = SPECK64_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct speck64_xts_tfm_ctx), + .base.cra_alignmask = 7, + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * SPECK64_96_KEY_SIZE, + .max_keysize = 2 * SPECK64_128_KEY_SIZE, + .ivsize = SPECK64_BLOCK_SIZE, + .walksize = SPECK_NEON_CHUNK_SIZE, + .setkey = speck64_xts_setkey, + .encrypt = speck64_xts_encrypt, + .decrypt = speck64_xts_decrypt, + } +}; + +static int __init speck_neon_module_init(void) +{ + if (!(elf_hwcap & HWCAP_ASIMD)) + return -ENODEV; + return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs)); +} + +static void __exit speck_neon_module_exit(void) +{ + crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs)); +} + +module_init(speck_neon_module_init); +module_exit(speck_neon_module_exit); + +MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Eric Biggers "); +MODULE_ALIAS_CRYPTO("xts(speck128)"); +MODULE_ALIAS_CRYPTO("xts-speck128-neon"); +MODULE_ALIAS_CRYPTO("xts(speck64)"); +MODULE_ALIAS_CRYPTO("xts-speck64-neon"); -- cgit v1.2.3 From bd2ad885e30d2c72996e051d9a81bd1f2694eba0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 10 Mar 2018 15:21:47 +0000 Subject: crypto: arm64/aes-ce-ccm - move kernel mode neon en/disable into loop When kernel mode NEON was first introduced on arm64, the preserve and restore of the userland NEON state was completely unoptimized, and involved saving all registers on each call to kernel_neon_begin(), and restoring them on each call to kernel_neon_end(). For this reason, the NEON crypto code that was introduced at the time keeps the NEON enabled throughout the execution of the crypto API methods, which may include calls back into the crypto API that could result in memory allocation or other actions that we should avoid when running with preemption disabled. Since then, we have optimized the kernel mode NEON handling, which now restores lazily (upon return to userland), and so the preserve action is only costly the first time it is called after entering the kernel. So let's put the kernel_neon_begin() and kernel_neon_end() calls around the actual invocations of the NEON crypto code, and run the remainder of the code with kernel mode NEON disabled (and preemption enabled) Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-ce-ccm-glue.c | 47 ++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index a1254036f2b1..68b11aa690e4 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -107,11 +107,13 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen) } static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[], - u32 abytes, u32 *macp, bool use_neon) + u32 abytes, u32 *macp) { - if (likely(use_neon)) { + if (may_use_simd()) { + kernel_neon_begin(); ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc, num_rounds(key)); + kernel_neon_end(); } else { if (*macp > 0 && *macp < AES_BLOCK_SIZE) { int added = min(abytes, AES_BLOCK_SIZE - *macp); @@ -143,8 +145,7 @@ static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[], } } -static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[], - bool use_neon) +static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[]) { struct crypto_aead *aead = crypto_aead_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead); @@ -163,7 +164,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[], ltag.len = 6; } - ccm_update_mac(ctx, mac, (u8 *)<ag, ltag.len, &macp, use_neon); + ccm_update_mac(ctx, mac, (u8 *)<ag, ltag.len, &macp); scatterwalk_start(&walk, req->src); do { @@ -175,7 +176,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[], n = scatterwalk_clamp(&walk, len); } p = scatterwalk_map(&walk); - ccm_update_mac(ctx, mac, p, n, &macp, use_neon); + ccm_update_mac(ctx, mac, p, n, &macp); len -= n; scatterwalk_unmap(p); @@ -242,43 +243,42 @@ static int ccm_encrypt(struct aead_request *req) u8 __aligned(8) mac[AES_BLOCK_SIZE]; u8 buf[AES_BLOCK_SIZE]; u32 len = req->cryptlen; - bool use_neon = may_use_simd(); int err; err = ccm_init_mac(req, mac, len); if (err) return err; - if (likely(use_neon)) - kernel_neon_begin(); - if (req->assoclen) - ccm_calculate_auth_mac(req, mac, use_neon); + ccm_calculate_auth_mac(req, mac); /* preserve the original iv for the final round */ memcpy(buf, req->iv, AES_BLOCK_SIZE); err = skcipher_walk_aead_encrypt(&walk, req, true); - if (likely(use_neon)) { + if (may_use_simd()) { while (walk.nbytes) { u32 tail = walk.nbytes % AES_BLOCK_SIZE; if (walk.nbytes == walk.total) tail = 0; + kernel_neon_begin(); ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes - tail, ctx->key_enc, num_rounds(ctx), mac, walk.iv); + kernel_neon_end(); err = skcipher_walk_done(&walk, tail); } - if (!err) + if (!err) { + kernel_neon_begin(); ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); - - kernel_neon_end(); + kernel_neon_end(); + } } else { err = ccm_crypt_fallback(&walk, mac, buf, ctx, true); } @@ -301,43 +301,42 @@ static int ccm_decrypt(struct aead_request *req) u8 __aligned(8) mac[AES_BLOCK_SIZE]; u8 buf[AES_BLOCK_SIZE]; u32 len = req->cryptlen - authsize; - bool use_neon = may_use_simd(); int err; err = ccm_init_mac(req, mac, len); if (err) return err; - if (likely(use_neon)) - kernel_neon_begin(); - if (req->assoclen) - ccm_calculate_auth_mac(req, mac, use_neon); + ccm_calculate_auth_mac(req, mac); /* preserve the original iv for the final round */ memcpy(buf, req->iv, AES_BLOCK_SIZE); err = skcipher_walk_aead_decrypt(&walk, req, true); - if (likely(use_neon)) { + if (may_use_simd()) { while (walk.nbytes) { u32 tail = walk.nbytes % AES_BLOCK_SIZE; if (walk.nbytes == walk.total) tail = 0; + kernel_neon_begin(); ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes - tail, ctx->key_enc, num_rounds(ctx), mac, walk.iv); + kernel_neon_end(); err = skcipher_walk_done(&walk, tail); } - if (!err) + if (!err) { + kernel_neon_begin(); ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); - - kernel_neon_end(); + kernel_neon_end(); + } } else { err = ccm_crypt_fallback(&walk, mac, buf, ctx, false); } -- cgit v1.2.3 From 6833817472702658bdce64ea56bb90813be85557 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 10 Mar 2018 15:21:48 +0000 Subject: crypto: arm64/aes-blk - move kernel mode neon en/disable into loop When kernel mode NEON was first introduced on arm64, the preserve and restore of the userland NEON state was completely unoptimized, and involved saving all registers on each call to kernel_neon_begin(), and restoring them on each call to kernel_neon_end(). For this reason, the NEON crypto code that was introduced at the time keeps the NEON enabled throughout the execution of the crypto API methods, which may include calls back into the crypto API that could result in memory allocation or other actions that we should avoid when running with preemption disabled. Since then, we have optimized the kernel mode NEON handling, which now restores lazily (upon return to userland), and so the preserve action is only costly the first time it is called after entering the kernel. So let's put the kernel_neon_begin() and kernel_neon_end() calls around the actual invocations of the NEON crypto code, and run the remainder of the code with kernel mode NEON disabled (and preemption enabled) Note that this requires some reshuffling of the registers in the asm code, because the XTS routines can no longer rely on the registers to retain their contents between invocations. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 95 ++++++++++++++++++------------------- arch/arm64/crypto/aes-modes.S | 90 +++++++++++++++++------------------ arch/arm64/crypto/aes-neonbs-glue.c | 14 +++--- 3 files changed, 97 insertions(+), 102 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 2fa850e86aa8..253188fb8cb0 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -64,17 +64,17 @@ MODULE_LICENSE("GPL v2"); /* defined in aes-modes.S */ asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], - int rounds, int blocks, int first); + int rounds, int blocks); asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], - int rounds, int blocks, int first); + int rounds, int blocks); asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], - int rounds, int blocks, u8 iv[], int first); + int rounds, int blocks, u8 iv[]); asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], - int rounds, int blocks, u8 iv[], int first); + int rounds, int blocks, u8 iv[]); asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], - int rounds, int blocks, u8 ctr[], int first); + int rounds, int blocks, u8 ctr[]); asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, int blocks, u8 const rk2[], u8 iv[], @@ -133,19 +133,19 @@ static int ecb_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - int err, first, rounds = 6 + ctx->key_length / 4; + int err, rounds = 6 + ctx->key_length / 4; struct skcipher_walk walk; unsigned int blocks; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); - for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + kernel_neon_begin(); aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - (u8 *)ctx->key_enc, rounds, blocks, first); + (u8 *)ctx->key_enc, rounds, blocks); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } - kernel_neon_end(); return err; } @@ -153,19 +153,19 @@ static int ecb_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - int err, first, rounds = 6 + ctx->key_length / 4; + int err, rounds = 6 + ctx->key_length / 4; struct skcipher_walk walk; unsigned int blocks; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); - for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + kernel_neon_begin(); aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - (u8 *)ctx->key_dec, rounds, blocks, first); + (u8 *)ctx->key_dec, rounds, blocks); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } - kernel_neon_end(); return err; } @@ -173,20 +173,19 @@ static int cbc_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - int err, first, rounds = 6 + ctx->key_length / 4; + int err, rounds = 6 + ctx->key_length / 4; struct skcipher_walk walk; unsigned int blocks; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); - for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + kernel_neon_begin(); aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - (u8 *)ctx->key_enc, rounds, blocks, walk.iv, - first); + (u8 *)ctx->key_enc, rounds, blocks, walk.iv); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } - kernel_neon_end(); return err; } @@ -194,20 +193,19 @@ static int cbc_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - int err, first, rounds = 6 + ctx->key_length / 4; + int err, rounds = 6 + ctx->key_length / 4; struct skcipher_walk walk; unsigned int blocks; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); - for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + kernel_neon_begin(); aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - (u8 *)ctx->key_dec, rounds, blocks, walk.iv, - first); + (u8 *)ctx->key_dec, rounds, blocks, walk.iv); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } - kernel_neon_end(); return err; } @@ -215,20 +213,18 @@ static int ctr_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - int err, first, rounds = 6 + ctx->key_length / 4; + int err, rounds = 6 + ctx->key_length / 4; struct skcipher_walk walk; int blocks; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - first = 1; - kernel_neon_begin(); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + kernel_neon_begin(); aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - (u8 *)ctx->key_enc, rounds, blocks, walk.iv, - first); + (u8 *)ctx->key_enc, rounds, blocks, walk.iv); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); - first = 0; + kernel_neon_end(); } if (walk.nbytes) { u8 __aligned(8) tail[AES_BLOCK_SIZE]; @@ -241,12 +237,13 @@ static int ctr_encrypt(struct skcipher_request *req) */ blocks = -1; + kernel_neon_begin(); aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds, - blocks, walk.iv, first); + blocks, walk.iv); + kernel_neon_end(); crypto_xor_cpy(tdst, tsrc, tail, nbytes); err = skcipher_walk_done(&walk, 0); } - kernel_neon_end(); return err; } @@ -270,16 +267,16 @@ static int xts_encrypt(struct skcipher_request *req) struct skcipher_walk walk; unsigned int blocks; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + kernel_neon_begin(); aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key1.key_enc, rounds, blocks, (u8 *)ctx->key2.key_enc, walk.iv, first); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } - kernel_neon_end(); return err; } @@ -292,16 +289,16 @@ static int xts_decrypt(struct skcipher_request *req) struct skcipher_walk walk; unsigned int blocks; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + kernel_neon_begin(); aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key1.key_dec, rounds, blocks, (u8 *)ctx->key2.key_enc, walk.iv, first); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } - kernel_neon_end(); return err; } @@ -425,7 +422,7 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key, /* encrypt the zero vector */ kernel_neon_begin(); - aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1, 1); + aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1); kernel_neon_end(); cmac_gf128_mul_by_x(consts, consts); @@ -454,8 +451,8 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key, return err; kernel_neon_begin(); - aes_ecb_encrypt(key, ks[0], rk, rounds, 1, 1); - aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2, 0); + aes_ecb_encrypt(key, ks[0], rk, rounds, 1); + aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2); kernel_neon_end(); return cbcmac_setkey(tfm, key, sizeof(key)); diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 2674d43d1384..65b273667b34 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -40,24 +40,24 @@ #if INTERLEAVE == 2 aes_encrypt_block2x: - encrypt_block2x v0, v1, w3, x2, x6, w7 + encrypt_block2x v0, v1, w3, x2, x8, w7 ret ENDPROC(aes_encrypt_block2x) aes_decrypt_block2x: - decrypt_block2x v0, v1, w3, x2, x6, w7 + decrypt_block2x v0, v1, w3, x2, x8, w7 ret ENDPROC(aes_decrypt_block2x) #elif INTERLEAVE == 4 aes_encrypt_block4x: - encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 ret ENDPROC(aes_encrypt_block4x) aes_decrypt_block4x: - decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 ret ENDPROC(aes_decrypt_block4x) @@ -86,33 +86,32 @@ ENDPROC(aes_decrypt_block4x) #define FRAME_POP .macro do_encrypt_block2x - encrypt_block2x v0, v1, w3, x2, x6, w7 + encrypt_block2x v0, v1, w3, x2, x8, w7 .endm .macro do_decrypt_block2x - decrypt_block2x v0, v1, w3, x2, x6, w7 + decrypt_block2x v0, v1, w3, x2, x8, w7 .endm .macro do_encrypt_block4x - encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 .endm .macro do_decrypt_block4x - decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 .endm #endif /* * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks, int first) + * int blocks) * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks, int first) + * int blocks) */ AES_ENTRY(aes_ecb_encrypt) FRAME_PUSH - cbz w5, .LecbencloopNx enc_prepare w3, x2, x5 @@ -148,7 +147,6 @@ AES_ENDPROC(aes_ecb_encrypt) AES_ENTRY(aes_ecb_decrypt) FRAME_PUSH - cbz w5, .LecbdecloopNx dec_prepare w3, x2, x5 @@ -184,14 +182,12 @@ AES_ENDPROC(aes_ecb_decrypt) /* * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks, u8 iv[], int first) + * int blocks, u8 iv[]) * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks, u8 iv[], int first) + * int blocks, u8 iv[]) */ AES_ENTRY(aes_cbc_encrypt) - cbz w6, .Lcbcencloop - ld1 {v0.16b}, [x5] /* get iv */ enc_prepare w3, x2, x6 @@ -209,7 +205,6 @@ AES_ENDPROC(aes_cbc_encrypt) AES_ENTRY(aes_cbc_decrypt) FRAME_PUSH - cbz w6, .LcbcdecloopNx ld1 {v7.16b}, [x5] /* get iv */ dec_prepare w3, x2, x6 @@ -264,20 +259,19 @@ AES_ENDPROC(aes_cbc_decrypt) /* * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks, u8 ctr[], int first) + * int blocks, u8 ctr[]) */ AES_ENTRY(aes_ctr_encrypt) FRAME_PUSH - cbz w6, .Lctrnotfirst /* 1st time around? */ + enc_prepare w3, x2, x6 ld1 {v4.16b}, [x5] -.Lctrnotfirst: - umov x8, v4.d[1] /* keep swabbed ctr in reg */ - rev x8, x8 + umov x6, v4.d[1] /* keep swabbed ctr in reg */ + rev x6, x6 #if INTERLEAVE >= 2 - cmn w8, w4 /* 32 bit overflow? */ + cmn w6, w4 /* 32 bit overflow? */ bcs .Lctrloop .LctrloopNx: subs w4, w4, #INTERLEAVE @@ -285,11 +279,11 @@ AES_ENTRY(aes_ctr_encrypt) #if INTERLEAVE == 2 mov v0.8b, v4.8b mov v1.8b, v4.8b - rev x7, x8 - add x8, x8, #1 + rev x7, x6 + add x6, x6, #1 ins v0.d[1], x7 - rev x7, x8 - add x8, x8, #1 + rev x7, x6 + add x6, x6, #1 ins v1.d[1], x7 ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */ do_encrypt_block2x @@ -298,7 +292,7 @@ AES_ENTRY(aes_ctr_encrypt) st1 {v0.16b-v1.16b}, [x0], #32 #else ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ - dup v7.4s, w8 + dup v7.4s, w6 mov v0.16b, v4.16b add v7.4s, v7.4s, v8.4s mov v1.16b, v4.16b @@ -316,9 +310,9 @@ AES_ENTRY(aes_ctr_encrypt) eor v2.16b, v7.16b, v2.16b eor v3.16b, v5.16b, v3.16b st1 {v0.16b-v3.16b}, [x0], #64 - add x8, x8, #INTERLEAVE + add x6, x6, #INTERLEAVE #endif - rev x7, x8 + rev x7, x6 ins v4.d[1], x7 cbz w4, .Lctrout b .LctrloopNx @@ -328,10 +322,10 @@ AES_ENTRY(aes_ctr_encrypt) #endif .Lctrloop: mov v0.16b, v4.16b - encrypt_block v0, w3, x2, x6, w7 + encrypt_block v0, w3, x2, x8, w7 - adds x8, x8, #1 /* increment BE ctr */ - rev x7, x8 + adds x6, x6, #1 /* increment BE ctr */ + rev x7, x6 ins v4.d[1], x7 bcs .Lctrcarry /* overflow? */ @@ -385,15 +379,17 @@ CPU_BE( .quad 0x87, 1 ) AES_ENTRY(aes_xts_encrypt) FRAME_PUSH - cbz w7, .LxtsencloopNx - ld1 {v4.16b}, [x6] - enc_prepare w3, x5, x6 - encrypt_block v4, w3, x5, x6, w7 /* first tweak */ - enc_switch_key w3, x2, x6 + cbz w7, .Lxtsencnotfirst + + enc_prepare w3, x5, x8 + encrypt_block v4, w3, x5, x8, w7 /* first tweak */ + enc_switch_key w3, x2, x8 ldr q7, .Lxts_mul_x b .LxtsencNx +.Lxtsencnotfirst: + enc_prepare w3, x2, x8 .LxtsencloopNx: ldr q7, .Lxts_mul_x next_tweak v4, v4, v7, v8 @@ -442,7 +438,7 @@ AES_ENTRY(aes_xts_encrypt) .Lxtsencloop: ld1 {v1.16b}, [x1], #16 eor v0.16b, v1.16b, v4.16b - encrypt_block v0, w3, x2, x6, w7 + encrypt_block v0, w3, x2, x8, w7 eor v0.16b, v0.16b, v4.16b st1 {v0.16b}, [x0], #16 subs w4, w4, #1 @@ -450,6 +446,7 @@ AES_ENTRY(aes_xts_encrypt) next_tweak v4, v4, v7, v8 b .Lxtsencloop .Lxtsencout: + st1 {v4.16b}, [x6] FRAME_POP ret AES_ENDPROC(aes_xts_encrypt) @@ -457,15 +454,17 @@ AES_ENDPROC(aes_xts_encrypt) AES_ENTRY(aes_xts_decrypt) FRAME_PUSH - cbz w7, .LxtsdecloopNx - ld1 {v4.16b}, [x6] - enc_prepare w3, x5, x6 - encrypt_block v4, w3, x5, x6, w7 /* first tweak */ - dec_prepare w3, x2, x6 + cbz w7, .Lxtsdecnotfirst + + enc_prepare w3, x5, x8 + encrypt_block v4, w3, x5, x8, w7 /* first tweak */ + dec_prepare w3, x2, x8 ldr q7, .Lxts_mul_x b .LxtsdecNx +.Lxtsdecnotfirst: + dec_prepare w3, x2, x8 .LxtsdecloopNx: ldr q7, .Lxts_mul_x next_tweak v4, v4, v7, v8 @@ -514,7 +513,7 @@ AES_ENTRY(aes_xts_decrypt) .Lxtsdecloop: ld1 {v1.16b}, [x1], #16 eor v0.16b, v1.16b, v4.16b - decrypt_block v0, w3, x2, x6, w7 + decrypt_block v0, w3, x2, x8, w7 eor v0.16b, v0.16b, v4.16b st1 {v0.16b}, [x0], #16 subs w4, w4, #1 @@ -522,6 +521,7 @@ AES_ENTRY(aes_xts_decrypt) next_tweak v4, v4, v7, v8 b .Lxtsdecloop .Lxtsdecout: + st1 {v4.16b}, [x6] FRAME_POP ret AES_ENDPROC(aes_xts_decrypt) diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index c55d68ccb89f..9d823c77ec84 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -46,10 +46,9 @@ asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], /* borrowed from aes-neon-blk.ko */ asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], - int rounds, int blocks, int first); + int rounds, int blocks); asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], - int rounds, int blocks, u8 iv[], - int first); + int rounds, int blocks, u8 iv[]); struct aesbs_ctx { u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32]; @@ -157,7 +156,7 @@ static int cbc_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; - int err, first = 1; + int err; err = skcipher_walk_virt(&walk, req, true); @@ -167,10 +166,9 @@ static int cbc_encrypt(struct skcipher_request *req) /* fall back to the non-bitsliced NEON implementation */ neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->enc, ctx->key.rounds, blocks, walk.iv, - first); + ctx->enc, ctx->key.rounds, blocks, + walk.iv); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); - first = 0; } kernel_neon_end(); return err; @@ -311,7 +309,7 @@ static int __xts_crypt(struct skcipher_request *req, kernel_neon_begin(); neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, - ctx->key.rounds, 1, 1); + ctx->key.rounds, 1); while (walk.nbytes >= AES_BLOCK_SIZE) { unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; -- cgit v1.2.3 From 78ad7b08d8e096e5f00bc2cbeba9ef6d24be1bc4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 10 Mar 2018 15:21:49 +0000 Subject: crypto: arm64/aes-bs - move kernel mode neon en/disable into loop When kernel mode NEON was first introduced on arm64, the preserve and restore of the userland NEON state was completely unoptimized, and involved saving all registers on each call to kernel_neon_begin(), and restoring them on each call to kernel_neon_end(). For this reason, the NEON crypto code that was introduced at the time keeps the NEON enabled throughout the execution of the crypto API methods, which may include calls back into the crypto API that could result in memory allocation or other actions that we should avoid when running with preemption disabled. Since then, we have optimized the kernel mode NEON handling, which now restores lazily (upon return to userland), and so the preserve action is only costly the first time it is called after entering the kernel. So let's put the kernel_neon_begin() and kernel_neon_end() calls around the actual invocations of the NEON crypto code, and run the remainder of the code with kernel mode NEON disabled (and preemption enabled) Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-neonbs-glue.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index 9d823c77ec84..e7a95a566462 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -99,9 +99,8 @@ static int __ecb_crypt(struct skcipher_request *req, struct skcipher_walk walk; int err; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); while (walk.nbytes >= AES_BLOCK_SIZE) { unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; @@ -109,12 +108,13 @@ static int __ecb_crypt(struct skcipher_request *req, blocks = round_down(blocks, walk.stride / AES_BLOCK_SIZE); + kernel_neon_begin(); fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, ctx->rounds, blocks); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes - blocks * AES_BLOCK_SIZE); } - kernel_neon_end(); return err; } @@ -158,19 +158,19 @@ static int cbc_encrypt(struct skcipher_request *req) struct skcipher_walk walk; int err; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); while (walk.nbytes >= AES_BLOCK_SIZE) { unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; /* fall back to the non-bitsliced NEON implementation */ + kernel_neon_begin(); neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, ctx->enc, ctx->key.rounds, blocks, walk.iv); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } - kernel_neon_end(); return err; } @@ -181,9 +181,8 @@ static int cbc_decrypt(struct skcipher_request *req) struct skcipher_walk walk; int err; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); while (walk.nbytes >= AES_BLOCK_SIZE) { unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; @@ -191,13 +190,14 @@ static int cbc_decrypt(struct skcipher_request *req) blocks = round_down(blocks, walk.stride / AES_BLOCK_SIZE); + kernel_neon_begin(); aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk, ctx->key.rounds, blocks, walk.iv); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes - blocks * AES_BLOCK_SIZE); } - kernel_neon_end(); return err; } @@ -229,9 +229,8 @@ static int ctr_encrypt(struct skcipher_request *req) u8 buf[AES_BLOCK_SIZE]; int err; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); while (walk.nbytes > 0) { unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL; @@ -242,8 +241,10 @@ static int ctr_encrypt(struct skcipher_request *req) final = NULL; } + kernel_neon_begin(); aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, ctx->rounds, blocks, walk.iv, final); + kernel_neon_end(); if (final) { u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; @@ -258,8 +259,6 @@ static int ctr_encrypt(struct skcipher_request *req) err = skcipher_walk_done(&walk, walk.nbytes - blocks * AES_BLOCK_SIZE); } - kernel_neon_end(); - return err; } @@ -304,12 +303,11 @@ static int __xts_crypt(struct skcipher_request *req, struct skcipher_walk walk; int err; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); kernel_neon_begin(); - - neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, - ctx->key.rounds, 1); + neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, ctx->key.rounds, 1); + kernel_neon_end(); while (walk.nbytes >= AES_BLOCK_SIZE) { unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; @@ -318,13 +316,13 @@ static int __xts_crypt(struct skcipher_request *req, blocks = round_down(blocks, walk.stride / AES_BLOCK_SIZE); + kernel_neon_begin(); fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk, ctx->key.rounds, blocks, walk.iv); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes - blocks * AES_BLOCK_SIZE); } - kernel_neon_end(); - return err; } -- cgit v1.2.3 From 4bf7e7a19df127391ddc5c47888bc62ef1bfd0ad Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 10 Mar 2018 15:21:50 +0000 Subject: crypto: arm64/chacha20 - move kernel mode neon en/disable into loop When kernel mode NEON was first introduced on arm64, the preserve and restore of the userland NEON state was completely unoptimized, and involved saving all registers on each call to kernel_neon_begin(), and restoring them on each call to kernel_neon_end(). For this reason, the NEON crypto code that was introduced at the time keeps the NEON enabled throughout the execution of the crypto API methods, which may include calls back into the crypto API that could result in memory allocation or other actions that we should avoid when running with preemption disabled. Since then, we have optimized the kernel mode NEON handling, which now restores lazily (upon return to userland), and so the preserve action is only costly the first time it is called after entering the kernel. So let's put the kernel_neon_begin() and kernel_neon_end() calls around the actual invocations of the NEON crypto code, and run the remainder of the code with kernel mode NEON disabled (and preemption enabled) Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/chacha20-neon-glue.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c index cbdb75d15cd0..727579c93ded 100644 --- a/arch/arm64/crypto/chacha20-neon-glue.c +++ b/arch/arm64/crypto/chacha20-neon-glue.c @@ -37,12 +37,19 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, u8 buf[CHACHA20_BLOCK_SIZE]; while (bytes >= CHACHA20_BLOCK_SIZE * 4) { + kernel_neon_begin(); chacha20_4block_xor_neon(state, dst, src); + kernel_neon_end(); bytes -= CHACHA20_BLOCK_SIZE * 4; src += CHACHA20_BLOCK_SIZE * 4; dst += CHACHA20_BLOCK_SIZE * 4; state[12] += 4; } + + if (!bytes) + return; + + kernel_neon_begin(); while (bytes >= CHACHA20_BLOCK_SIZE) { chacha20_block_xor_neon(state, dst, src); bytes -= CHACHA20_BLOCK_SIZE; @@ -55,6 +62,7 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, chacha20_block_xor_neon(state, buf, buf); memcpy(dst, buf, bytes); } + kernel_neon_end(); } static int chacha20_neon(struct skcipher_request *req) @@ -68,11 +76,10 @@ static int chacha20_neon(struct skcipher_request *req) if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE) return crypto_chacha20_crypt(req); - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); crypto_chacha20_init(state, ctx, walk.iv); - kernel_neon_begin(); while (walk.nbytes > 0) { unsigned int nbytes = walk.nbytes; @@ -83,7 +90,6 @@ static int chacha20_neon(struct skcipher_request *req) nbytes); err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } - kernel_neon_end(); return err; } -- cgit v1.2.3 From 55868b45cffdb7b4d7f9eddd3d4f1262cf645885 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 10 Mar 2018 15:21:51 +0000 Subject: crypto: arm64/aes-blk - remove configurable interleave The AES block mode implementation using Crypto Extensions or plain NEON was written before real hardware existed, and so its interleave factor was made build time configurable (as well as an option to instantiate all interleaved sequences inline rather than as subroutines) We ended up using INTERLEAVE=4 with inlining disabled for both flavors of the core AES routines, so let's stick with that, and remove the option to configure this at build time. This makes the code easier to modify, which is nice now that we're adding yield support. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/Makefile | 3 - arch/arm64/crypto/aes-modes.S | 237 +++++++----------------------------------- 2 files changed, 40 insertions(+), 200 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index d94ebd15a859..318346a0c1dd 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -62,9 +62,6 @@ aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o obj-$(CONFIG_CRYPTO_AES_ARM64_BS) += aes-neon-bs.o aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o -AFLAGS_aes-ce.o := -DINTERLEAVE=4 -AFLAGS_aes-neon.o := -DINTERLEAVE=4 - CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 65b273667b34..27a235b2ddee 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -13,44 +13,6 @@ .text .align 4 -/* - * There are several ways to instantiate this code: - * - no interleave, all inline - * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2) - * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE) - * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4) - * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE) - * - * Macros imported by this code: - * - enc_prepare - setup NEON registers for encryption - * - dec_prepare - setup NEON registers for decryption - * - enc_switch_key - change to new key after having prepared for encryption - * - encrypt_block - encrypt a single block - * - decrypt block - decrypt a single block - * - encrypt_block2x - encrypt 2 blocks in parallel (if INTERLEAVE == 2) - * - decrypt_block2x - decrypt 2 blocks in parallel (if INTERLEAVE == 2) - * - encrypt_block4x - encrypt 4 blocks in parallel (if INTERLEAVE == 4) - * - decrypt_block4x - decrypt 4 blocks in parallel (if INTERLEAVE == 4) - */ - -#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE) -#define FRAME_PUSH stp x29, x30, [sp,#-16]! ; mov x29, sp -#define FRAME_POP ldp x29, x30, [sp],#16 - -#if INTERLEAVE == 2 - -aes_encrypt_block2x: - encrypt_block2x v0, v1, w3, x2, x8, w7 - ret -ENDPROC(aes_encrypt_block2x) - -aes_decrypt_block2x: - decrypt_block2x v0, v1, w3, x2, x8, w7 - ret -ENDPROC(aes_decrypt_block2x) - -#elif INTERLEAVE == 4 - aes_encrypt_block4x: encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 ret @@ -61,48 +23,6 @@ aes_decrypt_block4x: ret ENDPROC(aes_decrypt_block4x) -#else -#error INTERLEAVE should equal 2 or 4 -#endif - - .macro do_encrypt_block2x - bl aes_encrypt_block2x - .endm - - .macro do_decrypt_block2x - bl aes_decrypt_block2x - .endm - - .macro do_encrypt_block4x - bl aes_encrypt_block4x - .endm - - .macro do_decrypt_block4x - bl aes_decrypt_block4x - .endm - -#else -#define FRAME_PUSH -#define FRAME_POP - - .macro do_encrypt_block2x - encrypt_block2x v0, v1, w3, x2, x8, w7 - .endm - - .macro do_decrypt_block2x - decrypt_block2x v0, v1, w3, x2, x8, w7 - .endm - - .macro do_encrypt_block4x - encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 - .endm - - .macro do_decrypt_block4x - decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 - .endm - -#endif - /* * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, * int blocks) @@ -111,28 +31,21 @@ ENDPROC(aes_decrypt_block4x) */ AES_ENTRY(aes_ecb_encrypt) - FRAME_PUSH + stp x29, x30, [sp, #-16]! + mov x29, sp enc_prepare w3, x2, x5 .LecbencloopNx: -#if INTERLEAVE >= 2 - subs w4, w4, #INTERLEAVE + subs w4, w4, #4 bmi .Lecbenc1x -#if INTERLEAVE == 2 - ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */ - do_encrypt_block2x - st1 {v0.16b-v1.16b}, [x0], #32 -#else ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ - do_encrypt_block4x + bl aes_encrypt_block4x st1 {v0.16b-v3.16b}, [x0], #64 -#endif b .LecbencloopNx .Lecbenc1x: - adds w4, w4, #INTERLEAVE + adds w4, w4, #4 beq .Lecbencout -#endif .Lecbencloop: ld1 {v0.16b}, [x1], #16 /* get next pt block */ encrypt_block v0, w3, x2, x5, w6 @@ -140,34 +53,27 @@ AES_ENTRY(aes_ecb_encrypt) subs w4, w4, #1 bne .Lecbencloop .Lecbencout: - FRAME_POP + ldp x29, x30, [sp], #16 ret AES_ENDPROC(aes_ecb_encrypt) AES_ENTRY(aes_ecb_decrypt) - FRAME_PUSH + stp x29, x30, [sp, #-16]! + mov x29, sp dec_prepare w3, x2, x5 .LecbdecloopNx: -#if INTERLEAVE >= 2 - subs w4, w4, #INTERLEAVE + subs w4, w4, #4 bmi .Lecbdec1x -#if INTERLEAVE == 2 - ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */ - do_decrypt_block2x - st1 {v0.16b-v1.16b}, [x0], #32 -#else ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ - do_decrypt_block4x + bl aes_decrypt_block4x st1 {v0.16b-v3.16b}, [x0], #64 -#endif b .LecbdecloopNx .Lecbdec1x: - adds w4, w4, #INTERLEAVE + adds w4, w4, #4 beq .Lecbdecout -#endif .Lecbdecloop: ld1 {v0.16b}, [x1], #16 /* get next ct block */ decrypt_block v0, w3, x2, x5, w6 @@ -175,7 +81,7 @@ AES_ENTRY(aes_ecb_decrypt) subs w4, w4, #1 bne .Lecbdecloop .Lecbdecout: - FRAME_POP + ldp x29, x30, [sp], #16 ret AES_ENDPROC(aes_ecb_decrypt) @@ -204,30 +110,20 @@ AES_ENDPROC(aes_cbc_encrypt) AES_ENTRY(aes_cbc_decrypt) - FRAME_PUSH + stp x29, x30, [sp, #-16]! + mov x29, sp ld1 {v7.16b}, [x5] /* get iv */ dec_prepare w3, x2, x6 .LcbcdecloopNx: -#if INTERLEAVE >= 2 - subs w4, w4, #INTERLEAVE + subs w4, w4, #4 bmi .Lcbcdec1x -#if INTERLEAVE == 2 - ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */ - mov v2.16b, v0.16b - mov v3.16b, v1.16b - do_decrypt_block2x - eor v0.16b, v0.16b, v7.16b - eor v1.16b, v1.16b, v2.16b - mov v7.16b, v3.16b - st1 {v0.16b-v1.16b}, [x0], #32 -#else ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ mov v4.16b, v0.16b mov v5.16b, v1.16b mov v6.16b, v2.16b - do_decrypt_block4x + bl aes_decrypt_block4x sub x1, x1, #16 eor v0.16b, v0.16b, v7.16b eor v1.16b, v1.16b, v4.16b @@ -235,12 +131,10 @@ AES_ENTRY(aes_cbc_decrypt) eor v2.16b, v2.16b, v5.16b eor v3.16b, v3.16b, v6.16b st1 {v0.16b-v3.16b}, [x0], #64 -#endif b .LcbcdecloopNx .Lcbcdec1x: - adds w4, w4, #INTERLEAVE + adds w4, w4, #4 beq .Lcbcdecout -#endif .Lcbcdecloop: ld1 {v1.16b}, [x1], #16 /* get next ct block */ mov v0.16b, v1.16b /* ...and copy to v0 */ @@ -251,8 +145,8 @@ AES_ENTRY(aes_cbc_decrypt) subs w4, w4, #1 bne .Lcbcdecloop .Lcbcdecout: - FRAME_POP st1 {v7.16b}, [x5] /* return iv */ + ldp x29, x30, [sp], #16 ret AES_ENDPROC(aes_cbc_decrypt) @@ -263,34 +157,19 @@ AES_ENDPROC(aes_cbc_decrypt) */ AES_ENTRY(aes_ctr_encrypt) - FRAME_PUSH + stp x29, x30, [sp, #-16]! + mov x29, sp enc_prepare w3, x2, x6 ld1 {v4.16b}, [x5] umov x6, v4.d[1] /* keep swabbed ctr in reg */ rev x6, x6 -#if INTERLEAVE >= 2 cmn w6, w4 /* 32 bit overflow? */ bcs .Lctrloop .LctrloopNx: - subs w4, w4, #INTERLEAVE + subs w4, w4, #4 bmi .Lctr1x -#if INTERLEAVE == 2 - mov v0.8b, v4.8b - mov v1.8b, v4.8b - rev x7, x6 - add x6, x6, #1 - ins v0.d[1], x7 - rev x7, x6 - add x6, x6, #1 - ins v1.d[1], x7 - ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */ - do_encrypt_block2x - eor v0.16b, v0.16b, v2.16b - eor v1.16b, v1.16b, v3.16b - st1 {v0.16b-v1.16b}, [x0], #32 -#else ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ dup v7.4s, w6 mov v0.16b, v4.16b @@ -303,23 +182,21 @@ AES_ENTRY(aes_ctr_encrypt) mov v2.s[3], v8.s[1] mov v3.s[3], v8.s[2] ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ - do_encrypt_block4x + bl aes_encrypt_block4x eor v0.16b, v5.16b, v0.16b ld1 {v5.16b}, [x1], #16 /* get 1 input block */ eor v1.16b, v6.16b, v1.16b eor v2.16b, v7.16b, v2.16b eor v3.16b, v5.16b, v3.16b st1 {v0.16b-v3.16b}, [x0], #64 - add x6, x6, #INTERLEAVE -#endif + add x6, x6, #4 rev x7, x6 ins v4.d[1], x7 cbz w4, .Lctrout b .LctrloopNx .Lctr1x: - adds w4, w4, #INTERLEAVE + adds w4, w4, #4 beq .Lctrout -#endif .Lctrloop: mov v0.16b, v4.16b encrypt_block v0, w3, x2, x8, w7 @@ -339,12 +216,12 @@ AES_ENTRY(aes_ctr_encrypt) .Lctrout: st1 {v4.16b}, [x5] /* return next CTR value */ - FRAME_POP + ldp x29, x30, [sp], #16 ret .Lctrtailblock: st1 {v0.16b}, [x0] - FRAME_POP + ldp x29, x30, [sp], #16 ret .Lctrcarry: @@ -378,7 +255,9 @@ CPU_LE( .quad 1, 0x87 ) CPU_BE( .quad 0x87, 1 ) AES_ENTRY(aes_xts_encrypt) - FRAME_PUSH + stp x29, x30, [sp, #-16]! + mov x29, sp + ld1 {v4.16b}, [x6] cbz w7, .Lxtsencnotfirst @@ -394,25 +273,8 @@ AES_ENTRY(aes_xts_encrypt) ldr q7, .Lxts_mul_x next_tweak v4, v4, v7, v8 .LxtsencNx: -#if INTERLEAVE >= 2 - subs w4, w4, #INTERLEAVE + subs w4, w4, #4 bmi .Lxtsenc1x -#if INTERLEAVE == 2 - ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */ - next_tweak v5, v4, v7, v8 - eor v0.16b, v0.16b, v4.16b - eor v1.16b, v1.16b, v5.16b - do_encrypt_block2x - eor v0.16b, v0.16b, v4.16b - eor v1.16b, v1.16b, v5.16b - st1 {v0.16b-v1.16b}, [x0], #32 - cbz w4, .LxtsencoutNx - next_tweak v4, v5, v7, v8 - b .LxtsencNx -.LxtsencoutNx: - mov v4.16b, v5.16b - b .Lxtsencout -#else ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ next_tweak v5, v4, v7, v8 eor v0.16b, v0.16b, v4.16b @@ -421,7 +283,7 @@ AES_ENTRY(aes_xts_encrypt) eor v2.16b, v2.16b, v6.16b next_tweak v7, v6, v7, v8 eor v3.16b, v3.16b, v7.16b - do_encrypt_block4x + bl aes_encrypt_block4x eor v3.16b, v3.16b, v7.16b eor v0.16b, v0.16b, v4.16b eor v1.16b, v1.16b, v5.16b @@ -430,11 +292,9 @@ AES_ENTRY(aes_xts_encrypt) mov v4.16b, v7.16b cbz w4, .Lxtsencout b .LxtsencloopNx -#endif .Lxtsenc1x: - adds w4, w4, #INTERLEAVE + adds w4, w4, #4 beq .Lxtsencout -#endif .Lxtsencloop: ld1 {v1.16b}, [x1], #16 eor v0.16b, v1.16b, v4.16b @@ -447,13 +307,15 @@ AES_ENTRY(aes_xts_encrypt) b .Lxtsencloop .Lxtsencout: st1 {v4.16b}, [x6] - FRAME_POP + ldp x29, x30, [sp], #16 ret AES_ENDPROC(aes_xts_encrypt) AES_ENTRY(aes_xts_decrypt) - FRAME_PUSH + stp x29, x30, [sp, #-16]! + mov x29, sp + ld1 {v4.16b}, [x6] cbz w7, .Lxtsdecnotfirst @@ -469,25 +331,8 @@ AES_ENTRY(aes_xts_decrypt) ldr q7, .Lxts_mul_x next_tweak v4, v4, v7, v8 .LxtsdecNx: -#if INTERLEAVE >= 2 - subs w4, w4, #INTERLEAVE + subs w4, w4, #4 bmi .Lxtsdec1x -#if INTERLEAVE == 2 - ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */ - next_tweak v5, v4, v7, v8 - eor v0.16b, v0.16b, v4.16b - eor v1.16b, v1.16b, v5.16b - do_decrypt_block2x - eor v0.16b, v0.16b, v4.16b - eor v1.16b, v1.16b, v5.16b - st1 {v0.16b-v1.16b}, [x0], #32 - cbz w4, .LxtsdecoutNx - next_tweak v4, v5, v7, v8 - b .LxtsdecNx -.LxtsdecoutNx: - mov v4.16b, v5.16b - b .Lxtsdecout -#else ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ next_tweak v5, v4, v7, v8 eor v0.16b, v0.16b, v4.16b @@ -496,7 +341,7 @@ AES_ENTRY(aes_xts_decrypt) eor v2.16b, v2.16b, v6.16b next_tweak v7, v6, v7, v8 eor v3.16b, v3.16b, v7.16b - do_decrypt_block4x + bl aes_decrypt_block4x eor v3.16b, v3.16b, v7.16b eor v0.16b, v0.16b, v4.16b eor v1.16b, v1.16b, v5.16b @@ -505,11 +350,9 @@ AES_ENTRY(aes_xts_decrypt) mov v4.16b, v7.16b cbz w4, .Lxtsdecout b .LxtsdecloopNx -#endif .Lxtsdec1x: - adds w4, w4, #INTERLEAVE + adds w4, w4, #4 beq .Lxtsdecout -#endif .Lxtsdecloop: ld1 {v1.16b}, [x1], #16 eor v0.16b, v1.16b, v4.16b @@ -522,7 +365,7 @@ AES_ENTRY(aes_xts_decrypt) b .Lxtsdecloop .Lxtsdecout: st1 {v4.16b}, [x6] - FRAME_POP + ldp x29, x30, [sp], #16 ret AES_ENDPROC(aes_xts_decrypt) -- cgit v1.2.3 From a8f8a69e82b6ecfbaa869987955b1dbd5ae7f612 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 10 Mar 2018 15:21:52 +0000 Subject: crypto: arm64/aes-blk - add 4 way interleave to CBC encrypt path CBC encryption is strictly sequential, and so the current AES code simply processes the input one block at a time. However, we are about to add yield support, which adds a bit of overhead, and which we prefer to align with other modes in terms of granularity (i.e., it is better to have all routines yield every 64 bytes and not have an exception for CBC encrypt which yields every 16 bytes) So unroll the loop by 4. We still cannot perform the AES algorithm in parallel, but we can at least merge the loads and stores. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-modes.S | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 27a235b2ddee..e86535a1329d 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -94,17 +94,36 @@ AES_ENDPROC(aes_ecb_decrypt) */ AES_ENTRY(aes_cbc_encrypt) - ld1 {v0.16b}, [x5] /* get iv */ + ld1 {v4.16b}, [x5] /* get iv */ enc_prepare w3, x2, x6 -.Lcbcencloop: - ld1 {v1.16b}, [x1], #16 /* get next pt block */ - eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */ +.Lcbcencloop4x: + subs w4, w4, #4 + bmi .Lcbcenc1x + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ + eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */ encrypt_block v0, w3, x2, x6, w7 - st1 {v0.16b}, [x0], #16 + eor v1.16b, v1.16b, v0.16b + encrypt_block v1, w3, x2, x6, w7 + eor v2.16b, v2.16b, v1.16b + encrypt_block v2, w3, x2, x6, w7 + eor v3.16b, v3.16b, v2.16b + encrypt_block v3, w3, x2, x6, w7 + st1 {v0.16b-v3.16b}, [x0], #64 + mov v4.16b, v3.16b + b .Lcbcencloop4x +.Lcbcenc1x: + adds w4, w4, #4 + beq .Lcbcencout +.Lcbcencloop: + ld1 {v0.16b}, [x1], #16 /* get next pt block */ + eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */ + encrypt_block v4, w3, x2, x6, w7 + st1 {v4.16b}, [x0], #16 subs w4, w4, #1 bne .Lcbcencloop - st1 {v0.16b}, [x5] /* return iv */ +.Lcbcencout: + st1 {v4.16b}, [x5] /* return iv */ ret AES_ENDPROC(aes_cbc_encrypt) -- cgit v1.2.3 From 870c163a0ee6486bf0a313d47da927400bcb131b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 10 Mar 2018 15:21:53 +0000 Subject: crypto: arm64/aes-blk - add 4 way interleave to CBC-MAC encrypt path CBC MAC is strictly sequential, and so the current AES code simply processes the input one block at a time. However, we are about to add yield support, which adds a bit of overhead, and which we prefer to align with other modes in terms of granularity (i.e., it is better to have all routines yield every 64 bytes and not have an exception for CBC MAC which yields every 16 bytes) So unroll the loop by 4. We still cannot perform the AES algorithm in parallel, but we can at least merge the loads and stores. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-modes.S | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index e86535a1329d..a68412e1e3a4 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -395,8 +395,28 @@ AES_ENDPROC(aes_xts_decrypt) AES_ENTRY(aes_mac_update) ld1 {v0.16b}, [x4] /* get dg */ enc_prepare w2, x1, x7 - cbnz w5, .Lmacenc + cbz w5, .Lmacloop4x + encrypt_block v0, w2, x1, x7, w8 + +.Lmacloop4x: + subs w3, w3, #4 + bmi .Lmac1x + ld1 {v1.16b-v4.16b}, [x0], #64 /* get next pt block */ + eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ + encrypt_block v0, w2, x1, x7, w8 + eor v0.16b, v0.16b, v2.16b + encrypt_block v0, w2, x1, x7, w8 + eor v0.16b, v0.16b, v3.16b + encrypt_block v0, w2, x1, x7, w8 + eor v0.16b, v0.16b, v4.16b + cmp w3, wzr + csinv x5, x6, xzr, eq + cbz w5, .Lmacout + encrypt_block v0, w2, x1, x7, w8 + b .Lmacloop4x +.Lmac1x: + add w3, w3, #4 .Lmacloop: cbz w3, .Lmacout ld1 {v1.16b}, [x0], #16 /* get next pt block */ @@ -406,7 +426,6 @@ AES_ENTRY(aes_mac_update) csinv x5, x6, xzr, eq cbz w5, .Lmacout -.Lmacenc: encrypt_block v0, w2, x1, x7, w8 b .Lmacloop -- cgit v1.2.3 From 1a3713c7cd8c903264065a79c9b29517eec5152e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 10 Mar 2018 15:21:54 +0000 Subject: crypto: arm64/sha256-neon - play nice with CONFIG_PREEMPT kernels Tweak the SHA256 update routines to invoke the SHA256 block transform block by block, to avoid excessive scheduling delays caused by the NEON algorithm running with preemption disabled. Also, remove a stale comment which no longer applies now that kernel mode NEON is actually disallowed in some contexts. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/sha256-glue.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c index b064d925fe2a..e8880ccdc71f 100644 --- a/arch/arm64/crypto/sha256-glue.c +++ b/arch/arm64/crypto/sha256-glue.c @@ -89,21 +89,32 @@ static struct shash_alg algs[] = { { static int sha256_update_neon(struct shash_desc *desc, const u8 *data, unsigned int len) { - /* - * Stacking and unstacking a substantial slice of the NEON register - * file may significantly affect performance for small updates when - * executing in interrupt context, so fall back to the scalar code - * in that case. - */ + struct sha256_state *sctx = shash_desc_ctx(desc); + if (!may_use_simd()) return sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha256_block_data_order); - kernel_neon_begin(); - sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_block_neon); - kernel_neon_end(); + while (len > 0) { + unsigned int chunk = len; + + /* + * Don't hog the CPU for the entire time it takes to process all + * input when running on a preemptible kernel, but process the + * data block by block instead. + */ + if (IS_ENABLED(CONFIG_PREEMPT) && + chunk + sctx->count % SHA256_BLOCK_SIZE > SHA256_BLOCK_SIZE) + chunk = SHA256_BLOCK_SIZE - + sctx->count % SHA256_BLOCK_SIZE; + kernel_neon_begin(); + sha256_base_do_update(desc, data, chunk, + (sha256_block_fn *)sha256_block_neon); + kernel_neon_end(); + data += chunk; + len -= chunk; + } return 0; } @@ -117,10 +128,9 @@ static int sha256_finup_neon(struct shash_desc *desc, const u8 *data, sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_block_data_order); } else { - kernel_neon_begin(); if (len) - sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_block_neon); + sha256_update_neon(desc, data, len); + kernel_neon_begin(); sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_block_neon); kernel_neon_end(); -- cgit v1.2.3 From 6aaf49b495b446ff6eec0ac983f781ca0dc56a73 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 13 Mar 2018 22:17:23 +0200 Subject: crypto: arm,arm64 - Fix random regeneration of S_shipped The decision to rebuild .S_shipped is made based on the relative timestamps of .S_shipped and .pl files but git makes this essentially random. This means that the perl script might run anyway (usually at most once per checkout), defeating the whole purpose of _shipped. Fix by skipping the rule unless explicit make variables are provided: REGENERATE_ARM_CRYPTO or REGENERATE_ARM64_CRYPTO. This can produce nasty occasional build failures downstream, for example for toolchains with broken perl. The solution is minimally intrusive to make it easier to push into stable. Another report on a similar issue here: https://lkml.org/lkml/2018/3/8/1379 Signed-off-by: Leonard Crestez Cc: Reviewed-by: Masahiro Yamada Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/Makefile | 2 ++ arch/arm64/crypto/Makefile | 2 ++ 2 files changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index a758107c5525..3304e671918d 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -56,6 +56,7 @@ crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o speck-neon-y := speck-neon-core.o speck-neon-glue.o +ifdef REGENERATE_ARM_CRYPTO quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) @@ -64,5 +65,6 @@ $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl $(call cmd,perl) +endif .PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 318346a0c1dd..8df9f326f449 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -67,6 +67,7 @@ CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE $(call if_changed_rule,cc_o_c) +ifdef REGENERATE_ARM64_CRYPTO quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $(<) void $(@) @@ -75,5 +76,6 @@ $(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl $(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl $(call cmd,perlasm) +endif .PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S -- cgit v1.2.3