From 8435a3c3003c00c43f1b267368bbe1d8dada35d1 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 20 Oct 2012 15:06:46 +0300 Subject: crypto: twofish/avx - avoid using temporary stack buffers Introduce new assembler functions to avoid use temporary stack buffers in glue code. This also allows use of vector instructions for xoring output in CTR and CBC modes and construction of IVs for CTR mode. ECB mode sees ~0.2% decrease in speed because added one extra function call. CBC mode decryption and CTR mode benefit from vector operations and gain ~3%. Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu --- arch/x86/crypto/twofish_avx_glue.c | 73 ++++++++------------------------------ 1 file changed, 15 insertions(+), 58 deletions(-) (limited to 'arch/x86/crypto/twofish_avx_glue.c') diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 810e45d51186..94ac91d26e47 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -45,66 +45,23 @@ #define TWOFISH_PARALLEL_BLOCKS 8 -static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src) -{ - __twofish_enc_blk_3way(ctx, dst, src, false); -} - /* 8-way parallel cipher functions */ -asmlinkage void __twofish_enc_blk_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void twofish_dec_blk_8way(struct twofish_ctx *ctx, u8 *dst, +asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); +asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, const u8 *src); -static inline void twofish_enc_blk_xway(struct twofish_ctx *ctx, u8 *dst, - const u8 *src) -{ - __twofish_enc_blk_8way(ctx, dst, src, false); -} - -static inline void twofish_enc_blk_xway_xor(struct twofish_ctx *ctx, u8 *dst, - const u8 *src) -{ - __twofish_enc_blk_8way(ctx, dst, src, true); -} +asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); +asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src, le128 *iv); -static inline void twofish_dec_blk_xway(struct twofish_ctx *ctx, u8 *dst, +static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, const u8 *src) { - twofish_dec_blk_8way(ctx, dst, src); -} - -static void twofish_dec_blk_cbc_xway(void *ctx, u128 *dst, const u128 *src) -{ - u128 ivs[TWOFISH_PARALLEL_BLOCKS - 1]; - unsigned int j; - - for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++) - ivs[j] = src[j]; - - twofish_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); - - for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++) - u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); + __twofish_enc_blk_3way(ctx, dst, src, false); } -static void twofish_enc_blk_ctr_xway(void *ctx, u128 *dst, const u128 *src, - le128 *iv) -{ - be128 ctrblks[TWOFISH_PARALLEL_BLOCKS]; - unsigned int i; - - for (i = 0; i < TWOFISH_PARALLEL_BLOCKS; i++) { - if (dst != src) - dst[i] = src[i]; - - le128_to_be128(&ctrblks[i], iv); - le128_inc(iv); - } - - twofish_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); -} static const struct common_glue_ctx twofish_enc = { .num_funcs = 3, @@ -112,7 +69,7 @@ static const struct common_glue_ctx twofish_enc = { .funcs = { { .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_xway) } + .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) } }, { .num_blocks = 3, .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } @@ -128,7 +85,7 @@ static const struct common_glue_ctx twofish_ctr = { .funcs = { { .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_xway) } + .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) } }, { .num_blocks = 3, .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) } @@ -144,7 +101,7 @@ static const struct common_glue_ctx twofish_dec = { .funcs = { { .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_xway) } + .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) } }, { .num_blocks = 3, .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } @@ -160,7 +117,7 @@ static const struct common_glue_ctx twofish_dec_cbc = { .funcs = { { .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_xway) } + .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) } }, { .num_blocks = 3, .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } @@ -227,7 +184,7 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { - twofish_enc_blk_xway(ctx->ctx, srcdst, srcdst); + twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst); return; } @@ -249,7 +206,7 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { - twofish_dec_blk_xway(ctx->ctx, srcdst, srcdst); + twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst); return; } -- cgit v1.2.3