From f2f770d74a8d12265e023f2792ad2eb996cabe1a Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 3 Apr 2015 18:03:40 +0800 Subject: crypto: arm/sha256 - Add optimized SHA-256/224 Add Andy Polyakov's optimized assembly and NEON implementations for SHA-256/224. The sha256-armv4.pl script for generating the assembly code is from OpenSSL commit 51f8d095562f36cdaa6893597b5c609e943b0565. Compared to sha256-generic these implementations have the following tcrypt speed improvements on Motorola Nexus 6 (Snapdragon 805): bs b/u sha256-neon sha256-asm 16 16 x1.32 x1.19 64 16 x1.27 x1.15 64 64 x1.36 x1.20 256 16 x1.22 x1.11 256 64 x1.36 x1.19 256 256 x1.59 x1.23 1024 16 x1.21 x1.10 1024 256 x1.65 x1.23 1024 1024 x1.76 x1.25 2048 16 x1.21 x1.10 2048 256 x1.66 x1.23 2048 1024 x1.78 x1.25 2048 2048 x1.79 x1.25 4096 16 x1.20 x1.09 4096 256 x1.66 x1.23 4096 1024 x1.79 x1.26 4096 4096 x1.82 x1.26 8192 16 x1.20 x1.09 8192 256 x1.67 x1.23 8192 1024 x1.80 x1.26 8192 4096 x1.85 x1.28 8192 8192 x1.85 x1.27 Where bs refers to block size and b/u to bytes per update. Signed-off-by: Sami Tolvanen Cc: Andy Polyakov Signed-off-by: Herbert Xu --- arch/arm/crypto/sha256_glue.c | 246 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 246 insertions(+) create mode 100644 arch/arm/crypto/sha256_glue.c (limited to 'arch/arm/crypto/sha256_glue.c') diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c new file mode 100644 index 000000000000..ccef5e25bbcb --- /dev/null +++ b/arch/arm/crypto/sha256_glue.c @@ -0,0 +1,246 @@ +/* + * Glue code for the SHA256 Secure Hash Algorithm assembly implementation + * using optimized ARM assembler and NEON instructions. + * + * Copyright © 2015 Google Inc. + * + * This file is based on sha256_ssse3_glue.c: + * Copyright (C) 2013 Intel Corporation + * Author: Tim Chen + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sha256_glue.h" + +asmlinkage void sha256_block_data_order(u32 *digest, const void *data, + unsigned int num_blks); + + +int sha256_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + sctx->state[0] = SHA256_H0; + sctx->state[1] = SHA256_H1; + sctx->state[2] = SHA256_H2; + sctx->state[3] = SHA256_H3; + sctx->state[4] = SHA256_H4; + sctx->state[5] = SHA256_H5; + sctx->state[6] = SHA256_H6; + sctx->state[7] = SHA256_H7; + sctx->count = 0; + + return 0; +} + +int sha224_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + sctx->state[0] = SHA224_H0; + sctx->state[1] = SHA224_H1; + sctx->state[2] = SHA224_H2; + sctx->state[3] = SHA224_H3; + sctx->state[4] = SHA224_H4; + sctx->state[5] = SHA224_H5; + sctx->state[6] = SHA224_H6; + sctx->state[7] = SHA224_H7; + sctx->count = 0; + + return 0; +} + +int __sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len, + unsigned int partial) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int done = 0; + + sctx->count += len; + + if (partial) { + done = SHA256_BLOCK_SIZE - partial; + memcpy(sctx->buf + partial, data, done); + sha256_block_data_order(sctx->state, sctx->buf, 1); + } + + if (len - done >= SHA256_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; + + sha256_block_data_order(sctx->state, data + done, rounds); + done += rounds * SHA256_BLOCK_SIZE; + } + + memcpy(sctx->buf, data + done, len - done); + + return 0; +} + +int sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; + + /* Handle the fast case right here */ + if (partial + len < SHA256_BLOCK_SIZE) { + sctx->count += len; + memcpy(sctx->buf + partial, data, len); + + return 0; + } + + return __sha256_update(desc, data, len, partial); +} + +/* Add padding and return the message digest. */ +static int sha256_final(struct shash_desc *desc, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be32 *dst = (__be32 *)out; + __be64 bits; + static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; + + /* save number of bits */ + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->count % SHA256_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); + + /* We need to fill a whole block for __sha256_update */ + if (padlen <= 56) { + sctx->count += padlen; + memcpy(sctx->buf + index, padding, padlen); + } else { + __sha256_update(desc, padding, padlen, index); + } + __sha256_update(desc, (const u8 *)&bits, sizeof(bits), 56); + + /* Store state in digest */ + for (i = 0; i < 8; i++) + dst[i] = cpu_to_be32(sctx->state[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int sha224_final(struct shash_desc *desc, u8 *out) +{ + u8 D[SHA256_DIGEST_SIZE]; + + sha256_final(desc, D); + + memcpy(out, D, SHA224_DIGEST_SIZE); + memzero_explicit(D, SHA256_DIGEST_SIZE); + + return 0; +} + +int sha256_export(struct shash_desc *desc, void *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + + return 0; +} + +int sha256_import(struct shash_desc *desc, const void *in) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + + return 0; +} + +static struct shash_alg algs[] = { { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_init, + .update = sha256_update, + .final = sha256_final, + .export = sha256_export, + .import = sha256_import, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-asm", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_init, + .update = sha256_update, + .final = sha224_final, + .export = sha256_export, + .import = sha256_import, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-asm", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static int __init sha256_mod_init(void) +{ + int res = crypto_register_shashes(algs, ARRAY_SIZE(algs)); + + if (res < 0) + return res; + + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) { + res = crypto_register_shashes(sha256_neon_algs, + ARRAY_SIZE(sha256_neon_algs)); + + if (res < 0) + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); + } + + return res; +} + +static void __exit sha256_mod_fini(void) +{ + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); + + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) + crypto_unregister_shashes(sha256_neon_algs, + ARRAY_SIZE(sha256_neon_algs)); +} + +module_init(sha256_mod_init); +module_exit(sha256_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm (ARM), including NEON"); + +MODULE_ALIAS_CRYPTO("sha256"); -- cgit v1.2.3 From b59e2ae3690c8ef5f8ddeeb0b6b3313521b915e6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 9 Apr 2015 12:55:42 +0200 Subject: crypto: arm/sha256 - move SHA-224/256 ASM/NEON implementation to base layer This removes all the boilerplate from the existing implementation, and replaces it with calls into the base layer. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/sha256_glue.c | 170 ++++++------------------------------- arch/arm/crypto/sha256_glue.h | 17 +--- arch/arm/crypto/sha256_neon_glue.c | 143 ++++++++----------------------- 3 files changed, 66 insertions(+), 264 deletions(-) (limited to 'arch/arm/crypto/sha256_glue.c') diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c index ccef5e25bbcb..a84e869ef900 100644 --- a/arch/arm/crypto/sha256_glue.c +++ b/arch/arm/crypto/sha256_glue.c @@ -24,165 +24,49 @@ #include #include #include -#include +#include #include #include + #include "sha256_glue.h" asmlinkage void sha256_block_data_order(u32 *digest, const void *data, - unsigned int num_blks); - - -int sha256_init(struct shash_desc *desc) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SHA256_H0; - sctx->state[1] = SHA256_H1; - sctx->state[2] = SHA256_H2; - sctx->state[3] = SHA256_H3; - sctx->state[4] = SHA256_H4; - sctx->state[5] = SHA256_H5; - sctx->state[6] = SHA256_H6; - sctx->state[7] = SHA256_H7; - sctx->count = 0; - - return 0; -} - -int sha224_init(struct shash_desc *desc) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SHA224_H0; - sctx->state[1] = SHA224_H1; - sctx->state[2] = SHA224_H2; - sctx->state[3] = SHA224_H3; - sctx->state[4] = SHA224_H4; - sctx->state[5] = SHA224_H5; - sctx->state[6] = SHA224_H6; - sctx->state[7] = SHA224_H7; - sctx->count = 0; - - return 0; -} - -int __sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len, - unsigned int partial) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int done = 0; + unsigned int num_blks); - sctx->count += len; - - if (partial) { - done = SHA256_BLOCK_SIZE - partial; - memcpy(sctx->buf + partial, data, done); - sha256_block_data_order(sctx->state, sctx->buf, 1); - } - - if (len - done >= SHA256_BLOCK_SIZE) { - const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; - - sha256_block_data_order(sctx->state, data + done, rounds); - done += rounds * SHA256_BLOCK_SIZE; - } - - memcpy(sctx->buf, data + done, len - done); - - return 0; -} - -int sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len) +int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; - - /* Handle the fast case right here */ - if (partial + len < SHA256_BLOCK_SIZE) { - sctx->count += len; - memcpy(sctx->buf + partial, data, len); + /* make sure casting to sha256_block_fn() is safe */ + BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0); - return 0; - } - - return __sha256_update(desc, data, len, partial); + return sha256_base_do_update(desc, data, len, + (sha256_block_fn *)sha256_block_data_order); } +EXPORT_SYMBOL(crypto_sha256_arm_update); -/* Add padding and return the message digest. */ static int sha256_final(struct shash_desc *desc, u8 *out) { - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int i, index, padlen; - __be32 *dst = (__be32 *)out; - __be64 bits; - static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; - - /* save number of bits */ - bits = cpu_to_be64(sctx->count << 3); - - /* Pad out to 56 mod 64 and append length */ - index = sctx->count % SHA256_BLOCK_SIZE; - padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); - - /* We need to fill a whole block for __sha256_update */ - if (padlen <= 56) { - sctx->count += padlen; - memcpy(sctx->buf + index, padding, padlen); - } else { - __sha256_update(desc, padding, padlen, index); - } - __sha256_update(desc, (const u8 *)&bits, sizeof(bits), 56); - - /* Store state in digest */ - for (i = 0; i < 8; i++) - dst[i] = cpu_to_be32(sctx->state[i]); - - /* Wipe context */ - memset(sctx, 0, sizeof(*sctx)); - - return 0; -} - -static int sha224_final(struct shash_desc *desc, u8 *out) -{ - u8 D[SHA256_DIGEST_SIZE]; - - sha256_final(desc, D); - - memcpy(out, D, SHA224_DIGEST_SIZE); - memzero_explicit(D, SHA256_DIGEST_SIZE); - - return 0; -} - -int sha256_export(struct shash_desc *desc, void *out) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - memcpy(out, sctx, sizeof(*sctx)); - - return 0; + sha256_base_do_finalize(desc, + (sha256_block_fn *)sha256_block_data_order); + return sha256_base_finish(desc, out); } -int sha256_import(struct shash_desc *desc, const void *in) +int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) { - struct sha256_state *sctx = shash_desc_ctx(desc); - - memcpy(sctx, in, sizeof(*sctx)); - - return 0; + sha256_base_do_update(desc, data, len, + (sha256_block_fn *)sha256_block_data_order); + return sha256_final(desc, out); } +EXPORT_SYMBOL(crypto_sha256_arm_finup); static struct shash_alg algs[] = { { .digestsize = SHA256_DIGEST_SIZE, - .init = sha256_init, - .update = sha256_update, + .init = sha256_base_init, + .update = crypto_sha256_arm_update, .final = sha256_final, - .export = sha256_export, - .import = sha256_import, + .finup = crypto_sha256_arm_finup, .descsize = sizeof(struct sha256_state), - .statesize = sizeof(struct sha256_state), .base = { .cra_name = "sha256", .cra_driver_name = "sha256-asm", @@ -193,13 +77,11 @@ static struct shash_alg algs[] = { { } }, { .digestsize = SHA224_DIGEST_SIZE, - .init = sha224_init, - .update = sha256_update, - .final = sha224_final, - .export = sha256_export, - .import = sha256_import, + .init = sha224_base_init, + .update = crypto_sha256_arm_update, + .final = sha256_final, + .finup = crypto_sha256_arm_finup, .descsize = sizeof(struct sha256_state), - .statesize = sizeof(struct sha256_state), .base = { .cra_name = "sha224", .cra_driver_name = "sha224-asm", diff --git a/arch/arm/crypto/sha256_glue.h b/arch/arm/crypto/sha256_glue.h index 0312f4ffe8cc..7cf0bf786ada 100644 --- a/arch/arm/crypto/sha256_glue.h +++ b/arch/arm/crypto/sha256_glue.h @@ -2,22 +2,13 @@ #define _CRYPTO_SHA256_GLUE_H #include -#include extern struct shash_alg sha256_neon_algs[2]; -extern int sha256_init(struct shash_desc *desc); +int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data, + unsigned int len); -extern int sha224_init(struct shash_desc *desc); - -extern int __sha256_update(struct shash_desc *desc, const u8 *data, - unsigned int len, unsigned int partial); - -extern int sha256_update(struct shash_desc *desc, const u8 *data, - unsigned int len); - -extern int sha256_export(struct shash_desc *desc, void *out); - -extern int sha256_import(struct shash_desc *desc, const void *in); +int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *hash); #endif /* _CRYPTO_SHA256_GLUE_H */ diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c index c4da10090eee..39ccd658817e 100644 --- a/arch/arm/crypto/sha256_neon_glue.c +++ b/arch/arm/crypto/sha256_neon_glue.c @@ -19,131 +19,62 @@ #include #include #include +#include #include #include #include + #include "sha256_glue.h" asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data, - unsigned int num_blks); - + unsigned int num_blks); -static int __sha256_neon_update(struct shash_desc *desc, const u8 *data, - unsigned int len, unsigned int partial) +static int sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int done = 0; - - sctx->count += len; - - if (partial) { - done = SHA256_BLOCK_SIZE - partial; - memcpy(sctx->buf + partial, data, done); - sha256_block_data_order_neon(sctx->state, sctx->buf, 1); - } - - if (len - done >= SHA256_BLOCK_SIZE) { - const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; - sha256_block_data_order_neon(sctx->state, data + done, rounds); - done += rounds * SHA256_BLOCK_SIZE; - } + if (!may_use_simd() || + (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) + return crypto_sha256_arm_update(desc, data, len); - memcpy(sctx->buf, data + done, len - done); + kernel_neon_begin(); + sha256_base_do_update(desc, data, len, + (sha256_block_fn *)sha256_block_data_order_neon); + kernel_neon_end(); return 0; } -static int sha256_neon_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; - int res; - - /* Handle the fast case right here */ - if (partial + len < SHA256_BLOCK_SIZE) { - sctx->count += len; - memcpy(sctx->buf + partial, data, len); - - return 0; - } - - if (!may_use_simd()) { - res = __sha256_update(desc, data, len, partial); - } else { - kernel_neon_begin(); - res = __sha256_neon_update(desc, data, len, partial); - kernel_neon_end(); - } - - return res; -} - -/* Add padding and return the message digest. */ -static int sha256_neon_final(struct shash_desc *desc, u8 *out) +static int sha256_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) { - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int i, index, padlen; - __be32 *dst = (__be32 *)out; - __be64 bits; - static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; - - /* save number of bits */ - bits = cpu_to_be64(sctx->count << 3); - - /* Pad out to 56 mod 64 and append length */ - index = sctx->count % SHA256_BLOCK_SIZE; - padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); - - if (!may_use_simd()) { - sha256_update(desc, padding, padlen); - sha256_update(desc, (const u8 *)&bits, sizeof(bits)); - } else { - kernel_neon_begin(); - /* We need to fill a whole block for __sha256_neon_update() */ - if (padlen <= 56) { - sctx->count += padlen; - memcpy(sctx->buf + index, padding, padlen); - } else { - __sha256_neon_update(desc, padding, padlen, index); - } - __sha256_neon_update(desc, (const u8 *)&bits, - sizeof(bits), 56); - kernel_neon_end(); - } - - /* Store state in digest */ - for (i = 0; i < 8; i++) - dst[i] = cpu_to_be32(sctx->state[i]); - - /* Wipe context */ - memzero_explicit(sctx, sizeof(*sctx)); - - return 0; + if (!may_use_simd()) + return crypto_sha256_arm_finup(desc, data, len, out); + + kernel_neon_begin(); + if (len) + sha256_base_do_update(desc, data, len, + (sha256_block_fn *)sha256_block_data_order_neon); + sha256_base_do_finalize(desc, + (sha256_block_fn *)sha256_block_data_order_neon); + kernel_neon_end(); + + return sha256_base_finish(desc, out); } -static int sha224_neon_final(struct shash_desc *desc, u8 *out) +static int sha256_final(struct shash_desc *desc, u8 *out) { - u8 D[SHA256_DIGEST_SIZE]; - - sha256_neon_final(desc, D); - - memcpy(out, D, SHA224_DIGEST_SIZE); - memzero_explicit(D, SHA256_DIGEST_SIZE); - - return 0; + return sha256_finup(desc, NULL, 0, out); } struct shash_alg sha256_neon_algs[] = { { .digestsize = SHA256_DIGEST_SIZE, - .init = sha256_init, - .update = sha256_neon_update, - .final = sha256_neon_final, - .export = sha256_export, - .import = sha256_import, + .init = sha256_base_init, + .update = sha256_update, + .final = sha256_final, + .finup = sha256_finup, .descsize = sizeof(struct sha256_state), - .statesize = sizeof(struct sha256_state), .base = { .cra_name = "sha256", .cra_driver_name = "sha256-neon", @@ -154,13 +85,11 @@ struct shash_alg sha256_neon_algs[] = { { } }, { .digestsize = SHA224_DIGEST_SIZE, - .init = sha224_init, - .update = sha256_neon_update, - .final = sha224_neon_final, - .export = sha256_export, - .import = sha256_import, + .init = sha224_base_init, + .update = sha256_update, + .final = sha256_final, + .finup = sha256_finup, .descsize = sizeof(struct sha256_state), - .statesize = sizeof(struct sha256_state), .base = { .cra_name = "sha224", .cra_driver_name = "sha224-neon", -- cgit v1.2.3