diff options
author | Ard Biesheuvel <ard.biesheuvel@linaro.org> | 2015-05-08 11:46:21 +0300 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2015-05-11 10:08:01 +0300 |
commit | c80ae7ca372606a3971dcdfa3420275cf17ef6b6 (patch) | |
tree | 369824cb056ee2f8c76bbbce405b43e24b66ad57 /arch/arm/crypto/sha512_neon_glue.c | |
parent | 551d7ed2fdee1fb7bba2dbd55b61e821a10a7c51 (diff) | |
download | linux-c80ae7ca372606a3971dcdfa3420275cf17ef6b6.tar.xz |
crypto: arm/sha512 - accelerated SHA-512 using ARM generic ASM and NEON
This replaces the SHA-512 NEON module with the faster and more
versatile implementation from the OpenSSL project. It consists
of both a NEON and a generic ASM version of the core SHA-512
transform, where the NEON version reverts to the ASM version
when invoked in non-process context.
This patch is based on the OpenSSL upstream version b1a5d1c65208
of sha512-armv4.pl, which can be found here:
https://git.openssl.org/gitweb/?p=openssl.git;h=b1a5d1c65208
Performance relative to the generic implementation (measured
using tcrypt.ko mode=306 sec=1 running on a Cortex-A57 under
KVM):
input size block size asm neon old neon
16 16 1.39 2.54 2.21
64 16 1.32 2.33 2.09
64 64 1.38 2.53 2.19
256 16 1.31 2.28 2.06
256 64 1.38 2.54 2.25
256 256 1.40 2.77 2.39
1024 16 1.29 2.22 2.01
1024 256 1.40 2.82 2.45
1024 1024 1.41 2.93 2.53
2048 16 1.33 2.21 2.00
2048 256 1.40 2.84 2.46
2048 1024 1.41 2.96 2.55
2048 2048 1.41 2.98 2.56
4096 16 1.34 2.20 1.99
4096 256 1.40 2.84 2.46
4096 1024 1.41 2.97 2.56
4096 4096 1.41 3.01 2.58
8192 16 1.34 2.19 1.99
8192 256 1.40 2.85 2.47
8192 1024 1.41 2.98 2.56
8192 4096 1.41 2.71 2.59
8192 8192 1.51 3.51 2.69
Acked-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/arm/crypto/sha512_neon_glue.c')
-rw-r--r-- | arch/arm/crypto/sha512_neon_glue.c | 305 |
1 files changed, 0 insertions, 305 deletions
diff --git a/arch/arm/crypto/sha512_neon_glue.c b/arch/arm/crypto/sha512_neon_glue.c deleted file mode 100644 index b124dce838d6..000000000000 --- a/arch/arm/crypto/sha512_neon_glue.c +++ /dev/null @@ -1,305 +0,0 @@ -/* - * Glue code for the SHA512 Secure Hash Algorithm assembly implementation - * using NEON instructions. - * - * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> - * - * This file is based on sha512_ssse3_glue.c: - * Copyright (C) 2013 Intel Corporation - * Author: Tim Chen <tim.c.chen@linux.intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/cryptohash.h> -#include <linux/types.h> -#include <linux/string.h> -#include <crypto/sha.h> -#include <asm/byteorder.h> -#include <asm/simd.h> -#include <asm/neon.h> - - -static const u64 sha512_k[] = { - 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, - 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, - 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, - 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, - 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, - 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, - 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, - 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL, - 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, - 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, - 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, - 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, - 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, - 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL, - 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, - 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, - 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, - 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL, - 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, - 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL, - 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, - 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, - 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, - 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, - 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, - 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL, - 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, - 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, - 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, - 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, - 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, - 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL, - 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, - 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, - 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, - 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, - 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, - 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL, - 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, - 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL -}; - - -asmlinkage void sha512_transform_neon(u64 *digest, const void *data, - const u64 k[], unsigned int num_blks); - - -static int sha512_neon_init(struct shash_desc *desc) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SHA512_H0; - sctx->state[1] = SHA512_H1; - sctx->state[2] = SHA512_H2; - sctx->state[3] = SHA512_H3; - sctx->state[4] = SHA512_H4; - sctx->state[5] = SHA512_H5; - sctx->state[6] = SHA512_H6; - sctx->state[7] = SHA512_H7; - sctx->count[0] = sctx->count[1] = 0; - - return 0; -} - -static int __sha512_neon_update(struct shash_desc *desc, const u8 *data, - unsigned int len, unsigned int partial) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - unsigned int done = 0; - - sctx->count[0] += len; - if (sctx->count[0] < len) - sctx->count[1]++; - - if (partial) { - done = SHA512_BLOCK_SIZE - partial; - memcpy(sctx->buf + partial, data, done); - sha512_transform_neon(sctx->state, sctx->buf, sha512_k, 1); - } - - if (len - done >= SHA512_BLOCK_SIZE) { - const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; - - sha512_transform_neon(sctx->state, data + done, sha512_k, - rounds); - - done += rounds * SHA512_BLOCK_SIZE; - } - - memcpy(sctx->buf, data + done, len - done); - - return 0; -} - -static int sha512_neon_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; - int res; - - /* Handle the fast case right here */ - if (partial + len < SHA512_BLOCK_SIZE) { - sctx->count[0] += len; - if (sctx->count[0] < len) - sctx->count[1]++; - memcpy(sctx->buf + partial, data, len); - - return 0; - } - - if (!may_use_simd()) { - res = crypto_sha512_update(desc, data, len); - } else { - kernel_neon_begin(); - res = __sha512_neon_update(desc, data, len, partial); - kernel_neon_end(); - } - - return res; -} - - -/* Add padding and return the message digest. */ -static int sha512_neon_final(struct shash_desc *desc, u8 *out) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - unsigned int i, index, padlen; - __be64 *dst = (__be64 *)out; - __be64 bits[2]; - static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; - - /* save number of bits */ - bits[1] = cpu_to_be64(sctx->count[0] << 3); - bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); - - /* Pad out to 112 mod 128 and append length */ - index = sctx->count[0] & 0x7f; - padlen = (index < 112) ? (112 - index) : ((128+112) - index); - - if (!may_use_simd()) { - crypto_sha512_update(desc, padding, padlen); - crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits)); - } else { - kernel_neon_begin(); - /* We need to fill a whole block for __sha512_neon_update() */ - if (padlen <= 112) { - sctx->count[0] += padlen; - if (sctx->count[0] < padlen) - sctx->count[1]++; - memcpy(sctx->buf + index, padding, padlen); - } else { - __sha512_neon_update(desc, padding, padlen, index); - } - __sha512_neon_update(desc, (const u8 *)&bits, - sizeof(bits), 112); - kernel_neon_end(); - } - - /* Store state in digest */ - for (i = 0; i < 8; i++) - dst[i] = cpu_to_be64(sctx->state[i]); - - /* Wipe context */ - memset(sctx, 0, sizeof(*sctx)); - - return 0; -} - -static int sha512_neon_export(struct shash_desc *desc, void *out) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - - memcpy(out, sctx, sizeof(*sctx)); - - return 0; -} - -static int sha512_neon_import(struct shash_desc *desc, const void *in) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - - memcpy(sctx, in, sizeof(*sctx)); - - return 0; -} - -static int sha384_neon_init(struct shash_desc *desc) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SHA384_H0; - sctx->state[1] = SHA384_H1; - sctx->state[2] = SHA384_H2; - sctx->state[3] = SHA384_H3; - sctx->state[4] = SHA384_H4; - sctx->state[5] = SHA384_H5; - sctx->state[6] = SHA384_H6; - sctx->state[7] = SHA384_H7; - - sctx->count[0] = sctx->count[1] = 0; - - return 0; -} - -static int sha384_neon_final(struct shash_desc *desc, u8 *hash) -{ - u8 D[SHA512_DIGEST_SIZE]; - - sha512_neon_final(desc, D); - - memcpy(hash, D, SHA384_DIGEST_SIZE); - memzero_explicit(D, SHA512_DIGEST_SIZE); - - return 0; -} - -static struct shash_alg algs[] = { { - .digestsize = SHA512_DIGEST_SIZE, - .init = sha512_neon_init, - .update = sha512_neon_update, - .final = sha512_neon_final, - .export = sha512_neon_export, - .import = sha512_neon_import, - .descsize = sizeof(struct sha512_state), - .statesize = sizeof(struct sha512_state), - .base = { - .cra_name = "sha512", - .cra_driver_name = "sha512-neon", - .cra_priority = 250, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, - .cra_blocksize = SHA512_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}, { - .digestsize = SHA384_DIGEST_SIZE, - .init = sha384_neon_init, - .update = sha512_neon_update, - .final = sha384_neon_final, - .export = sha512_neon_export, - .import = sha512_neon_import, - .descsize = sizeof(struct sha512_state), - .statesize = sizeof(struct sha512_state), - .base = { - .cra_name = "sha384", - .cra_driver_name = "sha384-neon", - .cra_priority = 250, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, - .cra_blocksize = SHA384_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -} }; - -static int __init sha512_neon_mod_init(void) -{ - if (!cpu_has_neon()) - return -ENODEV; - - return crypto_register_shashes(algs, ARRAY_SIZE(algs)); -} - -static void __exit sha512_neon_mod_fini(void) -{ - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); -} - -module_init(sha512_neon_mod_init); -module_exit(sha512_neon_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, NEON accelerated"); - -MODULE_ALIAS_CRYPTO("sha512"); -MODULE_ALIAS_CRYPTO("sha384"); |