diff options
author | Ard Biesheuvel <ardb@kernel.org> | 2022-01-27 14:35:45 +0300 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2022-02-05 07:10:51 +0300 |
commit | dfc6031ec917b7c34a7549c3120f841b2e2be6db (patch) | |
tree | 70227f3378c4f81f79d93237c6586650ac9a1e41 /arch/arm64/crypto/aes-neonbs-glue.c | |
parent | fc074e130051015e39245a4241956ff122e2f465 (diff) | |
download | linux-dfc6031ec917b7c34a7549c3120f841b2e2be6db.tar.xz |
crypto: arm64/aes-neonbs-xts - use plain NEON for non-power-of-2 input sizes
Even though the kernel's implementations of AES-XTS were updated to
implement ciphertext stealing and can operate on inputs of any size
larger than or equal to the AES block size, this feature is rarely used
in practice.
In fact, in the kernel, AES-XTS is only used to operate on 4096 or 512
byte blocks, which means that not only the ciphertext stealing is
effectively dead code, the logic in the bit sliced NEON implementation
to deal with fewer than 8 blocks at a time is also never used.
Since the bit-sliced NEON driver already depends on the plain NEON
version, which is slower but can operate on smaller data quantities more
straightforwardly, let's fallback to the plain NEON implementation of
XTS for any residual inputs that are not multiples of 128 bytes. This
allows us to remove a lot of complicated logic that rarely gets
exercised in practice.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/arm64/crypto/aes-neonbs-glue.c')
-rw-r--r-- | arch/arm64/crypto/aes-neonbs-glue.c | 33 |
1 files changed, 17 insertions, 16 deletions
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index 3189003e1cbe..bac4cabef607 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -302,23 +302,18 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt, return err; while (walk.nbytes >= AES_BLOCK_SIZE) { - unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; - - if (walk.nbytes < walk.total || walk.nbytes % AES_BLOCK_SIZE) - blocks = round_down(blocks, - walk.stride / AES_BLOCK_SIZE); - + int blocks = (walk.nbytes / AES_BLOCK_SIZE) & ~7; out = walk.dst.virt.addr; in = walk.src.virt.addr; nbytes = walk.nbytes; kernel_neon_begin(); - if (likely(blocks > 6)) { /* plain NEON is faster otherwise */ - if (first) + if (blocks >= 8) { + if (first == 1) neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, ctx->key.rounds, 1); - first = 0; + first = 2; fn(out, in, ctx->key.rk, ctx->key.rounds, blocks, walk.iv); @@ -327,10 +322,17 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt, in += blocks * AES_BLOCK_SIZE; nbytes -= blocks * AES_BLOCK_SIZE; } - - if (walk.nbytes == walk.total && nbytes > 0) - goto xts_tail; - + if (walk.nbytes == walk.total && nbytes > 0) { + if (encrypt) + neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, + ctx->key.rounds, nbytes, + ctx->twkey, walk.iv, first); + else + neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, + ctx->key.rounds, nbytes, + ctx->twkey, walk.iv, first); + nbytes = first = 0; + } kernel_neon_end(); err = skcipher_walk_done(&walk, nbytes); } @@ -355,13 +357,12 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt, nbytes = walk.nbytes; kernel_neon_begin(); -xts_tail: if (encrypt) neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds, - nbytes, ctx->twkey, walk.iv, first ?: 2); + nbytes, ctx->twkey, walk.iv, first); else neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds, - nbytes, ctx->twkey, walk.iv, first ?: 2); + nbytes, ctx->twkey, walk.iv, first); kernel_neon_end(); return skcipher_walk_done(&walk, 0); |