diff options
| author | Eric Biggers <ebiggers@kernel.org> | 2026-01-12 22:20:08 +0300 |
|---|---|---|
| committer | Eric Biggers <ebiggers@kernel.org> | 2026-01-12 22:39:58 +0300 |
| commit | fa2297750c2cc61788d1843f358dbfecaa42944f (patch) | |
| tree | d619f66893a79c8421cb744e43af99e03229a49a /lib | |
| parent | a2484474272ef98d9580d8c610b0f7c6ed2f146c (diff) | |
| download | linux-fa2297750c2cc61788d1843f358dbfecaa42944f.tar.xz | |
lib/crypto: arm/aes: Migrate optimized code into library
Move the ARM optimized single-block AES en/decryption code into
lib/crypto/, wire it up to the AES library API, and remove the
superseded "aes-arm" crypto_cipher algorithm.
The result is that both the AES library and crypto_cipher APIs are now
optimized for ARM, whereas previously only crypto_cipher was (and the
optimizations weren't enabled by default, which this fixes as well).
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20260112192035.10427-11-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/crypto/Kconfig | 1 | ||||
| -rw-r--r-- | lib/crypto/Makefile | 3 | ||||
| -rw-r--r-- | lib/crypto/arm/aes-cipher-core.S | 201 | ||||
| -rw-r--r-- | lib/crypto/arm/aes.h | 56 |
4 files changed, 261 insertions, 0 deletions
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 4efad77daa24..60420b421e04 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -14,6 +14,7 @@ config CRYPTO_LIB_AES config CRYPTO_LIB_AES_ARCH bool depends on CRYPTO_LIB_AES && !UML && !KMSAN + default y if ARM config CRYPTO_LIB_AESCFB tristate diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 01193b3f47ba..2f6b0f59eb1b 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -21,6 +21,9 @@ obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o libaes-y := aes.o ifeq ($(CONFIG_CRYPTO_LIB_AES_ARCH),y) CFLAGS_aes.o += -I$(src)/$(SRCARCH) + +libaes-$(CONFIG_ARM) += arm/aes-cipher-core.o + endif # CONFIG_CRYPTO_LIB_AES_ARCH ################################################################################ diff --git a/lib/crypto/arm/aes-cipher-core.S b/lib/crypto/arm/aes-cipher-core.S new file mode 100644 index 000000000000..87567d6822ba --- /dev/null +++ b/lib/crypto/arm/aes-cipher-core.S @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Scalar AES core transform + * + * Copyright (C) 2017 Linaro Ltd. + * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/cache.h> + + .text + .align 5 + + rk .req r0 + rounds .req r1 + in .req r2 + out .req r3 + ttab .req ip + + t0 .req lr + t1 .req r2 + t2 .req r3 + + .macro __select, out, in, idx + .if __LINUX_ARM_ARCH__ < 7 + and \out, \in, #0xff << (8 * \idx) + .else + ubfx \out, \in, #(8 * \idx), #8 + .endif + .endm + + .macro __load, out, in, idx, sz, op + .if __LINUX_ARM_ARCH__ < 7 && \idx > 0 + ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz] + .else + ldr\op \out, [ttab, \in, lsl #\sz] + .endif + .endm + + .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr + __select \out0, \in0, 0 + __select t0, \in1, 1 + __load \out0, \out0, 0, \sz, \op + __load t0, t0, 1, \sz, \op + + .if \enc + __select \out1, \in1, 0 + __select t1, \in2, 1 + .else + __select \out1, \in3, 0 + __select t1, \in0, 1 + .endif + __load \out1, \out1, 0, \sz, \op + __select t2, \in2, 2 + __load t1, t1, 1, \sz, \op + __load t2, t2, 2, \sz, \op + + eor \out0, \out0, t0, ror #24 + + __select t0, \in3, 3 + .if \enc + __select \t3, \in3, 2 + __select \t4, \in0, 3 + .else + __select \t3, \in1, 2 + __select \t4, \in2, 3 + .endif + __load \t3, \t3, 2, \sz, \op + __load t0, t0, 3, \sz, \op + __load \t4, \t4, 3, \sz, \op + + .ifnb \oldcpsr + /* + * This is the final round and we're done with all data-dependent table + * lookups, so we can safely re-enable interrupts. + */ + restore_irqs \oldcpsr + .endif + + eor \out1, \out1, t1, ror #24 + eor \out0, \out0, t2, ror #16 + ldm rk!, {t1, t2} + eor \out1, \out1, \t3, ror #16 + eor \out0, \out0, t0, ror #8 + eor \out1, \out1, \t4, ror #8 + eor \out0, \out0, t1 + eor \out1, \out1, t2 + .endm + + .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr + __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op + __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr + .endm + + .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr + __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op + __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr + .endm + + .macro do_crypt, round, ttab, ltab, bsz + push {r3-r11, lr} + + // Load keys first, to reduce latency in case they're not cached yet. + ldm rk!, {r8-r11} + + ldr r4, [in] + ldr r5, [in, #4] + ldr r6, [in, #8] + ldr r7, [in, #12] + +#ifdef CONFIG_CPU_BIG_ENDIAN + rev_l r4, t0 + rev_l r5, t0 + rev_l r6, t0 + rev_l r7, t0 +#endif + + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + + mov_l ttab, \ttab + /* + * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into + * L1 cache, assuming cacheline size >= 32. This is a hardening measure + * intended to make cache-timing attacks more difficult. They may not + * be fully prevented, however; see the paper + * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf + * ("Cache-timing attacks on AES") for a discussion of the many + * difficulties involved in writing truly constant-time AES software. + */ + save_and_disable_irqs t0 + .set i, 0 + .rept 1024 / 128 + ldr r8, [ttab, #i + 0] + ldr r9, [ttab, #i + 32] + ldr r10, [ttab, #i + 64] + ldr r11, [ttab, #i + 96] + .set i, i + 128 + .endr + push {t0} // oldcpsr + + tst rounds, #2 + bne 1f + +0: \round r8, r9, r10, r11, r4, r5, r6, r7 + \round r4, r5, r6, r7, r8, r9, r10, r11 + +1: subs rounds, rounds, #4 + \round r8, r9, r10, r11, r4, r5, r6, r7 + bls 2f + \round r4, r5, r6, r7, r8, r9, r10, r11 + b 0b + +2: .ifb \ltab + add ttab, ttab, #1 + .else + mov_l ttab, \ltab + // Prefetch inverse S-box for final round; see explanation above + .set i, 0 + .rept 256 / 64 + ldr t0, [ttab, #i + 0] + ldr t1, [ttab, #i + 32] + .set i, i + 64 + .endr + .endif + + pop {rounds} // oldcpsr + \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds + +#ifdef CONFIG_CPU_BIG_ENDIAN + rev_l r4, t0 + rev_l r5, t0 + rev_l r6, t0 + rev_l r7, t0 +#endif + + ldr out, [sp] + + str r4, [out] + str r5, [out, #4] + str r6, [out, #8] + str r7, [out, #12] + + pop {r3-r11, pc} + + .align 3 + .ltorg + .endm + +ENTRY(__aes_arm_encrypt) + do_crypt fround, aes_enc_tab,, 2 +ENDPROC(__aes_arm_encrypt) + + .align 5 +ENTRY(__aes_arm_decrypt) + do_crypt iround, aes_dec_tab, crypto_aes_inv_sbox, 0 +ENDPROC(__aes_arm_decrypt) diff --git a/lib/crypto/arm/aes.h b/lib/crypto/arm/aes.h new file mode 100644 index 000000000000..1dd7dfa657bb --- /dev/null +++ b/lib/crypto/arm/aes.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * AES block cipher, optimized for ARM + * + * Copyright (C) 2017 Linaro Ltd. + * Copyright 2026 Google LLC + */ + +asmlinkage void __aes_arm_encrypt(const u32 rk[], int rounds, + const u8 in[AES_BLOCK_SIZE], + u8 out[AES_BLOCK_SIZE]); +asmlinkage void __aes_arm_decrypt(const u32 inv_rk[], int rounds, + const u8 in[AES_BLOCK_SIZE], + u8 out[AES_BLOCK_SIZE]); + +static void aes_preparekey_arch(union aes_enckey_arch *k, + union aes_invkey_arch *inv_k, + const u8 *in_key, int key_len, int nrounds) +{ + aes_expandkey_generic(k->rndkeys, inv_k ? inv_k->inv_rndkeys : NULL, + in_key, key_len); +} + +static void aes_encrypt_arch(const struct aes_enckey *key, + u8 out[AES_BLOCK_SIZE], + const u8 in[AES_BLOCK_SIZE]) +{ + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && + !IS_ALIGNED((uintptr_t)out | (uintptr_t)in, 4)) { + u8 bounce_buf[AES_BLOCK_SIZE] __aligned(4); + + memcpy(bounce_buf, in, AES_BLOCK_SIZE); + __aes_arm_encrypt(key->k.rndkeys, key->nrounds, bounce_buf, + bounce_buf); + memcpy(out, bounce_buf, AES_BLOCK_SIZE); + return; + } + __aes_arm_encrypt(key->k.rndkeys, key->nrounds, in, out); +} + +static void aes_decrypt_arch(const struct aes_key *key, + u8 out[AES_BLOCK_SIZE], + const u8 in[AES_BLOCK_SIZE]) +{ + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && + !IS_ALIGNED((uintptr_t)out | (uintptr_t)in, 4)) { + u8 bounce_buf[AES_BLOCK_SIZE] __aligned(4); + + memcpy(bounce_buf, in, AES_BLOCK_SIZE); + __aes_arm_decrypt(key->inv_k.inv_rndkeys, key->nrounds, + bounce_buf, bounce_buf); + memcpy(out, bounce_buf, AES_BLOCK_SIZE); + return; + } + __aes_arm_decrypt(key->inv_k.inv_rndkeys, key->nrounds, in, out); +} |
