From fa2297750c2cc61788d1843f358dbfecaa42944f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 12 Jan 2026 11:20:08 -0800 Subject: lib/crypto: arm/aes: Migrate optimized code into library Move the ARM optimized single-block AES en/decryption code into lib/crypto/, wire it up to the AES library API, and remove the superseded "aes-arm" crypto_cipher algorithm. The result is that both the AES library and crypto_cipher APIs are now optimized for ARM, whereas previously only crypto_cipher was (and the optimizations weren't enabled by default, which this fixes as well). Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20260112192035.10427-11-ebiggers@kernel.org Signed-off-by: Eric Biggers --- arch/arm/configs/milbeaut_m10v_defconfig | 1 - arch/arm/configs/multi_v7_defconfig | 2 +- arch/arm/configs/omap2plus_defconfig | 2 +- arch/arm/configs/pxa_defconfig | 2 +- arch/arm/crypto/Kconfig | 18 --- arch/arm/crypto/Makefile | 2 - arch/arm/crypto/aes-cipher-core.S | 201 ------------------------------- arch/arm/crypto/aes-cipher-glue.c | 77 ------------ arch/arm/crypto/aes-cipher.h | 13 -- lib/crypto/Kconfig | 1 + lib/crypto/Makefile | 3 + lib/crypto/arm/aes-cipher-core.S | 201 +++++++++++++++++++++++++++++++ lib/crypto/arm/aes.h | 56 +++++++++ 13 files changed, 264 insertions(+), 315 deletions(-) delete mode 100644 arch/arm/crypto/aes-cipher-core.S delete mode 100644 arch/arm/crypto/aes-cipher-glue.c delete mode 100644 arch/arm/crypto/aes-cipher.h create mode 100644 lib/crypto/arm/aes-cipher-core.S create mode 100644 lib/crypto/arm/aes.h diff --git a/arch/arm/configs/milbeaut_m10v_defconfig b/arch/arm/configs/milbeaut_m10v_defconfig index a2995eb390c6..77b69d672d40 100644 --- a/arch/arm/configs/milbeaut_m10v_defconfig +++ b/arch/arm/configs/milbeaut_m10v_defconfig @@ -98,7 +98,6 @@ CONFIG_CRYPTO_SELFTESTS=y CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_SEQIV=m CONFIG_CRYPTO_GHASH_ARM_CE=m -CONFIG_CRYPTO_AES_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_AES_ARM_CE=m # CONFIG_CRYPTO_HW is not set diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 7f1fa9dd88c9..b6d3e20926bb 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -1286,7 +1286,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_GHASH_ARM_CE=m -CONFIG_CRYPTO_AES_ARM=m +CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_AES_ARM_CE=m CONFIG_CRYPTO_DEV_SUN4I_SS=m diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 4e53c331cd84..0464f6552169 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -706,7 +706,7 @@ CONFIG_NLS_ISO8859_1=y CONFIG_SECURITY=y CONFIG_CRYPTO_MICHAEL_MIC=y CONFIG_CRYPTO_GHASH_ARM_CE=m -CONFIG_CRYPTO_AES_ARM=m +CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_DEV_OMAP=m CONFIG_CRYPTO_DEV_OMAP_SHAM=m diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 3ea189f1f42f..eacd08fd87ad 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -657,7 +657,7 @@ CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y -CONFIG_CRYPTO_AES_ARM=m +CONFIG_CRYPTO_AES=m CONFIG_FONTS=y CONFIG_FONT_8x8=y CONFIG_FONT_8x16=y diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 167a648a9def..b9c28c818b7c 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -23,24 +23,6 @@ config CRYPTO_GHASH_ARM_CE that is part of the ARMv8 Crypto Extensions, or a slower variant that uses the vmull.p8 instruction that is part of the basic NEON ISA. -config CRYPTO_AES_ARM - tristate "Ciphers: AES" - select CRYPTO_ALGAPI - select CRYPTO_AES - help - Block ciphers: AES cipher algorithms (FIPS-197) - - Architecture: arm - - On ARM processors without the Crypto Extensions, this is the - fastest AES implementation for single blocks. For multiple - blocks, the NEON bit-sliced implementation is usually faster. - - This implementation may be vulnerable to cache timing attacks, - since it uses lookup tables. However, as countermeasures it - disables IRQs and preloads the tables; it is hoped this makes - such attacks very difficult. - config CRYPTO_AES_ARM_BS tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (bit-sliced NEON)" depends on KERNEL_MODE_NEON diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index d6683e9d4992..e73099e120b3 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -3,13 +3,11 @@ # Arch-specific CryptoAPI modules. # -obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o -aes-arm-y := aes-cipher-core.o aes-cipher-glue.o aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S deleted file mode 100644 index 87567d6822ba..000000000000 --- a/arch/arm/crypto/aes-cipher-core.S +++ /dev/null @@ -1,201 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Scalar AES core transform - * - * Copyright (C) 2017 Linaro Ltd. - * Author: Ard Biesheuvel - */ - -#include -#include -#include - - .text - .align 5 - - rk .req r0 - rounds .req r1 - in .req r2 - out .req r3 - ttab .req ip - - t0 .req lr - t1 .req r2 - t2 .req r3 - - .macro __select, out, in, idx - .if __LINUX_ARM_ARCH__ < 7 - and \out, \in, #0xff << (8 * \idx) - .else - ubfx \out, \in, #(8 * \idx), #8 - .endif - .endm - - .macro __load, out, in, idx, sz, op - .if __LINUX_ARM_ARCH__ < 7 && \idx > 0 - ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz] - .else - ldr\op \out, [ttab, \in, lsl #\sz] - .endif - .endm - - .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr - __select \out0, \in0, 0 - __select t0, \in1, 1 - __load \out0, \out0, 0, \sz, \op - __load t0, t0, 1, \sz, \op - - .if \enc - __select \out1, \in1, 0 - __select t1, \in2, 1 - .else - __select \out1, \in3, 0 - __select t1, \in0, 1 - .endif - __load \out1, \out1, 0, \sz, \op - __select t2, \in2, 2 - __load t1, t1, 1, \sz, \op - __load t2, t2, 2, \sz, \op - - eor \out0, \out0, t0, ror #24 - - __select t0, \in3, 3 - .if \enc - __select \t3, \in3, 2 - __select \t4, \in0, 3 - .else - __select \t3, \in1, 2 - __select \t4, \in2, 3 - .endif - __load \t3, \t3, 2, \sz, \op - __load t0, t0, 3, \sz, \op - __load \t4, \t4, 3, \sz, \op - - .ifnb \oldcpsr - /* - * This is the final round and we're done with all data-dependent table - * lookups, so we can safely re-enable interrupts. - */ - restore_irqs \oldcpsr - .endif - - eor \out1, \out1, t1, ror #24 - eor \out0, \out0, t2, ror #16 - ldm rk!, {t1, t2} - eor \out1, \out1, \t3, ror #16 - eor \out0, \out0, t0, ror #8 - eor \out1, \out1, \t4, ror #8 - eor \out0, \out0, t1 - eor \out1, \out1, t2 - .endm - - .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr - __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op - __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr - .endm - - .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr - __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op - __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr - .endm - - .macro do_crypt, round, ttab, ltab, bsz - push {r3-r11, lr} - - // Load keys first, to reduce latency in case they're not cached yet. - ldm rk!, {r8-r11} - - ldr r4, [in] - ldr r5, [in, #4] - ldr r6, [in, #8] - ldr r7, [in, #12] - -#ifdef CONFIG_CPU_BIG_ENDIAN - rev_l r4, t0 - rev_l r5, t0 - rev_l r6, t0 - rev_l r7, t0 -#endif - - eor r4, r4, r8 - eor r5, r5, r9 - eor r6, r6, r10 - eor r7, r7, r11 - - mov_l ttab, \ttab - /* - * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into - * L1 cache, assuming cacheline size >= 32. This is a hardening measure - * intended to make cache-timing attacks more difficult. They may not - * be fully prevented, however; see the paper - * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf - * ("Cache-timing attacks on AES") for a discussion of the many - * difficulties involved in writing truly constant-time AES software. - */ - save_and_disable_irqs t0 - .set i, 0 - .rept 1024 / 128 - ldr r8, [ttab, #i + 0] - ldr r9, [ttab, #i + 32] - ldr r10, [ttab, #i + 64] - ldr r11, [ttab, #i + 96] - .set i, i + 128 - .endr - push {t0} // oldcpsr - - tst rounds, #2 - bne 1f - -0: \round r8, r9, r10, r11, r4, r5, r6, r7 - \round r4, r5, r6, r7, r8, r9, r10, r11 - -1: subs rounds, rounds, #4 - \round r8, r9, r10, r11, r4, r5, r6, r7 - bls 2f - \round r4, r5, r6, r7, r8, r9, r10, r11 - b 0b - -2: .ifb \ltab - add ttab, ttab, #1 - .else - mov_l ttab, \ltab - // Prefetch inverse S-box for final round; see explanation above - .set i, 0 - .rept 256 / 64 - ldr t0, [ttab, #i + 0] - ldr t1, [ttab, #i + 32] - .set i, i + 64 - .endr - .endif - - pop {rounds} // oldcpsr - \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds - -#ifdef CONFIG_CPU_BIG_ENDIAN - rev_l r4, t0 - rev_l r5, t0 - rev_l r6, t0 - rev_l r7, t0 -#endif - - ldr out, [sp] - - str r4, [out] - str r5, [out, #4] - str r6, [out, #8] - str r7, [out, #12] - - pop {r3-r11, pc} - - .align 3 - .ltorg - .endm - -ENTRY(__aes_arm_encrypt) - do_crypt fround, aes_enc_tab,, 2 -ENDPROC(__aes_arm_encrypt) - - .align 5 -ENTRY(__aes_arm_decrypt) - do_crypt iround, aes_dec_tab, crypto_aes_inv_sbox, 0 -ENDPROC(__aes_arm_decrypt) diff --git a/arch/arm/crypto/aes-cipher-glue.c b/arch/arm/crypto/aes-cipher-glue.c deleted file mode 100644 index f302db808cd3..000000000000 --- a/arch/arm/crypto/aes-cipher-glue.c +++ /dev/null @@ -1,77 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Scalar AES core transform - * - * Copyright (C) 2017 Linaro Ltd. - * Author: Ard Biesheuvel - */ - -#include -#include -#include -#include "aes-cipher.h" - -EXPORT_SYMBOL_GPL(__aes_arm_encrypt); -EXPORT_SYMBOL_GPL(__aes_arm_decrypt); - -static int aes_arm_setkey(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len) -{ - struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); - - return aes_expandkey(ctx, in_key, key_len); -} - -static void aes_arm_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); - int rounds = 6 + ctx->key_length / 4; - - __aes_arm_encrypt(ctx->key_enc, rounds, in, out); -} - -static void aes_arm_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); - int rounds = 6 + ctx->key_length / 4; - - __aes_arm_decrypt(ctx->key_dec, rounds, in, out); -} - -static struct crypto_alg aes_alg = { - .cra_name = "aes", - .cra_driver_name = "aes-arm", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_module = THIS_MODULE, - - .cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE, - .cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE, - .cra_cipher.cia_setkey = aes_arm_setkey, - .cra_cipher.cia_encrypt = aes_arm_encrypt, - .cra_cipher.cia_decrypt = aes_arm_decrypt, - -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - .cra_alignmask = 3, -#endif -}; - -static int __init aes_init(void) -{ - return crypto_register_alg(&aes_alg); -} - -static void __exit aes_fini(void) -{ - crypto_unregister_alg(&aes_alg); -} - -module_init(aes_init); -module_exit(aes_fini); - -MODULE_DESCRIPTION("Scalar AES cipher for ARM"); -MODULE_AUTHOR("Ard Biesheuvel "); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("aes"); diff --git a/arch/arm/crypto/aes-cipher.h b/arch/arm/crypto/aes-cipher.h deleted file mode 100644 index d5db2b87eb69..000000000000 --- a/arch/arm/crypto/aes-cipher.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef ARM_CRYPTO_AES_CIPHER_H -#define ARM_CRYPTO_AES_CIPHER_H - -#include -#include - -asmlinkage void __aes_arm_encrypt(const u32 rk[], int rounds, - const u8 *in, u8 *out); -asmlinkage void __aes_arm_decrypt(const u32 rk[], int rounds, - const u8 *in, u8 *out); - -#endif /* ARM_CRYPTO_AES_CIPHER_H */ diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 4efad77daa24..60420b421e04 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -14,6 +14,7 @@ config CRYPTO_LIB_AES config CRYPTO_LIB_AES_ARCH bool depends on CRYPTO_LIB_AES && !UML && !KMSAN + default y if ARM config CRYPTO_LIB_AESCFB tristate diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 01193b3f47ba..2f6b0f59eb1b 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -21,6 +21,9 @@ obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o libaes-y := aes.o ifeq ($(CONFIG_CRYPTO_LIB_AES_ARCH),y) CFLAGS_aes.o += -I$(src)/$(SRCARCH) + +libaes-$(CONFIG_ARM) += arm/aes-cipher-core.o + endif # CONFIG_CRYPTO_LIB_AES_ARCH ################################################################################ diff --git a/lib/crypto/arm/aes-cipher-core.S b/lib/crypto/arm/aes-cipher-core.S new file mode 100644 index 000000000000..87567d6822ba --- /dev/null +++ b/lib/crypto/arm/aes-cipher-core.S @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Scalar AES core transform + * + * Copyright (C) 2017 Linaro Ltd. + * Author: Ard Biesheuvel + */ + +#include +#include +#include + + .text + .align 5 + + rk .req r0 + rounds .req r1 + in .req r2 + out .req r3 + ttab .req ip + + t0 .req lr + t1 .req r2 + t2 .req r3 + + .macro __select, out, in, idx + .if __LINUX_ARM_ARCH__ < 7 + and \out, \in, #0xff << (8 * \idx) + .else + ubfx \out, \in, #(8 * \idx), #8 + .endif + .endm + + .macro __load, out, in, idx, sz, op + .if __LINUX_ARM_ARCH__ < 7 && \idx > 0 + ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz] + .else + ldr\op \out, [ttab, \in, lsl #\sz] + .endif + .endm + + .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr + __select \out0, \in0, 0 + __select t0, \in1, 1 + __load \out0, \out0, 0, \sz, \op + __load t0, t0, 1, \sz, \op + + .if \enc + __select \out1, \in1, 0 + __select t1, \in2, 1 + .else + __select \out1, \in3, 0 + __select t1, \in0, 1 + .endif + __load \out1, \out1, 0, \sz, \op + __select t2, \in2, 2 + __load t1, t1, 1, \sz, \op + __load t2, t2, 2, \sz, \op + + eor \out0, \out0, t0, ror #24 + + __select t0, \in3, 3 + .if \enc + __select \t3, \in3, 2 + __select \t4, \in0, 3 + .else + __select \t3, \in1, 2 + __select \t4, \in2, 3 + .endif + __load \t3, \t3, 2, \sz, \op + __load t0, t0, 3, \sz, \op + __load \t4, \t4, 3, \sz, \op + + .ifnb \oldcpsr + /* + * This is the final round and we're done with all data-dependent table + * lookups, so we can safely re-enable interrupts. + */ + restore_irqs \oldcpsr + .endif + + eor \out1, \out1, t1, ror #24 + eor \out0, \out0, t2, ror #16 + ldm rk!, {t1, t2} + eor \out1, \out1, \t3, ror #16 + eor \out0, \out0, t0, ror #8 + eor \out1, \out1, \t4, ror #8 + eor \out0, \out0, t1 + eor \out1, \out1, t2 + .endm + + .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr + __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op + __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr + .endm + + .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr + __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op + __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr + .endm + + .macro do_crypt, round, ttab, ltab, bsz + push {r3-r11, lr} + + // Load keys first, to reduce latency in case they're not cached yet. + ldm rk!, {r8-r11} + + ldr r4, [in] + ldr r5, [in, #4] + ldr r6, [in, #8] + ldr r7, [in, #12] + +#ifdef CONFIG_CPU_BIG_ENDIAN + rev_l r4, t0 + rev_l r5, t0 + rev_l r6, t0 + rev_l r7, t0 +#endif + + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + + mov_l ttab, \ttab + /* + * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into + * L1 cache, assuming cacheline size >= 32. This is a hardening measure + * intended to make cache-timing attacks more difficult. They may not + * be fully prevented, however; see the paper + * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf + * ("Cache-timing attacks on AES") for a discussion of the many + * difficulties involved in writing truly constant-time AES software. + */ + save_and_disable_irqs t0 + .set i, 0 + .rept 1024 / 128 + ldr r8, [ttab, #i + 0] + ldr r9, [ttab, #i + 32] + ldr r10, [ttab, #i + 64] + ldr r11, [ttab, #i + 96] + .set i, i + 128 + .endr + push {t0} // oldcpsr + + tst rounds, #2 + bne 1f + +0: \round r8, r9, r10, r11, r4, r5, r6, r7 + \round r4, r5, r6, r7, r8, r9, r10, r11 + +1: subs rounds, rounds, #4 + \round r8, r9, r10, r11, r4, r5, r6, r7 + bls 2f + \round r4, r5, r6, r7, r8, r9, r10, r11 + b 0b + +2: .ifb \ltab + add ttab, ttab, #1 + .else + mov_l ttab, \ltab + // Prefetch inverse S-box for final round; see explanation above + .set i, 0 + .rept 256 / 64 + ldr t0, [ttab, #i + 0] + ldr t1, [ttab, #i + 32] + .set i, i + 64 + .endr + .endif + + pop {rounds} // oldcpsr + \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds + +#ifdef CONFIG_CPU_BIG_ENDIAN + rev_l r4, t0 + rev_l r5, t0 + rev_l r6, t0 + rev_l r7, t0 +#endif + + ldr out, [sp] + + str r4, [out] + str r5, [out, #4] + str r6, [out, #8] + str r7, [out, #12] + + pop {r3-r11, pc} + + .align 3 + .ltorg + .endm + +ENTRY(__aes_arm_encrypt) + do_crypt fround, aes_enc_tab,, 2 +ENDPROC(__aes_arm_encrypt) + + .align 5 +ENTRY(__aes_arm_decrypt) + do_crypt iround, aes_dec_tab, crypto_aes_inv_sbox, 0 +ENDPROC(__aes_arm_decrypt) diff --git a/lib/crypto/arm/aes.h b/lib/crypto/arm/aes.h new file mode 100644 index 000000000000..1dd7dfa657bb --- /dev/null +++ b/lib/crypto/arm/aes.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * AES block cipher, optimized for ARM + * + * Copyright (C) 2017 Linaro Ltd. + * Copyright 2026 Google LLC + */ + +asmlinkage void __aes_arm_encrypt(const u32 rk[], int rounds, + const u8 in[AES_BLOCK_SIZE], + u8 out[AES_BLOCK_SIZE]); +asmlinkage void __aes_arm_decrypt(const u32 inv_rk[], int rounds, + const u8 in[AES_BLOCK_SIZE], + u8 out[AES_BLOCK_SIZE]); + +static void aes_preparekey_arch(union aes_enckey_arch *k, + union aes_invkey_arch *inv_k, + const u8 *in_key, int key_len, int nrounds) +{ + aes_expandkey_generic(k->rndkeys, inv_k ? inv_k->inv_rndkeys : NULL, + in_key, key_len); +} + +static void aes_encrypt_arch(const struct aes_enckey *key, + u8 out[AES_BLOCK_SIZE], + const u8 in[AES_BLOCK_SIZE]) +{ + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && + !IS_ALIGNED((uintptr_t)out | (uintptr_t)in, 4)) { + u8 bounce_buf[AES_BLOCK_SIZE] __aligned(4); + + memcpy(bounce_buf, in, AES_BLOCK_SIZE); + __aes_arm_encrypt(key->k.rndkeys, key->nrounds, bounce_buf, + bounce_buf); + memcpy(out, bounce_buf, AES_BLOCK_SIZE); + return; + } + __aes_arm_encrypt(key->k.rndkeys, key->nrounds, in, out); +} + +static void aes_decrypt_arch(const struct aes_key *key, + u8 out[AES_BLOCK_SIZE], + const u8 in[AES_BLOCK_SIZE]) +{ + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && + !IS_ALIGNED((uintptr_t)out | (uintptr_t)in, 4)) { + u8 bounce_buf[AES_BLOCK_SIZE] __aligned(4); + + memcpy(bounce_buf, in, AES_BLOCK_SIZE); + __aes_arm_decrypt(key->inv_k.inv_rndkeys, key->nrounds, + bounce_buf, bounce_buf); + memcpy(out, bounce_buf, AES_BLOCK_SIZE); + return; + } + __aes_arm_decrypt(key->inv_k.inv_rndkeys, key->nrounds, in, out); +} -- cgit v1.2.3