summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/crypto/Kconfig1
-rw-r--r--lib/crypto/Makefile3
-rw-r--r--lib/crypto/arm/aes-cipher-core.S201
-rw-r--r--lib/crypto/arm/aes.h56
4 files changed, 261 insertions, 0 deletions
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 4efad77daa24..60420b421e04 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -14,6 +14,7 @@ config CRYPTO_LIB_AES
config CRYPTO_LIB_AES_ARCH
bool
depends on CRYPTO_LIB_AES && !UML && !KMSAN
+ default y if ARM
config CRYPTO_LIB_AESCFB
tristate
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 01193b3f47ba..2f6b0f59eb1b 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -21,6 +21,9 @@ obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o
libaes-y := aes.o
ifeq ($(CONFIG_CRYPTO_LIB_AES_ARCH),y)
CFLAGS_aes.o += -I$(src)/$(SRCARCH)
+
+libaes-$(CONFIG_ARM) += arm/aes-cipher-core.o
+
endif # CONFIG_CRYPTO_LIB_AES_ARCH
################################################################################
diff --git a/lib/crypto/arm/aes-cipher-core.S b/lib/crypto/arm/aes-cipher-core.S
new file mode 100644
index 000000000000..87567d6822ba
--- /dev/null
+++ b/lib/crypto/arm/aes-cipher-core.S
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Scalar AES core transform
+ *
+ * Copyright (C) 2017 Linaro Ltd.
+ * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/cache.h>
+
+ .text
+ .align 5
+
+ rk .req r0
+ rounds .req r1
+ in .req r2
+ out .req r3
+ ttab .req ip
+
+ t0 .req lr
+ t1 .req r2
+ t2 .req r3
+
+ .macro __select, out, in, idx
+ .if __LINUX_ARM_ARCH__ < 7
+ and \out, \in, #0xff << (8 * \idx)
+ .else
+ ubfx \out, \in, #(8 * \idx), #8
+ .endif
+ .endm
+
+ .macro __load, out, in, idx, sz, op
+ .if __LINUX_ARM_ARCH__ < 7 && \idx > 0
+ ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz]
+ .else
+ ldr\op \out, [ttab, \in, lsl #\sz]
+ .endif
+ .endm
+
+ .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
+ __select \out0, \in0, 0
+ __select t0, \in1, 1
+ __load \out0, \out0, 0, \sz, \op
+ __load t0, t0, 1, \sz, \op
+
+ .if \enc
+ __select \out1, \in1, 0
+ __select t1, \in2, 1
+ .else
+ __select \out1, \in3, 0
+ __select t1, \in0, 1
+ .endif
+ __load \out1, \out1, 0, \sz, \op
+ __select t2, \in2, 2
+ __load t1, t1, 1, \sz, \op
+ __load t2, t2, 2, \sz, \op
+
+ eor \out0, \out0, t0, ror #24
+
+ __select t0, \in3, 3
+ .if \enc
+ __select \t3, \in3, 2
+ __select \t4, \in0, 3
+ .else
+ __select \t3, \in1, 2
+ __select \t4, \in2, 3
+ .endif
+ __load \t3, \t3, 2, \sz, \op
+ __load t0, t0, 3, \sz, \op
+ __load \t4, \t4, 3, \sz, \op
+
+ .ifnb \oldcpsr
+ /*
+ * This is the final round and we're done with all data-dependent table
+ * lookups, so we can safely re-enable interrupts.
+ */
+ restore_irqs \oldcpsr
+ .endif
+
+ eor \out1, \out1, t1, ror #24
+ eor \out0, \out0, t2, ror #16
+ ldm rk!, {t1, t2}
+ eor \out1, \out1, \t3, ror #16
+ eor \out0, \out0, t0, ror #8
+ eor \out1, \out1, \t4, ror #8
+ eor \out0, \out0, t1
+ eor \out1, \out1, t2
+ .endm
+
+ .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
+ __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+ __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
+ .endm
+
+ .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
+ __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+ __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
+ .endm
+
+ .macro do_crypt, round, ttab, ltab, bsz
+ push {r3-r11, lr}
+
+ // Load keys first, to reduce latency in case they're not cached yet.
+ ldm rk!, {r8-r11}
+
+ ldr r4, [in]
+ ldr r5, [in, #4]
+ ldr r6, [in, #8]
+ ldr r7, [in, #12]
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ rev_l r4, t0
+ rev_l r5, t0
+ rev_l r6, t0
+ rev_l r7, t0
+#endif
+
+ eor r4, r4, r8
+ eor r5, r5, r9
+ eor r6, r6, r10
+ eor r7, r7, r11
+
+ mov_l ttab, \ttab
+ /*
+ * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
+ * L1 cache, assuming cacheline size >= 32. This is a hardening measure
+ * intended to make cache-timing attacks more difficult. They may not
+ * be fully prevented, however; see the paper
+ * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
+ * ("Cache-timing attacks on AES") for a discussion of the many
+ * difficulties involved in writing truly constant-time AES software.
+ */
+ save_and_disable_irqs t0
+ .set i, 0
+ .rept 1024 / 128
+ ldr r8, [ttab, #i + 0]
+ ldr r9, [ttab, #i + 32]
+ ldr r10, [ttab, #i + 64]
+ ldr r11, [ttab, #i + 96]
+ .set i, i + 128
+ .endr
+ push {t0} // oldcpsr
+
+ tst rounds, #2
+ bne 1f
+
+0: \round r8, r9, r10, r11, r4, r5, r6, r7
+ \round r4, r5, r6, r7, r8, r9, r10, r11
+
+1: subs rounds, rounds, #4
+ \round r8, r9, r10, r11, r4, r5, r6, r7
+ bls 2f
+ \round r4, r5, r6, r7, r8, r9, r10, r11
+ b 0b
+
+2: .ifb \ltab
+ add ttab, ttab, #1
+ .else
+ mov_l ttab, \ltab
+ // Prefetch inverse S-box for final round; see explanation above
+ .set i, 0
+ .rept 256 / 64
+ ldr t0, [ttab, #i + 0]
+ ldr t1, [ttab, #i + 32]
+ .set i, i + 64
+ .endr
+ .endif
+
+ pop {rounds} // oldcpsr
+ \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ rev_l r4, t0
+ rev_l r5, t0
+ rev_l r6, t0
+ rev_l r7, t0
+#endif
+
+ ldr out, [sp]
+
+ str r4, [out]
+ str r5, [out, #4]
+ str r6, [out, #8]
+ str r7, [out, #12]
+
+ pop {r3-r11, pc}
+
+ .align 3
+ .ltorg
+ .endm
+
+ENTRY(__aes_arm_encrypt)
+ do_crypt fround, aes_enc_tab,, 2
+ENDPROC(__aes_arm_encrypt)
+
+ .align 5
+ENTRY(__aes_arm_decrypt)
+ do_crypt iround, aes_dec_tab, crypto_aes_inv_sbox, 0
+ENDPROC(__aes_arm_decrypt)
diff --git a/lib/crypto/arm/aes.h b/lib/crypto/arm/aes.h
new file mode 100644
index 000000000000..1dd7dfa657bb
--- /dev/null
+++ b/lib/crypto/arm/aes.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * AES block cipher, optimized for ARM
+ *
+ * Copyright (C) 2017 Linaro Ltd.
+ * Copyright 2026 Google LLC
+ */
+
+asmlinkage void __aes_arm_encrypt(const u32 rk[], int rounds,
+ const u8 in[AES_BLOCK_SIZE],
+ u8 out[AES_BLOCK_SIZE]);
+asmlinkage void __aes_arm_decrypt(const u32 inv_rk[], int rounds,
+ const u8 in[AES_BLOCK_SIZE],
+ u8 out[AES_BLOCK_SIZE]);
+
+static void aes_preparekey_arch(union aes_enckey_arch *k,
+ union aes_invkey_arch *inv_k,
+ const u8 *in_key, int key_len, int nrounds)
+{
+ aes_expandkey_generic(k->rndkeys, inv_k ? inv_k->inv_rndkeys : NULL,
+ in_key, key_len);
+}
+
+static void aes_encrypt_arch(const struct aes_enckey *key,
+ u8 out[AES_BLOCK_SIZE],
+ const u8 in[AES_BLOCK_SIZE])
+{
+ if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
+ !IS_ALIGNED((uintptr_t)out | (uintptr_t)in, 4)) {
+ u8 bounce_buf[AES_BLOCK_SIZE] __aligned(4);
+
+ memcpy(bounce_buf, in, AES_BLOCK_SIZE);
+ __aes_arm_encrypt(key->k.rndkeys, key->nrounds, bounce_buf,
+ bounce_buf);
+ memcpy(out, bounce_buf, AES_BLOCK_SIZE);
+ return;
+ }
+ __aes_arm_encrypt(key->k.rndkeys, key->nrounds, in, out);
+}
+
+static void aes_decrypt_arch(const struct aes_key *key,
+ u8 out[AES_BLOCK_SIZE],
+ const u8 in[AES_BLOCK_SIZE])
+{
+ if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
+ !IS_ALIGNED((uintptr_t)out | (uintptr_t)in, 4)) {
+ u8 bounce_buf[AES_BLOCK_SIZE] __aligned(4);
+
+ memcpy(bounce_buf, in, AES_BLOCK_SIZE);
+ __aes_arm_decrypt(key->inv_k.inv_rndkeys, key->nrounds,
+ bounce_buf, bounce_buf);
+ memcpy(out, bounce_buf, AES_BLOCK_SIZE);
+ return;
+ }
+ __aes_arm_decrypt(key->inv_k.inv_rndkeys, key->nrounds, in, out);
+}