diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-18 22:11:14 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-18 22:11:14 +0300 |
commit | 8b53c76533aa4356602aea98f98a2f3b4051464c (patch) | |
tree | ab10ba58e21501407f8108a6bb9003daa2176962 /arch/arm64 | |
parent | 6cfae0c26b21dce323fe8799b66cf4bc996e3565 (diff) | |
parent | 9575d1a5c0780ea26ff8dd29c94a32be32ce3c85 (diff) | |
download | linux-8b53c76533aa4356602aea98f98a2f3b4051464c.tar.xz |
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu:
"API:
- Add the ability to abort a skcipher walk.
Algorithms:
- Fix XTS to actually do the stealing.
- Add library helpers for AES and DES for single-block users.
- Add library helpers for SHA256.
- Add new DES key verification helper.
- Add surrounding bits for ESSIV generator.
- Add accelerations for aegis128.
- Add test vectors for lzo-rle.
Drivers:
- Add i.MX8MQ support to caam.
- Add gcm/ccm/cfb/ofb aes support in inside-secure.
- Add ofb/cfb aes support in media-tek.
- Add HiSilicon ZIP accelerator support.
Others:
- Fix potential race condition in padata.
- Use unbound workqueues in padata"
* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (311 commits)
crypto: caam - Cast to long first before pointer conversion
crypto: ccree - enable CTS support in AES-XTS
crypto: inside-secure - Probe transform record cache RAM sizes
crypto: inside-secure - Base RD fetchcount on actual RD FIFO size
crypto: inside-secure - Base CD fetchcount on actual CD FIFO size
crypto: inside-secure - Enable extended algorithms on newer HW
crypto: inside-secure: Corrected configuration of EIP96_TOKEN_CTRL
crypto: inside-secure - Add EIP97/EIP197 and endianness detection
padata: remove cpu_index from the parallel_queue
padata: unbind parallel jobs from specific CPUs
padata: use separate workqueues for parallel and serial work
padata, pcrypt: take CPU hotplug lock internally in padata_alloc_possible
crypto: pcrypt - remove padata cpumask notifier
padata: make padata_do_parallel find alternate callback CPU
workqueue: require CPU hotplug read exclusion for apply_workqueue_attrs
workqueue: unconfine alloc/apply/free_workqueue_attrs()
padata: allocate workqueue internally
arm64: dts: imx8mq: Add CAAM node
random: Use wait_event_freezable() in add_hwgenerator_randomness()
crypto: ux500 - Fix COMPILE_TEST warnings
...
Diffstat (limited to 'arch/arm64')
-rw-r--r-- | arch/arm64/boot/dts/freescale/imx8mq.dtsi | 30 | ||||
-rw-r--r-- | arch/arm64/crypto/Kconfig | 10 | ||||
-rw-r--r-- | arch/arm64/crypto/aes-ce-ccm-glue.c | 18 | ||||
-rw-r--r-- | arch/arm64/crypto/aes-ce-glue.c | 7 | ||||
-rw-r--r-- | arch/arm64/crypto/aes-ce.S | 3 | ||||
-rw-r--r-- | arch/arm64/crypto/aes-cipher-core.S | 40 | ||||
-rw-r--r-- | arch/arm64/crypto/aes-cipher-glue.c | 11 | ||||
-rw-r--r-- | arch/arm64/crypto/aes-ctr-fallback.h | 50 | ||||
-rw-r--r-- | arch/arm64/crypto/aes-glue.c | 470 | ||||
-rw-r--r-- | arch/arm64/crypto/aes-modes.S | 135 | ||||
-rw-r--r-- | arch/arm64/crypto/aes-neon.S | 79 | ||||
-rw-r--r-- | arch/arm64/crypto/aes-neonbs-core.S | 9 | ||||
-rw-r--r-- | arch/arm64/crypto/aes-neonbs-glue.c | 140 | ||||
-rw-r--r-- | arch/arm64/crypto/ghash-ce-glue.c | 30 | ||||
-rw-r--r-- | arch/arm64/crypto/sha256-glue.c | 24 |
15 files changed, 668 insertions, 388 deletions
diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 3f3594d9485c..04115ca6bfb5 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -751,6 +751,36 @@ status = "disabled"; }; + crypto: crypto@30900000 { + compatible = "fsl,sec-v4.0"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x30900000 0x40000>; + ranges = <0 0x30900000 0x40000>; + interrupts = <GIC_SPI 91 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk IMX8MQ_CLK_AHB>, + <&clk IMX8MQ_CLK_IPG_ROOT>; + clock-names = "aclk", "ipg"; + + sec_jr0: jr@1000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x1000 0x1000>; + interrupts = <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>; + }; + + sec_jr1: jr@2000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x2000 0x1000>; + interrupts = <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>; + }; + + sec_jr2: jr@3000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x3000 0x1000>; + interrupts = <GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH>; + }; + }; + dphy: dphy@30a00300 { compatible = "fsl,imx8mq-mipi-dphy"; reg = <0x30a00300 0x100>; diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index d9a523ecdd83..4922c4451e7c 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -58,8 +58,7 @@ config CRYPTO_GHASH_ARM64_CE depends on KERNEL_MODE_NEON select CRYPTO_HASH select CRYPTO_GF128MUL - select CRYPTO_AES - select CRYPTO_AES_ARM64 + select CRYPTO_LIB_AES config CRYPTO_CRCT10DIF_ARM64_CE tristate "CRCT10DIF digest algorithm using PMULL instructions" @@ -74,15 +73,15 @@ config CRYPTO_AES_ARM64_CE tristate "AES core cipher using ARMv8 Crypto Extensions" depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_ALGAPI - select CRYPTO_AES_ARM64 + select CRYPTO_LIB_AES config CRYPTO_AES_ARM64_CE_CCM tristate "AES in CCM mode using ARMv8 Crypto Extensions" depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_ALGAPI select CRYPTO_AES_ARM64_CE - select CRYPTO_AES_ARM64 select CRYPTO_AEAD + select CRYPTO_LIB_AES config CRYPTO_AES_ARM64_CE_BLK tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions" @@ -97,7 +96,7 @@ config CRYPTO_AES_ARM64_NEON_BLK depends on KERNEL_MODE_NEON select CRYPTO_BLKCIPHER select CRYPTO_AES_ARM64 - select CRYPTO_AES + select CRYPTO_LIB_AES select CRYPTO_SIMD config CRYPTO_CHACHA20_NEON @@ -117,6 +116,7 @@ config CRYPTO_AES_ARM64_BS select CRYPTO_BLKCIPHER select CRYPTO_AES_ARM64_NEON_BLK select CRYPTO_AES_ARM64 + select CRYPTO_LIB_AES select CRYPTO_SIMD endif diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index 827e5473e5de..541cf9165748 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -43,8 +43,6 @@ asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[], u32 rounds); -asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); - static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key, unsigned int key_len) { @@ -124,8 +122,7 @@ static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[], } while (abytes >= AES_BLOCK_SIZE) { - __aes_arm64_encrypt(key->key_enc, mac, mac, - num_rounds(key)); + aes_encrypt(key, mac, mac); crypto_xor(mac, in, AES_BLOCK_SIZE); in += AES_BLOCK_SIZE; @@ -133,8 +130,7 @@ static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[], } if (abytes > 0) { - __aes_arm64_encrypt(key->key_enc, mac, mac, - num_rounds(key)); + aes_encrypt(key, mac, mac); crypto_xor(mac, in, abytes); *macp = abytes; } @@ -206,10 +202,8 @@ static int ccm_crypt_fallback(struct skcipher_walk *walk, u8 mac[], u8 iv0[], bsize = nbytes; crypto_inc(walk->iv, AES_BLOCK_SIZE); - __aes_arm64_encrypt(ctx->key_enc, buf, walk->iv, - num_rounds(ctx)); - __aes_arm64_encrypt(ctx->key_enc, mac, mac, - num_rounds(ctx)); + aes_encrypt(ctx, buf, walk->iv); + aes_encrypt(ctx, mac, mac); if (enc) crypto_xor(mac, src, bsize); crypto_xor_cpy(dst, src, buf, bsize); @@ -224,8 +218,8 @@ static int ccm_crypt_fallback(struct skcipher_walk *walk, u8 mac[], u8 iv0[], } if (!err) { - __aes_arm64_encrypt(ctx->key_enc, buf, iv0, num_rounds(ctx)); - __aes_arm64_encrypt(ctx->key_enc, mac, mac, num_rounds(ctx)); + aes_encrypt(ctx, buf, iv0); + aes_encrypt(ctx, mac, mac); crypto_xor(mac, buf, AES_BLOCK_SIZE); } return err; diff --git a/arch/arm64/crypto/aes-ce-glue.c b/arch/arm64/crypto/aes-ce-glue.c index d3bc97afde20..6d085dc56c51 100644 --- a/arch/arm64/crypto/aes-ce-glue.c +++ b/arch/arm64/crypto/aes-ce-glue.c @@ -20,9 +20,6 @@ MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); -asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); -asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds); - struct aes_block { u8 b[AES_BLOCK_SIZE]; }; @@ -51,7 +48,7 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); if (!crypto_simd_usable()) { - __aes_arm64_encrypt(ctx->key_enc, dst, src, num_rounds(ctx)); + aes_encrypt(ctx, dst, src); return; } @@ -65,7 +62,7 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); if (!crypto_simd_usable()) { - __aes_arm64_decrypt(ctx->key_dec, dst, src, num_rounds(ctx)); + aes_decrypt(ctx, dst, src); return; } diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S index 00bd2885feaa..c132c49c89a8 100644 --- a/arch/arm64/crypto/aes-ce.S +++ b/arch/arm64/crypto/aes-ce.S @@ -21,6 +21,9 @@ .macro xts_reload_mask, tmp .endm + .macro xts_cts_skip_tw, reg, lbl + .endm + /* preload all round keys */ .macro load_round_keys, rounds, rk cmp \rounds, #12 diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S index f06df0d2080c..423d0aebc570 100644 --- a/arch/arm64/crypto/aes-cipher-core.S +++ b/arch/arm64/crypto/aes-cipher-core.S @@ -128,43 +128,5 @@ ENDPROC(__aes_arm64_encrypt) .align 5 ENTRY(__aes_arm64_decrypt) - do_crypt iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0 + do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0 ENDPROC(__aes_arm64_decrypt) - - .section ".rodata", "a" - .align L1_CACHE_SHIFT - .type __aes_arm64_inverse_sbox, %object -__aes_arm64_inverse_sbox: - .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 - .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb - .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 - .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb - .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d - .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e - .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 - .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 - .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 - .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 - .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda - .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 - .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a - .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 - .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 - .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b - .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea - .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 - .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 - .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e - .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 - .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b - .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 - .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 - .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 - .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f - .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d - .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef - .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 - .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 - .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 - .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d - .size __aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox diff --git a/arch/arm64/crypto/aes-cipher-glue.c b/arch/arm64/crypto/aes-cipher-glue.c index 0913966aa6fa..8caf6dfefce8 100644 --- a/arch/arm64/crypto/aes-cipher-glue.c +++ b/arch/arm64/crypto/aes-cipher-glue.c @@ -10,12 +10,9 @@ #include <linux/module.h> asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); -EXPORT_SYMBOL(__aes_arm64_encrypt); - asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds); -EXPORT_SYMBOL(__aes_arm64_decrypt); -static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +static void aes_arm64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); int rounds = 6 + ctx->key_length / 4; @@ -23,7 +20,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) __aes_arm64_encrypt(ctx->key_enc, out, in, rounds); } -static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +static void aes_arm64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); int rounds = 6 + ctx->key_length / 4; @@ -43,8 +40,8 @@ static struct crypto_alg aes_alg = { .cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE, .cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE, .cra_cipher.cia_setkey = crypto_aes_set_key, - .cra_cipher.cia_encrypt = aes_encrypt, - .cra_cipher.cia_decrypt = aes_decrypt + .cra_cipher.cia_encrypt = aes_arm64_encrypt, + .cra_cipher.cia_decrypt = aes_arm64_decrypt }; static int __init aes_init(void) diff --git a/arch/arm64/crypto/aes-ctr-fallback.h b/arch/arm64/crypto/aes-ctr-fallback.h deleted file mode 100644 index 3ac911990ec7..000000000000 --- a/arch/arm64/crypto/aes-ctr-fallback.h +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Fallback for sync aes(ctr) in contexts where kernel mode NEON - * is not allowed - * - * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> - */ - -#include <crypto/aes.h> -#include <crypto/internal/skcipher.h> - -asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); - -static inline int aes_ctr_encrypt_fallback(struct crypto_aes_ctx *ctx, - struct skcipher_request *req) -{ - struct skcipher_walk walk; - u8 buf[AES_BLOCK_SIZE]; - int err; - - err = skcipher_walk_virt(&walk, req, true); - - while (walk.nbytes > 0) { - u8 *dst = walk.dst.virt.addr; - u8 *src = walk.src.virt.addr; - int nbytes = walk.nbytes; - int tail = 0; - - if (nbytes < walk.total) { - nbytes = round_down(nbytes, AES_BLOCK_SIZE); - tail = walk.nbytes % AES_BLOCK_SIZE; - } - - do { - int bsize = min(nbytes, AES_BLOCK_SIZE); - - __aes_arm64_encrypt(ctx->key_enc, buf, walk.iv, - 6 + ctx->key_length / 4); - crypto_xor_cpy(dst, src, buf, bsize); - crypto_inc(walk.iv, AES_BLOCK_SIZE); - - dst += AES_BLOCK_SIZE; - src += AES_BLOCK_SIZE; - nbytes -= AES_BLOCK_SIZE; - } while (nbytes > 0); - - err = skcipher_walk_done(&walk, tail); - } - return err; -} diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 8d6c8932c841..aa57dc639f77 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -9,6 +9,8 @@ #include <asm/hwcap.h> #include <asm/simd.h> #include <crypto/aes.h> +#include <crypto/ctr.h> +#include <crypto/sha.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> @@ -18,12 +20,10 @@ #include <crypto/xts.h> #include "aes-ce-setkey.h" -#include "aes-ctr-fallback.h" #ifdef USE_V8_CRYPTO_EXTENSIONS #define MODE "ce" #define PRIO 300 -#define aes_setkey ce_aes_setkey #define aes_expandkey ce_aes_expandkey #define aes_ecb_encrypt ce_aes_ecb_encrypt #define aes_ecb_decrypt ce_aes_ecb_decrypt @@ -31,6 +31,8 @@ #define aes_cbc_decrypt ce_aes_cbc_decrypt #define aes_cbc_cts_encrypt ce_aes_cbc_cts_encrypt #define aes_cbc_cts_decrypt ce_aes_cbc_cts_decrypt +#define aes_essiv_cbc_encrypt ce_aes_essiv_cbc_encrypt +#define aes_essiv_cbc_decrypt ce_aes_essiv_cbc_decrypt #define aes_ctr_encrypt ce_aes_ctr_encrypt #define aes_xts_encrypt ce_aes_xts_encrypt #define aes_xts_decrypt ce_aes_xts_decrypt @@ -39,27 +41,31 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); #else #define MODE "neon" #define PRIO 200 -#define aes_setkey crypto_aes_set_key -#define aes_expandkey crypto_aes_expand_key #define aes_ecb_encrypt neon_aes_ecb_encrypt #define aes_ecb_decrypt neon_aes_ecb_decrypt #define aes_cbc_encrypt neon_aes_cbc_encrypt #define aes_cbc_decrypt neon_aes_cbc_decrypt #define aes_cbc_cts_encrypt neon_aes_cbc_cts_encrypt #define aes_cbc_cts_decrypt neon_aes_cbc_cts_decrypt +#define aes_essiv_cbc_encrypt neon_aes_essiv_cbc_encrypt +#define aes_essiv_cbc_decrypt neon_aes_essiv_cbc_decrypt #define aes_ctr_encrypt neon_aes_ctr_encrypt #define aes_xts_encrypt neon_aes_xts_encrypt #define aes_xts_decrypt neon_aes_xts_decrypt #define aes_mac_update neon_aes_mac_update MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON"); +#endif +#if defined(USE_V8_CRYPTO_EXTENSIONS) || !defined(CONFIG_CRYPTO_AES_ARM64_BS) MODULE_ALIAS_CRYPTO("ecb(aes)"); MODULE_ALIAS_CRYPTO("cbc(aes)"); MODULE_ALIAS_CRYPTO("ctr(aes)"); MODULE_ALIAS_CRYPTO("xts(aes)"); +#endif +MODULE_ALIAS_CRYPTO("cts(cbc(aes))"); +MODULE_ALIAS_CRYPTO("essiv(cbc(aes),sha256)"); MODULE_ALIAS_CRYPTO("cmac(aes)"); MODULE_ALIAS_CRYPTO("xcbc(aes)"); MODULE_ALIAS_CRYPTO("cbcmac(aes)"); -#endif MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); @@ -84,25 +90,32 @@ asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int blocks, u8 ctr[]); asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], - int rounds, int blocks, u32 const rk2[], u8 iv[], + int rounds, int bytes, u32 const rk2[], u8 iv[], int first); asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], - int rounds, int blocks, u32 const rk2[], u8 iv[], + int rounds, int bytes, u32 const rk2[], u8 iv[], int first); +asmlinkage void aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[], + int rounds, int blocks, u8 iv[], + u32 const rk2[]); +asmlinkage void aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[], + int rounds, int blocks, u8 iv[], + u32 const rk2[]); + asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds, int blocks, u8 dg[], int enc_before, int enc_after); -struct cts_cbc_req_ctx { - struct scatterlist sg_src[2]; - struct scatterlist sg_dst[2]; - struct skcipher_request subreq; +struct crypto_aes_xts_ctx { + struct crypto_aes_ctx key1; + struct crypto_aes_ctx __aligned(8) key2; }; -struct crypto_aes_xts_ctx { +struct crypto_aes_essiv_cbc_ctx { struct crypto_aes_ctx key1; struct crypto_aes_ctx __aligned(8) key2; + struct crypto_shash *hash; }; struct mac_tfm_ctx { @@ -118,11 +131,18 @@ struct mac_desc_ctx { static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { - return aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + int ret; + + ret = aes_expandkey(ctx, in_key, key_len); + if (ret) + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + + return ret; } -static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) +static int __maybe_unused xts_set_key(struct crypto_skcipher *tfm, + const u8 *in_key, unsigned int key_len) { struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int ret; @@ -142,7 +162,33 @@ static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key, return -EINVAL; } -static int ecb_encrypt(struct skcipher_request *req) +static int __maybe_unused essiv_cbc_set_key(struct crypto_skcipher *tfm, + const u8 *in_key, + unsigned int key_len) +{ + struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + SHASH_DESC_ON_STACK(desc, ctx->hash); + u8 digest[SHA256_DIGEST_SIZE]; + int ret; + + ret = aes_expandkey(&ctx->key1, in_key, key_len); + if (ret) + goto out; + + desc->tfm = ctx->hash; + crypto_shash_digest(desc, in_key, key_len, digest); + + ret = aes_expandkey(&ctx->key2, digest, sizeof(digest)); + if (ret) + goto out; + + return 0; +out: + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; +} + +static int __maybe_unused ecb_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -162,7 +208,7 @@ static int ecb_encrypt(struct skcipher_request *req) return err; } -static int ecb_decrypt(struct skcipher_request *req) +static int __maybe_unused ecb_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -182,63 +228,78 @@ static int ecb_decrypt(struct skcipher_request *req) return err; } -static int cbc_encrypt(struct skcipher_request *req) +static int cbc_encrypt_walk(struct skcipher_request *req, + struct skcipher_walk *walk) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - int err, rounds = 6 + ctx->key_length / 4; - struct skcipher_walk walk; + int err = 0, rounds = 6 + ctx->key_length / 4; unsigned int blocks; - err = skcipher_walk_virt(&walk, req, false); - - while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) { kernel_neon_begin(); - aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_enc, rounds, blocks, walk.iv); + aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr, + ctx->key_enc, rounds, blocks, walk->iv); kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE); } return err; } -static int cbc_decrypt(struct skcipher_request *req) +static int __maybe_unused cbc_encrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - int err, rounds = 6 + ctx->key_length / 4; struct skcipher_walk walk; - unsigned int blocks; + int err; err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + return cbc_encrypt_walk(req, &walk); +} - while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { +static int cbc_decrypt_walk(struct skcipher_request *req, + struct skcipher_walk *walk) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + int err = 0, rounds = 6 + ctx->key_length / 4; + unsigned int blocks; + + while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) { kernel_neon_begin(); - aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_dec, rounds, blocks, walk.iv); + aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr, + ctx->key_dec, rounds, blocks, walk->iv); kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE); } return err; } -static int cts_cbc_init_tfm(struct crypto_skcipher *tfm) +static int __maybe_unused cbc_decrypt(struct skcipher_request *req) { - crypto_skcipher_set_reqsize(tfm, sizeof(struct cts_cbc_req_ctx)); - return 0; + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + return cbc_decrypt_walk(req, &walk); } static int cts_cbc_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req); int err, rounds = 6 + ctx->key_length / 4; int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; struct scatterlist *src = req->src, *dst = req->dst; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; struct skcipher_walk walk; - skcipher_request_set_tfm(&rctx->subreq, tfm); + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, skcipher_request_flags(req), + NULL, NULL); if (req->cryptlen <= AES_BLOCK_SIZE) { if (req->cryptlen < AES_BLOCK_SIZE) @@ -247,41 +308,30 @@ static int cts_cbc_encrypt(struct skcipher_request *req) } if (cbc_blocks > 0) { - unsigned int blocks; - - skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst, + skcipher_request_set_crypt(&subreq, req->src, req->dst, cbc_blocks * AES_BLOCK_SIZE, req->iv); - err = skcipher_walk_virt(&walk, &rctx->subreq, false); - - while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { - kernel_neon_begin(); - aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_enc, rounds, blocks, walk.iv); - kernel_neon_end(); - err = skcipher_walk_done(&walk, - walk.nbytes % AES_BLOCK_SIZE); - } + err = skcipher_walk_virt(&walk, &subreq, false) ?: + cbc_encrypt_walk(&subreq, &walk); if (err) return err; if (req->cryptlen == AES_BLOCK_SIZE) return 0; - dst = src = scatterwalk_ffwd(rctx->sg_src, req->src, - rctx->subreq.cryptlen); + dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen); if (req->dst != req->src) - dst = scatterwalk_ffwd(rctx->sg_dst, req->dst, - rctx->subreq.cryptlen); + dst = scatterwalk_ffwd(sg_dst, req->dst, + subreq.cryptlen); } /* handle ciphertext stealing */ - skcipher_request_set_crypt(&rctx->subreq, src, dst, + skcipher_request_set_crypt(&subreq, src, dst, req->cryptlen - cbc_blocks * AES_BLOCK_SIZE, req->iv); - err = skcipher_walk_virt(&walk, &rctx->subreq, false); + err = skcipher_walk_virt(&walk, &subreq, false); if (err) return err; @@ -297,13 +347,16 @@ static int cts_cbc_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req); int err, rounds = 6 + ctx->key_length / 4; int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; struct scatterlist *src = req->src, *dst = req->dst; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; struct skcipher_walk walk; - skcipher_request_set_tfm(&rctx->subreq, tfm); + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, skcipher_request_flags(req), + NULL, NULL); if (req->cryptlen <= AES_BLOCK_SIZE) { if (req->cryptlen < AES_BLOCK_SIZE) @@ -312,41 +365,30 @@ static int cts_cbc_decrypt(struct skcipher_request *req) } if (cbc_blocks > 0) { - unsigned int blocks; - - skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst, + skcipher_request_set_crypt(&subreq, req->src, req->dst, cbc_blocks * AES_BLOCK_SIZE, req->iv); - err = skcipher_walk_virt(&walk, &rctx->subreq, false); - - while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { - kernel_neon_begin(); - aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_dec, rounds, blocks, walk.iv); - kernel_neon_end(); - err = skcipher_walk_done(&walk, - walk.nbytes % AES_BLOCK_SIZE); - } + err = skcipher_walk_virt(&walk, &subreq, false) ?: + cbc_decrypt_walk(&subreq, &walk); if (err) return err; if (req->cryptlen == AES_BLOCK_SIZE) return 0; - dst = src = scatterwalk_ffwd(rctx->sg_src, req->src, - rctx->subreq.cryptlen); + dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen); if (req->dst != req->src) - dst = scatterwalk_ffwd(rctx->sg_dst, req->dst, - rctx->subreq.cryptlen); + dst = scatterwalk_ffwd(sg_dst, req->dst, + subreq.cryptlen); } /* handle ciphertext stealing */ - skcipher_request_set_crypt(&rctx->subreq, src, dst, + skcipher_request_set_crypt(&subreq, src, dst, req->cryptlen - cbc_blocks * AES_BLOCK_SIZE, req->iv); - err = skcipher_walk_virt(&walk, &rctx->subreq, false); + err = skcipher_walk_virt(&walk, &subreq, false); if (err) return err; @@ -358,6 +400,66 @@ static int cts_cbc_decrypt(struct skcipher_request *req) return skcipher_walk_done(&walk, 0); } +static int __maybe_unused essiv_cbc_init_tfm(struct crypto_skcipher *tfm) +{ + struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + + ctx->hash = crypto_alloc_shash("sha256", 0, 0); + + return PTR_ERR_OR_ZERO(ctx->hash); +} + +static void __maybe_unused essiv_cbc_exit_tfm(struct crypto_skcipher *tfm) +{ + struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + + crypto_free_shash(ctx->hash); +} + +static int __maybe_unused essiv_cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + int err, rounds = 6 + ctx->key1.key_length / 4; + struct skcipher_walk walk; + unsigned int blocks; + + err = skcipher_walk_virt(&walk, req, false); + + blocks = walk.nbytes / AES_BLOCK_SIZE; + if (blocks) { + kernel_neon_begin(); + aes_essiv_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_enc, rounds, blocks, + req->iv, ctx->key2.key_enc); + kernel_neon_end(); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); + } + return err ?: cbc_encrypt_walk(req, &walk); +} + +static int __maybe_unused essiv_cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + int err, rounds = 6 + ctx->key1.key_length / 4; + struct skcipher_walk walk; + unsigned int blocks; + + err = skcipher_walk_virt(&walk, req, false); + + blocks = walk.nbytes / AES_BLOCK_SIZE; + if (blocks) { + kernel_neon_begin(); + aes_essiv_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_dec, rounds, blocks, + req->iv, ctx->key2.key_enc); + kernel_neon_end(); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); + } + return err ?: cbc_decrypt_walk(req, &walk); +} + static int ctr_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -397,62 +499,176 @@ static int ctr_encrypt(struct skcipher_request *req) return err; } -static int ctr_encrypt_sync(struct skcipher_request *req) +static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + unsigned long flags; + + /* + * Temporarily disable interrupts to avoid races where + * cachelines are evicted when the CPU is interrupted + * to do something else. + */ + local_irq_save(flags); + aes_encrypt(ctx, dst, src); + local_irq_restore(flags); +} +static int __maybe_unused ctr_encrypt_sync(struct skcipher_request *req) +{ if (!crypto_simd_usable()) - return aes_ctr_encrypt_fallback(ctx, req); + return crypto_ctr_encrypt_walk(req, ctr_encrypt_one); return ctr_encrypt(req); } -static int xts_encrypt(struct skcipher_request *req) +static int __maybe_unused xts_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = 6 + ctx->key1.key_length / 4; + int tail = req->cryptlen % AES_BLOCK_SIZE; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct scatterlist *src, *dst; struct skcipher_walk walk; - unsigned int blocks; + + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; err = skcipher_walk_virt(&walk, req, false); - for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + if (unlikely(tail > 0 && walk.nbytes < walk.total)) { + int xts_blocks = DIV_ROUND_UP(req->cryptlen, + AES_BLOCK_SIZE) - 2; + + skcipher_walk_abort(&walk); + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + xts_blocks * AES_BLOCK_SIZE, + req->iv); + req = &subreq; + err = skcipher_walk_virt(&walk, req, false); + } else { + tail = 0; + } + + for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) { + int nbytes = walk.nbytes; + + if (walk.nbytes < walk.total) + nbytes &= ~(AES_BLOCK_SIZE - 1); + kernel_neon_begin(); aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_enc, rounds, blocks, + ctx->key1.key_enc, rounds, nbytes, ctx->key2.key_enc, walk.iv, first); kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } - return err; + if (err || likely(!tail)) + return err; + + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + kernel_neon_begin(); + aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_enc, rounds, walk.nbytes, + ctx->key2.key_enc, walk.iv, first); + kernel_neon_end(); + + return skcipher_walk_done(&walk, 0); } -static int xts_decrypt(struct skcipher_request *req) +static int __maybe_unused xts_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = 6 + ctx->key1.key_length / 4; + int tail = req->cryptlen % AES_BLOCK_SIZE; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct scatterlist *src, *dst; struct skcipher_walk walk; - unsigned int blocks; + + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; err = skcipher_walk_virt(&walk, req, false); - for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + if (unlikely(tail > 0 && walk.nbytes < walk.total)) { + int xts_blocks = DIV_ROUND_UP(req->cryptlen, + AES_BLOCK_SIZE) - 2; + + skcipher_walk_abort(&walk); + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + xts_blocks * AES_BLOCK_SIZE, + req->iv); + req = &subreq; + err = skcipher_walk_virt(&walk, req, false); + } else { + tail = 0; + } + + for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) { + int nbytes = walk.nbytes; + + if (walk.nbytes < walk.total) + nbytes &= ~(AES_BLOCK_SIZE - 1); + kernel_neon_begin(); aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_dec, rounds, blocks, + ctx->key1.key_dec, rounds, nbytes, ctx->key2.key_enc, walk.iv, first); kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } - return err; + if (err || likely(!tail)) + return err; + + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + + kernel_neon_begin(); + aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_dec, rounds, walk.nbytes, + ctx->key2.key_enc, walk.iv, first); + kernel_neon_end(); + + return skcipher_walk_done(&walk, 0); } static struct skcipher_alg aes_algs[] = { { +#if defined(USE_V8_CRYPTO_EXTENSIONS) || !defined(CONFIG_CRYPTO_AES_ARM64_BS) .base = { .cra_name = "__ecb(aes)", .cra_driver_name = "__ecb-aes-" MODE, @@ -485,24 +701,6 @@ static struct skcipher_alg aes_algs[] = { { .decrypt = cbc_decrypt, }, { .base = { - .cra_name = "__cts(cbc(aes))", - .cra_driver_name = "__cts-cbc-aes-" MODE, - .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_module = THIS_MODULE, - }, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .walksize = 2 * AES_BLOCK_SIZE, - .setkey = skcipher_aes_setkey, - .encrypt = cts_cbc_encrypt, - .decrypt = cts_cbc_decrypt, - .init = cts_cbc_init_tfm, -}, { - .base = { .cra_name = "__ctr(aes)", .cra_driver_name = "__ctr-aes-" MODE, .cra_priority = PRIO, @@ -547,9 +745,46 @@ static struct skcipher_alg aes_algs[] = { { .min_keysize = 2 * AES_MIN_KEY_SIZE, .max_keysize = 2 * AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, + .walksize = 2 * AES_BLOCK_SIZE, .setkey = xts_set_key, .encrypt = xts_encrypt, .decrypt = xts_decrypt, +}, { +#endif + .base = { + .cra_name = "__cts(cbc(aes))", + .cra_driver_name = "__cts-cbc-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .walksize = 2 * AES_BLOCK_SIZE, + .setkey = skcipher_aes_setkey, + .encrypt = cts_cbc_encrypt, + .decrypt = cts_cbc_decrypt, +}, { + .base = { + .cra_name = "__essiv(cbc(aes),sha256)", + .cra_driver_name = "__essiv-cbc-aes-sha256-" MODE, + .cra_priority = PRIO + 1, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_essiv_cbc_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = essiv_cbc_set_key, + .encrypt = essiv_cbc_encrypt, + .decrypt = essiv_cbc_decrypt, + .init = essiv_cbc_init_tfm, + .exit = essiv_cbc_exit_tfm, } }; static int cbcmac_setkey(struct crypto_shash *tfm, const u8 *in_key, @@ -646,15 +881,14 @@ static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks, kernel_neon_end(); } else { if (enc_before) - __aes_arm64_encrypt(ctx->key_enc, dg, dg, rounds); + aes_encrypt(ctx, dg, dg); while (blocks--) { crypto_xor(dg, in, AES_BLOCK_SIZE); in += AES_BLOCK_SIZE; if (blocks || enc_after) - __aes_arm64_encrypt(ctx->key_enc, dg, dg, - rounds); + aes_encrypt(ctx, dg, dg); } } } @@ -837,5 +1071,7 @@ module_cpu_feature_match(AES, aes_init); module_init(aes_init); EXPORT_SYMBOL(neon_aes_ecb_encrypt); EXPORT_SYMBOL(neon_aes_cbc_encrypt); +EXPORT_SYMBOL(neon_aes_xts_encrypt); +EXPORT_SYMBOL(neon_aes_xts_decrypt); #endif module_exit(aes_exit); diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 324039b72094..131618389f1f 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -118,8 +118,23 @@ AES_ENDPROC(aes_ecb_decrypt) * int blocks, u8 iv[]) * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, * int blocks, u8 iv[]) + * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[], + * int rounds, int blocks, u8 iv[], + * u32 const rk2[]); + * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[], + * int rounds, int blocks, u8 iv[], + * u32 const rk2[]); */ +AES_ENTRY(aes_essiv_cbc_encrypt) + ld1 {v4.16b}, [x5] /* get iv */ + + mov w8, #14 /* AES-256: 14 rounds */ + enc_prepare w8, x6, x7 + encrypt_block v4, w8, x6, x7, w9 + enc_switch_key w3, x2, x6 + b .Lcbcencloop4x + AES_ENTRY(aes_cbc_encrypt) ld1 {v4.16b}, [x5] /* get iv */ enc_prepare w3, x2, x6 @@ -153,13 +168,25 @@ AES_ENTRY(aes_cbc_encrypt) st1 {v4.16b}, [x5] /* return iv */ ret AES_ENDPROC(aes_cbc_encrypt) +AES_ENDPROC(aes_essiv_cbc_encrypt) +AES_ENTRY(aes_essiv_cbc_decrypt) + stp x29, x30, [sp, #-16]! + mov x29, sp + + ld1 {cbciv.16b}, [x5] /* get iv */ + + mov w8, #14 /* AES-256: 14 rounds */ + enc_prepare w8, x6, x7 + encrypt_block cbciv, w8, x6, x7, w9 + b .Lessivcbcdecstart AES_ENTRY(aes_cbc_decrypt) stp x29, x30, [sp, #-16]! mov x29, sp ld1 {cbciv.16b}, [x5] /* get iv */ +.Lessivcbcdecstart: dec_prepare w3, x2, x6 .LcbcdecloopNx: @@ -212,6 +239,7 @@ ST5( st1 {v4.16b}, [x0], #16 ) ldp x29, x30, [sp], #16 ret AES_ENDPROC(aes_cbc_decrypt) +AES_ENDPROC(aes_essiv_cbc_decrypt) /* @@ -265,12 +293,11 @@ AES_ENTRY(aes_cbc_cts_decrypt) ld1 {v5.16b}, [x5] /* get iv */ dec_prepare w3, x2, x6 - tbl v2.16b, {v1.16b}, v4.16b decrypt_block v0, w3, x2, x6, w7 - eor v2.16b, v2.16b, v0.16b + tbl v2.16b, {v0.16b}, v3.16b + eor v2.16b, v2.16b, v1.16b tbx v0.16b, {v1.16b}, v4.16b - tbl v2.16b, {v2.16b}, v3.16b decrypt_block v0, w3, x2, x6, w7 eor v0.16b, v0.16b, v5.16b /* xor with iv */ @@ -386,10 +413,10 @@ AES_ENDPROC(aes_ctr_encrypt) /* + * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, + * int bytes, u8 const rk2[], u8 iv[], int first) * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, - * int blocks, u8 const rk2[], u8 iv[], int first) - * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, - * int blocks, u8 const rk2[], u8 iv[], int first) + * int bytes, u8 const rk2[], u8 iv[], int first) */ .macro next_tweak, out, in, tmp @@ -415,6 +442,7 @@ AES_ENTRY(aes_xts_encrypt) cbz w7, .Lxtsencnotfirst enc_prepare w3, x5, x8 + xts_cts_skip_tw w7, .LxtsencNx encrypt_block v4, w3, x5, x8, w7 /* first tweak */ enc_switch_key w3, x2, x8 b .LxtsencNx @@ -424,7 +452,7 @@ AES_ENTRY(aes_xts_encrypt) .LxtsencloopNx: next_tweak v4, v4, v8 .LxtsencNx: - subs w4, w4, #4 + subs w4, w4, #64 bmi .Lxtsenc1x ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ next_tweak v5, v4, v8 @@ -441,39 +469,74 @@ AES_ENTRY(aes_xts_encrypt) eor v2.16b, v2.16b, v6.16b st1 {v0.16b-v3.16b}, [x0], #64 mov v4.16b, v7.16b - cbz w4, .Lxtsencout + cbz w4, .Lxtsencret xts_reload_mask v8 b .LxtsencloopNx .Lxtsenc1x: - adds w4, w4, #4 + adds w4, w4, #64 beq .Lxtsencout + subs w4, w4, #16 + bmi .LxtsencctsNx .Lxtsencloop: - ld1 {v1.16b}, [x1], #16 - eor v0.16b, v1.16b, v4.16b + ld1 {v0.16b}, [x1], #16 +.Lxtsencctsout: + eor v0.16b, v0.16b, v4.16b encrypt_block v0, w3, x2, x8, w7 eor v0.16b, v0.16b, v4.16b - st1 {v0.16b}, [x0], #16 - subs w4, w4, #1 - beq .Lxtsencout + cbz w4, .Lxtsencout + subs w4, w4, #16 next_tweak v4, v4, v8 + bmi .Lxtsenccts + st1 {v0.16b}, [x0], #16 b .Lxtsencloop .Lxtsencout: + st1 {v0.16b}, [x0] +.Lxtsencret: st1 {v4.16b}, [x6] ldp x29, x30, [sp], #16 ret -AES_ENDPROC(aes_xts_encrypt) +.LxtsencctsNx: + mov v0.16b, v3.16b + sub x0, x0, #16 +.Lxtsenccts: + adr_l x8, .Lcts_permute_table + + add x1, x1, w4, sxtw /* rewind input pointer */ + add w4, w4, #16 /* # bytes in final block */ + add x9, x8, #32 + add x8, x8, x4 + sub x9, x9, x4 + add x4, x0, x4 /* output address of final block */ + + ld1 {v1.16b}, [x1] /* load final block */ + ld1 {v2.16b}, [x8] + ld1 {v3.16b}, [x9] + + tbl v2.16b, {v0.16b}, v2.16b + tbx v0.16b, {v1.16b}, v3.16b + st1 {v2.16b}, [x4] /* overlapping stores */ + mov w4, wzr + b .Lxtsencctsout +AES_ENDPROC(aes_xts_encrypt) AES_ENTRY(aes_xts_decrypt) stp x29, x30, [sp, #-16]! mov x29, sp + /* subtract 16 bytes if we are doing CTS */ + sub w8, w4, #0x10 + tst w4, #0xf + csel w4, w4, w8, eq + ld1 {v4.16b}, [x6] xts_load_mask v8 + xts_cts_skip_tw w7, .Lxtsdecskiptw cbz w7, .Lxtsdecnotfirst enc_prepare w3, x5, x8 encrypt_block v4, w3, x5, x8, w7 /* first tweak */ +.Lxtsdecskiptw: dec_prepare w3, x2, x8 b .LxtsdecNx @@ -482,7 +545,7 @@ AES_ENTRY(aes_xts_decrypt) .LxtsdecloopNx: next_tweak v4, v4, v8 .LxtsdecNx: - subs w4, w4, #4 + subs w4, w4, #64 bmi .Lxtsdec1x ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ next_tweak v5, v4, v8 @@ -503,22 +566,52 @@ AES_ENTRY(aes_xts_decrypt) xts_reload_mask v8 b .LxtsdecloopNx .Lxtsdec1x: - adds w4, w4, #4 + adds w4, w4, #64 beq .Lxtsdecout + subs w4, w4, #16 .Lxtsdecloop: - ld1 {v1.16b}, [x1], #16 - eor v0.16b, v1.16b, v4.16b + ld1 {v0.16b}, [x1], #16 + bmi .Lxtsdeccts +.Lxtsdecctsout: + eor v0.16b, v0.16b, v4.16b decrypt_block v0, w3, x2, x8, w7 eor v0.16b, v0.16b, v4.16b st1 {v0.16b}, [x0], #16 - subs w4, w4, #1 - beq .Lxtsdecout + cbz w4, .Lxtsdecout + subs w4, w4, #16 next_tweak v4, v4, v8 b .Lxtsdecloop .Lxtsdecout: st1 {v4.16b}, [x6] ldp x29, x30, [sp], #16 ret + +.Lxtsdeccts: + adr_l x8, .Lcts_permute_table + + add x1, x1, w4, sxtw /* rewind input pointer */ + add w4, w4, #16 /* # bytes in final block */ + add x9, x8, #32 + add x8, x8, x4 + sub x9, x9, x4 + add x4, x0, x4 /* output address of final block */ + + next_tweak v5, v4, v8 + + ld1 {v1.16b}, [x1] /* load final block */ + ld1 {v2.16b}, [x8] + ld1 {v3.16b}, [x9] + + eor v0.16b, v0.16b, v5.16b + decrypt_block v0, w3, x2, x8, w7 + eor v0.16b, v0.16b, v5.16b + + tbl v2.16b, {v0.16b}, v2.16b + tbx v0.16b, {v1.16b}, v3.16b + + st1 {v2.16b}, [x4] /* overlapping stores */ + mov w4, wzr + b .Lxtsdecctsout AES_ENDPROC(aes_xts_decrypt) /* diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S index 2bebccc73869..22d9b110cf78 100644 --- a/arch/arm64/crypto/aes-neon.S +++ b/arch/arm64/crypto/aes-neon.S @@ -19,6 +19,11 @@ xts_load_mask \tmp .endm + /* special case for the neon-bs driver calling into this one for CTS */ + .macro xts_cts_skip_tw, reg, lbl + tbnz \reg, #1, \lbl + .endm + /* multiply by polynomial 'x' in GF(2^8) */ .macro mul_by_x, out, in, temp, const sshr \temp, \in, #7 @@ -49,7 +54,7 @@ /* do preload for encryption */ .macro enc_prepare, ignore0, ignore1, temp - prepare .LForward_Sbox, .LForward_ShiftRows, \temp + prepare crypto_aes_sbox, .LForward_ShiftRows, \temp .endm .macro enc_switch_key, ignore0, ignore1, temp @@ -58,7 +63,7 @@ /* do preload for decryption */ .macro dec_prepare, ignore0, ignore1, temp - prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp + prepare crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp .endm /* apply SubBytes transformation using the the preloaded Sbox */ @@ -234,75 +239,7 @@ #include "aes-modes.S" .section ".rodata", "a" - .align 6 -.LForward_Sbox: - .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 - .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 - .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 - .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 - .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc - .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 - .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a - .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 - .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 - .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 - .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b - .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf - .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 - .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 - .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 - .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 - .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 - .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 - .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 - .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb - .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c - .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 - .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 - .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 - .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 - .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a - .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e - .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e - .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 - .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf - .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 - .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 - -.LReverse_Sbox: - .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 - .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb - .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 - .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb - .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d - .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e - .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 - .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 - .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 - .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 - .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda - .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 - .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a - .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 - .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 - .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b - .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea - .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 - .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 - .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e - .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 - .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b - .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 - .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 - .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 - .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f - .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d - .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef - .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 - .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 - .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 - .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d - + .align 4 .LForward_ShiftRows: .octa 0x0b06010c07020d08030e09040f0a0500 diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S index cf10ff8878a3..65982039fa36 100644 --- a/arch/arm64/crypto/aes-neonbs-core.S +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -730,11 +730,6 @@ ENDPROC(aesbs_cbc_decrypt) eor \out\().16b, \out\().16b, \tmp\().16b .endm - .align 4 -.Lxts_mul_x: -CPU_LE( .quad 1, 0x87 ) -CPU_BE( .quad 0x87, 1 ) - /* * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, * int blocks, u8 iv[]) @@ -806,7 +801,9 @@ ENDPROC(__xts_crypt8) mov x23, x4 mov x24, x5 -0: ldr q30, .Lxts_mul_x +0: movi v30.2s, #0x1 + movi v25.2s, #0x87 + uzp1 v30.4s, v30.4s, v25.4s ld1 {v25.16b}, [x24] 99: adr x7, \do8 diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index 281d23087697..ea873b8904c4 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -8,13 +8,13 @@ #include <asm/neon.h> #include <asm/simd.h> #include <crypto/aes.h> +#include <crypto/ctr.h> #include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> +#include <crypto/scatterwalk.h> #include <crypto/xts.h> #include <linux/module.h> -#include "aes-ctr-fallback.h" - MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); @@ -46,6 +46,12 @@ asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int blocks); asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int blocks, u8 iv[]); +asmlinkage void neon_aes_xts_encrypt(u8 out[], u8 const in[], + u32 const rk1[], int rounds, int bytes, + u32 const rk2[], u8 iv[], int first); +asmlinkage void neon_aes_xts_decrypt(u8 out[], u8 const in[], + u32 const rk1[], int rounds, int bytes, + u32 const rk2[], u8 iv[], int first); struct aesbs_ctx { u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32]; @@ -65,6 +71,7 @@ struct aesbs_ctr_ctx { struct aesbs_xts_ctx { struct aesbs_ctx key; u32 twkey[AES_MAX_KEYLENGTH_U32]; + struct crypto_aes_ctx cts; }; static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key, @@ -74,7 +81,7 @@ static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key, struct crypto_aes_ctx rk; int err; - err = crypto_aes_expand_key(&rk, in_key, key_len); + err = aes_expandkey(&rk, in_key, key_len); if (err) return err; @@ -133,7 +140,7 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, struct crypto_aes_ctx rk; int err; - err = crypto_aes_expand_key(&rk, in_key, key_len); + err = aes_expandkey(&rk, in_key, key_len); if (err) return err; @@ -205,7 +212,7 @@ static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key, struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); int err; - err = crypto_aes_expand_key(&ctx->fallback, in_key, key_len); + err = aes_expandkey(&ctx->fallback, in_key, key_len); if (err) return err; @@ -271,7 +278,11 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, return err; key_len /= 2; - err = crypto_aes_expand_key(&rk, in_key + key_len, key_len); + err = aes_expandkey(&ctx->cts, in_key, key_len); + if (err) + return err; + + err = aes_expandkey(&rk, in_key + key_len, key_len); if (err) return err; @@ -280,59 +291,142 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, return aesbs_setkey(tfm, in_key, key_len); } -static int ctr_encrypt_sync(struct skcipher_request *req) +static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); + unsigned long flags; + + /* + * Temporarily disable interrupts to avoid races where + * cachelines are evicted when the CPU is interrupted + * to do something else. + */ + local_irq_save(flags); + aes_encrypt(&ctx->fallback, dst, src); + local_irq_restore(flags); +} +static int ctr_encrypt_sync(struct skcipher_request *req) +{ if (!crypto_simd_usable()) - return aes_ctr_encrypt_fallback(&ctx->fallback, req); + return crypto_ctr_encrypt_walk(req, ctr_encrypt_one); return ctr_encrypt(req); } -static int __xts_crypt(struct skcipher_request *req, +static int __xts_crypt(struct skcipher_request *req, bool encrypt, void (*fn)(u8 out[], u8 const in[], u8 const rk[], int rounds, int blocks, u8 iv[])) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + int tail = req->cryptlen % (8 * AES_BLOCK_SIZE); + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct scatterlist *src, *dst; struct skcipher_walk walk; - int err; + int nbytes, err; + int first = 1; + u8 *out, *in; + + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + + /* ensure that the cts tail is covered by a single step */ + if (unlikely(tail > 0 && tail < AES_BLOCK_SIZE)) { + int xts_blocks = DIV_ROUND_UP(req->cryptlen, + AES_BLOCK_SIZE) - 2; + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + xts_blocks * AES_BLOCK_SIZE, + req->iv); + req = &subreq; + } else { + tail = 0; + } err = skcipher_walk_virt(&walk, req, false); if (err) return err; - kernel_neon_begin(); - neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, ctx->key.rounds, 1); - kernel_neon_end(); - while (walk.nbytes >= AES_BLOCK_SIZE) { unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; - if (walk.nbytes < walk.total) + if (walk.nbytes < walk.total || walk.nbytes % AES_BLOCK_SIZE) blocks = round_down(blocks, walk.stride / AES_BLOCK_SIZE); + out = walk.dst.virt.addr; + in = walk.src.virt.addr; + nbytes = walk.nbytes; + kernel_neon_begin(); - fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk, - ctx->key.rounds, blocks, walk.iv); + if (likely(blocks > 6)) { /* plain NEON is faster otherwise */ + if (first) + neon_aes_ecb_encrypt(walk.iv, walk.iv, + ctx->twkey, + ctx->key.rounds, 1); + first = 0; + + fn(out, in, ctx->key.rk, ctx->key.rounds, blocks, + walk.iv); + + out += blocks * AES_BLOCK_SIZE; + in += blocks * AES_BLOCK_SIZE; + nbytes -= blocks * AES_BLOCK_SIZE; + } + + if (walk.nbytes == walk.total && nbytes > 0) + goto xts_tail; + kernel_neon_end(); - err = skcipher_walk_done(&walk, - walk.nbytes - blocks * AES_BLOCK_SIZE); + skcipher_walk_done(&walk, nbytes); } - return err; + + if (err || likely(!tail)) + return err; + + /* handle ciphertext stealing */ + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + + out = walk.dst.virt.addr; + in = walk.src.virt.addr; + nbytes = walk.nbytes; + + kernel_neon_begin(); +xts_tail: + if (encrypt) + neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds, + nbytes, ctx->twkey, walk.iv, first ?: 2); + else + neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds, + nbytes, ctx->twkey, walk.iv, first ?: 2); + kernel_neon_end(); + + return skcipher_walk_done(&walk, 0); } static int xts_encrypt(struct skcipher_request *req) { - return __xts_crypt(req, aesbs_xts_encrypt); + return __xts_crypt(req, true, aesbs_xts_encrypt); } static int xts_decrypt(struct skcipher_request *req) { - return __xts_crypt(req, aesbs_xts_decrypt); + return __xts_crypt(req, false, aesbs_xts_decrypt); } static struct skcipher_alg aes_algs[] = { { diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 16c5da9be9fb..70b1469783f9 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -70,8 +70,6 @@ asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[], u32 const rk[], int rounds); -asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); - static int ghash_init(struct shash_desc *desc) { struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); @@ -309,14 +307,13 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey, u8 key[GHASH_BLOCK_SIZE]; int ret; - ret = crypto_aes_expand_key(&ctx->aes_key, inkey, keylen); + ret = aes_expandkey(&ctx->aes_key, inkey, keylen); if (ret) { tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } - __aes_arm64_encrypt(ctx->aes_key.key_enc, key, (u8[AES_BLOCK_SIZE]){}, - num_rounds(&ctx->aes_key)); + aes_encrypt(&ctx->aes_key, key, (u8[AES_BLOCK_SIZE]){}); return __ghash_setkey(&ctx->ghash_key, key, sizeof(be128)); } @@ -467,7 +464,7 @@ static int gcm_encrypt(struct aead_request *req) rk = ctx->aes_key.key_enc; } while (walk.nbytes >= 2 * AES_BLOCK_SIZE); } else { - __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds); + aes_encrypt(&ctx->aes_key, tag, iv); put_unaligned_be32(2, iv + GCM_IV_SIZE); while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) { @@ -478,8 +475,7 @@ static int gcm_encrypt(struct aead_request *req) int remaining = blocks; do { - __aes_arm64_encrypt(ctx->aes_key.key_enc, - ks, iv, nrounds); + aes_encrypt(&ctx->aes_key, ks, iv); crypto_xor_cpy(dst, src, ks, AES_BLOCK_SIZE); crypto_inc(iv, AES_BLOCK_SIZE); @@ -495,13 +491,10 @@ static int gcm_encrypt(struct aead_request *req) walk.nbytes % (2 * AES_BLOCK_SIZE)); } if (walk.nbytes) { - __aes_arm64_encrypt(ctx->aes_key.key_enc, ks, iv, - nrounds); + aes_encrypt(&ctx->aes_key, ks, iv); if (walk.nbytes > AES_BLOCK_SIZE) { crypto_inc(iv, AES_BLOCK_SIZE); - __aes_arm64_encrypt(ctx->aes_key.key_enc, - ks + AES_BLOCK_SIZE, iv, - nrounds); + aes_encrypt(&ctx->aes_key, ks + AES_BLOCK_SIZE, iv); } } } @@ -605,7 +598,7 @@ static int gcm_decrypt(struct aead_request *req) rk = ctx->aes_key.key_enc; } while (walk.nbytes >= 2 * AES_BLOCK_SIZE); } else { - __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds); + aes_encrypt(&ctx->aes_key, tag, iv); put_unaligned_be32(2, iv + GCM_IV_SIZE); while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) { @@ -618,8 +611,7 @@ static int gcm_decrypt(struct aead_request *req) pmull_ghash_update_p64); do { - __aes_arm64_encrypt(ctx->aes_key.key_enc, - buf, iv, nrounds); + aes_encrypt(&ctx->aes_key, buf, iv); crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE); crypto_inc(iv, AES_BLOCK_SIZE); @@ -637,11 +629,9 @@ static int gcm_decrypt(struct aead_request *req) memcpy(iv2, iv, AES_BLOCK_SIZE); crypto_inc(iv2, AES_BLOCK_SIZE); - __aes_arm64_encrypt(ctx->aes_key.key_enc, iv2, - iv2, nrounds); + aes_encrypt(&ctx->aes_key, iv2, iv2); } - __aes_arm64_encrypt(ctx->aes_key.key_enc, iv, iv, - nrounds); + aes_encrypt(&ctx->aes_key, iv, iv); } } diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c index 04b9d17b0733..e273faca924f 100644 --- a/arch/arm64/crypto/sha256-glue.c +++ b/arch/arm64/crypto/sha256-glue.c @@ -30,15 +30,15 @@ EXPORT_SYMBOL(sha256_block_data_order); asmlinkage void sha256_block_neon(u32 *digest, const void *data, unsigned int num_blks); -static int sha256_update(struct shash_desc *desc, const u8 *data, - unsigned int len) +static int crypto_sha256_arm64_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { return sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha256_block_data_order); } -static int sha256_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) +static int crypto_sha256_arm64_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) { if (len) sha256_base_do_update(desc, data, len, @@ -49,17 +49,17 @@ static int sha256_finup(struct shash_desc *desc, const u8 *data, return sha256_base_finish(desc, out); } -static int sha256_final(struct shash_desc *desc, u8 *out) +static int crypto_sha256_arm64_final(struct shash_desc *desc, u8 *out) { - return sha256_finup(desc, NULL, 0, out); + return crypto_sha256_arm64_finup(desc, NULL, 0, out); } static struct shash_alg algs[] = { { .digestsize = SHA256_DIGEST_SIZE, .init = sha256_base_init, - .update = sha256_update, - .final = sha256_final, - .finup = sha256_finup, + .update = crypto_sha256_arm64_update, + .final = crypto_sha256_arm64_final, + .finup = crypto_sha256_arm64_finup, .descsize = sizeof(struct sha256_state), .base.cra_name = "sha256", .base.cra_driver_name = "sha256-arm64", @@ -69,9 +69,9 @@ static struct shash_alg algs[] = { { }, { .digestsize = SHA224_DIGEST_SIZE, .init = sha224_base_init, - .update = sha256_update, - .final = sha256_final, - .finup = sha256_finup, + .update = crypto_sha256_arm64_update, + .final = crypto_sha256_arm64_final, + .finup = crypto_sha256_arm64_finup, .descsize = sizeof(struct sha256_state), .base.cra_name = "sha224", .base.cra_driver_name = "sha224-arm64", |