diff options
Diffstat (limited to 'arch')
58 files changed, 326 insertions, 257 deletions
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index 4d1707388d94..312428d83eed 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -386,20 +386,32 @@ ENTRY(ce_aes_ctr_encrypt) .Lctrloop4x: subs r4, r4, #4 bmi .Lctr1x - add r6, r6, #1 + + /* + * NOTE: the sequence below has been carefully tweaked to avoid + * a silicon erratum that exists in Cortex-A57 (#1742098) and + * Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs + * may produce an incorrect result if they take their input from a + * register of which a single 32-bit lane has been updated the last + * time it was modified. To work around this, the lanes of registers + * q0-q3 below are not manipulated individually, and the different + * counter values are prepared by successive manipulations of q7. + */ + add ip, r6, #1 vmov q0, q7 + rev ip, ip + add lr, r6, #2 + vmov s31, ip @ set lane 3 of q1 via q7 + add ip, r6, #3 + rev lr, lr vmov q1, q7 - rev ip, r6 - add r6, r6, #1 + vmov s31, lr @ set lane 3 of q2 via q7 + rev ip, ip vmov q2, q7 - vmov s7, ip - rev ip, r6 - add r6, r6, #1 + vmov s31, ip @ set lane 3 of q3 via q7 + add r6, r6, #4 vmov q3, q7 - vmov s11, ip - rev ip, r6 - add r6, r6, #1 - vmov s15, ip + vld1.8 {q4-q5}, [r1]! vld1.8 {q6}, [r1]! vld1.8 {q15}, [r1]! diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index bda8bf17631e..f70af1d0514b 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -19,7 +19,7 @@ MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS_CRYPTO("ecb(aes)"); -MODULE_ALIAS_CRYPTO("cbc(aes)"); +MODULE_ALIAS_CRYPTO("cbc(aes)-all"); MODULE_ALIAS_CRYPTO("ctr(aes)"); MODULE_ALIAS_CRYPTO("xts(aes)"); @@ -191,7 +191,8 @@ static int cbc_init(struct crypto_skcipher *tfm) struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); unsigned int reqsize; - ctx->enc_tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); + ctx->enc_tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->enc_tfm)) return PTR_ERR(ctx->enc_tfm); @@ -441,7 +442,8 @@ static struct skcipher_alg aes_algs[] = { { .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx), .base.cra_module = THIS_MODULE, - .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_flags = CRYPTO_ALG_INTERNAL | + CRYPTO_ALG_NEED_FALLBACK, .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c index 59da6c0b63b6..7b5cf8430c6d 100644 --- a/arch/arm/crypto/chacha-glue.c +++ b/arch/arm/crypto/chacha-glue.c @@ -23,7 +23,7 @@ asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, int nrounds); asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, - int nrounds); + int nrounds, unsigned int nbytes); asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds); asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); @@ -42,24 +42,24 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, { u8 buf[CHACHA_BLOCK_SIZE]; - while (bytes >= CHACHA_BLOCK_SIZE * 4) { - chacha_4block_xor_neon(state, dst, src, nrounds); - bytes -= CHACHA_BLOCK_SIZE * 4; - src += CHACHA_BLOCK_SIZE * 4; - dst += CHACHA_BLOCK_SIZE * 4; - state[12] += 4; - } - while (bytes >= CHACHA_BLOCK_SIZE) { - chacha_block_xor_neon(state, dst, src, nrounds); - bytes -= CHACHA_BLOCK_SIZE; - src += CHACHA_BLOCK_SIZE; - dst += CHACHA_BLOCK_SIZE; - state[12]++; + while (bytes > CHACHA_BLOCK_SIZE) { + unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U); + + chacha_4block_xor_neon(state, dst, src, nrounds, l); + bytes -= l; + src += l; + dst += l; + state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); } if (bytes) { - memcpy(buf, src, bytes); - chacha_block_xor_neon(state, buf, buf, nrounds); - memcpy(dst, buf, bytes); + const u8 *s = src; + u8 *d = dst; + + if (bytes != CHACHA_BLOCK_SIZE) + s = d = memcpy(buf, src, bytes); + chacha_block_xor_neon(state, d, s, nrounds); + if (d != dst) + memcpy(dst, buf, bytes); } } diff --git a/arch/arm/crypto/chacha-neon-core.S b/arch/arm/crypto/chacha-neon-core.S index eb22926d4912..13d12f672656 100644 --- a/arch/arm/crypto/chacha-neon-core.S +++ b/arch/arm/crypto/chacha-neon-core.S @@ -47,6 +47,7 @@ */ #include <linux/linkage.h> +#include <asm/cache.h> .text .fpu neon @@ -205,7 +206,7 @@ ENDPROC(hchacha_block_neon) .align 5 ENTRY(chacha_4block_xor_neon) - push {r4-r5} + push {r4, lr} mov r4, sp // preserve the stack pointer sub ip, sp, #0x20 // allocate a 32 byte buffer bic ip, ip, #0x1f // aligned to 32 bytes @@ -229,10 +230,10 @@ ENTRY(chacha_4block_xor_neon) vld1.32 {q0-q1}, [r0] vld1.32 {q2-q3}, [ip] - adr r5, .Lctrinc + adr lr, .Lctrinc vdup.32 q15, d7[1] vdup.32 q14, d7[0] - vld1.32 {q4}, [r5, :128] + vld1.32 {q4}, [lr, :128] vdup.32 q13, d6[1] vdup.32 q12, d6[0] vdup.32 q11, d5[1] @@ -455,7 +456,7 @@ ENTRY(chacha_4block_xor_neon) // Re-interleave the words in the first two rows of each block (x0..7). // Also add the counter values 0-3 to x12[0-3]. - vld1.32 {q8}, [r5, :128] // load counter values 0-3 + vld1.32 {q8}, [lr, :128] // load counter values 0-3 vzip.32 q0, q1 // => (0 1 0 1) (0 1 0 1) vzip.32 q2, q3 // => (2 3 2 3) (2 3 2 3) vzip.32 q4, q5 // => (4 5 4 5) (4 5 4 5) @@ -493,6 +494,8 @@ ENTRY(chacha_4block_xor_neon) // Re-interleave the words in the last two rows of each block (x8..15). vld1.32 {q8-q9}, [sp, :256] + mov sp, r4 // restore original stack pointer + ldr r4, [r4, #8] // load number of bytes vzip.32 q12, q13 // => (12 13 12 13) (12 13 12 13) vzip.32 q14, q15 // => (14 15 14 15) (14 15 14 15) vzip.32 q8, q9 // => (8 9 8 9) (8 9 8 9) @@ -520,41 +523,121 @@ ENTRY(chacha_4block_xor_neon) // XOR the rest of the data with the keystream vld1.8 {q0-q1}, [r2]! + subs r4, r4, #96 veor q0, q0, q8 veor q1, q1, q12 + ble .Lle96 vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2]! + subs r4, r4, #32 veor q0, q0, q2 veor q1, q1, q6 + ble .Lle128 vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2]! + subs r4, r4, #32 veor q0, q0, q10 veor q1, q1, q14 + ble .Lle160 vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2]! + subs r4, r4, #32 veor q0, q0, q4 veor q1, q1, q5 + ble .Lle192 vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2]! + subs r4, r4, #32 veor q0, q0, q9 veor q1, q1, q13 + ble .Lle224 vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2]! + subs r4, r4, #32 veor q0, q0, q3 veor q1, q1, q7 + blt .Llt256 +.Lout: vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2] - mov sp, r4 // restore original stack pointer veor q0, q0, q11 veor q1, q1, q15 vst1.8 {q0-q1}, [r1] - pop {r4-r5} - bx lr + pop {r4, pc} + +.Lle192: + vmov q4, q9 + vmov q5, q13 + +.Lle160: + // nothing to do + +.Lfinalblock: + // Process the final block if processing less than 4 full blocks. + // Entered with 32 bytes of ChaCha cipher stream in q4-q5, and the + // previous 32 byte output block that still needs to be written at + // [r1] in q0-q1. + beq .Lfullblock + +.Lpartialblock: + adr lr, .Lpermute + 32 + add r2, r2, r4 + add lr, lr, r4 + add r4, r4, r1 + + vld1.8 {q2-q3}, [lr] + vld1.8 {q6-q7}, [r2] + + add r4, r4, #32 + + vtbl.8 d4, {q4-q5}, d4 + vtbl.8 d5, {q4-q5}, d5 + vtbl.8 d6, {q4-q5}, d6 + vtbl.8 d7, {q4-q5}, d7 + + veor q6, q6, q2 + veor q7, q7, q3 + + vst1.8 {q6-q7}, [r4] // overlapping stores + vst1.8 {q0-q1}, [r1] + pop {r4, pc} + +.Lfullblock: + vmov q11, q4 + vmov q15, q5 + b .Lout +.Lle96: + vmov q4, q2 + vmov q5, q6 + b .Lfinalblock +.Lle128: + vmov q4, q10 + vmov q5, q14 + b .Lfinalblock +.Lle224: + vmov q4, q3 + vmov q5, q7 + b .Lfinalblock +.Llt256: + vmov q4, q11 + vmov q5, q15 + b .Lpartialblock ENDPROC(chacha_4block_xor_neon) + + .align L1_CACHE_SHIFT +.Lpermute: + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 + .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 + .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c index e79b1fb4b4dc..de9100c67b37 100644 --- a/arch/arm/crypto/sha1-ce-glue.c +++ b/arch/arm/crypto/sha1-ce-glue.c @@ -7,7 +7,7 @@ #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <crypto/sha1_base.h> #include <linux/cpufeature.h> #include <linux/crypto.h> diff --git a/arch/arm/crypto/sha1.h b/arch/arm/crypto/sha1.h index 758db3e9ff0a..b1b7e21da2c3 100644 --- a/arch/arm/crypto/sha1.h +++ b/arch/arm/crypto/sha1.h @@ -3,7 +3,7 @@ #define ASM_ARM_CRYPTO_SHA1_H #include <linux/crypto.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> extern int sha1_update_arm(struct shash_desc *desc, const u8 *data, unsigned int len); diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c index 4e954b3f7ecd..6c2b849e459d 100644 --- a/arch/arm/crypto/sha1_glue.c +++ b/arch/arm/crypto/sha1_glue.c @@ -15,7 +15,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <crypto/sha1_base.h> #include <asm/byteorder.h> diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c index 0071e5e4411a..cfe36ae0f3f5 100644 --- a/arch/arm/crypto/sha1_neon_glue.c +++ b/arch/arm/crypto/sha1_neon_glue.c @@ -19,7 +19,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <crypto/sha1_base.h> #include <asm/neon.h> #include <asm/simd.h> diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c index 87f0b62386c6..c62ce89dd3e0 100644 --- a/arch/arm/crypto/sha2-ce-glue.c +++ b/arch/arm/crypto/sha2-ce-glue.c @@ -7,7 +7,7 @@ #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha256_base.h> #include <linux/cpufeature.h> #include <linux/crypto.h> diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c index b8a4f79020cf..433ee4ddce6c 100644 --- a/arch/arm/crypto/sha256_glue.c +++ b/arch/arm/crypto/sha256_glue.c @@ -17,7 +17,7 @@ #include <linux/mm.h> #include <linux/types.h> #include <linux/string.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha256_base.h> #include <asm/simd.h> #include <asm/neon.h> diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c index 79820b9e2541..701706262ef3 100644 --- a/arch/arm/crypto/sha256_neon_glue.c +++ b/arch/arm/crypto/sha256_neon_glue.c @@ -13,7 +13,7 @@ #include <crypto/internal/simd.h> #include <linux/types.h> #include <linux/string.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha256_base.h> #include <asm/byteorder.h> #include <asm/simd.h> diff --git a/arch/arm/crypto/sha512-glue.c b/arch/arm/crypto/sha512-glue.c index 8775aa42bbbe..0635a65aa488 100644 --- a/arch/arm/crypto/sha512-glue.c +++ b/arch/arm/crypto/sha512-glue.c @@ -6,7 +6,7 @@ */ #include <crypto/internal/hash.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha512_base.h> #include <linux/crypto.h> #include <linux/module.h> diff --git a/arch/arm/crypto/sha512-neon-glue.c b/arch/arm/crypto/sha512-neon-glue.c index 96cb94403540..c879ad32db51 100644 --- a/arch/arm/crypto/sha512-neon-glue.c +++ b/arch/arm/crypto/sha512-neon-glue.c @@ -7,7 +7,7 @@ #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha512_base.h> #include <linux/crypto.h> #include <linux/module.h> diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 5cfe3cf6f2ac..5e7d86cf5dfa 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1082,6 +1082,7 @@ CONFIG_CRYPTO_DEV_CCREE=m CONFIG_CRYPTO_DEV_HISI_SEC2=m CONFIG_CRYPTO_DEV_HISI_ZIP=m CONFIG_CRYPTO_DEV_HISI_HPRE=m +CONFIG_CRYPTO_DEV_HISI_TRNG=m CONFIG_CMA_SIZE_MBYTES=32 CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 395bbf64b2ab..34b8a89197be 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -10,7 +10,7 @@ #include <asm/simd.h> #include <crypto/aes.h> #include <crypto/ctr.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> diff --git a/arch/arm64/crypto/chacha-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S index e90386a7db8e..b70ac76f2610 100644 --- a/arch/arm64/crypto/chacha-neon-core.S +++ b/arch/arm64/crypto/chacha-neon-core.S @@ -195,7 +195,6 @@ SYM_FUNC_START(chacha_4block_xor_neon) adr_l x10, .Lpermute and x5, x4, #63 add x10, x10, x5 - add x11, x10, #64 // // This function encrypts four consecutive ChaCha blocks by loading @@ -645,11 +644,11 @@ CPU_BE( rev a15, a15 ) zip2 v31.4s, v14.4s, v15.4s eor a15, a15, w9 - mov x3, #64 + add x3, x2, x4 + sub x3, x3, #128 // start of last block + subs x5, x4, #128 - add x6, x5, x2 - csel x3, x3, xzr, ge - csel x2, x2, x6, ge + csel x2, x2, x3, ge // interleave 64-bit words in state n, n+2 zip1 v0.2d, v16.2d, v18.2d @@ -658,13 +657,10 @@ CPU_BE( rev a15, a15 ) zip1 v8.2d, v17.2d, v19.2d zip2 v12.2d, v17.2d, v19.2d stp a2, a3, [x1, #-56] - ld1 {v16.16b-v19.16b}, [x2], x3 subs x6, x4, #192 - ccmp x3, xzr, #4, lt - add x7, x6, x2 - csel x3, x3, xzr, eq - csel x2, x2, x7, eq + ld1 {v16.16b-v19.16b}, [x2], #64 + csel x2, x2, x3, ge zip1 v1.2d, v20.2d, v22.2d zip2 v5.2d, v20.2d, v22.2d @@ -672,13 +668,10 @@ CPU_BE( rev a15, a15 ) zip1 v9.2d, v21.2d, v23.2d zip2 v13.2d, v21.2d, v23.2d stp a6, a7, [x1, #-40] - ld1 {v20.16b-v23.16b}, [x2], x3 subs x7, x4, #256 - ccmp x3, xzr, #4, lt - add x8, x7, x2 - csel x3, x3, xzr, eq - csel x2, x2, x8, eq + ld1 {v20.16b-v23.16b}, [x2], #64 + csel x2, x2, x3, ge zip1 v2.2d, v24.2d, v26.2d zip2 v6.2d, v24.2d, v26.2d @@ -686,12 +679,10 @@ CPU_BE( rev a15, a15 ) zip1 v10.2d, v25.2d, v27.2d zip2 v14.2d, v25.2d, v27.2d stp a10, a11, [x1, #-24] - ld1 {v24.16b-v27.16b}, [x2], x3 subs x8, x4, #320 - ccmp x3, xzr, #4, lt - add x9, x8, x2 - csel x2, x2, x9, eq + ld1 {v24.16b-v27.16b}, [x2], #64 + csel x2, x2, x3, ge zip1 v3.2d, v28.2d, v30.2d zip2 v7.2d, v28.2d, v30.2d @@ -699,151 +690,105 @@ CPU_BE( rev a15, a15 ) zip1 v11.2d, v29.2d, v31.2d zip2 v15.2d, v29.2d, v31.2d stp a14, a15, [x1, #-8] + + tbnz x5, #63, .Lt128 ld1 {v28.16b-v31.16b}, [x2] // xor with corresponding input, write to output - tbnz x5, #63, 0f eor v16.16b, v16.16b, v0.16b eor v17.16b, v17.16b, v1.16b eor v18.16b, v18.16b, v2.16b eor v19.16b, v19.16b, v3.16b - st1 {v16.16b-v19.16b}, [x1], #64 - cbz x5, .Lout - tbnz x6, #63, 1f + tbnz x6, #63, .Lt192 + eor v20.16b, v20.16b, v4.16b eor v21.16b, v21.16b, v5.16b eor v22.16b, v22.16b, v6.16b eor v23.16b, v23.16b, v7.16b - st1 {v20.16b-v23.16b}, [x1], #64 - cbz x6, .Lout - tbnz x7, #63, 2f + st1 {v16.16b-v19.16b}, [x1], #64 + tbnz x7, #63, .Lt256 + eor v24.16b, v24.16b, v8.16b eor v25.16b, v25.16b, v9.16b eor v26.16b, v26.16b, v10.16b eor v27.16b, v27.16b, v11.16b - st1 {v24.16b-v27.16b}, [x1], #64 - cbz x7, .Lout - tbnz x8, #63, 3f + st1 {v20.16b-v23.16b}, [x1], #64 + tbnz x8, #63, .Lt320 + eor v28.16b, v28.16b, v12.16b eor v29.16b, v29.16b, v13.16b eor v30.16b, v30.16b, v14.16b eor v31.16b, v31.16b, v15.16b + + st1 {v24.16b-v27.16b}, [x1], #64 st1 {v28.16b-v31.16b}, [x1] .Lout: frame_pop ret - // fewer than 128 bytes of in/output -0: ld1 {v8.16b}, [x10] - ld1 {v9.16b}, [x11] - movi v10.16b, #16 - sub x2, x1, #64 - add x1, x1, x5 - ld1 {v16.16b-v19.16b}, [x2] - tbl v4.16b, {v0.16b-v3.16b}, v8.16b - tbx v20.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v5.16b, {v0.16b-v3.16b}, v8.16b - tbx v21.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v6.16b, {v0.16b-v3.16b}, v8.16b - tbx v22.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v7.16b, {v0.16b-v3.16b}, v8.16b - tbx v23.16b, {v16.16b-v19.16b}, v9.16b - - eor v20.16b, v20.16b, v4.16b - eor v21.16b, v21.16b, v5.16b - eor v22.16b, v22.16b, v6.16b - eor v23.16b, v23.16b, v7.16b - st1 {v20.16b-v23.16b}, [x1] - b .Lout - // fewer than 192 bytes of in/output -1: ld1 {v8.16b}, [x10] - ld1 {v9.16b}, [x11] - movi v10.16b, #16 - add x1, x1, x6 - tbl v0.16b, {v4.16b-v7.16b}, v8.16b - tbx v20.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v1.16b, {v4.16b-v7.16b}, v8.16b - tbx v21.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v2.16b, {v4.16b-v7.16b}, v8.16b - tbx v22.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v3.16b, {v4.16b-v7.16b}, v8.16b - tbx v23.16b, {v16.16b-v19.16b}, v9.16b - - eor v20.16b, v20.16b, v0.16b - eor v21.16b, v21.16b, v1.16b - eor v22.16b, v22.16b, v2.16b - eor v23.16b, v23.16b, v3.16b - st1 {v20.16b-v23.16b}, [x1] +.Lt192: cbz x5, 1f // exactly 128 bytes? + ld1 {v28.16b-v31.16b}, [x10] + add x5, x5, x1 + tbl v28.16b, {v4.16b-v7.16b}, v28.16b + tbl v29.16b, {v4.16b-v7.16b}, v29.16b + tbl v30.16b, {v4.16b-v7.16b}, v30.16b + tbl v31.16b, {v4.16b-v7.16b}, v31.16b + +0: eor v20.16b, v20.16b, v28.16b + eor v21.16b, v21.16b, v29.16b + eor v22.16b, v22.16b, v30.16b + eor v23.16b, v23.16b, v31.16b + st1 {v20.16b-v23.16b}, [x5] // overlapping stores +1: st1 {v16.16b-v19.16b}, [x1] b .Lout + // fewer than 128 bytes of in/output +.Lt128: ld1 {v28.16b-v31.16b}, [x10] + add x5, x5, x1 + sub x1, x1, #64 + tbl v28.16b, {v0.16b-v3.16b}, v28.16b + tbl v29.16b, {v0.16b-v3.16b}, v29.16b + tbl v30.16b, {v0.16b-v3.16b}, v30.16b + tbl v31.16b, {v0.16b-v3.16b}, v31.16b + ld1 {v16.16b-v19.16b}, [x1] // reload first output block + b 0b + // fewer than 256 bytes of in/output -2: ld1 {v4.16b}, [x10] - ld1 {v5.16b}, [x11] - movi v6.16b, #16 - add x1, x1, x7 +.Lt256: cbz x6, 2f // exactly 192 bytes? + ld1 {v4.16b-v7.16b}, [x10] + add x6, x6, x1 tbl v0.16b, {v8.16b-v11.16b}, v4.16b - tbx v24.16b, {v20.16b-v23.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v1.16b, {v8.16b-v11.16b}, v4.16b - tbx v25.16b, {v20.16b-v23.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v2.16b, {v8.16b-v11.16b}, v4.16b - tbx v26.16b, {v20.16b-v23.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v3.16b, {v8.16b-v11.16b}, v4.16b - tbx v27.16b, {v20.16b-v23.16b}, v5.16b - - eor v24.16b, v24.16b, v0.16b - eor v25.16b, v25.16b, v1.16b - eor v26.16b, v26.16b, v2.16b - eor v27.16b, v27.16b, v3.16b - st1 {v24.16b-v27.16b}, [x1] + tbl v1.16b, {v8.16b-v11.16b}, v5.16b + tbl v2.16b, {v8.16b-v11.16b}, v6.16b + tbl v3.16b, {v8.16b-v11.16b}, v7.16b + + eor v28.16b, v28.16b, v0.16b + eor v29.16b, v29.16b, v1.16b + eor v30.16b, v30.16b, v2.16b + eor v31.16b, v31.16b, v3.16b + st1 {v28.16b-v31.16b}, [x6] // overlapping stores +2: st1 {v20.16b-v23.16b}, [x1] b .Lout // fewer than 320 bytes of in/output -3: ld1 {v4.16b}, [x10] - ld1 {v5.16b}, [x11] - movi v6.16b, #16 - add x1, x1, x8 +.Lt320: cbz x7, 3f // exactly 256 bytes? + ld1 {v4.16b-v7.16b}, [x10] + add x7, x7, x1 tbl v0.16b, {v12.16b-v15.16b}, v4.16b - tbx v28.16b, {v24.16b-v27.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v1.16b, {v12.16b-v15.16b}, v4.16b - tbx v29.16b, {v24.16b-v27.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v2.16b, {v12.16b-v15.16b}, v4.16b - tbx v30.16b, {v24.16b-v27.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v3.16b, {v12.16b-v15.16b}, v4.16b - tbx v31.16b, {v24.16b-v27.16b}, v5.16b + tbl v1.16b, {v12.16b-v15.16b}, v5.16b + tbl v2.16b, {v12.16b-v15.16b}, v6.16b + tbl v3.16b, {v12.16b-v15.16b}, v7.16b eor v28.16b, v28.16b, v0.16b eor v29.16b, v29.16b, v1.16b eor v30.16b, v30.16b, v2.16b eor v31.16b, v31.16b, v3.16b - st1 {v28.16b-v31.16b}, [x1] + st1 {v28.16b-v31.16b}, [x7] // overlapping stores +3: st1 {v24.16b-v27.16b}, [x1] b .Lout SYM_FUNC_END(chacha_4block_xor_neon) @@ -851,7 +796,7 @@ SYM_FUNC_END(chacha_4block_xor_neon) .align L1_CACHE_SHIFT .Lpermute: .set .Li, 0 - .rept 192 + .rept 128 .byte (.Li - 64) .set .Li, .Li + 1 .endr diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index 6b958dcdf136..7868330dd54e 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -544,7 +544,22 @@ CPU_LE( rev w8, w8 ) ext XL.16b, XL.16b, XL.16b, #8 rev64 XL.16b, XL.16b eor XL.16b, XL.16b, KS0.16b + + .if \enc == 1 st1 {XL.16b}, [x10] // store tag + .else + ldp x11, x12, [sp, #40] // load tag pointer and authsize + adr_l x17, .Lpermute_table + ld1 {KS0.16b}, [x11] // load supplied tag + add x17, x17, x12 + ld1 {KS1.16b}, [x17] // load permute vector + + cmeq XL.16b, XL.16b, KS0.16b // compare tags + mvn XL.16b, XL.16b // -1 for fail, 0 for pass + tbl XL.16b, {XL.16b}, KS1.16b // keep authsize bytes only + sminv b0, XL.16b // signed minimum across XL + smov w0, v0.b[0] // return b0 + .endif 4: ldp x29, x30, [sp], #32 ret diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 8536008e3e35..720cd3a58da3 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -55,10 +55,10 @@ asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src, asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[], u64 const h[][2], u64 dg[], u8 ctr[], u32 const rk[], int rounds, u8 tag[]); - -asmlinkage void pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[], - u64 const h[][2], u64 dg[], u8 ctr[], - u32 const rk[], int rounds, u8 tag[]); +asmlinkage int pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[], + u64 const h[][2], u64 dg[], u8 ctr[], + u32 const rk[], int rounds, const u8 l[], + const u8 tag[], u64 authsize); static int ghash_init(struct shash_desc *desc) { @@ -168,7 +168,7 @@ static int ghash_final(struct shash_desc *desc, u8 *dst) put_unaligned_be64(ctx->digest[1], dst); put_unaligned_be64(ctx->digest[0], dst + 8); - *ctx = (struct ghash_desc_ctx){}; + memzero_explicit(ctx, sizeof(*ctx)); return 0; } @@ -458,6 +458,7 @@ static int gcm_decrypt(struct aead_request *req) unsigned int authsize = crypto_aead_authsize(aead); int nrounds = num_rounds(&ctx->aes_key); struct skcipher_walk walk; + u8 otag[AES_BLOCK_SIZE]; u8 buf[AES_BLOCK_SIZE]; u8 iv[AES_BLOCK_SIZE]; u64 dg[2] = {}; @@ -474,9 +475,15 @@ static int gcm_decrypt(struct aead_request *req) memcpy(iv, req->iv, GCM_IV_SIZE); put_unaligned_be32(2, iv + GCM_IV_SIZE); + scatterwalk_map_and_copy(otag, req->src, + req->assoclen + req->cryptlen - authsize, + authsize, 0); + err = skcipher_walk_aead_decrypt(&walk, req, false); if (likely(crypto_simd_usable())) { + int ret; + do { const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; @@ -493,9 +500,10 @@ static int gcm_decrypt(struct aead_request *req) } kernel_neon_begin(); - pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h, - dg, iv, ctx->aes_key.key_enc, nrounds, - tag); + ret = pmull_gcm_decrypt(nbytes, dst, src, + ctx->ghash_key.h, + dg, iv, ctx->aes_key.key_enc, + nrounds, tag, otag, authsize); kernel_neon_end(); if (unlikely(!nbytes)) @@ -507,6 +515,11 @@ static int gcm_decrypt(struct aead_request *req) err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } while (walk.nbytes); + + if (err) + return err; + if (ret) + return -EBADMSG; } else { while (walk.nbytes >= AES_BLOCK_SIZE) { int blocks = walk.nbytes / AES_BLOCK_SIZE; @@ -548,23 +561,20 @@ static int gcm_decrypt(struct aead_request *req) err = skcipher_walk_done(&walk, 0); } + if (err) + return err; + put_unaligned_be64(dg[1], tag); put_unaligned_be64(dg[0], tag + 8); put_unaligned_be32(1, iv + GCM_IV_SIZE); aes_encrypt(&ctx->aes_key, iv, iv); crypto_xor(tag, iv, AES_BLOCK_SIZE); - } - - if (err) - return err; - /* compare calculated auth tag with the stored one */ - scatterwalk_map_and_copy(buf, req->src, - req->assoclen + req->cryptlen - authsize, - authsize, 0); - - if (crypto_memneq(tag, buf, authsize)) - return -EBADMSG; + if (crypto_memneq(tag, otag, authsize)) { + memzero_explicit(tag, AES_BLOCK_SIZE); + return -EBADMSG; + } + } return 0; } diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/crypto/poly1305-armv8.pl index 6e5576d19af8..cbc980fb02e3 100644 --- a/arch/arm64/crypto/poly1305-armv8.pl +++ b/arch/arm64/crypto/poly1305-armv8.pl @@ -840,7 +840,6 @@ poly1305_blocks_neon: ldp d14,d15,[sp,#64] addp $ACC2,$ACC2,$ACC2 ldr x30,[sp,#8] - .inst 0xd50323bf // autiasp //////////////////////////////////////////////////////////////// // lazy reduction, but without narrowing @@ -882,6 +881,7 @@ poly1305_blocks_neon: str x4,[$ctx,#8] // set is_base2_26 ldr x29,[sp],#80 + .inst 0xd50323bf // autiasp ret .size poly1305_blocks_neon,.-poly1305_blocks_neon diff --git a/arch/arm64/crypto/poly1305-core.S_shipped b/arch/arm64/crypto/poly1305-core.S_shipped index 8d1c4e420ccd..fb2822abf63a 100644 --- a/arch/arm64/crypto/poly1305-core.S_shipped +++ b/arch/arm64/crypto/poly1305-core.S_shipped @@ -779,7 +779,6 @@ poly1305_blocks_neon: ldp d14,d15,[sp,#64] addp v21.2d,v21.2d,v21.2d ldr x30,[sp,#8] - .inst 0xd50323bf // autiasp //////////////////////////////////////////////////////////////// // lazy reduction, but without narrowing @@ -821,6 +820,7 @@ poly1305_blocks_neon: str x4,[x0,#8] // set is_base2_26 ldr x29,[sp],#80 + .inst 0xd50323bf // autiasp ret .size poly1305_blocks_neon,.-poly1305_blocks_neon diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c index f33ada70c4ed..683de671741a 100644 --- a/arch/arm64/crypto/poly1305-glue.c +++ b/arch/arm64/crypto/poly1305-glue.c @@ -177,7 +177,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) } poly1305_emit(&dctx->h, dst, dctx->s); - *dctx = (struct poly1305_desc_ctx){}; + memzero_explicit(dctx, sizeof(*dctx)); } EXPORT_SYMBOL(poly1305_final_arch); diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index c63b99211db3..c93121bcfdeb 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -10,7 +10,7 @@ #include <asm/unaligned.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <crypto/sha1_base.h> #include <linux/cpufeature.h> #include <linux/crypto.h> diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index 5e956d7582a5..31ba3da5e61b 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -10,7 +10,7 @@ #include <asm/unaligned.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha256_base.h> #include <linux/cpufeature.h> #include <linux/crypto.h> diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c index 77bc6e72abae..9462f6088b3f 100644 --- a/arch/arm64/crypto/sha256-glue.c +++ b/arch/arm64/crypto/sha256-glue.c @@ -10,7 +10,7 @@ #include <asm/simd.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha256_base.h> #include <linux/types.h> #include <linux/string.h> diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c index 9a4bbfc45f40..e5a2936f0886 100644 --- a/arch/arm64/crypto/sha3-ce-glue.c +++ b/arch/arm64/crypto/sha3-ce-glue.c @@ -94,7 +94,7 @@ static int sha3_final(struct shash_desc *desc, u8 *out) if (digest_size & 4) put_unaligned_le32(sctx->st[i], (__le32 *)digest); - *sctx = (struct sha3_state){}; + memzero_explicit(sctx, sizeof(*sctx)); return 0; } diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c index dc890a719f54..faa83f6cf376 100644 --- a/arch/arm64/crypto/sha512-ce-glue.c +++ b/arch/arm64/crypto/sha512-ce-glue.c @@ -14,7 +14,7 @@ #include <asm/unaligned.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha512_base.h> #include <linux/cpufeature.h> #include <linux/crypto.h> diff --git a/arch/arm64/crypto/sha512-glue.c b/arch/arm64/crypto/sha512-glue.c index 370ccb29602f..2acff1c7df5d 100644 --- a/arch/arm64/crypto/sha512-glue.c +++ b/arch/arm64/crypto/sha512-glue.c @@ -8,7 +8,7 @@ #include <crypto/internal/hash.h> #include <linux/types.h> #include <linux/string.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha512_base.h> #include <asm/neon.h> diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.h b/arch/mips/cavium-octeon/crypto/octeon-crypto.h index 7315cc307397..cb68f9e284bb 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-crypto.h +++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.h @@ -41,7 +41,7 @@ do { \ */ #define read_octeon_64bit_hash_dword(index) \ ({ \ - u64 __value; \ + __be64 __value; \ \ __asm__ __volatile__ ( \ "dmfc2 %[rt],0x0048+" STR(index) \ diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c index 8c8ea139653e..5ee4ade99b99 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-md5.c +++ b/arch/mips/cavium-octeon/crypto/octeon-md5.c @@ -68,10 +68,11 @@ static int octeon_md5_init(struct shash_desc *desc) { struct md5_state *mctx = shash_desc_ctx(desc); - mctx->hash[0] = cpu_to_le32(MD5_H0); - mctx->hash[1] = cpu_to_le32(MD5_H1); - mctx->hash[2] = cpu_to_le32(MD5_H2); - mctx->hash[3] = cpu_to_le32(MD5_H3); + mctx->hash[0] = MD5_H0; + mctx->hash[1] = MD5_H1; + mctx->hash[2] = MD5_H2; + mctx->hash[3] = MD5_H3; + cpu_to_le32_array(mctx->hash, 4); mctx->byte_count = 0; return 0; @@ -139,8 +140,9 @@ static int octeon_md5_final(struct shash_desc *desc, u8 *out) } memset(p, 0, padding); - mctx->block[14] = cpu_to_le32(mctx->byte_count << 3); - mctx->block[15] = cpu_to_le32(mctx->byte_count >> 29); + mctx->block[14] = mctx->byte_count << 3; + mctx->block[15] = mctx->byte_count >> 29; + cpu_to_le32_array(mctx->block + 14, 2); octeon_md5_transform(mctx->block); octeon_md5_read_hash(mctx); diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha1.c b/arch/mips/cavium-octeon/crypto/octeon-sha1.c index 75e79b47abfe..30f1d75208a5 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-sha1.c +++ b/arch/mips/cavium-octeon/crypto/octeon-sha1.c @@ -14,7 +14,7 @@ */ #include <linux/mm.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <linux/init.h> #include <linux/types.h> #include <linux/module.h> diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha256.c b/arch/mips/cavium-octeon/crypto/octeon-sha256.c index a682ce76716a..36cb92895d72 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-sha256.c +++ b/arch/mips/cavium-octeon/crypto/octeon-sha256.c @@ -15,7 +15,7 @@ */ #include <linux/mm.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <linux/init.h> #include <linux/types.h> #include <linux/module.h> diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha512.c b/arch/mips/cavium-octeon/crypto/octeon-sha512.c index 50722a0cfb53..359f039820d8 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-sha512.c +++ b/arch/mips/cavium-octeon/crypto/octeon-sha512.c @@ -14,7 +14,7 @@ */ #include <linux/mm.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <linux/init.h> #include <linux/types.h> #include <linux/module.h> diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c index cb57be4ada61..b1e577cbf00c 100644 --- a/arch/powerpc/crypto/sha1-spe-glue.c +++ b/arch/powerpc/crypto/sha1-spe-glue.c @@ -12,7 +12,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <asm/byteorder.h> #include <asm/switch_to.h> #include <linux/hardirq.h> diff --git a/arch/powerpc/crypto/sha1.c b/arch/powerpc/crypto/sha1.c index b40dc50a6908..7a55d790cdb1 100644 --- a/arch/powerpc/crypto/sha1.c +++ b/arch/powerpc/crypto/sha1.c @@ -17,7 +17,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <asm/byteorder.h> void powerpc_sha_transform(u32 *state, const u8 *src); diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c index ceb0b6c980b3..a6e650a97d8f 100644 --- a/arch/powerpc/crypto/sha256-spe-glue.c +++ b/arch/powerpc/crypto/sha256-spe-glue.c @@ -13,7 +13,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <asm/byteorder.h> #include <asm/switch_to.h> #include <linux/hardirq.h> @@ -177,7 +177,7 @@ static int ppc_spe_sha256_final(struct shash_desc *desc, u8 *out) static int ppc_spe_sha224_final(struct shash_desc *desc, u8 *out) { - u32 D[SHA256_DIGEST_SIZE >> 2]; + __be32 D[SHA256_DIGEST_SIZE >> 2]; __be32 *dst = (__be32 *)out; ppc_spe_sha256_final(desc, (u8 *)D); diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index ada2f98c27b7..65ea12fc87a1 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -11,7 +11,8 @@ #define _CRYPTO_ARCH_S390_SHA_H #include <linux/crypto.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> +#include <crypto/sha2.h> #include <crypto/sha3.h> /* must be big enough for the largest SHA variant */ diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index 698b1e6d3c14..a3fabf310a38 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -22,7 +22,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/cpufeature.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <asm/cpacf.h> #include "sha.h" diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index b52c87e44939..24983f175676 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -12,7 +12,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/cpufeature.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <asm/cpacf.h> #include "sha.h" diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c index 460cbbbaa44a..30ac49b635bf 100644 --- a/arch/s390/crypto/sha3_256_s390.c +++ b/arch/s390/crypto/sha3_256_s390.c @@ -12,7 +12,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/cpufeature.h> -#include <crypto/sha.h> #include <crypto/sha3.h> #include <asm/cpacf.h> diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c index 72cf460a53e5..e70d50f7620f 100644 --- a/arch/s390/crypto/sha3_512_s390.c +++ b/arch/s390/crypto/sha3_512_s390.c @@ -11,7 +11,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/cpufeature.h> -#include <crypto/sha.h> #include <crypto/sha3.h> #include <asm/cpacf.h> diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index ad29db085a18..29a6bd404c59 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -8,7 +8,7 @@ * Author(s): Jan Glauber (jang@de.ibm.com) */ #include <crypto/internal/hash.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/kernel.h> diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c index 0a423bcf6746..030efda05dbe 100644 --- a/arch/s390/purgatory/purgatory.c +++ b/arch/s390/purgatory/purgatory.c @@ -9,7 +9,7 @@ #include <linux/kexec.h> #include <linux/string.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <asm/purgatory.h> int verify_sha256_digest(void) diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c index 4e9323229e71..82efb7f81c28 100644 --- a/arch/sparc/crypto/crc32c_glue.c +++ b/arch/sparc/crypto/crc32c_glue.c @@ -35,7 +35,7 @@ static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key, if (keylen != sizeof(u32)) return -EINVAL; - *(__le32 *)mctx = le32_to_cpup((__le32 *)key); + *mctx = le32_to_cpup((__le32 *)key); return 0; } diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c index 111283fe837e..511db98d590a 100644 --- a/arch/sparc/crypto/md5_glue.c +++ b/arch/sparc/crypto/md5_glue.c @@ -33,10 +33,11 @@ static int md5_sparc64_init(struct shash_desc *desc) { struct md5_state *mctx = shash_desc_ctx(desc); - mctx->hash[0] = cpu_to_le32(MD5_H0); - mctx->hash[1] = cpu_to_le32(MD5_H1); - mctx->hash[2] = cpu_to_le32(MD5_H2); - mctx->hash[3] = cpu_to_le32(MD5_H3); + mctx->hash[0] = MD5_H0; + mctx->hash[1] = MD5_H1; + mctx->hash[2] = MD5_H2; + mctx->hash[3] = MD5_H3; + le32_to_cpu_array(mctx->hash, 4); mctx->byte_count = 0; return 0; diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c index dc017782be52..86a654cce5ab 100644 --- a/arch/sparc/crypto/sha1_glue.c +++ b/arch/sparc/crypto/sha1_glue.c @@ -16,7 +16,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <asm/pstate.h> #include <asm/elf.h> diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c index ca2547df9652..60ec524cf9ca 100644 --- a/arch/sparc/crypto/sha256_glue.c +++ b/arch/sparc/crypto/sha256_glue.c @@ -16,7 +16,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <asm/pstate.h> #include <asm/elf.h> diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c index 3b2ca732ff7a..273ce21918c1 100644 --- a/arch/sparc/crypto/sha512_glue.c +++ b/arch/sparc/crypto/sha512_glue.c @@ -15,7 +15,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <asm/pstate.h> #include <asm/elf.h> diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c deleted file mode 100644 index 7b7dc05fa1a4..000000000000 --- a/arch/x86/crypto/aes_glue.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 1852b19a73a0..d1436c37008b 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -318,7 +318,7 @@ _initial_blocks_\@: # Main loop - Encrypt/Decrypt remaining blocks - cmp $0, %r13 + test %r13, %r13 je _zero_cipher_left_\@ sub $64, %r13 je _four_cipher_left_\@ @@ -437,7 +437,7 @@ _multiple_of_16_bytes_\@: mov PBlockLen(%arg2), %r12 - cmp $0, %r12 + test %r12, %r12 je _partial_done\@ GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 @@ -474,7 +474,7 @@ _T_8_\@: add $8, %r10 sub $8, %r11 psrldq $8, %xmm0 - cmp $0, %r11 + test %r11, %r11 je _return_T_done_\@ _T_4_\@: movd %xmm0, %eax @@ -482,7 +482,7 @@ _T_4_\@: add $4, %r10 sub $4, %r11 psrldq $4, %xmm0 - cmp $0, %r11 + test %r11, %r11 je _return_T_done_\@ _T_123_\@: movd %xmm0, %eax @@ -619,7 +619,7 @@ _get_AAD_blocks\@: /* read the last <16B of AAD */ _get_AAD_rest\@: - cmp $0, %r11 + test %r11, %r11 je _get_AAD_done\@ READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7 @@ -640,7 +640,7 @@ _get_AAD_done\@: .macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ AAD_HASH operation mov PBlockLen(%arg2), %r13 - cmp $0, %r13 + test %r13, %r13 je _partial_block_done_\@ # Leave Macro if no partial blocks # Read in input data without over reading cmp $16, \PLAIN_CYPH_LEN @@ -692,7 +692,7 @@ _no_extra_mask_1_\@: pshufb %xmm2, %xmm3 pxor %xmm3, \AAD_HASH - cmp $0, %r10 + test %r10, %r10 jl _partial_incomplete_1_\@ # GHASH computation for the last <16 Byte block @@ -727,7 +727,7 @@ _no_extra_mask_2_\@: pshufb %xmm2, %xmm9 pxor %xmm9, \AAD_HASH - cmp $0, %r10 + test %r10, %r10 jl _partial_incomplete_2_\@ # GHASH computation for the last <16 Byte block @@ -747,7 +747,7 @@ _encode_done_\@: pshufb %xmm2, %xmm9 .endif # output encrypted Bytes - cmp $0, %r10 + test %r10, %r10 jl _partial_fill_\@ mov %r13, %r12 mov $16, %r13 @@ -2720,7 +2720,7 @@ SYM_FUNC_END(aesni_ctr_enc) */ SYM_FUNC_START(aesni_xts_crypt8) FRAME_BEGIN - cmpb $0, %cl + testb %cl, %cl movl $0, %ecx movl $240, %r10d leaq _aesni_enc4, %r11 diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index 5fee47956f3b..2cf8e94d986a 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -369,7 +369,7 @@ _initial_num_blocks_is_0\@: _initial_blocks_encrypted\@: - cmp $0, %r13 + test %r13, %r13 je _zero_cipher_left\@ sub $128, %r13 @@ -528,7 +528,7 @@ _multiple_of_16_bytes\@: vmovdqu HashKey(arg2), %xmm13 mov PBlockLen(arg2), %r12 - cmp $0, %r12 + test %r12, %r12 je _partial_done\@ #GHASH computation for the last <16 Byte block @@ -573,7 +573,7 @@ _T_8\@: add $8, %r10 sub $8, %r11 vpsrldq $8, %xmm9, %xmm9 - cmp $0, %r11 + test %r11, %r11 je _return_T_done\@ _T_4\@: vmovd %xmm9, %eax @@ -581,7 +581,7 @@ _T_4\@: add $4, %r10 sub $4, %r11 vpsrldq $4, %xmm9, %xmm9 - cmp $0, %r11 + test %r11, %r11 je _return_T_done\@ _T_123\@: vmovd %xmm9, %eax @@ -625,7 +625,7 @@ _get_AAD_blocks\@: cmp $16, %r11 jge _get_AAD_blocks\@ vmovdqu \T8, \T7 - cmp $0, %r11 + test %r11, %r11 je _get_AAD_done\@ vpxor \T7, \T7, \T7 @@ -644,7 +644,7 @@ _get_AAD_rest8\@: vpxor \T1, \T7, \T7 jmp _get_AAD_rest8\@ _get_AAD_rest4\@: - cmp $0, %r11 + test %r11, %r11 jle _get_AAD_rest0\@ mov (%r10), %eax movq %rax, \T1 @@ -749,7 +749,7 @@ _done_read_partial_block_\@: .macro PARTIAL_BLOCK GHASH_MUL CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ AAD_HASH ENC_DEC mov PBlockLen(arg2), %r13 - cmp $0, %r13 + test %r13, %r13 je _partial_block_done_\@ # Leave Macro if no partial blocks # Read in input data without over reading cmp $16, \PLAIN_CYPH_LEN @@ -801,7 +801,7 @@ _no_extra_mask_1_\@: vpshufb %xmm2, %xmm3, %xmm3 vpxor %xmm3, \AAD_HASH, \AAD_HASH - cmp $0, %r10 + test %r10, %r10 jl _partial_incomplete_1_\@ # GHASH computation for the last <16 Byte block @@ -836,7 +836,7 @@ _no_extra_mask_2_\@: vpshufb %xmm2, %xmm9, %xmm9 vpxor %xmm9, \AAD_HASH, \AAD_HASH - cmp $0, %r10 + test %r10, %r10 jl _partial_incomplete_2_\@ # GHASH computation for the last <16 Byte block @@ -856,7 +856,7 @@ _encode_done_\@: vpshufb %xmm2, %xmm9, %xmm9 .endif # output encrypted Bytes - cmp $0, %r10 + test %r10, %r10 jl _partial_fill_\@ mov %r13, %r12 mov $16, %r13 diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl index 7d568012cc15..71fae5a09e56 100644 --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl @@ -251,7 +251,7 @@ $code.=<<___; mov %rax,8($ctx) mov %rax,16($ctx) - cmp \$0,$inp + test $inp,$inp je .Lno_key ___ $code.=<<___ if (!$kernel); diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index c44aba290fbb..646da46e8d10 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -210,7 +210,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) } poly1305_simd_emit(&dctx->h, dst, dctx->s); - *dctx = (struct poly1305_desc_ctx){}; + memzero_explicit(dctx, sizeof(*dctx)); } EXPORT_SYMBOL(poly1305_final_arch); diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 18200135603f..44340a1139e0 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -22,7 +22,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <crypto/sha1_base.h> #include <asm/simd.h> diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index dd06249229e1..3a5f6be7dbba 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -35,7 +35,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha256_base.h> #include <linux/string.h> #include <asm/simd.h> diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 63470fd6ae32..684d58c8bc4f 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S @@ -278,7 +278,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE # "blocks" is the message length in SHA512 blocks ######################################################################## SYM_FUNC_START(sha512_transform_avx) - cmp $0, msglen + test msglen, msglen je nowork # Allocate Stack Space diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index 7946a1bee85b..50812af0b083 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S @@ -280,7 +280,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE ######################################################################## SYM_FUNC_START(sha512_transform_ssse3) - cmp $0, msglen + test msglen, msglen je nowork # Allocate Stack Space diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index b0b05c93409e..30e70f4fe2f7 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -34,7 +34,7 @@ #include <linux/mm.h> #include <linux/string.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha512_base.h> #include <asm/simd.h> diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 7b37a412f829..f03b64d9cb51 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -9,7 +9,7 @@ */ #include <linux/bug.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <asm/purgatory.h> #include "../boot/string.h" |