// SPDX-License-Identifier: GPL-2.0 /* * ChaCha and HChaCha functions (ARM optimized) * * Copyright (C) 2016-2019 Linaro, Ltd. * Copyright (C) 2015 Martin Willi */ #include #include #include #include #include #include #include #include #include asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, u8 *dst, const u8 *src, int nrounds); asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state, u8 *dst, const u8 *src, int nrounds, unsigned int nbytes); asmlinkage void hchacha_block_arm(const struct chacha_state *state, u32 out[HCHACHA_OUT_WORDS], int nrounds); asmlinkage void hchacha_block_neon(const struct chacha_state *state, u32 out[HCHACHA_OUT_WORDS], int nrounds); asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, const struct chacha_state *state, int nrounds); static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); static inline bool neon_usable(void) { return static_branch_likely(&use_neon) && crypto_simd_usable(); } static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src, unsigned int bytes, int nrounds) { u8 buf[CHACHA_BLOCK_SIZE]; while (bytes > CHACHA_BLOCK_SIZE) { unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U); chacha_4block_xor_neon(state, dst, src, nrounds, l); bytes -= l; src += l; dst += l; state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); } if (bytes) { const u8 *s = src; u8 *d = dst; if (bytes != CHACHA_BLOCK_SIZE) s = d = memcpy(buf, src, bytes); chacha_block_xor_neon(state, d, s, nrounds); if (d != dst) memcpy(dst, buf, bytes); state->x[12]++; } } void hchacha_block_arch(const struct chacha_state *state, u32 out[HCHACHA_OUT_WORDS], int nrounds) { if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { hchacha_block_arm(state, out, nrounds); } else { kernel_neon_begin(); hchacha_block_neon(state, out, nrounds); kernel_neon_end(); } } EXPORT_SYMBOL(hchacha_block_arch); void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, unsigned int bytes, int nrounds) { if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || bytes <= CHACHA_BLOCK_SIZE) { chacha_doarm(dst, src, bytes, state, nrounds); state->x[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); return; } do { unsigned int todo = min_t(unsigned int, bytes, SZ_4K); kernel_neon_begin(); chacha_doneon(state, dst, src, todo, nrounds); kernel_neon_end(); bytes -= todo; src += todo; dst += todo; } while (bytes); } EXPORT_SYMBOL(chacha_crypt_arch); bool chacha_is_arch_optimized(void) { /* We always can use at least the ARM scalar implementation. */ return true; } EXPORT_SYMBOL(chacha_is_arch_optimized); static int __init chacha_arm_mod_init(void) { if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { switch (read_cpuid_part()) { case ARM_CPU_PART_CORTEX_A7: case ARM_CPU_PART_CORTEX_A5: /* * The Cortex-A7 and Cortex-A5 do not perform well with * the NEON implementation but do incredibly with the * scalar one and use less power. */ break; default: static_branch_enable(&use_neon); } } return 0; } subsys_initcall(chacha_arm_mod_init); static void __exit chacha_arm_mod_exit(void) { } module_exit(chacha_arm_mod_exit); MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM optimized)"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2");