diff options
Diffstat (limited to 'lib/crypto/x86/chacha_glue.c')
| -rw-r--r-- | lib/crypto/x86/chacha_glue.c | 196 | 
1 files changed, 196 insertions, 0 deletions
diff --git a/lib/crypto/x86/chacha_glue.c b/lib/crypto/x86/chacha_glue.c new file mode 100644 index 000000000000..10b2c945f541 --- /dev/null +++ b/lib/crypto/x86/chacha_glue.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * ChaCha and HChaCha functions (x86_64 optimized) + * + * Copyright (C) 2015 Martin Willi + */ + +#include <asm/simd.h> +#include <crypto/chacha.h> +#include <linux/jump_label.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sizes.h> + +asmlinkage void chacha_block_xor_ssse3(const struct chacha_state *state, +				       u8 *dst, const u8 *src, +				       unsigned int len, int nrounds); +asmlinkage void chacha_4block_xor_ssse3(const struct chacha_state *state, +					u8 *dst, const u8 *src, +					unsigned int len, int nrounds); +asmlinkage void hchacha_block_ssse3(const struct chacha_state *state, +				    u32 out[HCHACHA_OUT_WORDS], int nrounds); + +asmlinkage void chacha_2block_xor_avx2(const struct chacha_state *state, +				       u8 *dst, const u8 *src, +				       unsigned int len, int nrounds); +asmlinkage void chacha_4block_xor_avx2(const struct chacha_state *state, +				       u8 *dst, const u8 *src, +				       unsigned int len, int nrounds); +asmlinkage void chacha_8block_xor_avx2(const struct chacha_state *state, +				       u8 *dst, const u8 *src, +				       unsigned int len, int nrounds); + +asmlinkage void chacha_2block_xor_avx512vl(const struct chacha_state *state, +					   u8 *dst, const u8 *src, +					   unsigned int len, int nrounds); +asmlinkage void chacha_4block_xor_avx512vl(const struct chacha_state *state, +					   u8 *dst, const u8 *src, +					   unsigned int len, int nrounds); +asmlinkage void chacha_8block_xor_avx512vl(const struct chacha_state *state, +					   u8 *dst, const u8 *src, +					   unsigned int len, int nrounds); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl); + +static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks) +{ +	len = min(len, maxblocks * CHACHA_BLOCK_SIZE); +	return round_up(len, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE; +} + +static void chacha_dosimd(struct chacha_state *state, u8 *dst, const u8 *src, +			  unsigned int bytes, int nrounds) +{ +	if (static_branch_likely(&chacha_use_avx512vl)) { +		while (bytes >= CHACHA_BLOCK_SIZE * 8) { +			chacha_8block_xor_avx512vl(state, dst, src, bytes, +						   nrounds); +			bytes -= CHACHA_BLOCK_SIZE * 8; +			src += CHACHA_BLOCK_SIZE * 8; +			dst += CHACHA_BLOCK_SIZE * 8; +			state->x[12] += 8; +		} +		if (bytes > CHACHA_BLOCK_SIZE * 4) { +			chacha_8block_xor_avx512vl(state, dst, src, bytes, +						   nrounds); +			state->x[12] += chacha_advance(bytes, 8); +			return; +		} +		if (bytes > CHACHA_BLOCK_SIZE * 2) { +			chacha_4block_xor_avx512vl(state, dst, src, bytes, +						   nrounds); +			state->x[12] += chacha_advance(bytes, 4); +			return; +		} +		if (bytes) { +			chacha_2block_xor_avx512vl(state, dst, src, bytes, +						   nrounds); +			state->x[12] += chacha_advance(bytes, 2); +			return; +		} +	} + +	if (static_branch_likely(&chacha_use_avx2)) { +		while (bytes >= CHACHA_BLOCK_SIZE * 8) { +			chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); +			bytes -= CHACHA_BLOCK_SIZE * 8; +			src += CHACHA_BLOCK_SIZE * 8; +			dst += CHACHA_BLOCK_SIZE * 8; +			state->x[12] += 8; +		} +		if (bytes > CHACHA_BLOCK_SIZE * 4) { +			chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); +			state->x[12] += chacha_advance(bytes, 8); +			return; +		} +		if (bytes > CHACHA_BLOCK_SIZE * 2) { +			chacha_4block_xor_avx2(state, dst, src, bytes, nrounds); +			state->x[12] += chacha_advance(bytes, 4); +			return; +		} +		if (bytes > CHACHA_BLOCK_SIZE) { +			chacha_2block_xor_avx2(state, dst, src, bytes, nrounds); +			state->x[12] += chacha_advance(bytes, 2); +			return; +		} +	} + +	while (bytes >= CHACHA_BLOCK_SIZE * 4) { +		chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); +		bytes -= CHACHA_BLOCK_SIZE * 4; +		src += CHACHA_BLOCK_SIZE * 4; +		dst += CHACHA_BLOCK_SIZE * 4; +		state->x[12] += 4; +	} +	if (bytes > CHACHA_BLOCK_SIZE) { +		chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); +		state->x[12] += chacha_advance(bytes, 4); +		return; +	} +	if (bytes) { +		chacha_block_xor_ssse3(state, dst, src, bytes, nrounds); +		state->x[12]++; +	} +} + +void hchacha_block_arch(const struct chacha_state *state, +			u32 out[HCHACHA_OUT_WORDS], int nrounds) +{ +	if (!static_branch_likely(&chacha_use_simd)) { +		hchacha_block_generic(state, out, nrounds); +	} else { +		kernel_fpu_begin(); +		hchacha_block_ssse3(state, out, nrounds); +		kernel_fpu_end(); +	} +} +EXPORT_SYMBOL(hchacha_block_arch); + +void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, +		       unsigned int bytes, int nrounds) +{ +	if (!static_branch_likely(&chacha_use_simd) || +	    bytes <= CHACHA_BLOCK_SIZE) +		return chacha_crypt_generic(state, dst, src, bytes, nrounds); + +	do { +		unsigned int todo = min_t(unsigned int, bytes, SZ_4K); + +		kernel_fpu_begin(); +		chacha_dosimd(state, dst, src, todo, nrounds); +		kernel_fpu_end(); + +		bytes -= todo; +		src += todo; +		dst += todo; +	} while (bytes); +} +EXPORT_SYMBOL(chacha_crypt_arch); + +bool chacha_is_arch_optimized(void) +{ +	return static_key_enabled(&chacha_use_simd); +} +EXPORT_SYMBOL(chacha_is_arch_optimized); + +static int __init chacha_simd_mod_init(void) +{ +	if (!boot_cpu_has(X86_FEATURE_SSSE3)) +		return 0; + +	static_branch_enable(&chacha_use_simd); + +	if (boot_cpu_has(X86_FEATURE_AVX) && +	    boot_cpu_has(X86_FEATURE_AVX2) && +	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { +		static_branch_enable(&chacha_use_avx2); + +		if (boot_cpu_has(X86_FEATURE_AVX512VL) && +		    boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */ +			static_branch_enable(&chacha_use_avx512vl); +	} +	return 0; +} +subsys_initcall(chacha_simd_mod_init); + +static void __exit chacha_simd_mod_exit(void) +{ +} +module_exit(chacha_simd_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Martin Willi <martin@strongswan.org>"); +MODULE_DESCRIPTION("ChaCha and HChaCha functions (x86_64 optimized)");  | 
