diff options
Diffstat (limited to 'lib/raid')
| -rw-r--r-- | lib/raid/xor/Makefile | 5 | ||||
| -rw-r--r-- | lib/raid/xor/powerpc/xor_vmx.c | 156 | ||||
| -rw-r--r-- | lib/raid/xor/powerpc/xor_vmx.h | 22 | ||||
| -rw-r--r-- | lib/raid/xor/powerpc/xor_vmx_glue.c | 67 |
4 files changed, 250 insertions, 0 deletions
diff --git a/lib/raid/xor/Makefile b/lib/raid/xor/Makefile index e8868f5fc396..006b44ce46bf 100644 --- a/lib/raid/xor/Makefile +++ b/lib/raid/xor/Makefile @@ -16,6 +16,7 @@ endif xor-$(CONFIG_ARM64) += arm64/xor-neon.o arm64/xor-neon-glue.o xor-$(CONFIG_CPU_HAS_LSX) += loongarch/xor_simd.o xor-$(CONFIG_CPU_HAS_LSX) += loongarch/xor_simd_glue.o +xor-$(CONFIG_ALTIVEC) += powerpc/xor_vmx.o powerpc/xor_vmx_glue.o CFLAGS_arm/xor-neon.o += $(CC_FLAGS_FPU) @@ -23,3 +24,7 @@ CFLAGS_REMOVE_arm/xor-neon.o += $(CC_FLAGS_NO_FPU) CFLAGS_arm64/xor-neon.o += $(CC_FLAGS_FPU) CFLAGS_REMOVE_arm64/xor-neon.o += $(CC_FLAGS_NO_FPU) + +CFLAGS_powerpc/xor_vmx.o += -mhard-float -maltivec \ + $(call cc-option,-mabi=altivec) \ + -isystem $(shell $(CC) -print-file-name=include) diff --git a/lib/raid/xor/powerpc/xor_vmx.c b/lib/raid/xor/powerpc/xor_vmx.c new file mode 100644 index 000000000000..aab49d056d18 --- /dev/null +++ b/lib/raid/xor/powerpc/xor_vmx.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * + * Copyright (C) IBM Corporation, 2012 + * + * Author: Anton Blanchard <anton@au.ibm.com> + */ + +/* + * Sparse (as at v0.5.0) gets very, very confused by this file. + * Make it a bit simpler for it. + */ +#if !defined(__CHECKER__) +#include <altivec.h> +#else +#define vec_xor(a, b) a ^ b +#define vector __attribute__((vector_size(16))) +#endif + +#include "xor_vmx.h" + +typedef vector signed char unative_t; + +#define DEFINE(V) \ + unative_t *V = (unative_t *)V##_in; \ + unative_t V##_0, V##_1, V##_2, V##_3 + +#define LOAD(V) \ + do { \ + V##_0 = V[0]; \ + V##_1 = V[1]; \ + V##_2 = V[2]; \ + V##_3 = V[3]; \ + } while (0) + +#define STORE(V) \ + do { \ + V[0] = V##_0; \ + V[1] = V##_1; \ + V[2] = V##_2; \ + V[3] = V##_3; \ + } while (0) + +#define XOR(V1, V2) \ + do { \ + V1##_0 = vec_xor(V1##_0, V2##_0); \ + V1##_1 = vec_xor(V1##_1, V2##_1); \ + V1##_2 = vec_xor(V1##_2, V2##_2); \ + V1##_3 = vec_xor(V1##_3, V2##_3); \ + } while (0) + +void __xor_altivec_2(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in) +{ + DEFINE(v1); + DEFINE(v2); + unsigned long lines = bytes / (sizeof(unative_t)) / 4; + + do { + LOAD(v1); + LOAD(v2); + XOR(v1, v2); + STORE(v1); + + v1 += 4; + v2 += 4; + } while (--lines > 0); +} + +void __xor_altivec_3(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in) +{ + DEFINE(v1); + DEFINE(v2); + DEFINE(v3); + unsigned long lines = bytes / (sizeof(unative_t)) / 4; + + do { + LOAD(v1); + LOAD(v2); + LOAD(v3); + XOR(v1, v2); + XOR(v1, v3); + STORE(v1); + + v1 += 4; + v2 += 4; + v3 += 4; + } while (--lines > 0); +} + +void __xor_altivec_4(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in, + const unsigned long * __restrict v4_in) +{ + DEFINE(v1); + DEFINE(v2); + DEFINE(v3); + DEFINE(v4); + unsigned long lines = bytes / (sizeof(unative_t)) / 4; + + do { + LOAD(v1); + LOAD(v2); + LOAD(v3); + LOAD(v4); + XOR(v1, v2); + XOR(v3, v4); + XOR(v1, v3); + STORE(v1); + + v1 += 4; + v2 += 4; + v3 += 4; + v4 += 4; + } while (--lines > 0); +} + +void __xor_altivec_5(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in, + const unsigned long * __restrict v4_in, + const unsigned long * __restrict v5_in) +{ + DEFINE(v1); + DEFINE(v2); + DEFINE(v3); + DEFINE(v4); + DEFINE(v5); + unsigned long lines = bytes / (sizeof(unative_t)) / 4; + + do { + LOAD(v1); + LOAD(v2); + LOAD(v3); + LOAD(v4); + LOAD(v5); + XOR(v1, v2); + XOR(v3, v4); + XOR(v1, v5); + XOR(v1, v3); + STORE(v1); + + v1 += 4; + v2 += 4; + v3 += 4; + v4 += 4; + v5 += 4; + } while (--lines > 0); +} diff --git a/lib/raid/xor/powerpc/xor_vmx.h b/lib/raid/xor/powerpc/xor_vmx.h new file mode 100644 index 000000000000..573c41d90dac --- /dev/null +++ b/lib/raid/xor/powerpc/xor_vmx.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Simple interface to link xor_vmx.c and xor_vmx_glue.c + * + * Separating these file ensures that no altivec instructions are run + * outside of the enable/disable altivec block. + */ + +void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); diff --git a/lib/raid/xor/powerpc/xor_vmx_glue.c b/lib/raid/xor/powerpc/xor_vmx_glue.c new file mode 100644 index 000000000000..c41e38340700 --- /dev/null +++ b/lib/raid/xor/powerpc/xor_vmx_glue.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Altivec XOR operations + * + * Copyright 2017 IBM Corp. + */ + +#include <linux/preempt.h> +#include <linux/sched.h> +#include <linux/raid/xor_impl.h> +#include <asm/switch_to.h> +#include <asm/xor.h> +#include "xor_vmx.h" + +static void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) +{ + preempt_disable(); + enable_kernel_altivec(); + __xor_altivec_2(bytes, p1, p2); + disable_kernel_altivec(); + preempt_enable(); +} + +static void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) +{ + preempt_disable(); + enable_kernel_altivec(); + __xor_altivec_3(bytes, p1, p2, p3); + disable_kernel_altivec(); + preempt_enable(); +} + +static void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) +{ + preempt_disable(); + enable_kernel_altivec(); + __xor_altivec_4(bytes, p1, p2, p3, p4); + disable_kernel_altivec(); + preempt_enable(); +} + +static void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) +{ + preempt_disable(); + enable_kernel_altivec(); + __xor_altivec_5(bytes, p1, p2, p3, p4, p5); + disable_kernel_altivec(); + preempt_enable(); +} + +struct xor_block_template xor_block_altivec = { + .name = "altivec", + .do_2 = xor_altivec_2, + .do_3 = xor_altivec_3, + .do_4 = xor_altivec_4, + .do_5 = xor_altivec_5, +}; |
