From 3f276cece4dd9e8bf199d9bf3901eef8ca904c2d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:47 +0100 Subject: powerpc: move the XOR code to lib/raid/ Move the optimized XOR into lib/raid and include it it in xor.ko instead of always building it into the main kernel image. Link: https://lkml.kernel.org/r/20260327061704.3707577-16-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/powerpc/include/asm/xor.h | 17 +--- arch/powerpc/include/asm/xor_altivec.h | 22 ----- arch/powerpc/lib/Makefile | 5 -- arch/powerpc/lib/xor_vmx.c | 156 --------------------------------- arch/powerpc/lib/xor_vmx.h | 22 ----- arch/powerpc/lib/xor_vmx_glue.c | 63 ------------- lib/raid/xor/Makefile | 5 ++ lib/raid/xor/powerpc/xor_vmx.c | 156 +++++++++++++++++++++++++++++++++ lib/raid/xor/powerpc/xor_vmx.h | 22 +++++ lib/raid/xor/powerpc/xor_vmx_glue.c | 67 ++++++++++++++ 10 files changed, 252 insertions(+), 283 deletions(-) delete mode 100644 arch/powerpc/include/asm/xor_altivec.h delete mode 100644 arch/powerpc/lib/xor_vmx.c delete mode 100644 arch/powerpc/lib/xor_vmx.h delete mode 100644 arch/powerpc/lib/xor_vmx_glue.c create mode 100644 lib/raid/xor/powerpc/xor_vmx.c create mode 100644 lib/raid/xor/powerpc/xor_vmx.h create mode 100644 lib/raid/xor/powerpc/xor_vmx_glue.c diff --git a/arch/powerpc/include/asm/xor.h b/arch/powerpc/include/asm/xor.h index 30224c5279c4..3293ac87181c 100644 --- a/arch/powerpc/include/asm/xor.h +++ b/arch/powerpc/include/asm/xor.h @@ -8,24 +8,11 @@ #ifndef _ASM_POWERPC_XOR_H #define _ASM_POWERPC_XOR_H -#ifdef CONFIG_ALTIVEC - -#include #include -#include - -static struct xor_block_template xor_block_altivec = { - .name = "altivec", - .do_2 = xor_altivec_2, - .do_3 = xor_altivec_3, - .do_4 = xor_altivec_4, - .do_5 = xor_altivec_5, -}; -#endif /* CONFIG_ALTIVEC */ - -/* Also try the generic routines. */ #include +extern struct xor_block_template xor_block_altivec; + #define arch_xor_init arch_xor_init static __always_inline void __init arch_xor_init(void) { diff --git a/arch/powerpc/include/asm/xor_altivec.h b/arch/powerpc/include/asm/xor_altivec.h deleted file mode 100644 index 294620a25f80..000000000000 --- a/arch/powerpc/include/asm/xor_altivec.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_XOR_ALTIVEC_H -#define _ASM_POWERPC_XOR_ALTIVEC_H - -#ifdef CONFIG_ALTIVEC -void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2); -void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3); -void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4); -void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5); - -#endif -#endif /* _ASM_POWERPC_XOR_ALTIVEC_H */ diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index f14ecab674a3..002edc3f01d5 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -73,9 +73,4 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o -obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o -CFLAGS_xor_vmx.o += -mhard-float -maltivec $(call cc-option,-mabi=altivec) -# Enable -CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include) - obj-$(CONFIG_PPC64) += $(obj64-y) diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c deleted file mode 100644 index aab49d056d18..000000000000 --- a/arch/powerpc/lib/xor_vmx.c +++ /dev/null @@ -1,156 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) IBM Corporation, 2012 - * - * Author: Anton Blanchard - */ - -/* - * Sparse (as at v0.5.0) gets very, very confused by this file. - * Make it a bit simpler for it. - */ -#if !defined(__CHECKER__) -#include -#else -#define vec_xor(a, b) a ^ b -#define vector __attribute__((vector_size(16))) -#endif - -#include "xor_vmx.h" - -typedef vector signed char unative_t; - -#define DEFINE(V) \ - unative_t *V = (unative_t *)V##_in; \ - unative_t V##_0, V##_1, V##_2, V##_3 - -#define LOAD(V) \ - do { \ - V##_0 = V[0]; \ - V##_1 = V[1]; \ - V##_2 = V[2]; \ - V##_3 = V[3]; \ - } while (0) - -#define STORE(V) \ - do { \ - V[0] = V##_0; \ - V[1] = V##_1; \ - V[2] = V##_2; \ - V[3] = V##_3; \ - } while (0) - -#define XOR(V1, V2) \ - do { \ - V1##_0 = vec_xor(V1##_0, V2##_0); \ - V1##_1 = vec_xor(V1##_1, V2##_1); \ - V1##_2 = vec_xor(V1##_2, V2##_2); \ - V1##_3 = vec_xor(V1##_3, V2##_3); \ - } while (0) - -void __xor_altivec_2(unsigned long bytes, - unsigned long * __restrict v1_in, - const unsigned long * __restrict v2_in) -{ - DEFINE(v1); - DEFINE(v2); - unsigned long lines = bytes / (sizeof(unative_t)) / 4; - - do { - LOAD(v1); - LOAD(v2); - XOR(v1, v2); - STORE(v1); - - v1 += 4; - v2 += 4; - } while (--lines > 0); -} - -void __xor_altivec_3(unsigned long bytes, - unsigned long * __restrict v1_in, - const unsigned long * __restrict v2_in, - const unsigned long * __restrict v3_in) -{ - DEFINE(v1); - DEFINE(v2); - DEFINE(v3); - unsigned long lines = bytes / (sizeof(unative_t)) / 4; - - do { - LOAD(v1); - LOAD(v2); - LOAD(v3); - XOR(v1, v2); - XOR(v1, v3); - STORE(v1); - - v1 += 4; - v2 += 4; - v3 += 4; - } while (--lines > 0); -} - -void __xor_altivec_4(unsigned long bytes, - unsigned long * __restrict v1_in, - const unsigned long * __restrict v2_in, - const unsigned long * __restrict v3_in, - const unsigned long * __restrict v4_in) -{ - DEFINE(v1); - DEFINE(v2); - DEFINE(v3); - DEFINE(v4); - unsigned long lines = bytes / (sizeof(unative_t)) / 4; - - do { - LOAD(v1); - LOAD(v2); - LOAD(v3); - LOAD(v4); - XOR(v1, v2); - XOR(v3, v4); - XOR(v1, v3); - STORE(v1); - - v1 += 4; - v2 += 4; - v3 += 4; - v4 += 4; - } while (--lines > 0); -} - -void __xor_altivec_5(unsigned long bytes, - unsigned long * __restrict v1_in, - const unsigned long * __restrict v2_in, - const unsigned long * __restrict v3_in, - const unsigned long * __restrict v4_in, - const unsigned long * __restrict v5_in) -{ - DEFINE(v1); - DEFINE(v2); - DEFINE(v3); - DEFINE(v4); - DEFINE(v5); - unsigned long lines = bytes / (sizeof(unative_t)) / 4; - - do { - LOAD(v1); - LOAD(v2); - LOAD(v3); - LOAD(v4); - LOAD(v5); - XOR(v1, v2); - XOR(v3, v4); - XOR(v1, v5); - XOR(v1, v3); - STORE(v1); - - v1 += 4; - v2 += 4; - v3 += 4; - v4 += 4; - v5 += 4; - } while (--lines > 0); -} diff --git a/arch/powerpc/lib/xor_vmx.h b/arch/powerpc/lib/xor_vmx.h deleted file mode 100644 index 573c41d90dac..000000000000 --- a/arch/powerpc/lib/xor_vmx.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Simple interface to link xor_vmx.c and xor_vmx_glue.c - * - * Separating these file ensures that no altivec instructions are run - * outside of the enable/disable altivec block. - */ - -void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2); -void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3); -void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4); -void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5); diff --git a/arch/powerpc/lib/xor_vmx_glue.c b/arch/powerpc/lib/xor_vmx_glue.c deleted file mode 100644 index 35d917ece4d1..000000000000 --- a/arch/powerpc/lib/xor_vmx_glue.c +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Altivec XOR operations - * - * Copyright 2017 IBM Corp. - */ - -#include -#include -#include -#include -#include -#include "xor_vmx.h" - -void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2) -{ - preempt_disable(); - enable_kernel_altivec(); - __xor_altivec_2(bytes, p1, p2); - disable_kernel_altivec(); - preempt_enable(); -} -EXPORT_SYMBOL(xor_altivec_2); - -void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3) -{ - preempt_disable(); - enable_kernel_altivec(); - __xor_altivec_3(bytes, p1, p2, p3); - disable_kernel_altivec(); - preempt_enable(); -} -EXPORT_SYMBOL(xor_altivec_3); - -void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4) -{ - preempt_disable(); - enable_kernel_altivec(); - __xor_altivec_4(bytes, p1, p2, p3, p4); - disable_kernel_altivec(); - preempt_enable(); -} -EXPORT_SYMBOL(xor_altivec_4); - -void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5) -{ - preempt_disable(); - enable_kernel_altivec(); - __xor_altivec_5(bytes, p1, p2, p3, p4, p5); - disable_kernel_altivec(); - preempt_enable(); -} -EXPORT_SYMBOL(xor_altivec_5); diff --git a/lib/raid/xor/Makefile b/lib/raid/xor/Makefile index e8868f5fc396..006b44ce46bf 100644 --- a/lib/raid/xor/Makefile +++ b/lib/raid/xor/Makefile @@ -16,6 +16,7 @@ endif xor-$(CONFIG_ARM64) += arm64/xor-neon.o arm64/xor-neon-glue.o xor-$(CONFIG_CPU_HAS_LSX) += loongarch/xor_simd.o xor-$(CONFIG_CPU_HAS_LSX) += loongarch/xor_simd_glue.o +xor-$(CONFIG_ALTIVEC) += powerpc/xor_vmx.o powerpc/xor_vmx_glue.o CFLAGS_arm/xor-neon.o += $(CC_FLAGS_FPU) @@ -23,3 +24,7 @@ CFLAGS_REMOVE_arm/xor-neon.o += $(CC_FLAGS_NO_FPU) CFLAGS_arm64/xor-neon.o += $(CC_FLAGS_FPU) CFLAGS_REMOVE_arm64/xor-neon.o += $(CC_FLAGS_NO_FPU) + +CFLAGS_powerpc/xor_vmx.o += -mhard-float -maltivec \ + $(call cc-option,-mabi=altivec) \ + -isystem $(shell $(CC) -print-file-name=include) diff --git a/lib/raid/xor/powerpc/xor_vmx.c b/lib/raid/xor/powerpc/xor_vmx.c new file mode 100644 index 000000000000..aab49d056d18 --- /dev/null +++ b/lib/raid/xor/powerpc/xor_vmx.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * + * Copyright (C) IBM Corporation, 2012 + * + * Author: Anton Blanchard + */ + +/* + * Sparse (as at v0.5.0) gets very, very confused by this file. + * Make it a bit simpler for it. + */ +#if !defined(__CHECKER__) +#include +#else +#define vec_xor(a, b) a ^ b +#define vector __attribute__((vector_size(16))) +#endif + +#include "xor_vmx.h" + +typedef vector signed char unative_t; + +#define DEFINE(V) \ + unative_t *V = (unative_t *)V##_in; \ + unative_t V##_0, V##_1, V##_2, V##_3 + +#define LOAD(V) \ + do { \ + V##_0 = V[0]; \ + V##_1 = V[1]; \ + V##_2 = V[2]; \ + V##_3 = V[3]; \ + } while (0) + +#define STORE(V) \ + do { \ + V[0] = V##_0; \ + V[1] = V##_1; \ + V[2] = V##_2; \ + V[3] = V##_3; \ + } while (0) + +#define XOR(V1, V2) \ + do { \ + V1##_0 = vec_xor(V1##_0, V2##_0); \ + V1##_1 = vec_xor(V1##_1, V2##_1); \ + V1##_2 = vec_xor(V1##_2, V2##_2); \ + V1##_3 = vec_xor(V1##_3, V2##_3); \ + } while (0) + +void __xor_altivec_2(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in) +{ + DEFINE(v1); + DEFINE(v2); + unsigned long lines = bytes / (sizeof(unative_t)) / 4; + + do { + LOAD(v1); + LOAD(v2); + XOR(v1, v2); + STORE(v1); + + v1 += 4; + v2 += 4; + } while (--lines > 0); +} + +void __xor_altivec_3(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in) +{ + DEFINE(v1); + DEFINE(v2); + DEFINE(v3); + unsigned long lines = bytes / (sizeof(unative_t)) / 4; + + do { + LOAD(v1); + LOAD(v2); + LOAD(v3); + XOR(v1, v2); + XOR(v1, v3); + STORE(v1); + + v1 += 4; + v2 += 4; + v3 += 4; + } while (--lines > 0); +} + +void __xor_altivec_4(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in, + const unsigned long * __restrict v4_in) +{ + DEFINE(v1); + DEFINE(v2); + DEFINE(v3); + DEFINE(v4); + unsigned long lines = bytes / (sizeof(unative_t)) / 4; + + do { + LOAD(v1); + LOAD(v2); + LOAD(v3); + LOAD(v4); + XOR(v1, v2); + XOR(v3, v4); + XOR(v1, v3); + STORE(v1); + + v1 += 4; + v2 += 4; + v3 += 4; + v4 += 4; + } while (--lines > 0); +} + +void __xor_altivec_5(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in, + const unsigned long * __restrict v4_in, + const unsigned long * __restrict v5_in) +{ + DEFINE(v1); + DEFINE(v2); + DEFINE(v3); + DEFINE(v4); + DEFINE(v5); + unsigned long lines = bytes / (sizeof(unative_t)) / 4; + + do { + LOAD(v1); + LOAD(v2); + LOAD(v3); + LOAD(v4); + LOAD(v5); + XOR(v1, v2); + XOR(v3, v4); + XOR(v1, v5); + XOR(v1, v3); + STORE(v1); + + v1 += 4; + v2 += 4; + v3 += 4; + v4 += 4; + v5 += 4; + } while (--lines > 0); +} diff --git a/lib/raid/xor/powerpc/xor_vmx.h b/lib/raid/xor/powerpc/xor_vmx.h new file mode 100644 index 000000000000..573c41d90dac --- /dev/null +++ b/lib/raid/xor/powerpc/xor_vmx.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Simple interface to link xor_vmx.c and xor_vmx_glue.c + * + * Separating these file ensures that no altivec instructions are run + * outside of the enable/disable altivec block. + */ + +void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); diff --git a/lib/raid/xor/powerpc/xor_vmx_glue.c b/lib/raid/xor/powerpc/xor_vmx_glue.c new file mode 100644 index 000000000000..c41e38340700 --- /dev/null +++ b/lib/raid/xor/powerpc/xor_vmx_glue.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Altivec XOR operations + * + * Copyright 2017 IBM Corp. + */ + +#include +#include +#include +#include +#include +#include "xor_vmx.h" + +static void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) +{ + preempt_disable(); + enable_kernel_altivec(); + __xor_altivec_2(bytes, p1, p2); + disable_kernel_altivec(); + preempt_enable(); +} + +static void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) +{ + preempt_disable(); + enable_kernel_altivec(); + __xor_altivec_3(bytes, p1, p2, p3); + disable_kernel_altivec(); + preempt_enable(); +} + +static void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) +{ + preempt_disable(); + enable_kernel_altivec(); + __xor_altivec_4(bytes, p1, p2, p3, p4); + disable_kernel_altivec(); + preempt_enable(); +} + +static void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) +{ + preempt_disable(); + enable_kernel_altivec(); + __xor_altivec_5(bytes, p1, p2, p3, p4, p5); + disable_kernel_altivec(); + preempt_enable(); +} + +struct xor_block_template xor_block_altivec = { + .name = "altivec", + .do_2 = xor_altivec_2, + .do_3 = xor_altivec_3, + .do_4 = xor_altivec_4, + .do_5 = xor_altivec_5, +}; -- cgit v1.2.3