From f36bbf21e8b911b3c629fd36d4d217105b47a20e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 30 May 2018 07:06:13 +0000 Subject: powerpc/lib: optimise 32 bits __clear_user() Rewrite clear_user() on the same principle as memset(0), making use of dcbz to clear complete cache lines. This code is a copy/paste of memset(), with some modifications in order to retrieve remaining number of bytes to be cleared, as it needs to be returned in case of error. On the same way as done on PPC64 in commit 17968fbbd19f1 ("powerpc: 64bit optimised __clear_user"), the patch moves __clear_user() into a dedicated file string_32.S On a MPC885, throughput is almost doubled: Before: ~# dd if=/dev/zero of=/dev/null bs=1M count=1000 1048576000 bytes (1000.0MB) copied, 18.990779 seconds, 52.7MB/s After: ~# dd if=/dev/zero of=/dev/null bs=1M count=1000 1048576000 bytes (1000.0MB) copied, 9.611468 seconds, 104.0MB/s On a MPC8321, throughput is multiplied by 2.12: Before: root@vgoippro:~# dd if=/dev/zero of=/dev/null bs=1M count=1000 1048576000 bytes (1000.0MB) copied, 6.844352 seconds, 146.1MB/s After: root@vgoippro:~# dd if=/dev/zero of=/dev/null bs=1M count=1000 1048576000 bytes (1000.0MB) copied, 3.218854 seconds, 310.7MB/s Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/lib/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/lib/Makefile') diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 653901042ad7..2c9b8c0adf22 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -26,13 +26,14 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ memcpy_power7.o obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ - string_64.o memcpy_64.o memcmp_64.o pmem.o + memcpy_64.o memcmp_64.o pmem.o obj64-$(CONFIG_SMP) += locks.o obj64-$(CONFIG_ALTIVEC) += vmx-helper.o obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o -obj-y += checksum_$(BITS).o checksum_wrappers.o +obj-y += checksum_$(BITS).o checksum_wrappers.o \ + string_$(BITS).o obj-y += sstep.o ldstfp.o quad.o obj64-y += quad.o -- cgit v1.2.3