powerpc/64: Use optimized checksum routines on little-endian

Currently we have optimized hand-coded assembly checksum routines for big-endian 64-bit systems, but for little-endian we use the generic C routines. This modifies the optimized routines to work for little-endian. With this, we no longer need to enable CONFIG_GENERIC_CSUM. This also fixes a couple of comments in checksum_64.S so they accurately reflect what the associated instruction does. Signed-off-by: Paul Mackerras <paulus@ozlabs.org> [mpe: Use the more common __BIG_ENDIAN__] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
author: Paul Mackerras <paulus@ozlabs.org> 2016-11-03 08:15:42 +0300
committer: Michael Ellerman <mpe@ellerman.id.au> 2017-01-25 05:34:18 +0300
commit: d4fde568a34a93897dfb9ae64cfe9dda9d5c908c (patch)
tree: 88beaa27d42bc3b780167930f79129ecd29fab38 /arch
parent: b492f7e4e07a28e706db26cf4943bb0911435426 (diff)
download: linux-d4fde568a34a93897dfb9ae64cfe9dda9d5c908c.tar.xz
4 files changed, 15 insertions, 5 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a8ee573fe610..e022859340b7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -167,7 +167,7 @@ config PPC
 	select HAVE_CC_STACKPROTECTOR
 
 config GENERIC_CSUM
-	def_bool CPU_LITTLE_ENDIAN
+	def_bool n
 
 config EARLY_PRINTK
 	bool
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index 5b1a6e39afa7..4e63787dc3be 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -70,7 +70,11 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
 
 	s += (__force u32)saddr;
 	s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN__
 	s += proto + len;
+#else
+	s += (proto + len) << 8;
+#endif
 	return (__force __wsum) from64to32(s);
 #else
     __asm__("\n\
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 309361e86523..0e649d72fe8d 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -21,9 +21,7 @@ obj64-y	+= copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \
 obj64-$(CONFIG_SMP)	+= locks.o
 obj64-$(CONFIG_ALTIVEC)	+= vmx-helper.o
 
-ifeq ($(CONFIG_GENERIC_CSUM),)
 obj-y			+= checksum_$(BITS).o checksum_wrappers.o
-endif
 
 obj-$(CONFIG_PPC_EMULATE_SSTEP)	+= sstep.o ldstfp.o
 
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index d0d311e108ff..d7f1a966136e 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -36,7 +36,7 @@ _GLOBAL(__csum_partial)
 	 * work to calculate the correct checksum, we ignore that case
 	 * and take the potential slowdown of unaligned loads.
 	 */
-	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 & 0x3) >> 1 */
+	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 >> 1) & 0x3 */
 	beq	.Lcsum_aligned
 
 	li	r7,4
@@ -168,8 +168,12 @@ _GLOBAL(__csum_partial)
 	beq	.Lcsum_finish
 
 	lbz	r6,0(r3)
+#ifdef __BIG_ENDIAN__
 	sldi	r9,r6,8			/* Pad the byte out to 16 bits */
 	adde	r0,r0,r9
+#else
+	adde	r0,r0,r6
+#endif
 
 .Lcsum_finish:
 	addze	r0,r0			/* add in final carry */
@@ -224,7 +228,7 @@ _GLOBAL(csum_partial_copy_generic)
 	 * If the source and destination are relatively unaligned we only
 	 * align the source. This keeps things simple.
 	 */
-	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 & 0x3) >> 1 */
+	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 >> 1) & 0x3 */
 	beq	.Lcopy_aligned
 
 	li	r9,4
@@ -386,8 +390,12 @@ dstnr;	sth	r6,0(r4)
 	beq	.Lcopy_finish
 
 srcnr;	lbz	r6,0(r3)
+#ifdef __BIG_ENDIAN__
 	sldi	r9,r6,8			/* Pad the byte out to 16 bits */
 	adde	r0,r0,r9
+#else
+	adde	r0,r0,r6
+#endif
 dstnr;	stb	r6,0(r4)
 
 .Lcopy_finish:
author	Paul Mackerras <paulus@ozlabs.org>	2016-11-03 08:15:42 +0300
committer	Michael Ellerman <mpe@ellerman.id.au>	2017-01-25 05:34:18 +0300
commit	d4fde568a34a93897dfb9ae64cfe9dda9d5c908c (patch)
tree	88beaa27d42bc3b780167930f79129ecd29fab38 /arch
parent	b492f7e4e07a28e706db26cf4943bb0911435426 (diff)
download	linux-d4fde568a34a93897dfb9ae64cfe9dda9d5c908c.tar.xz