diff options
author | Paul Mackerras <paulus@ozlabs.org> | 2016-11-03 08:15:42 +0300 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2017-01-25 05:34:18 +0300 |
commit | d4fde568a34a93897dfb9ae64cfe9dda9d5c908c (patch) | |
tree | 88beaa27d42bc3b780167930f79129ecd29fab38 /arch | |
parent | b492f7e4e07a28e706db26cf4943bb0911435426 (diff) | |
download | linux-d4fde568a34a93897dfb9ae64cfe9dda9d5c908c.tar.xz |
powerpc/64: Use optimized checksum routines on little-endian
Currently we have optimized hand-coded assembly checksum routines for
big-endian 64-bit systems, but for little-endian we use the generic C
routines. This modifies the optimized routines to work for
little-endian. With this, we no longer need to enable
CONFIG_GENERIC_CSUM. This also fixes a couple of comments in
checksum_64.S so they accurately reflect what the associated instruction
does.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
[mpe: Use the more common __BIG_ENDIAN__]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/powerpc/Kconfig | 2 | ||||
-rw-r--r-- | arch/powerpc/include/asm/checksum.h | 4 | ||||
-rw-r--r-- | arch/powerpc/lib/Makefile | 2 | ||||
-rw-r--r-- | arch/powerpc/lib/checksum_64.S | 12 |
4 files changed, 15 insertions, 5 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a8ee573fe610..e022859340b7 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -167,7 +167,7 @@ config PPC select HAVE_CC_STACKPROTECTOR config GENERIC_CSUM - def_bool CPU_LITTLE_ENDIAN + def_bool n config EARLY_PRINTK bool diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 5b1a6e39afa7..4e63787dc3be 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -70,7 +70,11 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, s += (__force u32)saddr; s += (__force u32)daddr; +#ifdef __BIG_ENDIAN__ s += proto + len; +#else + s += (proto + len) << 8; +#endif return (__force __wsum) from64to32(s); #else __asm__("\n\ diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 309361e86523..0e649d72fe8d 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -21,9 +21,7 @@ obj64-y += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \ obj64-$(CONFIG_SMP) += locks.o obj64-$(CONFIG_ALTIVEC) += vmx-helper.o -ifeq ($(CONFIG_GENERIC_CSUM),) obj-y += checksum_$(BITS).o checksum_wrappers.o -endif obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index d0d311e108ff..d7f1a966136e 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -36,7 +36,7 @@ _GLOBAL(__csum_partial) * work to calculate the correct checksum, we ignore that case * and take the potential slowdown of unaligned loads. */ - rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ + rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */ beq .Lcsum_aligned li r7,4 @@ -168,8 +168,12 @@ _GLOBAL(__csum_partial) beq .Lcsum_finish lbz r6,0(r3) +#ifdef __BIG_ENDIAN__ sldi r9,r6,8 /* Pad the byte out to 16 bits */ adde r0,r0,r9 +#else + adde r0,r0,r6 +#endif .Lcsum_finish: addze r0,r0 /* add in final carry */ @@ -224,7 +228,7 @@ _GLOBAL(csum_partial_copy_generic) * If the source and destination are relatively unaligned we only * align the source. This keeps things simple. */ - rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ + rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */ beq .Lcopy_aligned li r9,4 @@ -386,8 +390,12 @@ dstnr; sth r6,0(r4) beq .Lcopy_finish srcnr; lbz r6,0(r3) +#ifdef __BIG_ENDIAN__ sldi r9,r6,8 /* Pad the byte out to 16 bits */ adde r0,r0,r9 +#else + adde r0,r0,r6 +#endif dstnr; stb r6,0(r4) .Lcopy_finish: |