diff options
author | Heiko Carstens <hca@linux.ibm.com> | 2024-02-03 13:45:23 +0300 |
---|---|---|
committer | Heiko Carstens <hca@linux.ibm.com> | 2024-02-16 16:30:17 +0300 |
commit | dcd3e1de9d17dc43dfed87a9fc814b9dec508043 (patch) | |
tree | f4dc28be4bafb8cde3a619428250517fac82244d /arch/s390/lib | |
parent | cb2a1dd589a0ce97429bf2beeb560e5b030c2ccc (diff) | |
download | linux-dcd3e1de9d17dc43dfed87a9fc814b9dec508043.tar.xz |
s390/checksum: provide csum_partial_copy_nocheck()
With csum_partial(), which reads all bytes into registers it is easy to
also implement csum_partial_copy_nocheck() which copies the buffer while
calculating its checksum.
For a 512 byte buffer this reduces the runtime by 19%. Compared to the old
generic variant (memcpy() + cksm instruction) runtime is reduced by 42%).
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Diffstat (limited to 'arch/s390/lib')
-rw-r--r-- | arch/s390/lib/csum-partial.c | 54 |
1 files changed, 41 insertions, 13 deletions
diff --git a/arch/s390/lib/csum-partial.c b/arch/s390/lib/csum-partial.c index 3ea009cbc3b7..458abd9bac70 100644 --- a/arch/s390/lib/csum-partial.c +++ b/arch/s390/lib/csum-partial.c @@ -5,8 +5,8 @@ #include <asm/fpu.h> /* - * Computes the checksum of a memory block at buff, length len, - * and adds in "sum" (32-bit). + * Computes the checksum of a memory block at src, length len, + * and adds in "sum" (32-bit). If copy is true copies to dst. * * Returns a 32-bit number suitable for feeding into itself * or csum_tcpudp_magic. @@ -14,43 +14,60 @@ * This function must be called with even lengths, except * for the last fragment, which may be odd. * - * It's best to have buff aligned on a 64-bit boundary. + * It's best to have src and dst aligned on a 64-bit boundary. */ -__wsum csum_partial(const void *buff, int len, __wsum sum) +static __always_inline __wsum csum_copy(void *dst, const void *src, int len, __wsum sum, bool copy) { DECLARE_KERNEL_FPU_ONSTACK8(vxstate); - if (!cpu_has_vx()) - return cksm(buff, len, sum); + if (!cpu_has_vx()) { + if (copy) + memcpy(dst, src, len); + return cksm(dst, len, sum); + } kernel_fpu_begin(&vxstate, KERNEL_VXR_V16V23); fpu_vlvgf(16, (__force u32)sum, 1); fpu_vzero(17); fpu_vzero(18); fpu_vzero(19); while (len >= 64) { - fpu_vlm(20, 23, buff); + fpu_vlm(20, 23, src); + if (copy) { + fpu_vstm(20, 23, dst); + dst += 64; + } fpu_vcksm(16, 20, 16); fpu_vcksm(17, 21, 17); fpu_vcksm(18, 22, 18); fpu_vcksm(19, 23, 19); - buff += 64; + src += 64; len -= 64; } while (len >= 32) { - fpu_vlm(20, 21, buff); + fpu_vlm(20, 21, src); + if (copy) { + fpu_vstm(20, 21, dst); + dst += 32; + } fpu_vcksm(16, 20, 16); fpu_vcksm(17, 21, 17); - buff += 32; + src += 32; len -= 32; } while (len >= 16) { - fpu_vl(20, buff); + fpu_vl(20, src); + if (copy) { + fpu_vst(20, dst); + dst += 16; + } fpu_vcksm(16, 20, 16); - buff += 16; + src += 16; len -= 16; } if (len) { - fpu_vll(20, len - 1, buff); + fpu_vll(20, len - 1, src); + if (copy) + fpu_vstl(20, len - 1, dst); fpu_vcksm(16, 20, 16); } fpu_vcksm(18, 19, 18); @@ -60,4 +77,15 @@ __wsum csum_partial(const void *buff, int len, __wsum sum) kernel_fpu_end(&vxstate, KERNEL_VXR_V16V23); return sum; } + +__wsum csum_partial(const void *buff, int len, __wsum sum) +{ + return csum_copy(NULL, buff, len, sum, false); +} EXPORT_SYMBOL(csum_partial); + +__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len) +{ + return csum_copy(dst, src, len, 0, true); +} +EXPORT_SYMBOL(csum_partial_copy_nocheck); |