summaryrefslogtreecommitdiff
path: root/arch/s390/lib
diff options
context:
space:
mode:
authorHeiko Carstens <hca@linux.ibm.com>2024-02-03 13:45:23 +0300
committerHeiko Carstens <hca@linux.ibm.com>2024-02-16 16:30:17 +0300
commitdcd3e1de9d17dc43dfed87a9fc814b9dec508043 (patch)
treef4dc28be4bafb8cde3a619428250517fac82244d /arch/s390/lib
parentcb2a1dd589a0ce97429bf2beeb560e5b030c2ccc (diff)
downloadlinux-dcd3e1de9d17dc43dfed87a9fc814b9dec508043.tar.xz
s390/checksum: provide csum_partial_copy_nocheck()
With csum_partial(), which reads all bytes into registers it is easy to also implement csum_partial_copy_nocheck() which copies the buffer while calculating its checksum. For a 512 byte buffer this reduces the runtime by 19%. Compared to the old generic variant (memcpy() + cksm instruction) runtime is reduced by 42%). Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Diffstat (limited to 'arch/s390/lib')
-rw-r--r--arch/s390/lib/csum-partial.c54
1 files changed, 41 insertions, 13 deletions
diff --git a/arch/s390/lib/csum-partial.c b/arch/s390/lib/csum-partial.c
index 3ea009cbc3b7..458abd9bac70 100644
--- a/arch/s390/lib/csum-partial.c
+++ b/arch/s390/lib/csum-partial.c
@@ -5,8 +5,8 @@
#include <asm/fpu.h>
/*
- * Computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit).
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in "sum" (32-bit). If copy is true copies to dst.
*
* Returns a 32-bit number suitable for feeding into itself
* or csum_tcpudp_magic.
@@ -14,43 +14,60 @@
* This function must be called with even lengths, except
* for the last fragment, which may be odd.
*
- * It's best to have buff aligned on a 64-bit boundary.
+ * It's best to have src and dst aligned on a 64-bit boundary.
*/
-__wsum csum_partial(const void *buff, int len, __wsum sum)
+static __always_inline __wsum csum_copy(void *dst, const void *src, int len, __wsum sum, bool copy)
{
DECLARE_KERNEL_FPU_ONSTACK8(vxstate);
- if (!cpu_has_vx())
- return cksm(buff, len, sum);
+ if (!cpu_has_vx()) {
+ if (copy)
+ memcpy(dst, src, len);
+ return cksm(dst, len, sum);
+ }
kernel_fpu_begin(&vxstate, KERNEL_VXR_V16V23);
fpu_vlvgf(16, (__force u32)sum, 1);
fpu_vzero(17);
fpu_vzero(18);
fpu_vzero(19);
while (len >= 64) {
- fpu_vlm(20, 23, buff);
+ fpu_vlm(20, 23, src);
+ if (copy) {
+ fpu_vstm(20, 23, dst);
+ dst += 64;
+ }
fpu_vcksm(16, 20, 16);
fpu_vcksm(17, 21, 17);
fpu_vcksm(18, 22, 18);
fpu_vcksm(19, 23, 19);
- buff += 64;
+ src += 64;
len -= 64;
}
while (len >= 32) {
- fpu_vlm(20, 21, buff);
+ fpu_vlm(20, 21, src);
+ if (copy) {
+ fpu_vstm(20, 21, dst);
+ dst += 32;
+ }
fpu_vcksm(16, 20, 16);
fpu_vcksm(17, 21, 17);
- buff += 32;
+ src += 32;
len -= 32;
}
while (len >= 16) {
- fpu_vl(20, buff);
+ fpu_vl(20, src);
+ if (copy) {
+ fpu_vst(20, dst);
+ dst += 16;
+ }
fpu_vcksm(16, 20, 16);
- buff += 16;
+ src += 16;
len -= 16;
}
if (len) {
- fpu_vll(20, len - 1, buff);
+ fpu_vll(20, len - 1, src);
+ if (copy)
+ fpu_vstl(20, len - 1, dst);
fpu_vcksm(16, 20, 16);
}
fpu_vcksm(18, 19, 18);
@@ -60,4 +77,15 @@ __wsum csum_partial(const void *buff, int len, __wsum sum)
kernel_fpu_end(&vxstate, KERNEL_VXR_V16V23);
return sum;
}
+
+__wsum csum_partial(const void *buff, int len, __wsum sum)
+{
+ return csum_copy(NULL, buff, len, sum, false);
+}
EXPORT_SYMBOL(csum_partial);
+
+__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len)
+{
+ return csum_copy(dst, src, len, 0, true);
+}
+EXPORT_SYMBOL(csum_partial_copy_nocheck);