diff options
author | Nick Terrell <terrelln@meta.com> | 2025-03-08 23:09:33 +0300 |
---|---|---|
committer | Nick Terrell <terrelln@meta.com> | 2025-03-13 23:25:58 +0300 |
commit | 65d1f5507ed2c78c64fce40e44e5574a9419eb09 (patch) | |
tree | 4a1b819db2ea7f2a322e9bcc7582d946d0a4ea29 /lib/zstd/common/entropy_common.c | |
parent | 7eb172143d5508b4da468ed59ee857c6e5e01da6 (diff) | |
download | linux-65d1f5507ed2c78c64fce40e44e5574a9419eb09.tar.xz |
zstd: Import upstream v1.5.7
In addition to keeping the kernel's copy of zstd up to date, this update
was requested by Intel to expose upstream's APIs that allow QAT to accelerate
the LZ match finding stage of Zstd.
This patch is imported from the upstream tag v1.5.7-kernel [0], which is signed
with upstream's signing key EF8FE99528B52FFD [1]. It was imported from upstream
using this command:
export ZSTD=/path/to/repo/zstd/
export LINUX=/path/to/repo/linux/
cd "$ZSTD/contrib/linux-kernel"
git checkout v1.5.7-kernel
make import LINUX="$LINUX"
This patch has been tested on x86-64, and has been boot tested with
a zstd compressed kernel & initramfs on i386 and aarch64. I benchmarked
the patch on x86-64 with gcc-14.2.1 on an Intel i9-9900K by measruing the
performance of compressed filesystem reads and writes.
Component, Level, Size delta, C. time delta, D. time delta
Btrfs , 1, +0.00%, -6.1%, +1.4%
Btrfs , 3, +0.00%, -9.8%, +3.0%
Btrfs , 5, +0.00%, +1.7%, +1.4%
Btrfs , 7, +0.00%, -1.9%, +2.7%
Btrfs , 9, +0.00%, -3.4%, +3.7%
Btrfs , 15, +0.00%, -0.3%, +3.6%
SquashFS , 1, +0.00%, N/A, +1.9%
The major changes that impact the kernel use cases for each version are:
v1.5.7: https://github.com/facebook/zstd/releases/tag/v1.5.7
* Add zstd_compress_sequences_and_literals() for use by Intel's QAT driver
to implement Zstd compression acceleration in the kernel.
* Fix an underflow bug in 32-bit builds that can cause data corruption when
processing more than 4GB of data with a single `ZSTD_CCtx` object, when an
input crosses the 4GB boundry. I don't believe this impacts any current kernel
use cases, because the `ZSTD_CCtx` is typically reconstructed between
compressions.
* Levels 1-4 see 5-10% compression speed improvements for inputs smaller than
128KB.
v1.5.6: https://github.com/facebook/zstd/releases/tag/v1.5.6
* Improved compression ratio for the highest compression levels. I don't expect
these see much use however, due to their slow speeds.
v1.5.5: https://github.com/facebook/zstd/releases/tag/v1.5.5
* Fix a rare corruption bug that can trigger on levels 13 and above.
* Improve compression speed of levels 5-11 on incompressible data.
v1.5.4: https://github.com/facebook/zstd/releases/tag/v1.5.4
* Improve copmression speed of levels 5-11 on ARM.
* Improve dictionary compression speed.
Signed-off-by: Nick Terrell <terrelln@fb.com>
Diffstat (limited to 'lib/zstd/common/entropy_common.c')
-rw-r--r-- | lib/zstd/common/entropy_common.c | 42 |
1 files changed, 13 insertions, 29 deletions
diff --git a/lib/zstd/common/entropy_common.c b/lib/zstd/common/entropy_common.c index fef67056f052..6cdd82233fb5 100644 --- a/lib/zstd/common/entropy_common.c +++ b/lib/zstd/common/entropy_common.c @@ -1,6 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause /* ****************************************************************** * Common functions of New Generation Entropy library - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -19,8 +20,8 @@ #include "error_private.h" /* ERR_*, ERROR */ #define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ #include "fse.h" -#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ #include "huf.h" +#include "bits.h" /* ZSDT_highbit32, ZSTD_countTrailingZeros32 */ /*=== Version ===*/ @@ -38,23 +39,6 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } /*-************************************************************** * FSE NCount encoding-decoding ****************************************************************/ -static U32 FSE_ctz(U32 val) -{ - assert(val != 0); - { -# if (__GNUC__ >= 3) /* GCC Intrinsic */ - return __builtin_ctz(val); -# else /* Software version */ - U32 count = 0; - while ((val & 1) == 0) { - val >>= 1; - ++count; - } - return count; -# endif - } -} - FORCE_INLINE_TEMPLATE size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, const void* headerBuffer, size_t hbSize) @@ -102,7 +86,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne * repeat. * Avoid UB by setting the high bit to 1. */ - int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; + int repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1; while (repeats >= 12) { charnum += 3 * 12; if (LIKELY(ip <= iend-7)) { @@ -113,7 +97,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne ip = iend - 4; } bitStream = MEM_readLE32(ip) >> bitCount; - repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; + repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1; } charnum += 3 * repeats; bitStream >>= 2 * repeats; @@ -178,7 +162,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne * know that threshold > 1. */ if (remaining <= 1) break; - nbBits = BIT_highbit32(remaining) + 1; + nbBits = ZSTD_highbit32(remaining) + 1; threshold = 1 << (nbBits - 1); } if (charnum >= maxSV1) break; @@ -253,7 +237,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, const void* src, size_t srcSize) { U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; - return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); + return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* flags */ 0); } FORCE_INLINE_TEMPLATE size_t @@ -301,14 +285,14 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, if (weightTotal == 0) return ERROR(corruption_detected); /* get last non-null symbol weight (implied, total must be 2^n) */ - { U32 const tableLog = BIT_highbit32(weightTotal) + 1; + { U32 const tableLog = ZSTD_highbit32(weightTotal) + 1; if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); *tableLogPtr = tableLog; /* determine last weight */ { U32 const total = 1 << tableLog; U32 const rest = total - weightTotal; - U32 const verif = 1 << BIT_highbit32(rest); - U32 const lastWeight = BIT_highbit32(rest) + 1; + U32 const verif = 1 << ZSTD_highbit32(rest); + U32 const lastWeight = ZSTD_highbit32(rest) + 1; if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ huffWeight[oSize] = (BYTE)lastWeight; rankStats[lastWeight]++; @@ -345,13 +329,13 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, - int bmi2) + int flags) { #if DYNAMIC_BMI2 - if (bmi2) { + if (flags & HUF_flags_bmi2) { return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); } #endif - (void)bmi2; + (void)flags; return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); } |