summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/crypto/intel/qat/Kconfig1
-rw-r--r--drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c1
-rw-r--r--drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c1
-rw-r--r--drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c17
-rw-r--r--drivers/crypto/intel/qat/qat_common/Makefile1
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_accel_devices.h6
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_common_drv.h6
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c18
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_init.c6
-rw-r--r--drivers/crypto/intel/qat/qat_common/icp_qat_fw.h7
-rw-r--r--drivers/crypto/intel/qat/qat_common/icp_qat_fw_comp.h2
-rw-r--r--drivers/crypto/intel/qat/qat_common/icp_qat_hw.h3
-rw-r--r--drivers/crypto/intel/qat/qat_common/qat_comp_algs.c524
-rw-r--r--drivers/crypto/intel/qat/qat_common/qat_comp_req.h9
-rw-r--r--drivers/crypto/intel/qat/qat_common/qat_comp_zstd_utils.c165
-rw-r--r--drivers/crypto/intel/qat/qat_common/qat_comp_zstd_utils.h13
-rw-r--r--drivers/crypto/intel/qat/qat_common/qat_compression.c23
17 files changed, 773 insertions, 30 deletions
diff --git a/drivers/crypto/intel/qat/Kconfig b/drivers/crypto/intel/qat/Kconfig
index 6734b746ea70..9d6e6f52d2dc 100644
--- a/drivers/crypto/intel/qat/Kconfig
+++ b/drivers/crypto/intel/qat/Kconfig
@@ -12,6 +12,7 @@ config CRYPTO_DEV_QAT
select CRYPTO_LIB_SHA1
select CRYPTO_LIB_SHA256
select CRYPTO_LIB_SHA512
+ select CRYPTO_ZSTD
select FW_LOADER
select CRC8
diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c
index 0002122219bc..19f9f738630b 100644
--- a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c
@@ -488,6 +488,7 @@ void adf_init_hw_data_420xx(struct adf_hw_device_data *hw_data, u32 dev_id)
hw_data->clock_frequency = ADF_420XX_AE_FREQ;
hw_data->services_supported = adf_gen4_services_supported;
hw_data->get_svc_slice_cnt = adf_gen4_get_svc_slice_cnt;
+ hw_data->accel_capabilities_ext_mask = ADF_ACCEL_CAPABILITIES_EXT_ZSTD_LZ4S;
adf_gen4_set_err_mask(&hw_data->dev_err_mask);
adf_gen4_init_hw_csr_ops(&hw_data->csr_ops);
diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
index 900f19b90b2d..49b425be34c8 100644
--- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -473,6 +473,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id)
hw_data->clock_frequency = ADF_4XXX_AE_FREQ;
hw_data->services_supported = adf_gen4_services_supported;
hw_data->get_svc_slice_cnt = adf_gen4_get_svc_slice_cnt;
+ hw_data->accel_capabilities_ext_mask = ADF_ACCEL_CAPABILITIES_EXT_ZSTD_LZ4S;
adf_gen4_set_err_mask(&hw_data->dev_err_mask);
adf_gen4_init_hw_csr_ops(&hw_data->csr_ops);
diff --git a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c
index 177bc4eb3c24..205680797e2c 100644
--- a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c
@@ -33,6 +33,8 @@
#define ADF_AE_GROUP_1 GENMASK(7, 4)
#define ADF_AE_GROUP_2 BIT(8)
+#define ASB_MULTIPLIER 9
+
struct adf_ring_config {
u32 ring_mask;
enum adf_cfg_service_type ring_type;
@@ -509,6 +511,9 @@ static int build_comp_block(void *ctx, enum adf_dc_algo algo)
case QAT_DEFLATE:
header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DYNAMIC;
break;
+ case QAT_ZSTD:
+ header->service_cmd_id = ICP_QAT_FW_COMP_CMD_ZSTD_COMPRESS;
+ break;
default:
return -EINVAL;
}
@@ -519,6 +524,13 @@ static int build_comp_block(void *ctx, enum adf_dc_algo algo)
cd_pars->u.sl.comp_slice_cfg_word[0] = lower_val;
cd_pars->u.sl.comp_slice_cfg_word[1] = 0;
+ /*
+ * Store Auto Select Best (ASB) multiplier in the request template.
+ * This will be used in the data path to set the actual threshold
+ * value based on the input data size.
+ */
+ req_tmpl->u3.asb_threshold.asb_value = ASB_MULTIPLIER;
+
return 0;
}
@@ -532,12 +544,16 @@ static int build_decomp_block(void *ctx, enum adf_dc_algo algo)
case QAT_DEFLATE:
header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DECOMPRESS;
break;
+ case QAT_ZSTD:
+ header->service_cmd_id = ICP_QAT_FW_COMP_CMD_ZSTD_DECOMPRESS;
+ break;
default:
return -EINVAL;
}
cd_pars->u.sl.comp_slice_cfg_word[0] = 0;
cd_pars->u.sl.comp_slice_cfg_word[1] = 0;
+ req_tmpl->u3.asb_threshold.asb_value = 0;
return 0;
}
@@ -1030,6 +1046,7 @@ void adf_init_hw_data_6xxx(struct adf_hw_device_data *hw_data)
hw_data->num_rps = ADF_GEN6_ETR_MAX_BANKS;
hw_data->clock_frequency = ADF_6XXX_AE_FREQ;
hw_data->get_svc_slice_cnt = adf_gen6_get_svc_slice_cnt;
+ hw_data->accel_capabilities_ext_mask = ADF_ACCEL_CAPABILITIES_EXT_ZSTD;
adf_gen6_init_services_supported(hw_data);
adf_gen6_init_hw_csr_ops(&hw_data->csr_ops);
diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile
index 016b81e60cfb..9478111c8437 100644
--- a/drivers/crypto/intel/qat/qat_common/Makefile
+++ b/drivers/crypto/intel/qat/qat_common/Makefile
@@ -41,6 +41,7 @@ intel_qat-y := adf_accel_engine.o \
qat_bl.o \
qat_comp_algs.o \
qat_compression.o \
+ qat_comp_zstd_utils.o \
qat_crypto.o \
qat_hal.o \
qat_mig_dev.o \
diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
index cac110215c5e..03a4e9690208 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
@@ -59,6 +59,11 @@ enum adf_accel_capabilities {
ADF_ACCEL_CAPABILITIES_RANDOM_NUMBER = 128
};
+enum adf_accel_capabilities_ext {
+ ADF_ACCEL_CAPABILITIES_EXT_ZSTD_LZ4S = BIT(0),
+ ADF_ACCEL_CAPABILITIES_EXT_ZSTD = BIT(1),
+};
+
enum adf_fuses {
ADF_FUSECTL0,
ADF_FUSECTL1,
@@ -336,6 +341,7 @@ struct adf_hw_device_data {
u32 fuses[ADF_MAX_FUSES];
u32 straps;
u32 accel_capabilities_mask;
+ u32 accel_capabilities_ext_mask;
u32 extended_dc_capabilities;
u16 fw_capabilities;
u32 clock_frequency;
diff --git a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
index 6cf3a95489e8..7b8b295ac459 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
@@ -111,12 +111,12 @@ void qat_algs_unregister(void);
int qat_asym_algs_register(void);
void qat_asym_algs_unregister(void);
-struct qat_compression_instance *qat_compression_get_instance_node(int node);
+struct qat_compression_instance *qat_compression_get_instance_node(int node, int alg);
void qat_compression_put_instance(struct qat_compression_instance *inst);
int qat_compression_register(void);
int qat_compression_unregister(void);
-int qat_comp_algs_register(void);
-void qat_comp_algs_unregister(void);
+int qat_comp_algs_register(u32 caps);
+void qat_comp_algs_unregister(u32 caps);
void qat_comp_alg_callback(void *resp);
int adf_isr_resource_alloc(struct adf_accel_dev *accel_dev);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c
index 349fdb323763..f4a58f04071a 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c
@@ -504,14 +504,20 @@ static int adf_gen4_build_comp_block(void *ctx, enum adf_dc_algo algo)
switch (algo) {
case QAT_DEFLATE:
header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DYNAMIC;
+ hw_comp_lower_csr.algo = ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_ILZ77;
+ hw_comp_lower_csr.lllbd = ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_ENABLED;
+ hw_comp_lower_csr.skip_ctrl = ICP_QAT_HW_COMP_20_BYTE_SKIP_3BYTE_LITERAL;
+ break;
+ case QAT_LZ4S:
+ header->service_cmd_id = ICP_QAT_FW_COMP_20_CMD_LZ4S_COMPRESS;
+ hw_comp_lower_csr.algo = ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_LZ4S;
+ hw_comp_lower_csr.lllbd = ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_DISABLED;
+ hw_comp_lower_csr.abd = ICP_QAT_HW_COMP_20_ABD_ABD_DISABLED;
break;
default:
return -EINVAL;
}
- hw_comp_lower_csr.skip_ctrl = ICP_QAT_HW_COMP_20_BYTE_SKIP_3BYTE_LITERAL;
- hw_comp_lower_csr.algo = ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_ILZ77;
- hw_comp_lower_csr.lllbd = ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_ENABLED;
hw_comp_lower_csr.sd = ICP_QAT_HW_COMP_20_SEARCH_DEPTH_LEVEL_1;
hw_comp_lower_csr.hash_update = ICP_QAT_HW_COMP_20_SKIP_HASH_UPDATE_DONT_ALLOW;
hw_comp_lower_csr.edmm = ICP_QAT_HW_COMP_20_EXTENDED_DELAY_MATCH_MODE_EDMM_ENABLED;
@@ -538,12 +544,16 @@ static int adf_gen4_build_decomp_block(void *ctx, enum adf_dc_algo algo)
switch (algo) {
case QAT_DEFLATE:
header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DECOMPRESS;
+ hw_decomp_lower_csr.algo = ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_DEFLATE;
+ break;
+ case QAT_LZ4S:
+ header->service_cmd_id = ICP_QAT_FW_COMP_20_CMD_LZ4S_DECOMPRESS;
+ hw_decomp_lower_csr.algo = ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_LZ4S;
break;
default:
return -EINVAL;
}
- hw_decomp_lower_csr.algo = ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_DEFLATE;
lower_val = ICP_QAT_FW_DECOMP_20_BUILD_CONFIG_LOWER(hw_decomp_lower_csr);
cd_pars->u.sl.comp_slice_cfg_word[0] = lower_val;
diff --git a/drivers/crypto/intel/qat/qat_common/adf_init.c b/drivers/crypto/intel/qat/qat_common/adf_init.c
index ec376583b3ae..f8088388cf12 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_init.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_init.c
@@ -180,6 +180,7 @@ static int adf_dev_start(struct adf_accel_dev *accel_dev)
{
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
struct service_hndl *service;
+ u32 caps;
int ret;
set_bit(ADF_STATUS_STARTING, &accel_dev->status);
@@ -253,7 +254,8 @@ static int adf_dev_start(struct adf_accel_dev *accel_dev)
}
set_bit(ADF_STATUS_CRYPTO_ALGS_REGISTERED, &accel_dev->status);
- if (!list_empty(&accel_dev->compression_list) && qat_comp_algs_register()) {
+ caps = hw_data->accel_capabilities_ext_mask;
+ if (!list_empty(&accel_dev->compression_list) && qat_comp_algs_register(caps)) {
dev_err(&GET_DEV(accel_dev),
"Failed to register compression algs\n");
set_bit(ADF_STATUS_STARTING, &accel_dev->status);
@@ -308,7 +310,7 @@ static void adf_dev_stop(struct adf_accel_dev *accel_dev)
if (!list_empty(&accel_dev->compression_list) &&
test_bit(ADF_STATUS_COMP_ALGS_REGISTERED, &accel_dev->status))
- qat_comp_algs_unregister();
+ qat_comp_algs_unregister(hw_data->accel_capabilities_ext_mask);
clear_bit(ADF_STATUS_COMP_ALGS_REGISTERED, &accel_dev->status);
list_for_each_entry(service, &service_table, list) {
diff --git a/drivers/crypto/intel/qat/qat_common/icp_qat_fw.h b/drivers/crypto/intel/qat/qat_common/icp_qat_fw.h
index c141160421e1..2fea30a78340 100644
--- a/drivers/crypto/intel/qat/qat_common/icp_qat_fw.h
+++ b/drivers/crypto/intel/qat/qat_common/icp_qat_fw.h
@@ -151,6 +151,13 @@ struct icp_qat_fw_comn_resp {
ICP_QAT_FW_COMN_CNV_FLAG_BITPOS, \
ICP_QAT_FW_COMN_CNV_FLAG_MASK)
+#define ICP_QAT_FW_COMN_ST_BLK_FLAG_BITPOS 4
+#define ICP_QAT_FW_COMN_ST_BLK_FLAG_MASK 0x1
+#define ICP_QAT_FW_COMN_HDR_ST_BLK_FLAG_GET(hdr_flags) \
+ QAT_FIELD_GET(hdr_flags, \
+ ICP_QAT_FW_COMN_ST_BLK_FLAG_BITPOS, \
+ ICP_QAT_FW_COMN_ST_BLK_FLAG_MASK)
+
#define ICP_QAT_FW_COMN_HDR_CNV_FLAG_SET(hdr_t, val) \
QAT_FIELD_SET((hdr_t.hdr_flags), (val), \
ICP_QAT_FW_COMN_CNV_FLAG_BITPOS, \
diff --git a/drivers/crypto/intel/qat/qat_common/icp_qat_fw_comp.h b/drivers/crypto/intel/qat/qat_common/icp_qat_fw_comp.h
index 81969c515a17..2526053ee630 100644
--- a/drivers/crypto/intel/qat/qat_common/icp_qat_fw_comp.h
+++ b/drivers/crypto/intel/qat/qat_common/icp_qat_fw_comp.h
@@ -8,6 +8,8 @@ enum icp_qat_fw_comp_cmd_id {
ICP_QAT_FW_COMP_CMD_STATIC = 0,
ICP_QAT_FW_COMP_CMD_DYNAMIC = 1,
ICP_QAT_FW_COMP_CMD_DECOMPRESS = 2,
+ ICP_QAT_FW_COMP_CMD_ZSTD_COMPRESS = 10,
+ ICP_QAT_FW_COMP_CMD_ZSTD_DECOMPRESS = 11,
ICP_QAT_FW_COMP_CMD_DELIMITER
};
diff --git a/drivers/crypto/intel/qat/qat_common/icp_qat_hw.h b/drivers/crypto/intel/qat/qat_common/icp_qat_hw.h
index 0223bd541f1f..16ef6d98fa42 100644
--- a/drivers/crypto/intel/qat/qat_common/icp_qat_hw.h
+++ b/drivers/crypto/intel/qat/qat_common/icp_qat_hw.h
@@ -336,7 +336,8 @@ enum icp_qat_hw_compression_delayed_match {
enum icp_qat_hw_compression_algo {
ICP_QAT_HW_COMPRESSION_ALGO_DEFLATE = 0,
ICP_QAT_HW_COMPRESSION_ALGO_LZS = 1,
- ICP_QAT_HW_COMPRESSION_ALGO_DELIMITER = 2
+ ICP_QAT_HW_COMPRESSION_ALGO_ZSTD = 2,
+ ICP_QAT_HW_COMPRESSION_ALGO_DELIMITER
};
enum icp_qat_hw_compression_depth {
diff --git a/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c b/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c
index bfc820a08ada..e0d003b50358 100644
--- a/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c
+++ b/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c
@@ -6,6 +6,7 @@
#include <crypto/scatterwalk.h>
#include <linux/dma-mapping.h>
#include <linux/workqueue.h>
+#include <linux/zstd.h>
#include "adf_accel_devices.h"
#include "adf_common_drv.h"
#include "adf_dc.h"
@@ -13,9 +14,104 @@
#include "qat_comp_req.h"
#include "qat_compression.h"
#include "qat_algs_send.h"
+#include "qat_comp_zstd_utils.h"
+
+#define QAT_ZSTD_SCRATCH_SIZE 524288
+#define QAT_ZSTD_MAX_BLOCK_SIZE 65535
+#define QAT_ZSTD_MAX_CONTENT_SIZE 4096
+#define QAT_LZ4S_MIN_INPUT_SIZE 8192
+#define QAT_LZ4S_MAX_OUTPUT_SIZE QAT_ZSTD_SCRATCH_SIZE
+#define QAT_MAX_SEQUENCES (128 * 1024)
static DEFINE_MUTEX(algs_lock);
-static unsigned int active_devs;
+static unsigned int active_devs_deflate;
+static unsigned int active_devs_lz4s;
+static unsigned int active_devs_zstd;
+
+struct qat_zstd_scratch {
+ size_t cctx_buffer_size;
+ void *lz4s;
+ void *literals;
+ void *out_seqs;
+ void *workspace;
+ ZSTD_CCtx *ctx;
+};
+
+static void *qat_zstd_alloc_scratch(void)
+{
+ struct qat_zstd_scratch *scratch;
+ ZSTD_parameters params;
+ size_t cctx_size;
+ ZSTD_CCtx *ctx;
+ size_t zret;
+ int ret;
+
+ ret = -ENOMEM;
+ scratch = kzalloc_obj(*scratch);
+ if (!scratch)
+ return ERR_PTR(ret);
+
+ scratch->lz4s = kvmalloc(QAT_ZSTD_SCRATCH_SIZE, GFP_KERNEL);
+ if (!scratch->lz4s)
+ goto error;
+
+ scratch->literals = kvmalloc(QAT_ZSTD_SCRATCH_SIZE, GFP_KERNEL);
+ if (!scratch->literals)
+ goto error;
+
+ scratch->out_seqs = kvcalloc(QAT_MAX_SEQUENCES, sizeof(ZSTD_Sequence),
+ GFP_KERNEL);
+ if (!scratch->out_seqs)
+ goto error;
+
+ params = zstd_get_params(zstd_max_clevel(), QAT_ZSTD_SCRATCH_SIZE);
+ cctx_size = zstd_cctx_workspace_bound(&params.cParams);
+
+ scratch->workspace = kvmalloc(cctx_size, GFP_KERNEL | __GFP_ZERO);
+ if (!scratch->workspace)
+ goto error;
+
+ ret = -EINVAL;
+ ctx = zstd_init_cctx(scratch->workspace, cctx_size);
+ if (!ctx)
+ goto error;
+
+ scratch->ctx = ctx;
+ scratch->cctx_buffer_size = cctx_size;
+
+ zret = zstd_cctx_set_param(ctx, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters);
+ if (zstd_is_error(zret))
+ goto error;
+
+ return scratch;
+
+error:
+ kvfree(scratch->lz4s);
+ kvfree(scratch->literals);
+ kvfree(scratch->out_seqs);
+ kvfree(scratch->workspace);
+ kfree(scratch);
+ return ERR_PTR(ret);
+}
+
+static void qat_zstd_free_scratch(void *ctx)
+{
+ struct qat_zstd_scratch *scratch = ctx;
+
+ if (!scratch)
+ return;
+
+ kvfree(scratch->lz4s);
+ kvfree(scratch->literals);
+ kvfree(scratch->out_seqs);
+ kvfree(scratch->workspace);
+ kfree(scratch);
+}
+
+static struct crypto_acomp_streams qat_zstd_streams = {
+ .alloc_ctx = qat_zstd_alloc_scratch,
+ .free_ctx = qat_zstd_free_scratch,
+};
enum direction {
DECOMPRESSION = 0,
@@ -24,10 +120,18 @@ enum direction {
struct qat_compression_req;
+struct qat_callback_params {
+ unsigned int produced;
+ unsigned int dlen;
+ bool plain;
+};
+
struct qat_compression_ctx {
u8 comp_ctx[QAT_COMP_CTX_SIZE];
struct qat_compression_instance *inst;
- int (*qat_comp_callback)(struct qat_compression_req *qat_req, void *resp);
+ int (*qat_comp_callback)(struct qat_compression_req *qat_req, void *resp,
+ struct qat_callback_params *params);
+ struct crypto_acomp *ftfm;
};
struct qat_compression_req {
@@ -62,6 +166,7 @@ static void qat_comp_generic_callback(struct qat_compression_req *qat_req,
struct adf_accel_dev *accel_dev = ctx->inst->accel_dev;
struct crypto_acomp *tfm = crypto_acomp_reqtfm(areq);
struct qat_compression_instance *inst = ctx->inst;
+ struct qat_callback_params params = { };
int consumed, produced;
s8 cmp_err, xlt_err;
int res = -EBADMSG;
@@ -76,6 +181,10 @@ static void qat_comp_generic_callback(struct qat_compression_req *qat_req,
consumed = qat_comp_get_consumed_ctr(resp);
produced = qat_comp_get_produced_ctr(resp);
+ /* Cache parameters for algorithm specific callback */
+ params.produced = produced;
+ params.dlen = areq->dlen;
+
dev_dbg(&GET_DEV(accel_dev),
"[%s][%s][%s] slen = %8d dlen = %8d consumed = %8d produced = %8d cmp_err = %3d xlt_err = %3d",
crypto_tfm_alg_driver_name(crypto_acomp_tfm(tfm)),
@@ -83,16 +192,20 @@ static void qat_comp_generic_callback(struct qat_compression_req *qat_req,
status ? "ERR" : "OK ",
areq->slen, areq->dlen, consumed, produced, cmp_err, xlt_err);
- areq->dlen = 0;
+ if (unlikely(status != ICP_QAT_FW_COMN_STATUS_FLAG_OK)) {
+ if (cmp_err == ERR_CODE_OVERFLOW_ERROR || xlt_err == ERR_CODE_OVERFLOW_ERROR)
+ res = -E2BIG;
- if (unlikely(status != ICP_QAT_FW_COMN_STATUS_FLAG_OK))
+ areq->dlen = 0;
goto end;
+ }
if (qat_req->dir == COMPRESSION) {
cnv = qat_comp_get_cmp_cnv_flag(resp);
if (unlikely(!cnv)) {
dev_err(&GET_DEV(accel_dev),
"Verified compression not supported\n");
+ areq->dlen = 0;
goto end;
}
@@ -102,33 +215,36 @@ static void qat_comp_generic_callback(struct qat_compression_req *qat_req,
dev_dbg(&GET_DEV(accel_dev),
"Actual buffer overflow: produced=%d, dlen=%d\n",
produced, qat_req->actual_dlen);
+
+ res = -E2BIG;
+ areq->dlen = 0;
goto end;
}
+
+ params.plain = !!qat_comp_get_cmp_uncomp_flag(resp);
}
res = 0;
areq->dlen = produced;
if (ctx->qat_comp_callback)
- res = ctx->qat_comp_callback(qat_req, resp);
+ res = ctx->qat_comp_callback(qat_req, resp, &params);
end:
qat_bl_free_bufl(accel_dev, &qat_req->buf);
acomp_request_complete(areq, res);
+ qat_alg_send_backlog(qat_req->alg_req.backlog);
}
void qat_comp_alg_callback(void *resp)
{
struct qat_compression_req *qat_req =
(void *)(__force long)qat_comp_get_opaque(resp);
- struct qat_instance_backlog *backlog = qat_req->alg_req.backlog;
qat_comp_generic_callback(qat_req, resp);
-
- qat_alg_send_backlog(backlog);
}
-static int qat_comp_alg_init_tfm(struct crypto_acomp *acomp_tfm)
+static int qat_comp_alg_init_tfm(struct crypto_acomp *acomp_tfm, int alg)
{
struct qat_compression_ctx *ctx = acomp_tfm_ctx(acomp_tfm);
struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm);
@@ -141,12 +257,12 @@ static int qat_comp_alg_init_tfm(struct crypto_acomp *acomp_tfm)
node = tfm->node;
memset(ctx, 0, sizeof(*ctx));
- inst = qat_compression_get_instance_node(node);
+ inst = qat_compression_get_instance_node(node, alg);
if (!inst)
return -EINVAL;
ctx->inst = inst;
- ret = qat_comp_build_ctx(inst->accel_dev, ctx->comp_ctx, QAT_DEFLATE);
+ ret = qat_comp_build_ctx(inst->accel_dev, ctx->comp_ctx, alg);
if (ret) {
qat_compression_put_instance(inst);
memset(ctx, 0, sizeof(*ctx));
@@ -155,6 +271,11 @@ static int qat_comp_alg_init_tfm(struct crypto_acomp *acomp_tfm)
return ret;
}
+static int qat_comp_alg_deflate_init_tfm(struct crypto_acomp *acomp_tfm)
+{
+ return qat_comp_alg_init_tfm(acomp_tfm, QAT_DEFLATE);
+}
+
static void qat_comp_alg_exit_tfm(struct crypto_acomp *acomp_tfm)
{
struct qat_compression_ctx *ctx = acomp_tfm_ctx(acomp_tfm);
@@ -237,7 +358,234 @@ static int qat_comp_alg_decompress(struct acomp_req *req)
return qat_comp_alg_compress_decompress(req, DECOMPRESSION, 0, 0, 0, 0);
}
-static struct acomp_alg qat_acomp[] = { {
+static int qat_comp_alg_zstd_decompress(struct acomp_req *req)
+{
+ struct crypto_acomp *acomp_tfm = crypto_acomp_reqtfm(req);
+ struct qat_compression_ctx *ctx = acomp_tfm_ctx(acomp_tfm);
+ struct acomp_req *nreq = acomp_request_ctx(req);
+ zstd_frame_header header;
+ void *buffer;
+ size_t zret;
+ int ret;
+
+ buffer = kmap_local_page(sg_page(req->src)) + req->src->offset;
+ zret = zstd_get_frame_header(&header, buffer, req->src->length);
+ kunmap_local(buffer);
+ if (zret) {
+ dev_err(&GET_DEV(ctx->inst->accel_dev),
+ "ZSTD-compressed data has an incomplete frame header\n");
+ return -EINVAL;
+ }
+
+ if (header.windowSize > QAT_ZSTD_MAX_BLOCK_SIZE ||
+ header.frameContentSize >= QAT_ZSTD_MAX_CONTENT_SIZE) {
+ dev_dbg(&GET_DEV(ctx->inst->accel_dev), "Window size=0x%llx\n",
+ header.windowSize);
+
+ memcpy(nreq, req, sizeof(*req));
+ acomp_request_set_tfm(nreq, ctx->ftfm);
+
+ ret = crypto_acomp_decompress(nreq);
+ req->dlen = nreq->dlen;
+
+ return ret;
+ }
+
+ return qat_comp_alg_compress_decompress(req, DECOMPRESSION, 0, 0, 0, 0);
+}
+
+static int qat_comp_lz4s_zstd_callback(struct qat_compression_req *qat_req, void *resp,
+ struct qat_callback_params *params)
+{
+ struct qat_compression_ctx *qat_ctx = qat_req->qat_compression_ctx;
+ struct acomp_req *areq = qat_req->acompress_req;
+ struct qat_zstd_scratch *scratch;
+ struct crypto_acomp_stream *s;
+ unsigned int lit_len = 0;
+ ZSTD_Sequence *out_seqs;
+ void *lz4s, *zstd;
+ size_t comp_size;
+ ZSTD_CCtx *ctx;
+ void *literals;
+ int seq_count;
+ int ret = 0;
+
+ if (params->produced + QAT_ZSTD_LIT_COPY_LEN > QAT_ZSTD_SCRATCH_SIZE) {
+ dev_dbg(&GET_DEV(qat_ctx->inst->accel_dev),
+ "LZ4s-ZSTD: produced size (%u) + COPY_SIZE > QAT_ZSTD_SCRATCH_SIZE (%u)\n",
+ params->produced, QAT_ZSTD_SCRATCH_SIZE);
+ areq->dlen = 0;
+ return -E2BIG;
+ }
+
+ s = crypto_acomp_lock_stream_bh(&qat_zstd_streams);
+ scratch = s->ctx;
+
+ lz4s = scratch->lz4s;
+ zstd = lz4s; /* Output buffer is same as lz4s */
+ out_seqs = scratch->out_seqs;
+ ctx = scratch->ctx;
+ literals = scratch->literals;
+
+ if (likely(!params->plain)) {
+ if (likely(sg_nents(areq->dst) == 1)) {
+ zstd = sg_virt(areq->dst);
+ lz4s = zstd;
+ } else {
+ memcpy_from_sglist(lz4s, areq->dst, 0, params->produced);
+ }
+
+ seq_count = qat_alg_dec_lz4s(out_seqs, QAT_MAX_SEQUENCES, lz4s,
+ params->produced, literals, &lit_len);
+ if (seq_count < 0) {
+ ret = seq_count;
+ comp_size = 0;
+ goto out;
+ }
+ } else {
+ out_seqs[0].litLength = areq->slen;
+ out_seqs[0].offset = 0;
+ out_seqs[0].matchLength = 0;
+
+ seq_count = 1;
+ }
+
+ comp_size = zstd_compress_sequences_and_literals(ctx, zstd, params->dlen,
+ out_seqs, seq_count,
+ literals, lit_len,
+ QAT_ZSTD_SCRATCH_SIZE,
+ areq->slen);
+ if (zstd_is_error(comp_size)) {
+ if (comp_size == ZSTD_error_cannotProduce_uncompressedBlock)
+ ret = -E2BIG;
+ else
+ ret = -EOPNOTSUPP;
+
+ comp_size = 0;
+ goto out;
+ }
+
+ if (comp_size > params->dlen) {
+ dev_dbg(&GET_DEV(qat_ctx->inst->accel_dev),
+ "LZ4s-ZSTD: compressed_size (%u) > output buffer size (%u)\n",
+ (unsigned int)comp_size, params->dlen);
+ ret = -EOVERFLOW;
+ goto out;
+ }
+
+ if (unlikely(sg_nents(areq->dst) != 1))
+ memcpy_to_sglist(areq->dst, 0, zstd, comp_size);
+
+out:
+ areq->dlen = comp_size;
+ crypto_acomp_unlock_stream_bh(s);
+
+ return ret;
+}
+
+static int qat_comp_alg_lz4s_zstd_init_tfm(struct crypto_acomp *acomp_tfm)
+{
+ struct qat_compression_ctx *ctx = acomp_tfm_ctx(acomp_tfm);
+ struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm);
+ int reqsize;
+ int ret;
+
+ /* qat_comp_alg_init_tfm() wipes out the ctx */
+ ret = qat_comp_alg_init_tfm(acomp_tfm, QAT_LZ4S);
+ if (ret)
+ return ret;
+
+ ctx->ftfm = crypto_alloc_acomp_node("zstd", 0, CRYPTO_ALG_NEED_FALLBACK,
+ tfm->node);
+ if (IS_ERR(ctx->ftfm)) {
+ qat_comp_alg_exit_tfm(acomp_tfm);
+ return PTR_ERR(ctx->ftfm);
+ }
+
+ reqsize = max(sizeof(struct qat_compression_req),
+ sizeof(struct acomp_req) + crypto_acomp_reqsize(ctx->ftfm));
+
+ acomp_tfm->reqsize = reqsize;
+
+ ctx->qat_comp_callback = qat_comp_lz4s_zstd_callback;
+
+ return 0;
+}
+
+static int qat_comp_alg_zstd_init_tfm(struct crypto_acomp *acomp_tfm)
+{
+ struct qat_compression_ctx *ctx = acomp_tfm_ctx(acomp_tfm);
+ struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm);
+ int reqsize;
+ int ret;
+
+ /* qat_comp_alg_init_tfm() wipes out the ctx */
+ ret = qat_comp_alg_init_tfm(acomp_tfm, QAT_ZSTD);
+ if (ret)
+ return ret;
+
+ ctx->ftfm = crypto_alloc_acomp_node("zstd", 0, CRYPTO_ALG_NEED_FALLBACK,
+ tfm->node);
+ if (IS_ERR(ctx->ftfm)) {
+ qat_comp_alg_exit_tfm(acomp_tfm);
+ return PTR_ERR(ctx->ftfm);
+ }
+
+ reqsize = max(sizeof(struct qat_compression_req),
+ sizeof(struct acomp_req) + crypto_acomp_reqsize(ctx->ftfm));
+
+ acomp_tfm->reqsize = reqsize;
+
+ return 0;
+}
+
+static void qat_comp_alg_zstd_exit_tfm(struct crypto_acomp *acomp_tfm)
+{
+ struct qat_compression_ctx *ctx = acomp_tfm_ctx(acomp_tfm);
+
+ if (ctx->ftfm)
+ crypto_free_acomp(ctx->ftfm);
+
+ qat_comp_alg_exit_tfm(acomp_tfm);
+}
+
+static int qat_comp_alg_lz4s_zstd_compress(struct acomp_req *req)
+{
+ struct crypto_acomp *acomp_tfm = crypto_acomp_reqtfm(req);
+ struct qat_compression_ctx *ctx = acomp_tfm_ctx(acomp_tfm);
+ struct acomp_req *nreq = acomp_request_ctx(req);
+ int ret;
+
+ if (req->slen >= QAT_LZ4S_MIN_INPUT_SIZE && req->dlen >= QAT_LZ4S_MIN_INPUT_SIZE &&
+ req->slen <= QAT_LZ4S_MAX_OUTPUT_SIZE && req->dlen <= QAT_LZ4S_MAX_OUTPUT_SIZE)
+ return qat_comp_alg_compress(req);
+
+ memcpy(nreq, req, sizeof(*req));
+ acomp_request_set_tfm(nreq, ctx->ftfm);
+
+ ret = crypto_acomp_compress(nreq);
+ req->dlen = nreq->dlen;
+
+ return ret;
+}
+
+static int qat_comp_alg_sw_decompress(struct acomp_req *req)
+{
+ struct crypto_acomp *acomp_tfm = crypto_acomp_reqtfm(req);
+ struct qat_compression_ctx *ctx = acomp_tfm_ctx(acomp_tfm);
+ struct acomp_req *nreq = acomp_request_ctx(req);
+ int ret;
+
+ memcpy(nreq, req, sizeof(*req));
+ acomp_request_set_tfm(nreq, ctx->ftfm);
+
+ ret = crypto_acomp_decompress(nreq);
+ req->dlen = nreq->dlen;
+
+ return ret;
+}
+
+static struct acomp_alg qat_acomp_deflate[] = { {
.base = {
.cra_name = "deflate",
.cra_driver_name = "qat_deflate",
@@ -247,27 +595,165 @@ static struct acomp_alg qat_acomp[] = { {
.cra_reqsize = sizeof(struct qat_compression_req),
.cra_module = THIS_MODULE,
},
- .init = qat_comp_alg_init_tfm,
+ .init = qat_comp_alg_deflate_init_tfm,
.exit = qat_comp_alg_exit_tfm,
.compress = qat_comp_alg_compress,
.decompress = qat_comp_alg_decompress,
}};
-int qat_comp_algs_register(void)
+static struct acomp_alg qat_acomp_zstd_lz4s = {
+ .base = {
+ .cra_name = "zstd",
+ .cra_driver_name = "qat_zstd",
+ .cra_priority = 4001,
+ .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_reqsize = sizeof(struct qat_compression_req),
+ .cra_ctxsize = sizeof(struct qat_compression_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .init = qat_comp_alg_lz4s_zstd_init_tfm,
+ .exit = qat_comp_alg_zstd_exit_tfm,
+ .compress = qat_comp_alg_lz4s_zstd_compress,
+ .decompress = qat_comp_alg_sw_decompress,
+};
+
+static struct acomp_alg qat_acomp_zstd_native = {
+ .base = {
+ .cra_name = "zstd",
+ .cra_driver_name = "qat_zstd",
+ .cra_priority = 4001,
+ .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_reqsize = sizeof(struct qat_compression_req),
+ .cra_ctxsize = sizeof(struct qat_compression_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .init = qat_comp_alg_zstd_init_tfm,
+ .exit = qat_comp_alg_zstd_exit_tfm,
+ .compress = qat_comp_alg_compress,
+ .decompress = qat_comp_alg_zstd_decompress,
+};
+
+static int qat_comp_algs_register_deflate(void)
+{
+ int ret = 0;
+
+ mutex_lock(&algs_lock);
+ if (++active_devs_deflate == 1) {
+ ret = crypto_register_acomps(qat_acomp_deflate,
+ ARRAY_SIZE(qat_acomp_deflate));
+ if (ret)
+ active_devs_deflate--;
+ }
+ mutex_unlock(&algs_lock);
+
+ return ret;
+}
+
+static void qat_comp_algs_unregister_deflate(void)
+{
+ mutex_lock(&algs_lock);
+ if (--active_devs_deflate == 0)
+ crypto_unregister_acomps(qat_acomp_deflate, ARRAY_SIZE(qat_acomp_deflate));
+ mutex_unlock(&algs_lock);
+}
+
+static int qat_comp_algs_register_lz4s(void)
{
int ret = 0;
mutex_lock(&algs_lock);
- if (++active_devs == 1)
- ret = crypto_register_acomps(qat_acomp, ARRAY_SIZE(qat_acomp));
+ if (++active_devs_lz4s == 1) {
+ ret = crypto_acomp_alloc_streams(&qat_zstd_streams);
+ if (ret) {
+ active_devs_lz4s--;
+ goto unlock;
+ }
+
+ ret = crypto_register_acomp(&qat_acomp_zstd_lz4s);
+ if (ret) {
+ crypto_acomp_free_streams(&qat_zstd_streams);
+ active_devs_lz4s--;
+ }
+ }
+unlock:
mutex_unlock(&algs_lock);
+
return ret;
}
-void qat_comp_algs_unregister(void)
+static void qat_comp_algs_unregister_lz4s(void)
{
mutex_lock(&algs_lock);
- if (--active_devs == 0)
- crypto_unregister_acomps(qat_acomp, ARRAY_SIZE(qat_acomp));
+ if (--active_devs_lz4s == 0) {
+ crypto_unregister_acomp(&qat_acomp_zstd_lz4s);
+ crypto_acomp_free_streams(&qat_zstd_streams);
+ }
+ mutex_unlock(&algs_lock);
+}
+
+static int qat_comp_algs_register_zstd(void)
+{
+ int ret = 0;
+
+ mutex_lock(&algs_lock);
+ if (++active_devs_zstd == 1) {
+ ret = crypto_register_acomp(&qat_acomp_zstd_native);
+ if (ret)
+ active_devs_zstd--;
+ }
mutex_unlock(&algs_lock);
+
+ return ret;
+}
+
+static void qat_comp_algs_unregister_zstd(void)
+{
+ mutex_lock(&algs_lock);
+ if (--active_devs_zstd == 0)
+ crypto_unregister_acomp(&qat_acomp_zstd_native);
+ mutex_unlock(&algs_lock);
+}
+
+int qat_comp_algs_register(u32 caps)
+{
+ int ret;
+
+ ret = qat_comp_algs_register_deflate();
+ if (ret)
+ return ret;
+
+ if (caps & ADF_ACCEL_CAPABILITIES_EXT_ZSTD_LZ4S) {
+ ret = qat_comp_algs_register_lz4s();
+ if (ret)
+ goto err_unregister_deflate;
+ }
+
+ if (caps & ADF_ACCEL_CAPABILITIES_EXT_ZSTD) {
+ ret = qat_comp_algs_register_zstd();
+ if (ret)
+ goto err_unregister_lz4s;
+ }
+
+ return ret;
+
+err_unregister_lz4s:
+ if (caps & ADF_ACCEL_CAPABILITIES_EXT_ZSTD_LZ4S)
+ qat_comp_algs_unregister_lz4s();
+err_unregister_deflate:
+ qat_comp_algs_unregister_deflate();
+
+ return ret;
+}
+
+void qat_comp_algs_unregister(u32 caps)
+{
+ qat_comp_algs_unregister_deflate();
+
+ if (caps & ADF_ACCEL_CAPABILITIES_EXT_ZSTD_LZ4S)
+ qat_comp_algs_unregister_lz4s();
+
+ if (caps & ADF_ACCEL_CAPABILITIES_EXT_ZSTD)
+ qat_comp_algs_unregister_zstd();
}
diff --git a/drivers/crypto/intel/qat/qat_common/qat_comp_req.h b/drivers/crypto/intel/qat/qat_common/qat_comp_req.h
index 18a1f33a6db9..f165d28aaaf4 100644
--- a/drivers/crypto/intel/qat/qat_common/qat_comp_req.h
+++ b/drivers/crypto/intel/qat/qat_common/qat_comp_req.h
@@ -23,6 +23,7 @@ static inline void qat_comp_create_req(void *ctx, void *req, u64 src, u32 slen,
fw_req->comn_mid.opaque_data = opaque;
req_pars->comp_len = slen;
req_pars->out_buffer_sz = dlen;
+ fw_req->u3.asb_threshold.asb_value *= slen >> 4;
}
static inline void qat_comp_create_compression_req(void *ctx, void *req,
@@ -110,4 +111,12 @@ static inline u8 qat_comp_get_cmp_cnv_flag(void *resp)
return ICP_QAT_FW_COMN_HDR_CNV_FLAG_GET(flags);
}
+static inline u8 qat_comp_get_cmp_uncomp_flag(void *resp)
+{
+ struct icp_qat_fw_comp_resp *qat_resp = resp;
+ u8 flags = qat_resp->comn_resp.hdr_flags;
+
+ return ICP_QAT_FW_COMN_HDR_ST_BLK_FLAG_GET(flags);
+}
+
#endif
diff --git a/drivers/crypto/intel/qat/qat_common/qat_comp_zstd_utils.c b/drivers/crypto/intel/qat/qat_common/qat_comp_zstd_utils.c
new file mode 100644
index 000000000000..62ec2d5c3ab8
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/qat_comp_zstd_utils.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2026 Intel Corporation */
+#include <linux/errno.h>
+#include <linux/printk.h>
+#include <linux/string.h>
+#include <linux/unaligned.h>
+#include <linux/zstd.h>
+
+#include "qat_comp_zstd_utils.h"
+
+#define ML_BITS 4
+#define ML_MASK ((1U << ML_BITS) - 1)
+#define RUN_BITS (8 - ML_BITS)
+#define RUN_MASK ((1U << RUN_BITS) - 1)
+#define LZ4S_MINMATCH 2
+
+/*
+ * ZSTD blocks can decompress to at most min(windowSize, 128KB) bytes.
+ * Insert explicit block delimiters to keep blocks within this limit.
+ */
+#define QAT_ZSTD_BLOCK_MAX ZSTD_BLOCKSIZE_MAX
+
+static int emit_delimiter(ZSTD_Sequence *out_seqs, size_t *seqs_idx,
+ size_t out_seqs_capacity, unsigned int lz4s_buff_size)
+{
+ if (*seqs_idx >= out_seqs_capacity - 1) {
+ pr_debug("QAT ZSTD: sequence overflow (seqs_idx:%zu, capacity:%zu, lz4s_size:%u)\n",
+ *seqs_idx, out_seqs_capacity, lz4s_buff_size);
+ return -EOVERFLOW;
+ }
+
+ out_seqs[*seqs_idx].offset = 0;
+ out_seqs[*seqs_idx].litLength = 0;
+ out_seqs[*seqs_idx].matchLength = 0;
+ (*seqs_idx)++;
+
+ return 0;
+}
+
+int qat_alg_dec_lz4s(ZSTD_Sequence *out_seqs, size_t out_seqs_capacity,
+ unsigned char *lz4s_buff, unsigned int lz4s_buff_size,
+ unsigned char *literals, unsigned int *lit_len)
+{
+ unsigned char *end_ip = lz4s_buff + lz4s_buff_size;
+ unsigned char *start, *dest, *dest_end;
+ unsigned int hist_literal_len = 0;
+ unsigned char *ip = lz4s_buff;
+ size_t block_decomp_size = 0;
+ size_t seqs_idx = 0;
+ int ret;
+
+ *lit_len = 0;
+
+ if (!lz4s_buff_size)
+ return 0;
+
+ while (ip < end_ip) {
+ size_t literal_len = 0, match_len = 0;
+ const unsigned int token = *ip++;
+ size_t length = 0;
+ size_t offset = 0;
+
+ /* Get literal length */
+ length = token >> ML_BITS;
+ if (length == RUN_MASK) {
+ unsigned int s;
+
+ do {
+ s = *ip++;
+ length += s;
+ } while (s == 255);
+ }
+
+ literal_len = length;
+
+ start = ip;
+ dest = literals;
+ dest_end = literals + length;
+
+ do {
+ memcpy(dest, start, QAT_ZSTD_LIT_COPY_LEN);
+ dest += QAT_ZSTD_LIT_COPY_LEN;
+ start += QAT_ZSTD_LIT_COPY_LEN;
+ } while (dest < dest_end);
+
+ literals += length;
+ *lit_len += length;
+
+ ip += length;
+ if (ip == end_ip) {
+ literal_len += hist_literal_len;
+ /*
+ * If adding trailing literals would overflow the
+ * current block, close it first.
+ */
+ if (block_decomp_size + literal_len > QAT_ZSTD_BLOCK_MAX) {
+ ret = emit_delimiter(out_seqs, &seqs_idx,
+ out_seqs_capacity,
+ lz4s_buff_size);
+ if (ret)
+ return ret;
+ }
+ out_seqs[seqs_idx].litLength = literal_len;
+ out_seqs[seqs_idx].offset = offset;
+ out_seqs[seqs_idx].matchLength = match_len;
+ break;
+ }
+
+ offset = get_unaligned_le16(ip);
+ ip += 2;
+
+ length = token & ML_MASK;
+ if (length == ML_MASK) {
+ unsigned int s;
+
+ do {
+ s = *ip++;
+ length += s;
+ } while (s == 255);
+ }
+ if (length != 0) {
+ length += LZ4S_MINMATCH;
+ match_len = (unsigned short)length;
+ literal_len += hist_literal_len;
+
+ /*
+ * If this sequence would push the current block past
+ * the ZSTD maximum, close the block first.
+ */
+ if (block_decomp_size + literal_len + match_len > QAT_ZSTD_BLOCK_MAX) {
+ ret = emit_delimiter(out_seqs, &seqs_idx,
+ out_seqs_capacity,
+ lz4s_buff_size);
+ if (ret)
+ return ret;
+
+ block_decomp_size = 0;
+ }
+
+ out_seqs[seqs_idx].offset = offset;
+ out_seqs[seqs_idx].litLength = literal_len;
+ out_seqs[seqs_idx].matchLength = match_len;
+ hist_literal_len = 0;
+ seqs_idx++;
+ if (seqs_idx >= out_seqs_capacity - 1) {
+ pr_debug("QAT ZSTD: sequence overflow (seqs_idx:%zu, capacity:%zu, lz4s_size:%u)\n",
+ seqs_idx, out_seqs_capacity, lz4s_buff_size);
+ return -EOVERFLOW;
+ }
+
+ block_decomp_size += literal_len + match_len;
+ } else {
+ if (literal_len > 0) {
+ /*
+ * When match length is 0, the literal length needs
+ * to be temporarily stored and processed together
+ * with the next data block.
+ */
+ hist_literal_len += literal_len;
+ }
+ }
+ }
+
+ return seqs_idx + 1;
+}
diff --git a/drivers/crypto/intel/qat/qat_common/qat_comp_zstd_utils.h b/drivers/crypto/intel/qat/qat_common/qat_comp_zstd_utils.h
new file mode 100644
index 000000000000..55c7a1b9b848
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/qat_comp_zstd_utils.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2026 Intel Corporation */
+#ifndef QAT_COMP_ZSTD_UTILS_H_
+#define QAT_COMP_ZSTD_UTILS_H_
+#include <linux/zstd_lib.h>
+
+#define QAT_ZSTD_LIT_COPY_LEN 8
+
+int qat_alg_dec_lz4s(ZSTD_Sequence *out_seqs, size_t out_seqs_capacity,
+ unsigned char *lz4s_buff, unsigned int lz4s_buff_size,
+ unsigned char *literals, unsigned int *lit_len);
+
+#endif
diff --git a/drivers/crypto/intel/qat/qat_common/qat_compression.c b/drivers/crypto/intel/qat/qat_common/qat_compression.c
index 53a4db5507ec..1424d7a9bcd3 100644
--- a/drivers/crypto/intel/qat/qat_common/qat_compression.c
+++ b/drivers/crypto/intel/qat/qat_common/qat_compression.c
@@ -46,12 +46,14 @@ static int qat_compression_free_instances(struct adf_accel_dev *accel_dev)
return 0;
}
-struct qat_compression_instance *qat_compression_get_instance_node(int node)
+struct qat_compression_instance *qat_compression_get_instance_node(int node, int alg)
{
struct qat_compression_instance *inst = NULL;
+ struct adf_hw_device_data *hw_data = NULL;
struct adf_accel_dev *accel_dev = NULL;
unsigned long best = ~0;
struct list_head *itr;
+ u32 caps, mask;
list_for_each(itr, adf_devmgr_get_head()) {
struct adf_accel_dev *tmp_dev;
@@ -61,6 +63,15 @@ struct qat_compression_instance *qat_compression_get_instance_node(int node)
tmp_dev = list_entry(itr, struct adf_accel_dev, list);
tmp_dev_node = dev_to_node(&GET_DEV(tmp_dev));
+ if (alg == QAT_ZSTD || alg == QAT_LZ4S) {
+ hw_data = tmp_dev->hw_device;
+ caps = hw_data->accel_capabilities_ext_mask;
+ mask = ADF_ACCEL_CAPABILITIES_EXT_ZSTD |
+ ADF_ACCEL_CAPABILITIES_EXT_ZSTD_LZ4S;
+ if (!(caps & mask))
+ continue;
+ }
+
if ((node == tmp_dev_node || tmp_dev_node < 0) &&
adf_dev_started(tmp_dev) && !list_empty(&tmp_dev->compression_list)) {
ctr = atomic_read(&tmp_dev->ref_count);
@@ -78,6 +89,16 @@ struct qat_compression_instance *qat_compression_get_instance_node(int node)
struct adf_accel_dev *tmp_dev;
tmp_dev = list_entry(itr, struct adf_accel_dev, list);
+
+ if (alg == QAT_ZSTD || alg == QAT_LZ4S) {
+ hw_data = tmp_dev->hw_device;
+ caps = hw_data->accel_capabilities_ext_mask;
+ mask = ADF_ACCEL_CAPABILITIES_EXT_ZSTD |
+ ADF_ACCEL_CAPABILITIES_EXT_ZSTD_LZ4S;
+ if (!(caps & mask))
+ continue;
+ }
+
if (adf_dev_started(tmp_dev) &&
!list_empty(&tmp_dev->compression_list)) {
accel_dev = tmp_dev;