Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu: "API: - Fix out-of-sync IVs in self-test for IPsec AEAD algorithms Algorithms: - Use formally verified implementation of x86/curve25519 Drivers: - Enhance hwrng support in caam - Use crypto_engine for skcipher/aead/rsa/hash in caam - Add Xilinx AES driver - Add uacce driver - Register zip engine to uacce in hisilicon - Add support for OCTEON TX CPT engine in marvell" * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (162 commits) crypto: af_alg - bool type cosmetics crypto: arm[64]/poly1305 - add artifact to .gitignore files crypto: caam - limit single JD RNG output to maximum of 16 bytes crypto: caam - enable prediction resistance in HRWNG bus: fsl-mc: add api to retrieve mc version crypto: caam - invalidate entropy register during RNG initialization crypto: caam - check if RNG job failed crypto: caam - simplify RNG implementation crypto: caam - drop global context pointer and init_done crypto: caam - use struct hwrng's .init for initialization crypto: caam - allocate RNG instantiation descriptor with GFP_DMA crypto: ccree - remove duplicated include from cc_aead.c crypto: chelsio - remove set but not used variable 'adap' crypto: marvell - enable OcteonTX cpt options for build crypto: marvell - add the Virtual Function driver for CPT crypto: marvell - add support for OCTEON TX CPT engine crypto: marvell - create common Kconfig and Makefile for Marvell crypto: arm/neon - memzero_explicit aes-cbc key crypto: bcm - Use scnprintf() for avoiding potential buffer overflow crypto: atmel-i2c - Fix wakeup fail ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2020-04-02 00:47:40 +0300
committer: Linus Torvalds <torvalds@linux-foundation.org> 2020-04-02 00:47:40 +0300
commit: 72f35423e8a6a2451c202f52cb8adb92b08592ec (patch)
tree: 2cc5c715631a59d51b6445143e03a187e8e394f6 /drivers
parent: 890f0b0d27dc400679b9a91d04ca44f5ee4c19c0 (diff)
parent: fcb90d51c375d09a034993cda262b68499e233a4 (diff)
download: linux-72f35423e8a6a2451c202f52cb8adb92b08592ec.tar.xz
112 files changed, 11734 insertions, 2747 deletions
diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c
index c78d10ea641f..40526da5c6a6 100644
--- a/drivers/bus/fsl-mc/fsl-mc-bus.c
+++ b/drivers/bus/fsl-mc/fsl-mc-bus.c
@@ -26,6 +26,8 @@
  */
 #define FSL_MC_DEFAULT_DMA_MASK	(~0ULL)
 
+static struct fsl_mc_version mc_version;
+
 /**
  * struct fsl_mc - Private data of a "fsl,qoriq-mc" platform device
  * @root_mc_bus_dev: fsl-mc device representing the root DPRC
@@ -55,20 +57,6 @@ struct fsl_mc_addr_translation_range {
 };
 
 /**
- * struct mc_version
- * @major: Major version number: incremented on API compatibility changes
- * @minor: Minor version number: incremented on API additions (that are
- *		backward compatible); reset when major version is incremented
- * @revision: Internal revision number: incremented on implementation changes
- *		and/or bug fixes that have no impact on API
- */
-struct mc_version {
-	u32 major;
-	u32 minor;
-	u32 revision;
-};
-
-/**
  * fsl_mc_bus_match - device to driver matching callback
  * @dev: the fsl-mc device to match against
  * @drv: the device driver to search for matching fsl-mc object type
@@ -338,7 +326,7 @@ EXPORT_SYMBOL_GPL(fsl_mc_driver_unregister);
  */
 static int mc_get_version(struct fsl_mc_io *mc_io,
 			  u32 cmd_flags,
-			  struct mc_version *mc_ver_info)
+			  struct fsl_mc_version *mc_ver_info)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpmng_rsp_get_version *rsp_params;
@@ -364,6 +352,20 @@ static int mc_get_version(struct fsl_mc_io *mc_io,
 }
 
 /**
+ * fsl_mc_get_version - function to retrieve the MC f/w version information
+ *
+ * Return:	mc version when called after fsl-mc-bus probe; NULL otherwise.
+ */
+struct fsl_mc_version *fsl_mc_get_version(void)
+{
+	if (mc_version.major)
+		return &mc_version;
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(fsl_mc_get_version);
+
+/**
  * fsl_mc_get_root_dprc - function to traverse to the root dprc
  */
 static void fsl_mc_get_root_dprc(struct device *dev,
@@ -862,7 +864,6 @@ static int fsl_mc_bus_probe(struct platform_device *pdev)
 	int container_id;
 	phys_addr_t mc_portal_phys_addr;
 	u32 mc_portal_size;
-	struct mc_version mc_version;
 	struct resource res;
 
 	mc = devm_kzalloc(&pdev->dev, sizeof(*mc), GFP_KERNEL);
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 914e293ba62b..9bc46da8d77a 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -244,7 +244,8 @@ config HW_RANDOM_MXC_RNGA
 
 config HW_RANDOM_IMX_RNGC
 	tristate "Freescale i.MX RNGC Random Number Generator"
-	depends on ARCH_MXC
+	depends on HAS_IOMEM && HAVE_CLK
+	depends on SOC_IMX25 || COMPILE_TEST
 	default HW_RANDOM
 	---help---
 	  This driver provides kernel-side support for the Random Number
@@ -466,6 +467,13 @@ config HW_RANDOM_NPCM
 
  	  If unsure, say Y.
 
+config HW_RANDOM_KEYSTONE
+	depends on ARCH_KEYSTONE || COMPILE_TEST
+	default HW_RANDOM
+	tristate "TI Keystone NETCP SA Hardware random number generator"
+	help
+	  This option enables Keystone's hardware random generator.
+
 endif # HW_RANDOM
 
 config UML_RANDOM
@@ -482,10 +490,3 @@ config UML_RANDOM
 	  (check your distro, or download from
 	  http://sourceforge.net/projects/gkernel/).  rngd periodically reads
 	  /dev/hwrng and injects the entropy into /dev/random.
-
-config HW_RANDOM_KEYSTONE
-	depends on ARCH_KEYSTONE || COMPILE_TEST
-	default HW_RANDOM
-	tristate "TI Keystone NETCP SA Hardware random number generator"
-	help
-	  This option enables Keystone's hardware random generator.
diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c
index 30cf00f8e9a0..9c47e431ce90 100644
--- a/drivers/char/hw_random/imx-rngc.c
+++ b/drivers/char/hw_random/imx-rngc.c
@@ -18,12 +18,22 @@
 #include <linux/completion.h>
 #include <linux/io.h>
 
+#define RNGC_VER_ID			0x0000
 #define RNGC_COMMAND			0x0004
 #define RNGC_CONTROL			0x0008
 #define RNGC_STATUS			0x000C
 #define RNGC_ERROR			0x0010
 #define RNGC_FIFO			0x0014
 
+/* the fields in the ver id register */
+#define RNGC_TYPE_SHIFT		28
+#define RNGC_VER_MAJ_SHIFT		8
+
+/* the rng_type field */
+#define RNGC_TYPE_RNGB			0x1
+#define RNGC_TYPE_RNGC			0x2
+
+
 #define RNGC_CMD_CLR_ERR		0x00000020
 #define RNGC_CMD_CLR_INT		0x00000010
 #define RNGC_CMD_SEED			0x00000002
@@ -31,6 +41,7 @@
 
 #define RNGC_CTRL_MASK_ERROR		0x00000040
 #define RNGC_CTRL_MASK_DONE		0x00000020
+#define RNGC_CTRL_AUTO_SEED		0x00000010
 
 #define RNGC_STATUS_ERROR		0x00010000
 #define RNGC_STATUS_FIFO_LEVEL_MASK	0x00000f00
@@ -100,15 +111,11 @@ static int imx_rngc_self_test(struct imx_rngc *rngc)
 	writel(cmd | RNGC_CMD_SELF_TEST, rngc->base + RNGC_COMMAND);
 
 	ret = wait_for_completion_timeout(&rngc->rng_op_done, RNGC_TIMEOUT);
-	if (!ret) {
-		imx_rngc_irq_mask_clear(rngc);
+	imx_rngc_irq_mask_clear(rngc);
+	if (!ret)
 		return -ETIMEDOUT;
-	}
 
-	if (rngc->err_reg != 0)
-		return -EIO;
-
-	return 0;
+	return rngc->err_reg ? -EIO : 0;
 }
 
 static int imx_rngc_read(struct hwrng *rng, void *data, size_t max, bool wait)
@@ -165,17 +172,17 @@ static irqreturn_t imx_rngc_irq(int irq, void *priv)
 static int imx_rngc_init(struct hwrng *rng)
 {
 	struct imx_rngc *rngc = container_of(rng, struct imx_rngc, rng);
-	u32 cmd;
+	u32 cmd, ctrl;
 	int ret;
 
 	/* clear error */
 	cmd = readl(rngc->base + RNGC_COMMAND);
 	writel(cmd | RNGC_CMD_CLR_ERR, rngc->base + RNGC_COMMAND);
 
+	imx_rngc_irq_unmask(rngc);
+
 	/* create seed, repeat while there is some statistical error */
 	do {
-		imx_rngc_irq_unmask(rngc);
-
 		/* seed creation */
 		cmd = readl(rngc->base + RNGC_COMMAND);
 		writel(cmd | RNGC_CMD_SEED, rngc->base + RNGC_COMMAND);
@@ -184,13 +191,42 @@ static int imx_rngc_init(struct hwrng *rng)
 				RNGC_TIMEOUT);
 
 		if (!ret) {
-			imx_rngc_irq_mask_clear(rngc);
-			return -ETIMEDOUT;
+			ret = -ETIMEDOUT;
+			goto err;
 		}
 
 	} while (rngc->err_reg == RNGC_ERROR_STATUS_STAT_ERR);
 
-	return rngc->err_reg ? -EIO : 0;
+	if (rngc->err_reg) {
+		ret = -EIO;
+		goto err;
+	}
+
+	/*
+	 * enable automatic seeding, the rngc creates a new seed automatically
+	 * after serving 2^20 random 160-bit words
+	 */
+	ctrl = readl(rngc->base + RNGC_CONTROL);
+	ctrl |= RNGC_CTRL_AUTO_SEED;
+	writel(ctrl, rngc->base + RNGC_CONTROL);
+
+	/*
+	 * if initialisation was successful, we keep the interrupt
+	 * unmasked until imx_rngc_cleanup is called
+	 * we mask the interrupt ourselves if we return an error
+	 */
+	return 0;
+
+err:
+	imx_rngc_irq_mask_clear(rngc);
+	return ret;
+}
+
+static void imx_rngc_cleanup(struct hwrng *rng)
+{
+	struct imx_rngc *rngc = container_of(rng, struct imx_rngc, rng);
+
+	imx_rngc_irq_mask_clear(rngc);
 }
 
 static int imx_rngc_probe(struct platform_device *pdev)
@@ -198,6 +234,8 @@ static int imx_rngc_probe(struct platform_device *pdev)
 	struct imx_rngc *rngc;
 	int ret;
 	int irq;
+	u32 ver_id;
+	u8  rng_type;
 
 	rngc = devm_kzalloc(&pdev->dev, sizeof(*rngc), GFP_KERNEL);
 	if (!rngc)
@@ -223,6 +261,17 @@ static int imx_rngc_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
+	ver_id = readl(rngc->base + RNGC_VER_ID);
+	rng_type = ver_id >> RNGC_TYPE_SHIFT;
+	/*
+	 * This driver supports only RNGC and RNGB. (There's a different
+	 * driver for RNGA.)
+	 */
+	if (rng_type != RNGC_TYPE_RNGC && rng_type != RNGC_TYPE_RNGB) {
+		ret = -ENODEV;
+		goto err;
+	}
+
 	ret = devm_request_irq(&pdev->dev,
 			irq, imx_rngc_irq, 0, pdev->name, (void *)rngc);
 	if (ret) {
@@ -235,6 +284,7 @@ static int imx_rngc_probe(struct platform_device *pdev)
 	rngc->rng.name = pdev->name;
 	rngc->rng.init = imx_rngc_init;
 	rngc->rng.read = imx_rngc_read;
+	rngc->rng.cleanup = imx_rngc_cleanup;
 
 	rngc->dev = &pdev->dev;
 	platform_set_drvdata(pdev, rngc);
@@ -244,18 +294,21 @@ static int imx_rngc_probe(struct platform_device *pdev)
 	if (self_test) {
 		ret = imx_rngc_self_test(rngc);
 		if (ret) {
-			dev_err(rngc->dev, "FSL RNGC self test failed.\n");
+			dev_err(rngc->dev, "self test failed\n");
 			goto err;
 		}
 	}
 
 	ret = hwrng_register(&rngc->rng);
 	if (ret) {
-		dev_err(&pdev->dev, "FSL RNGC registering failed (%d)\n", ret);
+		dev_err(&pdev->dev, "hwrng registration failed\n");
 		goto err;
 	}
 
-	dev_info(&pdev->dev, "Freescale RNGC registered.\n");
+	dev_info(&pdev->dev,
+		"Freescale RNG%c registered (HW revision %d.%02d)\n",
+		rng_type == RNGC_TYPE_RNGB ? 'B' : 'C',
+		(ver_id >> RNGC_VER_MAJ_SHIFT) & 0xff, ver_id & 0xff);
 	return 0;
 
 err:
diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c
index e08a8887e718..a431c5cbe2be 100644
--- a/drivers/char/hw_random/omap3-rom-rng.c
+++ b/drivers/char/hw_random/omap3-rom-rng.c
@@ -18,6 +18,7 @@
 #include <linux/workqueue.h>
 #include <linux/clk.h>
 #include <linux/err.h>
+#include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index c2767ed54dfe..2c887e4d005a 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -233,20 +233,6 @@ config CRYPTO_CRC32_S390
 
 	  It is available with IBM z13 or later.
 
-config CRYPTO_DEV_MARVELL_CESA
-	tristate "Marvell's Cryptographic Engine driver"
-	depends on PLAT_ORION || ARCH_MVEBU
-	select CRYPTO_LIB_AES
-	select CRYPTO_LIB_DES
-	select CRYPTO_SKCIPHER
-	select CRYPTO_HASH
-	select SRAM
-	help
-	  This driver allows you to utilize the Cryptographic Engines and
-	  Security Accelerator (CESA) which can be found on MVEBU and ORION
-	  platforms.
-	  This driver supports CPU offload through DMA transfers.
-
 config CRYPTO_DEV_NIAGARA2
 	tristate "Niagara2 Stream Processing Unit driver"
 	select CRYPTO_LIB_DES
@@ -606,6 +592,7 @@ config CRYPTO_DEV_MXS_DCP
 source "drivers/crypto/qat/Kconfig"
 source "drivers/crypto/cavium/cpt/Kconfig"
 source "drivers/crypto/cavium/nitrox/Kconfig"
+source "drivers/crypto/marvell/Kconfig"
 
 config CRYPTO_DEV_CAVIUM_ZIP
 	tristate "Cavium ZIP driver"
@@ -685,6 +672,29 @@ choice
 
 endchoice
 
+config CRYPTO_DEV_QCE_SW_MAX_LEN
+	int "Default maximum request size to use software for AES"
+	depends on CRYPTO_DEV_QCE && CRYPTO_DEV_QCE_SKCIPHER
+	default 512
+	help
+	  This sets the default maximum request size to perform AES requests
+	  using software instead of the crypto engine.  It can be changed by
+	  setting the aes_sw_max_len parameter.
+
+	  Small blocks are processed faster in software than hardware.
+	  Considering the 256-bit ciphers, software is 2-3 times faster than
+	  qce at 256-bytes, 30% faster at 512, and about even at 768-bytes.
+	  With 128-bit keys, the break-even point would be around 1024-bytes.
+
+	  The default is set a little lower, to 512 bytes, to balance the
+	  cost in CPU usage.  The minimum recommended setting is 16-bytes
+	  (1 AES block), since AES-GCM will fail if you set it lower.
+	  Setting this to zero will send all requests to the hardware.
+
+	  Note that 192-bit keys are not supported by the hardware and are
+	  always processed by the software fallback, and all DES requests
+	  are done by the hardware.
+
 config CRYPTO_DEV_QCOM_RNG
 	tristate "Qualcomm Random Number Generator Driver"
 	depends on ARCH_QCOM || COMPILE_TEST
@@ -731,6 +741,18 @@ config CRYPTO_DEV_ROCKCHIP
 	  This driver interfaces with the hardware crypto accelerator.
 	  Supporting cbc/ecb chainmode, and aes/des/des3_ede cipher mode.
 
+config CRYPTO_DEV_ZYNQMP_AES
+	tristate "Support for Xilinx ZynqMP AES hw accelerator"
+	depends on ZYNQMP_FIRMWARE || COMPILE_TEST
+	select CRYPTO_AES
+	select CRYPTO_ENGINE
+	select CRYPTO_AEAD
+	help
+	  Xilinx ZynqMP has AES-GCM engine used for symmetric key
+	  encryption and decryption. This driver interfaces with AES hw
+	  accelerator. Select this if you want to use the ZynqMP module
+	  for AES algorithms.
+
 config CRYPTO_DEV_MEDIATEK
 	tristate "MediaTek's EIP97 Cryptographic Engine driver"
 	depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 40229d499476..944ed7226e37 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -18,7 +18,7 @@ obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
 obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
 obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o
 obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
-obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell/
+obj-$(CONFIG_CRYPTO_DEV_MARVELL) += marvell/
 obj-$(CONFIG_CRYPTO_DEV_MEDIATEK) += mediatek/
 obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o
 obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o
@@ -47,5 +47,6 @@ obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
 obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/
 obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/
 obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) += axis/
+obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_AES) += xilinx/
 obj-y += hisilicon/
 obj-$(CONFIG_CRYPTO_DEV_AMLOGIC_GXL) += amlogic/
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
index f72346a44e69..3e4e4bbda34c 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
@@ -565,10 +565,8 @@ static int sun8i_ce_probe(struct platform_device *pdev)
 
 	/* Get Non Secure IRQ */
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(ce->dev, "Cannot get CryptoEngine Non-secure IRQ\n");
+	if (irq < 0)
 		return irq;
-	}
 
 	ce->reset = devm_reset_control_get(&pdev->dev, NULL);
 	if (IS_ERR(ce->reset)) {
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
index 8f8404c84a4d..0e9eac397e1b 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
@@ -214,7 +214,7 @@ struct sun8i_cipher_tfm_ctx {
  *			this template
  * @alg:		one of sub struct must be used
  * @stat_req:		number of request done on this template
- * @stat_fb:		total of all data len done on this template
+ * @stat_fb:		number of request which has fallbacked
  */
 struct sun8i_ce_alg_template {
 	u32 type;
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
index b5f855f3de10..29c44f279112 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
@@ -186,7 +186,7 @@ struct sun8i_cipher_tfm_ctx {
  *			this template
  * @alg:		one of sub struct must be used
  * @stat_req:		number of request done on this template
- * @stat_fb:		total of all data len done on this template
+ * @stat_fb:		number of request which has fallbacked
  */
 struct sun8i_ss_alg_template {
 	u32 type;
diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c
index 1d3355913b40..e8e8281e027d 100644
--- a/drivers/crypto/atmel-i2c.c
+++ b/drivers/crypto/atmel-i2c.c
@@ -176,7 +176,8 @@ static int atmel_i2c_wakeup(struct i2c_client *client)
 	 * device is idle, asleep or during waking up. Don't check for error
 	 * when waking up the device.
 	 */
-	i2c_master_send(client, i2c_priv->wake_token, i2c_priv->wake_token_sz);
+	i2c_transfer_buffer_flags(client, i2c_priv->wake_token,
+				i2c_priv->wake_token_sz, I2C_M_IGNORE_NAK);
 
 	/*
 	 * Wait to wake the device. Typical execution times for ecdh and genkey
diff --git a/drivers/crypto/bcm/util.c b/drivers/crypto/bcm/util.c
index cd7504101acd..2b304fc78059 100644
--- a/drivers/crypto/bcm/util.c
+++ b/drivers/crypto/bcm/util.c
@@ -366,88 +366,88 @@ static ssize_t spu_debugfs_read(struct file *filp, char __user *ubuf,
 
 	ipriv = filp->private_data;
 	out_offset = 0;
-	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
 			       "Number of SPUs.........%u\n",
 			       ipriv->spu.num_spu);
-	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
 			       "Current sessions.......%u\n",
 			       atomic_read(&ipriv->session_count));
-	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
 			       "Session count..........%u\n",
 			       atomic_read(&ipriv->stream_count));
-	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
 			       "Cipher setkey..........%u\n",
 			       atomic_read(&ipriv->setkey_cnt[SPU_OP_CIPHER]));
-	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
 			       "Cipher Ops.............%u\n",
 			       atomic_read(&ipriv->op_counts[SPU_OP_CIPHER]));
 	for (alg = 0; alg < CIPHER_ALG_LAST; alg++) {
 		for (mode = 0; mode < CIPHER_MODE_LAST; mode++) {
 			op_cnt = atomic_read(&ipriv->cipher_cnt[alg][mode]);
 			if (op_cnt) {
-				out_offset += snprintf(buf + out_offset,
+				out_offset += scnprintf(buf + out_offset,
 						       out_count - out_offset,
 			       "  %-13s%11u\n",
 			       spu_alg_name(alg, mode), op_cnt);
 			}
 		}
 	}
-	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
 			       "Hash Ops...............%u\n",
 			       atomic_read(&ipriv->op_counts[SPU_OP_HASH]));
 	for (alg = 0; alg < HASH_ALG_LAST; alg++) {
 		op_cnt = atomic_read(&ipriv->hash_cnt[alg]);
 		if (op_cnt) {
-			out_offset += snprintf(buf + out_offset,
+			out_offset += scnprintf(buf + out_offset,
 					       out_count - out_offset,
 		       "  %-13s%11u\n",
 		       hash_alg_name[alg], op_cnt);
 		}
 	}
-	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
 			       "HMAC setkey............%u\n",
 			       atomic_read(&ipriv->setkey_cnt[SPU_OP_HMAC]));
-	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
 			       "HMAC Ops...............%u\n",
 			       atomic_read(&ipriv->op_counts[SPU_OP_HMAC]));
 	for (alg = 0; alg < HASH_ALG_LAST; alg++) {
 		op_cnt = atomic_read(&ipriv->hmac_cnt[alg]);
 		if (op_cnt) {
-			out_offset += snprintf(buf + out_offset,
+			out_offset += scnprintf(buf + out_offset,
 					       out_count - out_offset,
 		       "  %-13s%11u\n",
 		       hash_alg_name[alg], op_cnt);
 		}
 	}
-	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
 			       "AEAD setkey............%u\n",
 			       atomic_read(&ipriv->setkey_cnt[SPU_OP_AEAD]));
 
-	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
 			       "AEAD Ops...............%u\n",
 			       atomic_read(&ipriv->op_counts[SPU_OP_AEAD]));
 	for (alg = 0; alg < AEAD_TYPE_LAST; alg++) {
 		op_cnt = atomic_read(&ipriv->aead_cnt[alg]);
 		if (op_cnt) {
-			out_offset += snprintf(buf + out_offset,
+			out_offset += scnprintf(buf + out_offset,
 					       out_count - out_offset,
 		       "  %-13s%11u\n",
 		       aead_alg_name[alg], op_cnt);
 		}
 	}
-	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
 			       "Bytes of req data......%llu\n",
 			       (u64)atomic64_read(&ipriv->bytes_out));
-	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
 			       "Bytes of resp data.....%llu\n",
 			       (u64)atomic64_read(&ipriv->bytes_in));
-	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
 			       "Mailbox full...........%u\n",
 			       atomic_read(&ipriv->mb_no_spc));
-	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
 			       "Mailbox send failures..%u\n",
 			       atomic_read(&ipriv->mb_send_fail));
-	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
 			       "Check ICV errors.......%u\n",
 			       atomic_read(&ipriv->bad_icv));
 	if (ipriv->spu.spu_type == SPU_TYPE_SPUM)
@@ -455,7 +455,7 @@ static ssize_t spu_debugfs_read(struct file *filp, char __user *ubuf,
 			spu_ofifo_ctrl = ioread32(ipriv->spu.reg_vbase[i] +
 						  SPU_OFIFO_CTRL);
 			fifo_len = spu_ofifo_ctrl & SPU_FIFO_WATERMARK;
-			out_offset += snprintf(buf + out_offset,
+			out_offset += scnprintf(buf + out_offset,
 					       out_count - out_offset,
 				       "SPU %d output FIFO high water.....%u\n",
 				       i, fifo_len);
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index fac5b2e26610..a62f228be6da 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -13,6 +13,7 @@ config CRYPTO_DEV_FSL_CAAM
 	depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE
 	select SOC_BUS
 	select CRYPTO_DEV_FSL_CAAM_COMMON
+	imply FSL_MC_BUS
 	help
 	  Enables the driver module for Freescale's Cryptographic Accelerator
 	  and Assurance Module (CAAM), also known as the SEC version 4 (SEC4).
@@ -33,6 +34,7 @@ config CRYPTO_DEV_FSL_CAAM_DEBUG
 
 menuconfig CRYPTO_DEV_FSL_CAAM_JR
 	tristate "Freescale CAAM Job Ring driver backend"
+	select CRYPTO_ENGINE
 	default y
 	help
 	  Enables the driver module for Job Rings which are part of
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index ef1a65f4fc92..b7bb7c30adeb 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -56,6 +56,7 @@
 #include "sg_sw_sec4.h"
 #include "key_gen.h"
 #include "caamalg_desc.h"
+#include <crypto/engine.h>
 
 /*
  * crypto alg
@@ -101,6 +102,7 @@ struct caam_skcipher_alg {
  * per-session context
  */
 struct caam_ctx {
+	struct crypto_engine_ctx enginectx;
 	u32 sh_desc_enc[DESC_MAX_USED_LEN];
 	u32 sh_desc_dec[DESC_MAX_USED_LEN];
 	u8 key[CAAM_MAX_KEY_SIZE];
@@ -114,6 +116,14 @@ struct caam_ctx {
 	unsigned int authsize;
 };
 
+struct caam_skcipher_req_ctx {
+	struct skcipher_edesc *edesc;
+};
+
+struct caam_aead_req_ctx {
+	struct aead_edesc *edesc;
+};
+
 static int aead_null_set_sh_desc(struct crypto_aead *aead)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
@@ -858,6 +868,7 @@ static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
  * @mapped_src_nents: number of segments in input h/w link table
  * @mapped_dst_nents: number of segments in output h/w link table
  * @sec4_sg_bytes: length of dma mapped sec4_sg space
+ * @bklog: stored to determine if the request needs backlog
  * @sec4_sg_dma: bus physical mapped address of h/w link table
  * @sec4_sg: pointer to h/w link table
  * @hw_desc: the h/w job descriptor followed by any referenced link tables
@@ -868,6 +879,7 @@ struct aead_edesc {
 	int mapped_src_nents;
 	int mapped_dst_nents;
 	int sec4_sg_bytes;
+	bool bklog;
 	dma_addr_t sec4_sg_dma;
 	struct sec4_sg_entry *sec4_sg;
 	u32 hw_desc[];
@@ -881,6 +893,7 @@ struct aead_edesc {
  * @mapped_dst_nents: number of segments in output h/w link table
  * @iv_dma: dma address of iv for checking continuity and link table
  * @sec4_sg_bytes: length of dma mapped sec4_sg space
+ * @bklog: stored to determine if the request needs backlog
  * @sec4_sg_dma: bus physical mapped address of h/w link table
  * @sec4_sg: pointer to h/w link table
  * @hw_desc: the h/w job descriptor followed by any referenced link tables
@@ -893,9 +906,10 @@ struct skcipher_edesc {
 	int mapped_dst_nents;
 	dma_addr_t iv_dma;
 	int sec4_sg_bytes;
+	bool bklog;
 	dma_addr_t sec4_sg_dma;
 	struct sec4_sg_entry *sec4_sg;
-	u32 hw_desc[0];
+	u32 hw_desc[];
 };
 
 static void caam_unmap(struct device *dev, struct scatterlist *src,
@@ -941,37 +955,18 @@ static void skcipher_unmap(struct device *dev, struct skcipher_edesc *edesc,
 		   edesc->sec4_sg_dma, edesc->sec4_sg_bytes);
 }
 
-static void aead_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
-				   void *context)
-{
-	struct aead_request *req = context;
-	struct aead_edesc *edesc;
-	int ecode = 0;
-
-	dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
-
-	edesc = container_of(desc, struct aead_edesc, hw_desc[0]);
-
-	if (err)
-		ecode = caam_jr_strstatus(jrdev, err);
-
-	aead_unmap(jrdev, edesc, req);
-
-	kfree(edesc);
-
-	aead_request_complete(req, ecode);
-}
-
-static void aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
-				   void *context)
+static void aead_crypt_done(struct device *jrdev, u32 *desc, u32 err,
+			    void *context)
 {
 	struct aead_request *req = context;
+	struct caam_aead_req_ctx *rctx = aead_request_ctx(req);
+	struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev);
 	struct aead_edesc *edesc;
 	int ecode = 0;
 
 	dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 
-	edesc = container_of(desc, struct aead_edesc, hw_desc[0]);
+	edesc = rctx->edesc;
 
 	if (err)
 		ecode = caam_jr_strstatus(jrdev, err);
@@ -980,61 +975,30 @@ static void aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
 
 	kfree(edesc);
 
-	aead_request_complete(req, ecode);
-}
-
-static void skcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
-				  void *context)
-{
-	struct skcipher_request *req = context;
-	struct skcipher_edesc *edesc;
-	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
-	int ivsize = crypto_skcipher_ivsize(skcipher);
-	int ecode = 0;
-
-	dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
-
-	edesc = container_of(desc, struct skcipher_edesc, hw_desc[0]);
-
-	if (err)
-		ecode = caam_jr_strstatus(jrdev, err);
-
-	skcipher_unmap(jrdev, edesc, req);
-
 	/*
-	 * The crypto API expects us to set the IV (req->iv) to the last
-	 * ciphertext block (CBC mode) or last counter (CTR mode).
-	 * This is used e.g. by the CTS mode.
+	 * If no backlog flag, the completion of the request is done
+	 * by CAAM, not crypto engine.
 	 */
-	if (ivsize && !ecode) {
-		memcpy(req->iv, (u8 *)edesc->sec4_sg + edesc->sec4_sg_bytes,
-		       ivsize);
-		print_hex_dump_debug("dstiv  @"__stringify(__LINE__)": ",
-				     DUMP_PREFIX_ADDRESS, 16, 4, req->iv,
-				     edesc->src_nents > 1 ? 100 : ivsize, 1);
-	}
-
-	caam_dump_sg("dst    @" __stringify(__LINE__)": ",
-		     DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
-		     edesc->dst_nents > 1 ? 100 : req->cryptlen, 1);
-
-	kfree(edesc);
-
-	skcipher_request_complete(req, ecode);
+	if (!edesc->bklog)
+		aead_request_complete(req, ecode);
+	else
+		crypto_finalize_aead_request(jrp->engine, req, ecode);
 }
 
-static void skcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
-				  void *context)
+static void skcipher_crypt_done(struct device *jrdev, u32 *desc, u32 err,
+				void *context)
 {
 	struct skcipher_request *req = context;
 	struct skcipher_edesc *edesc;
+	struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req);
 	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev);
 	int ivsize = crypto_skcipher_ivsize(skcipher);
 	int ecode = 0;
 
 	dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 
-	edesc = container_of(desc, struct skcipher_edesc, hw_desc[0]);
+	edesc = rctx->edesc;
 	if (err)
 		ecode = caam_jr_strstatus(jrdev, err);
 
@@ -1060,7 +1024,14 @@ static void skcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
 
 	kfree(edesc);
 
-	skcipher_request_complete(req, ecode);
+	/*
+	 * If no backlog flag, the completion of the request is done
+	 * by CAAM, not crypto engine.
+	 */
+	if (!edesc->bklog)
+		skcipher_request_complete(req, ecode);
+	else
+		crypto_finalize_skcipher_request(jrp->engine, req, ecode);
 }
 
 /*
@@ -1306,6 +1277,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
+	struct caam_aead_req_ctx *rctx = aead_request_ctx(req);
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
 	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
@@ -1406,6 +1378,9 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	edesc->mapped_dst_nents = mapped_dst_nents;
 	edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) +
 			 desc_bytes;
+
+	rctx->edesc = edesc;
+
 	*all_contig_ptr = !(mapped_src_nents > 1);
 
 	sec4_sg_index = 0;
@@ -1436,41 +1411,34 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	return edesc;
 }
 
-static int gcm_encrypt(struct aead_request *req)
+static int aead_enqueue_req(struct device *jrdev, struct aead_request *req)
 {
-	struct aead_edesc *edesc;
-	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
-	struct device *jrdev = ctx->jrdev;
-	bool all_contig;
-	u32 *desc;
-	int ret = 0;
-
-	/* allocate extended descriptor */
-	edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig, true);
-	if (IS_ERR(edesc))
-		return PTR_ERR(edesc);
-
-	/* Create and submit job descriptor */
-	init_gcm_job(req, edesc, all_contig, true);
+	struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev);
+	struct caam_aead_req_ctx *rctx = aead_request_ctx(req);
+	struct aead_edesc *edesc = rctx->edesc;
+	u32 *desc = edesc->hw_desc;
+	int ret;
 
-	print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ",
-			     DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
-			     desc_bytes(edesc->hw_desc), 1);
+	/*
+	 * Only the backlog request are sent to crypto-engine since the others
+	 * can be handled by CAAM, if free, especially since JR has up to 1024
+	 * entries (more than the 10 entries from crypto-engine).
+	 */
+	if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
+		ret = crypto_transfer_aead_request_to_engine(jrpriv->engine,
+							     req);
+	else
+		ret = caam_jr_enqueue(jrdev, desc, aead_crypt_done, req);
 
-	desc = edesc->hw_desc;
-	ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req);
-	if (!ret) {
-		ret = -EINPROGRESS;
-	} else {
+	if ((ret != -EINPROGRESS) && (ret != -EBUSY)) {
 		aead_unmap(jrdev, edesc, req);
-		kfree(edesc);
+		kfree(rctx->edesc);
 	}
 
 	return ret;
 }
 
-static int chachapoly_encrypt(struct aead_request *req)
+static inline int chachapoly_crypt(struct aead_request *req, bool encrypt)
 {
 	struct aead_edesc *edesc;
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
@@ -1478,180 +1446,130 @@ static int chachapoly_encrypt(struct aead_request *req)
 	struct device *jrdev = ctx->jrdev;
 	bool all_contig;
 	u32 *desc;
-	int ret;
 
 	edesc = aead_edesc_alloc(req, CHACHAPOLY_DESC_JOB_IO_LEN, &all_contig,
-				 true);
+				 encrypt);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
 	desc = edesc->hw_desc;
 
-	init_chachapoly_job(req, edesc, all_contig, true);
+	init_chachapoly_job(req, edesc, all_contig, encrypt);
 	print_hex_dump_debug("chachapoly jobdesc@" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
 			     1);
 
-	ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req);
-	if (!ret) {
-		ret = -EINPROGRESS;
-	} else {
-		aead_unmap(jrdev, edesc, req);
-		kfree(edesc);
-	}
-
-	return ret;
+	return aead_enqueue_req(jrdev, req);
 }
 
-static int chachapoly_decrypt(struct aead_request *req)
+static int chachapoly_encrypt(struct aead_request *req)
 {
-	struct aead_edesc *edesc;
-	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
-	struct device *jrdev = ctx->jrdev;
-	bool all_contig;
-	u32 *desc;
-	int ret;
-
-	edesc = aead_edesc_alloc(req, CHACHAPOLY_DESC_JOB_IO_LEN, &all_contig,
-				 false);
-	if (IS_ERR(edesc))
-		return PTR_ERR(edesc);
-
-	desc = edesc->hw_desc;
-
-	init_chachapoly_job(req, edesc, all_contig, false);
-	print_hex_dump_debug("chachapoly jobdesc@" __stringify(__LINE__)": ",
-			     DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
-			     1);
-
-	ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req);
-	if (!ret) {
-		ret = -EINPROGRESS;
-	} else {
-		aead_unmap(jrdev, edesc, req);
-		kfree(edesc);
-	}
-
-	return ret;
+	return chachapoly_crypt(req, true);
 }
 
-static int ipsec_gcm_encrypt(struct aead_request *req)
+static int chachapoly_decrypt(struct aead_request *req)
 {
-	return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_encrypt(req);
+	return chachapoly_crypt(req, false);
 }
 
-static int aead_encrypt(struct aead_request *req)
+static inline int aead_crypt(struct aead_request *req, bool encrypt)
 {
 	struct aead_edesc *edesc;
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
 	bool all_contig;
-	u32 *desc;
-	int ret = 0;
 
 	/* allocate extended descriptor */
 	edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN,
-				 &all_contig, true);
+				 &all_contig, encrypt);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
 	/* Create and submit job descriptor */
-	init_authenc_job(req, edesc, all_contig, true);
+	init_authenc_job(req, edesc, all_contig, encrypt);
 
 	print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
 			     desc_bytes(edesc->hw_desc), 1);
 
-	desc = edesc->hw_desc;
-	ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req);
-	if (!ret) {
-		ret = -EINPROGRESS;
-	} else {
-		aead_unmap(jrdev, edesc, req);
-		kfree(edesc);
-	}
+	return aead_enqueue_req(jrdev, req);
+}
 
-	return ret;
+static int aead_encrypt(struct aead_request *req)
+{
+	return aead_crypt(req, true);
 }
 
-static int gcm_decrypt(struct aead_request *req)
+static int aead_decrypt(struct aead_request *req)
 {
-	struct aead_edesc *edesc;
-	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
-	struct device *jrdev = ctx->jrdev;
-	bool all_contig;
-	u32 *desc;
-	int ret = 0;
+	return aead_crypt(req, false);
+}
 
-	/* allocate extended descriptor */
-	edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig, false);
-	if (IS_ERR(edesc))
-		return PTR_ERR(edesc);
+static int aead_do_one_req(struct crypto_engine *engine, void *areq)
+{
+	struct aead_request *req = aead_request_cast(areq);
+	struct caam_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct caam_aead_req_ctx *rctx = aead_request_ctx(req);
+	u32 *desc = rctx->edesc->hw_desc;
+	int ret;
 
-	/* Create and submit job descriptor*/
-	init_gcm_job(req, edesc, all_contig, false);
+	rctx->edesc->bklog = true;
 
-	print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ",
-			     DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
-			     desc_bytes(edesc->hw_desc), 1);
+	ret = caam_jr_enqueue(ctx->jrdev, desc, aead_crypt_done, req);
 
-	desc = edesc->hw_desc;
-	ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req);
-	if (!ret) {
-		ret = -EINPROGRESS;
+	if (ret != -EINPROGRESS) {
+		aead_unmap(ctx->jrdev, rctx->edesc, req);
+		kfree(rctx->edesc);
 	} else {
-		aead_unmap(jrdev, edesc, req);
-		kfree(edesc);
+		ret = 0;
 	}
 
 	return ret;
 }
 
-static int ipsec_gcm_decrypt(struct aead_request *req)
-{
-	return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_decrypt(req);
-}
-
-static int aead_decrypt(struct aead_request *req)
+static inline int gcm_crypt(struct aead_request *req, bool encrypt)
 {
 	struct aead_edesc *edesc;
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
 	bool all_contig;
-	u32 *desc;
-	int ret = 0;
-
-	caam_dump_sg("dec src@" __stringify(__LINE__)": ",
-		     DUMP_PREFIX_ADDRESS, 16, 4, req->src,
-		     req->assoclen + req->cryptlen, 1);
 
 	/* allocate extended descriptor */
-	edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN,
-				 &all_contig, false);
+	edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig,
+				 encrypt);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
-	/* Create and submit job descriptor*/
-	init_authenc_job(req, edesc, all_contig, false);
+	/* Create and submit job descriptor */
+	init_gcm_job(req, edesc, all_contig, encrypt);
 
 	print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
 			     desc_bytes(edesc->hw_desc), 1);
 
-	desc = edesc->hw_desc;
-	ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req);
-	if (!ret) {
-		ret = -EINPROGRESS;
-	} else {
-		aead_unmap(jrdev, edesc, req);
-		kfree(edesc);
-	}
+	return aead_enqueue_req(jrdev, req);
+}
 
-	return ret;
+static int gcm_encrypt(struct aead_request *req)
+{
+	return gcm_crypt(req, true);
+}
+
+static int gcm_decrypt(struct aead_request *req)
+{
+	return gcm_crypt(req, false);
+}
+
+static int ipsec_gcm_encrypt(struct aead_request *req)
+{
+	return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_encrypt(req);
+}
+
+static int ipsec_gcm_decrypt(struct aead_request *req)
+{
+	return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_decrypt(req);
 }
 
 /*
@@ -1662,6 +1580,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
 {
 	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
 	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
@@ -1760,6 +1679,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
 	edesc->sec4_sg_bytes = sec4_sg_bytes;
 	edesc->sec4_sg = (struct sec4_sg_entry *)((u8 *)edesc->hw_desc +
 						  desc_bytes);
+	rctx->edesc = edesc;
 
 	/* Make sure IV is located in a DMAable area */
 	if (ivsize) {
@@ -1815,49 +1735,35 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
 	return edesc;
 }
 
-static int skcipher_encrypt(struct skcipher_request *req)
+static int skcipher_do_one_req(struct crypto_engine *engine, void *areq)
 {
-	struct skcipher_edesc *edesc;
-	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
-	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
-	struct device *jrdev = ctx->jrdev;
-	u32 *desc;
-	int ret = 0;
-
-	if (!req->cryptlen)
-		return 0;
-
-	/* allocate extended descriptor */
-	edesc = skcipher_edesc_alloc(req, DESC_JOB_IO_LEN * CAAM_CMD_SZ);
-	if (IS_ERR(edesc))
-		return PTR_ERR(edesc);
-
-	/* Create and submit job descriptor*/
-	init_skcipher_job(req, edesc, true);
+	struct skcipher_request *req = skcipher_request_cast(areq);
+	struct caam_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+	struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req);
+	u32 *desc = rctx->edesc->hw_desc;
+	int ret;
 
-	print_hex_dump_debug("skcipher jobdesc@" __stringify(__LINE__)": ",
-			     DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
-			     desc_bytes(edesc->hw_desc), 1);
+	rctx->edesc->bklog = true;
 
-	desc = edesc->hw_desc;
-	ret = caam_jr_enqueue(jrdev, desc, skcipher_encrypt_done, req);
+	ret = caam_jr_enqueue(ctx->jrdev, desc, skcipher_crypt_done, req);
 
-	if (!ret) {
-		ret = -EINPROGRESS;
+	if (ret != -EINPROGRESS) {
+		skcipher_unmap(ctx->jrdev, rctx->edesc, req);
+		kfree(rctx->edesc);
 	} else {
-		skcipher_unmap(jrdev, edesc, req);
-		kfree(edesc);
+		ret = 0;
 	}
 
 	return ret;
 }
 
-static int skcipher_decrypt(struct skcipher_request *req)
+static inline int skcipher_crypt(struct skcipher_request *req, bool encrypt)
 {
 	struct skcipher_edesc *edesc;
 	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
 	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
 	struct device *jrdev = ctx->jrdev;
+	struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev);
 	u32 *desc;
 	int ret = 0;
 
@@ -1870,17 +1776,25 @@ static int skcipher_decrypt(struct skcipher_request *req)
 		return PTR_ERR(edesc);
 
 	/* Create and submit job descriptor*/
-	init_skcipher_job(req, edesc, false);
-	desc = edesc->hw_desc;
+	init_skcipher_job(req, edesc, encrypt);
 
 	print_hex_dump_debug("skcipher jobdesc@" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
 			     desc_bytes(edesc->hw_desc), 1);
 
-	ret = caam_jr_enqueue(jrdev, desc, skcipher_decrypt_done, req);
-	if (!ret) {
-		ret = -EINPROGRESS;
-	} else {
+	desc = edesc->hw_desc;
+	/*
+	 * Only the backlog request are sent to crypto-engine since the others
+	 * can be handled by CAAM, if free, especially since JR has up to 1024
+	 * entries (more than the 10 entries from crypto-engine).
+	 */
+	if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
+		ret = crypto_transfer_skcipher_request_to_engine(jrpriv->engine,
+								 req);
+	else
+		ret = caam_jr_enqueue(jrdev, desc, skcipher_crypt_done, req);
+
+	if ((ret != -EINPROGRESS) && (ret != -EBUSY)) {
 		skcipher_unmap(jrdev, edesc, req);
 		kfree(edesc);
 	}
@@ -1888,6 +1802,16 @@ static int skcipher_decrypt(struct skcipher_request *req)
 	return ret;
 }
 
+static int skcipher_encrypt(struct skcipher_request *req)
+{
+	return skcipher_crypt(req, true);
+}
+
+static int skcipher_decrypt(struct skcipher_request *req)
+{
+	return skcipher_crypt(req, false);
+}
+
 static struct caam_skcipher_alg driver_algs[] = {
 	{
 		.skcipher = {
@@ -3391,6 +3315,8 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam,
 {
 	dma_addr_t dma_addr;
 	struct caam_drv_private *priv;
+	const size_t sh_desc_enc_offset = offsetof(struct caam_ctx,
+						   sh_desc_enc);
 
 	ctx->jrdev = caam_jr_alloc();
 	if (IS_ERR(ctx->jrdev)) {
@@ -3406,7 +3332,8 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam,
 
 	dma_addr = dma_map_single_attrs(ctx->jrdev, ctx->sh_desc_enc,
 					offsetof(struct caam_ctx,
-						 sh_desc_enc_dma),
+						 sh_desc_enc_dma) -
+					sh_desc_enc_offset,
 					ctx->dir, DMA_ATTR_SKIP_CPU_SYNC);
 	if (dma_mapping_error(ctx->jrdev, dma_addr)) {
 		dev_err(ctx->jrdev, "unable to map key, shared descriptors\n");
@@ -3416,8 +3343,10 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam,
 
 	ctx->sh_desc_enc_dma = dma_addr;
 	ctx->sh_desc_dec_dma = dma_addr + offsetof(struct caam_ctx,
-						   sh_desc_dec);
-	ctx->key_dma = dma_addr + offsetof(struct caam_ctx, key);
+						   sh_desc_dec) -
+					sh_desc_enc_offset;
+	ctx->key_dma = dma_addr + offsetof(struct caam_ctx, key) -
+					sh_desc_enc_offset;
 
 	/* copy descriptor header template value */
 	ctx->cdata.algtype = OP_TYPE_CLASS1_ALG | caam->class1_alg_type;
@@ -3431,6 +3360,11 @@ static int caam_cra_init(struct crypto_skcipher *tfm)
 	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
 	struct caam_skcipher_alg *caam_alg =
 		container_of(alg, typeof(*caam_alg), skcipher);
+	struct caam_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct caam_skcipher_req_ctx));
+
+	ctx->enginectx.op.do_one_request = skcipher_do_one_req;
 
 	return caam_init_common(crypto_skcipher_ctx(tfm), &caam_alg->caam,
 				false);
@@ -3443,13 +3377,18 @@ static int caam_aead_init(struct crypto_aead *tfm)
 		 container_of(alg, struct caam_aead_alg, aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(tfm);
 
+	crypto_aead_set_reqsize(tfm, sizeof(struct caam_aead_req_ctx));
+
+	ctx->enginectx.op.do_one_request = aead_do_one_req;
+
 	return caam_init_common(ctx, &caam_alg->caam, !caam_alg->caam.nodkp);
 }
 
 static void caam_exit_common(struct caam_ctx *ctx)
 {
 	dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_enc_dma,
-			       offsetof(struct caam_ctx, sh_desc_enc_dma),
+			       offsetof(struct caam_ctx, sh_desc_enc_dma) -
+			       offsetof(struct caam_ctx, sh_desc_enc),
 			       ctx->dir, DMA_ATTR_SKIP_CPU_SYNC);
 	caam_jr_free(ctx->jrdev);
 }
diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c
index aa9ccca67045..d6c58184bb57 100644
--- a/drivers/crypto/caam/caamalg_desc.c
+++ b/drivers/crypto/caam/caamalg_desc.c
@@ -1379,6 +1379,9 @@ void cnstr_shdsc_skcipher_encap(u32 * const desc, struct alginfo *cdata,
 				const u32 ctx1_iv_off)
 {
 	u32 *key_jump_cmd;
+	u32 options = cdata->algtype | OP_ALG_AS_INIT | OP_ALG_ENCRYPT;
+	bool is_chacha20 = ((cdata->algtype & OP_ALG_ALGSEL_MASK) ==
+			    OP_ALG_ALGSEL_CHACHA20);
 
 	init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
 	/* Skip if already shared */
@@ -1417,14 +1420,15 @@ void cnstr_shdsc_skcipher_encap(u32 * const desc, struct alginfo *cdata,
 				      LDST_OFFSET_SHIFT));
 
 	/* Load operation */
-	append_operation(desc, cdata->algtype | OP_ALG_AS_INIT |
-			 OP_ALG_ENCRYPT);
+	if (is_chacha20)
+		options |= OP_ALG_AS_FINALIZE;
+	append_operation(desc, options);
 
 	/* Perform operation */
 	skcipher_append_src_dst(desc);
 
 	/* Store IV */
-	if (ivsize)
+	if (!is_chacha20 && ivsize)
 		append_seq_store(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT |
 				 LDST_CLASS_1_CCB | (ctx1_iv_off <<
 				 LDST_OFFSET_SHIFT));
@@ -1451,6 +1455,8 @@ void cnstr_shdsc_skcipher_decap(u32 * const desc, struct alginfo *cdata,
 				const u32 ctx1_iv_off)
 {
 	u32 *key_jump_cmd;
+	bool is_chacha20 = ((cdata->algtype & OP_ALG_ALGSEL_MASK) ==
+			    OP_ALG_ALGSEL_CHACHA20);
 
 	init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
 	/* Skip if already shared */
@@ -1499,7 +1505,7 @@ void cnstr_shdsc_skcipher_decap(u32 * const desc, struct alginfo *cdata,
 	skcipher_append_src_dst(desc);
 
 	/* Store IV */
-	if (ivsize)
+	if (!is_chacha20 && ivsize)
 		append_seq_store(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT |
 				 LDST_CLASS_1_CCB | (ctx1_iv_off <<
 				 LDST_OFFSET_SHIFT));
@@ -1518,7 +1524,13 @@ EXPORT_SYMBOL(cnstr_shdsc_skcipher_decap);
  */
 void cnstr_shdsc_xts_skcipher_encap(u32 * const desc, struct alginfo *cdata)
 {
-	__be64 sector_size = cpu_to_be64(512);
+	/*
+	 * Set sector size to a big value, practically disabling
+	 * sector size segmentation in xts implementation. We cannot
+	 * take full advantage of this HW feature with existing
+	 * crypto API / dm-crypt SW architecture.
+	 */
+	__be64 sector_size = cpu_to_be64(BIT(15));
 	u32 *key_jump_cmd;
 
 	init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
@@ -1571,7 +1583,13 @@ EXPORT_SYMBOL(cnstr_shdsc_xts_skcipher_encap);
  */
 void cnstr_shdsc_xts_skcipher_decap(u32 * const desc, struct alginfo *cdata)
 {
-	__be64 sector_size = cpu_to_be64(512);
+	/*
+	 * Set sector size to a big value, practically disabling
+	 * sector size segmentation in xts implementation. We cannot
+	 * take full advantage of this HW feature with existing
+	 * crypto API / dm-crypt SW architecture.
+	 */
+	__be64 sector_size = cpu_to_be64(BIT(15));
 	u32 *key_jump_cmd;
 
 	init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index 4a29e0ef9d63..27e36bdf6163 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -783,7 +783,7 @@ struct aead_edesc {
 	unsigned int assoclen;
 	dma_addr_t assoclen_dma;
 	struct caam_drv_req drv_req;
-	struct qm_sg_entry sgt[0];
+	struct qm_sg_entry sgt[];
 };
 
 /*
@@ -803,7 +803,7 @@ struct skcipher_edesc {
 	int qm_sg_bytes;
 	dma_addr_t qm_sg_dma;
 	struct caam_drv_req drv_req;
-	struct qm_sg_entry sgt[0];
+	struct qm_sg_entry sgt[];
 };
 
 static struct caam_drv_ctx *get_drv_ctx(struct caam_ctx *ctx,
diff --git a/drivers/crypto/caam/caamalg_qi2.h b/drivers/crypto/caam/caamalg_qi2.h
index 706736776b47..f29cb7bd7dd3 100644
--- a/drivers/crypto/caam/caamalg_qi2.h
+++ b/drivers/crypto/caam/caamalg_qi2.h
@@ -114,7 +114,7 @@ struct aead_edesc {
 	dma_addr_t qm_sg_dma;
 	unsigned int assoclen;
 	dma_addr_t assoclen_dma;
-	struct dpaa2_sg_entry sgt[0];
+	struct dpaa2_sg_entry sgt[];
 };
 
 /*
@@ -132,7 +132,7 @@ struct skcipher_edesc {
 	dma_addr_t iv_dma;
 	int qm_sg_bytes;
 	dma_addr_t qm_sg_dma;
-	struct dpaa2_sg_entry sgt[0];
+	struct dpaa2_sg_entry sgt[];
 };
 
 /*
@@ -146,7 +146,7 @@ struct ahash_edesc {
 	dma_addr_t qm_sg_dma;
 	int src_nents;
 	int qm_sg_bytes;
-	struct dpaa2_sg_entry sgt[0];
+	struct dpaa2_sg_entry sgt[];
 };
 
 /**
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 8d9143407fc5..943bc0296267 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -65,6 +65,7 @@
 #include "sg_sw_sec4.h"
 #include "key_gen.h"
 #include "caamhash_desc.h"
+#include <crypto/engine.h>
 
 #define CAAM_CRA_PRIORITY		3000
 
@@ -86,6 +87,7 @@ static struct list_head hash_list;
 
 /* ahash per-session context */
 struct caam_hash_ctx {
+	struct crypto_engine_ctx enginectx;
 	u32 sh_desc_update[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
 	u32 sh_desc_update_first[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
 	u32 sh_desc_fin[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
@@ -111,9 +113,12 @@ struct caam_hash_state {
 	int buflen;
 	int next_buflen;
 	u8 caam_ctx[MAX_CTX_LEN] ____cacheline_aligned;
-	int (*update)(struct ahash_request *req);
+	int (*update)(struct ahash_request *req) ____cacheline_aligned;
 	int (*final)(struct ahash_request *req);
 	int (*finup)(struct ahash_request *req);
+	struct ahash_edesc *edesc;
+	void (*ahash_op_done)(struct device *jrdev, u32 *desc, u32 err,
+			      void *context);
 };
 
 struct caam_export_state {
@@ -395,7 +400,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key,
 	init_completion(&result.completion);
 
 	ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result);
-	if (!ret) {
+	if (ret == -EINPROGRESS) {
 		/* in progress */
 		wait_for_completion(&result.completion);
 		ret = result.err;
@@ -521,6 +526,7 @@ static int acmac_setkey(struct crypto_ahash *ahash, const u8 *key,
  * @sec4_sg_dma: physical mapped address of h/w link table
  * @src_nents: number of segments in input scatterlist
  * @sec4_sg_bytes: length of dma mapped sec4_sg space
+ * @bklog: stored to determine if the request needs backlog
  * @hw_desc: the h/w job descriptor followed by any referenced link tables
  * @sec4_sg: h/w link table
  */
@@ -528,8 +534,9 @@ struct ahash_edesc {
 	dma_addr_t sec4_sg_dma;
 	int src_nents;
 	int sec4_sg_bytes;
+	bool bklog;
 	u32 hw_desc[DESC_JOB_IO_LEN_MAX / sizeof(u32)] ____cacheline_aligned;
-	struct sec4_sg_entry sec4_sg[0];
+	struct sec4_sg_entry sec4_sg[];
 };
 
 static inline void ahash_unmap(struct device *dev,
@@ -565,10 +572,11 @@ static inline void ahash_unmap_ctx(struct device *dev,
 	ahash_unmap(dev, edesc, req, dst_len);
 }
 
-static void ahash_done(struct device *jrdev, u32 *desc, u32 err,
-		       void *context)
+static inline void ahash_done_cpy(struct device *jrdev, u32 *desc, u32 err,
+				  void *context, enum dma_data_direction dir)
 {
 	struct ahash_request *req = context;
+	struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev);
 	struct ahash_edesc *edesc;
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
 	int digestsize = crypto_ahash_digestsize(ahash);
@@ -578,11 +586,12 @@ static void ahash_done(struct device *jrdev, u32 *desc, u32 err,
 
 	dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 
-	edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
+	edesc = state->edesc;
+
 	if (err)
 		ecode = caam_jr_strstatus(jrdev, err);
 
-	ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
+	ahash_unmap_ctx(jrdev, edesc, req, digestsize, dir);
 	memcpy(req->result, state->caam_ctx, digestsize);
 	kfree(edesc);
 
@@ -590,81 +599,33 @@ static void ahash_done(struct device *jrdev, u32 *desc, u32 err,
 			     DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
 			     ctx->ctx_len, 1);
 
-	req->base.complete(&req->base, ecode);
+	/*
+	 * If no backlog flag, the completion of the request is done
+	 * by CAAM, not crypto engine.
+	 */
+	if (!edesc->bklog)
+		req->base.complete(&req->base, ecode);
+	else
+		crypto_finalize_hash_request(jrp->engine, req, ecode);
 }
 
-static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err,
-			    void *context)
+static void ahash_done(struct device *jrdev, u32 *desc, u32 err,
+		       void *context)
 {
-	struct ahash_request *req = context;
-	struct ahash_edesc *edesc;
-	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
-	int digestsize = crypto_ahash_digestsize(ahash);
-	int ecode = 0;
-
-	dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
-
-	edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
-	if (err)
-		ecode = caam_jr_strstatus(jrdev, err);
-
-	ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL);
-	kfree(edesc);
-
-	scatterwalk_map_and_copy(state->buf, req->src,
-				 req->nbytes - state->next_buflen,
-				 state->next_buflen, 0);
-	state->buflen = state->next_buflen;
-
-	print_hex_dump_debug("buf@" __stringify(__LINE__)": ",
-			     DUMP_PREFIX_ADDRESS, 16, 4, state->buf,
-			     state->buflen, 1);
-
-	print_hex_dump_debug("ctx@"__stringify(__LINE__)": ",
-			     DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
-			     ctx->ctx_len, 1);
-	if (req->result)
-		print_hex_dump_debug("result@"__stringify(__LINE__)": ",
-				     DUMP_PREFIX_ADDRESS, 16, 4, req->result,
-				     digestsize, 1);
-
-	req->base.complete(&req->base, ecode);
+	ahash_done_cpy(jrdev, desc, err, context, DMA_FROM_DEVICE);
 }
 
 static void ahash_done_ctx_src(struct device *jrdev, u32 *desc, u32 err,
 			       void *context)
 {
-	struct ahash_request *req = context;
-	struct ahash_edesc *edesc;
-	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	int digestsize = crypto_ahash_digestsize(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	int ecode = 0;
-
-	dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
-
-	edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
-	if (err)
-		ecode = caam_jr_strstatus(jrdev, err);
-
-	ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_BIDIRECTIONAL);
-	memcpy(req->result, state->caam_ctx, digestsize);
-	kfree(edesc);
-
-	print_hex_dump_debug("ctx@"__stringify(__LINE__)": ",
-			     DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
-			     ctx->ctx_len, 1);
-
-	req->base.complete(&req->base, ecode);
+	ahash_done_cpy(jrdev, desc, err, context, DMA_BIDIRECTIONAL);
 }
 
-static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err,
-			       void *context)
+static inline void ahash_done_switch(struct device *jrdev, u32 *desc, u32 err,
+				     void *context, enum dma_data_direction dir)
 {
 	struct ahash_request *req = context;
+	struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev);
 	struct ahash_edesc *edesc;
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
@@ -674,11 +635,11 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err,
 
 	dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 
-	edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
+	edesc = state->edesc;
 	if (err)
 		ecode = caam_jr_strstatus(jrdev, err);
 
-	ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_FROM_DEVICE);
+	ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, dir);
 	kfree(edesc);
 
 	scatterwalk_map_and_copy(state->buf, req->src,
@@ -698,18 +659,42 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err,
 				     DUMP_PREFIX_ADDRESS, 16, 4, req->result,
 				     digestsize, 1);
 
-	req->base.complete(&req->base, ecode);
+	/*
+	 * If no backlog flag, the completion of the request is done
+	 * by CAAM, not crypto engine.
+	 */
+	if (!edesc->bklog)
+		req->base.complete(&req->base, ecode);
+	else
+		crypto_finalize_hash_request(jrp->engine, req, ecode);
+
+}
+
+static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err,
+			  void *context)
+{
+	ahash_done_switch(jrdev, desc, err, context, DMA_BIDIRECTIONAL);
+}
+
+static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err,
+			       void *context)
+{
+	ahash_done_switch(jrdev, desc, err, context, DMA_FROM_DEVICE);
 }
 
 /*
  * Allocate an enhanced descriptor, which contains the hardware descriptor
  * and space for hardware scatter table containing sg_num entries.
  */
-static struct ahash_edesc *ahash_edesc_alloc(struct caam_hash_ctx *ctx,
+static struct ahash_edesc *ahash_edesc_alloc(struct ahash_request *req,
 					     int sg_num, u32 *sh_desc,
-					     dma_addr_t sh_desc_dma,
-					     gfp_t flags)
+					     dma_addr_t sh_desc_dma)
 {
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	struct ahash_edesc *edesc;
 	unsigned int sg_size = sg_num * sizeof(struct sec4_sg_entry);
 
@@ -719,6 +704,8 @@ static struct ahash_edesc *ahash_edesc_alloc(struct caam_hash_ctx *ctx,
 		return NULL;
 	}
 
+	state->edesc = edesc;
+
 	init_job_desc_shared(edesc->hw_desc, sh_desc_dma, desc_len(sh_desc),
 			     HDR_SHARE_DEFER | HDR_REVERSE);
 
@@ -761,6 +748,62 @@ static int ahash_edesc_add_src(struct caam_hash_ctx *ctx,
 	return 0;
 }
 
+static int ahash_do_one_req(struct crypto_engine *engine, void *areq)
+{
+	struct ahash_request *req = ahash_request_cast(areq);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct device *jrdev = ctx->jrdev;
+	u32 *desc = state->edesc->hw_desc;
+	int ret;
+
+	state->edesc->bklog = true;
+
+	ret = caam_jr_enqueue(jrdev, desc, state->ahash_op_done, req);
+
+	if (ret != -EINPROGRESS) {
+		ahash_unmap(jrdev, state->edesc, req, 0);
+		kfree(state->edesc);
+	} else {
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static int ahash_enqueue_req(struct device *jrdev,
+			     void (*cbk)(struct device *jrdev, u32 *desc,
+					 u32 err, void *context),
+			     struct ahash_request *req,
+			     int dst_len, enum dma_data_direction dir)
+{
+	struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct ahash_edesc *edesc = state->edesc;
+	u32 *desc = edesc->hw_desc;
+	int ret;
+
+	state->ahash_op_done = cbk;
+
+	/*
+	 * Only the backlog request are sent to crypto-engine since the others
+	 * can be handled by CAAM, if free, especially since JR has up to 1024
+	 * entries (more than the 10 entries from crypto-engine).
+	 */
+	if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
+		ret = crypto_transfer_hash_request_to_engine(jrpriv->engine,
+							     req);
+	else
+		ret = caam_jr_enqueue(jrdev, desc, cbk, req);
+
+	if ((ret != -EINPROGRESS) && (ret != -EBUSY)) {
+		ahash_unmap_ctx(jrdev, edesc, req, dst_len, dir);
+		kfree(edesc);
+	}
+
+	return ret;
+}
+
 /* submit update job descriptor */
 static int ahash_update_ctx(struct ahash_request *req)
 {
@@ -768,8 +811,6 @@ static int ahash_update_ctx(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-		       GFP_KERNEL : GFP_ATOMIC;
 	u8 *buf = state->buf;
 	int *buflen = &state->buflen;
 	int *next_buflen = &state->next_buflen;
@@ -823,8 +864,8 @@ static int ahash_update_ctx(struct ahash_request *req)
 		 * allocate space for base edesc and hw desc commands,
 		 * link tables
 		 */
-		edesc = ahash_edesc_alloc(ctx, pad_nents, ctx->sh_desc_update,
-					  ctx->sh_desc_update_dma, flags);
+		edesc = ahash_edesc_alloc(req, pad_nents, ctx->sh_desc_update,
+					  ctx->sh_desc_update_dma);
 		if (!edesc) {
 			dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
 			return -ENOMEM;
@@ -870,11 +911,8 @@ static int ahash_update_ctx(struct ahash_request *req)
 				     DUMP_PREFIX_ADDRESS, 16, 4, desc,
 				     desc_bytes(desc), 1);
 
-		ret = caam_jr_enqueue(jrdev, desc, ahash_done_bi, req);
-		if (ret)
-			goto unmap_ctx;
-
-		ret = -EINPROGRESS;
+		ret = ahash_enqueue_req(jrdev, ahash_done_bi, req,
+					ctx->ctx_len, DMA_BIDIRECTIONAL);
 	} else if (*next_buflen) {
 		scatterwalk_map_and_copy(buf + *buflen, req->src, 0,
 					 req->nbytes, 0);
@@ -898,8 +936,6 @@ static int ahash_final_ctx(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-		       GFP_KERNEL : GFP_ATOMIC;
 	int buflen = state->buflen;
 	u32 *desc;
 	int sec4_sg_bytes;
@@ -911,8 +947,8 @@ static int ahash_final_ctx(struct ahash_request *req)
 			sizeof(struct sec4_sg_entry);
 
 	/* allocate space for base edesc and hw desc commands, link tables */
-	edesc = ahash_edesc_alloc(ctx, 4, ctx->sh_desc_fin,
-				  ctx->sh_desc_fin_dma, flags);
+	edesc = ahash_edesc_alloc(req, 4, ctx->sh_desc_fin,
+				  ctx->sh_desc_fin_dma);
 	if (!edesc)
 		return -ENOMEM;
 
@@ -947,11 +983,8 @@ static int ahash_final_ctx(struct ahash_request *req)
 			     DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
 			     1);
 
-	ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req);
-	if (ret)
-		goto unmap_ctx;
-
-	return -EINPROGRESS;
+	return ahash_enqueue_req(jrdev, ahash_done_ctx_src, req,
+				 digestsize, DMA_BIDIRECTIONAL);
  unmap_ctx:
 	ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_BIDIRECTIONAL);
 	kfree(edesc);
@@ -964,8 +997,6 @@ static int ahash_finup_ctx(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-		       GFP_KERNEL : GFP_ATOMIC;
 	int buflen = state->buflen;
 	u32 *desc;
 	int sec4_sg_src_index;
@@ -994,9 +1025,8 @@ static int ahash_finup_ctx(struct ahash_request *req)
 	sec4_sg_src_index = 1 + (buflen ? 1 : 0);
 
 	/* allocate space for base edesc and hw desc commands, link tables */
-	edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents,
-				  ctx->sh_desc_fin, ctx->sh_desc_fin_dma,
-				  flags);
+	edesc = ahash_edesc_alloc(req, sec4_sg_src_index + mapped_nents,
+				  ctx->sh_desc_fin, ctx->sh_desc_fin_dma);
 	if (!edesc) {
 		dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
 		return -ENOMEM;
@@ -1027,11 +1057,8 @@ static int ahash_finup_ctx(struct ahash_request *req)
 			     DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
 			     1);
 
-	ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req);
-	if (ret)
-		goto unmap_ctx;
-
-	return -EINPROGRESS;
+	return ahash_enqueue_req(jrdev, ahash_done_ctx_src, req,
+				 digestsize, DMA_BIDIRECTIONAL);
  unmap_ctx:
 	ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_BIDIRECTIONAL);
 	kfree(edesc);
@@ -1044,8 +1071,6 @@ static int ahash_digest(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-		       GFP_KERNEL : GFP_ATOMIC;
 	u32 *desc;
 	int digestsize = crypto_ahash_digestsize(ahash);
 	int src_nents, mapped_nents;
@@ -1072,9 +1097,8 @@ static int ahash_digest(struct ahash_request *req)
 	}
 
 	/* allocate space for base edesc and hw desc commands, link tables */
-	edesc = ahash_edesc_alloc(ctx, mapped_nents > 1 ? mapped_nents : 0,
-				  ctx->sh_desc_digest, ctx->sh_desc_digest_dma,
-				  flags);
+	edesc = ahash_edesc_alloc(req, mapped_nents > 1 ? mapped_nents : 0,
+				  ctx->sh_desc_digest, ctx->sh_desc_digest_dma);
 	if (!edesc) {
 		dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
 		return -ENOMEM;
@@ -1103,15 +1127,8 @@ static int ahash_digest(struct ahash_request *req)
 			     DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
 			     1);
 
-	ret = caam_jr_enqueue(jrdev, desc, ahash_done, req);
-	if (!ret) {
-		ret = -EINPROGRESS;
-	} else {
-		ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
-		kfree(edesc);
-	}
-
-	return ret;
+	return ahash_enqueue_req(jrdev, ahash_done, req, digestsize,
+				 DMA_FROM_DEVICE);
 }
 
 /* submit ahash final if it the first job descriptor */
@@ -1121,8 +1138,6 @@ static int ahash_final_no_ctx(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-		       GFP_KERNEL : GFP_ATOMIC;
 	u8 *buf = state->buf;
 	int buflen = state->buflen;
 	u32 *desc;
@@ -1131,8 +1146,8 @@ static int ahash_final_no_ctx(struct ahash_request *req)
 	int ret;
 
 	/* allocate space for base edesc and hw desc commands, link tables */
-	edesc = ahash_edesc_alloc(ctx, 0, ctx->sh_desc_digest,
-				  ctx->sh_desc_digest_dma, flags);
+	edesc = ahash_edesc_alloc(req, 0, ctx->sh_desc_digest,
+				  ctx->sh_desc_digest_dma);
 	if (!edesc)
 		return -ENOMEM;
 
@@ -1157,20 +1172,12 @@ static int ahash_final_no_ctx(struct ahash_request *req)
 			     DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
 			     1);
 
-	ret = caam_jr_enqueue(jrdev, desc, ahash_done, req);
-	if (!ret) {
-		ret = -EINPROGRESS;
-	} else {
-		ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
-		kfree(edesc);
-	}
-
-	return ret;
+	return ahash_enqueue_req(jrdev, ahash_done, req,
+				 digestsize, DMA_FROM_DEVICE);
  unmap:
 	ahash_unmap(jrdev, edesc, req, digestsize);
 	kfree(edesc);
 	return -ENOMEM;
-
 }
 
 /* submit ahash update if it the first job descriptor after update */
@@ -1180,8 +1187,6 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-		       GFP_KERNEL : GFP_ATOMIC;
 	u8 *buf = state->buf;
 	int *buflen = &state->buflen;
 	int *next_buflen = &state->next_buflen;
@@ -1234,10 +1239,9 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 		 * allocate space for base edesc and hw desc commands,
 		 * link tables
 		 */
-		edesc = ahash_edesc_alloc(ctx, pad_nents,
+		edesc = ahash_edesc_alloc(req, pad_nents,
 					  ctx->sh_desc_update_first,
-					  ctx->sh_desc_update_first_dma,
-					  flags);
+					  ctx->sh_desc_update_first_dma);
 		if (!edesc) {
 			dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
 			return -ENOMEM;
@@ -1273,11 +1277,10 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 				     DUMP_PREFIX_ADDRESS, 16, 4, desc,
 				     desc_bytes(desc), 1);
 
-		ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req);
-		if (ret)
-			goto unmap_ctx;
-
-		ret = -EINPROGRESS;
+		ret = ahash_enqueue_req(jrdev, ahash_done_ctx_dst, req,
+					ctx->ctx_len, DMA_TO_DEVICE);
+		if ((ret != -EINPROGRESS) && (ret != -EBUSY))
+			return ret;
 		state->update = ahash_update_ctx;
 		state->finup = ahash_finup_ctx;
 		state->final = ahash_final_ctx;
@@ -1305,8 +1308,6 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-		       GFP_KERNEL : GFP_ATOMIC;
 	int buflen = state->buflen;
 	u32 *desc;
 	int sec4_sg_bytes, sec4_sg_src_index, src_nents, mapped_nents;
@@ -1336,9 +1337,8 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
 			 sizeof(struct sec4_sg_entry);
 
 	/* allocate space for base edesc and hw desc commands, link tables */
-	edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents,
-				  ctx->sh_desc_digest, ctx->sh_desc_digest_dma,
-				  flags);
+	edesc = ahash_edesc_alloc(req, sec4_sg_src_index + mapped_nents,
+				  ctx->sh_desc_digest, ctx->sh_desc_digest_dma);
 	if (!edesc) {
 		dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
 		return -ENOMEM;
@@ -1368,15 +1368,8 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
 			     DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
 			     1);
 
-	ret = caam_jr_enqueue(jrdev, desc, ahash_done, req);
-	if (!ret) {
-		ret = -EINPROGRESS;
-	} else {
-		ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
-		kfree(edesc);
-	}
-
-	return ret;
+	return ahash_enqueue_req(jrdev, ahash_done, req,
+				 digestsize, DMA_FROM_DEVICE);
  unmap:
 	ahash_unmap(jrdev, edesc, req, digestsize);
 	kfree(edesc);
@@ -1391,8 +1384,6 @@ static int ahash_update_first(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-		       GFP_KERNEL : GFP_ATOMIC;
 	u8 *buf = state->buf;
 	int *buflen = &state->buflen;
 	int *next_buflen = &state->next_buflen;
@@ -1440,11 +1431,10 @@ static int ahash_update_first(struct ahash_request *req)
 		 * allocate space for base edesc and hw desc commands,
 		 * link tables
 		 */
-		edesc = ahash_edesc_alloc(ctx, mapped_nents > 1 ?
+		edesc = ahash_edesc_alloc(req, mapped_nents > 1 ?
 					  mapped_nents : 0,
 					  ctx->sh_desc_update_first,
-					  ctx->sh_desc_update_first_dma,
-					  flags);
+					  ctx->sh_desc_update_first_dma);
 		if (!edesc) {
 			dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
 			return -ENOMEM;
@@ -1467,11 +1457,10 @@ static int ahash_update_first(struct ahash_request *req)
 				     DUMP_PREFIX_ADDRESS, 16, 4, desc,
 				     desc_bytes(desc), 1);
 
-		ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req);
-		if (ret)
-			goto unmap_ctx;
-
-		ret = -EINPROGRESS;
+		ret = ahash_enqueue_req(jrdev, ahash_done_ctx_dst, req,
+					ctx->ctx_len, DMA_TO_DEVICE);
+		if ((ret != -EINPROGRESS) && (ret != -EBUSY))
+			return ret;
 		state->update = ahash_update_ctx;
 		state->finup = ahash_finup_ctx;
 		state->final = ahash_final_ctx;
@@ -1774,6 +1763,8 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
 					 HASH_MSG_LEN + SHA256_DIGEST_SIZE,
 					 HASH_MSG_LEN + 64,
 					 HASH_MSG_LEN + SHA512_DIGEST_SIZE };
+	const size_t sh_desc_update_offset = offsetof(struct caam_hash_ctx,
+						      sh_desc_update);
 	dma_addr_t dma_addr;
 	struct caam_drv_private *priv;
 
@@ -1826,7 +1817,8 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
 	}
 
 	dma_addr = dma_map_single_attrs(ctx->jrdev, ctx->sh_desc_update,
-					offsetof(struct caam_hash_ctx, key),
+					offsetof(struct caam_hash_ctx, key) -
+					sh_desc_update_offset,
 					ctx->dir, DMA_ATTR_SKIP_CPU_SYNC);
 	if (dma_mapping_error(ctx->jrdev, dma_addr)) {
 		dev_err(ctx->jrdev, "unable to map shared descriptors\n");
@@ -1844,11 +1836,16 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
 	ctx->sh_desc_update_dma = dma_addr;
 	ctx->sh_desc_update_first_dma = dma_addr +
 					offsetof(struct caam_hash_ctx,
-						 sh_desc_update_first);
+						 sh_desc_update_first) -
+					sh_desc_update_offset;
 	ctx->sh_desc_fin_dma = dma_addr + offsetof(struct caam_hash_ctx,
-						   sh_desc_fin);
+						   sh_desc_fin) -
+					sh_desc_update_offset;
 	ctx->sh_desc_digest_dma = dma_addr + offsetof(struct caam_hash_ctx,
-						      sh_desc_digest);
+						      sh_desc_digest) -
+					sh_desc_update_offset;
+
+	ctx->enginectx.op.do_one_request = ahash_do_one_req;
 
 	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
 				 sizeof(struct caam_hash_state));
@@ -1865,7 +1862,8 @@ static void caam_hash_cra_exit(struct crypto_tfm *tfm)
 	struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_update_dma,
-			       offsetof(struct caam_hash_ctx, key),
+			       offsetof(struct caam_hash_ctx, key) -
+			       offsetof(struct caam_hash_ctx, sh_desc_update),
 			       ctx->dir, DMA_ATTR_SKIP_CPU_SYNC);
 	if (ctx->key_dir != DMA_NONE)
 		dma_unmap_single_attrs(ctx->jrdev, ctx->adata.key_dma,
diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
index 6619c512ef1a..4fcae37a2e33 100644
--- a/drivers/crypto/caam/caampkc.c
+++ b/drivers/crypto/caam/caampkc.c
@@ -117,76 +117,69 @@ static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc,
 static void rsa_pub_done(struct device *dev, u32 *desc, u32 err, void *context)
 {
 	struct akcipher_request *req = context;
+	struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
+	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
 	struct rsa_edesc *edesc;
 	int ecode = 0;
 
 	if (err)
 		ecode = caam_jr_strstatus(dev, err);
 
-	edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
+	edesc = req_ctx->edesc;
 
 	rsa_pub_unmap(dev, edesc, req);
 	rsa_io_unmap(dev, edesc, req);
 	kfree(edesc);
 
-	akcipher_request_complete(req, ecode);
-}
-
-static void rsa_priv_f1_done(struct device *dev, u32 *desc, u32 err,
-			     void *context)
-{
-	struct akcipher_request *req = context;
-	struct rsa_edesc *edesc;
-	int ecode = 0;
-
-	if (err)
-		ecode = caam_jr_strstatus(dev, err);
-
-	edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
-
-	rsa_priv_f1_unmap(dev, edesc, req);
-	rsa_io_unmap(dev, edesc, req);
-	kfree(edesc);
-
-	akcipher_request_complete(req, ecode);
-}
-
-static void rsa_priv_f2_done(struct device *dev, u32 *desc, u32 err,
-			     void *context)
-{
-	struct akcipher_request *req = context;
-	struct rsa_edesc *edesc;
-	int ecode = 0;
-
-	if (err)
-		ecode = caam_jr_strstatus(dev, err);
-
-	edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
-
-	rsa_priv_f2_unmap(dev, edesc, req);
-	rsa_io_unmap(dev, edesc, req);
-	kfree(edesc);
-
-	akcipher_request_complete(req, ecode);
+	/*
+	 * If no backlog flag, the completion of the request is done
+	 * by CAAM, not crypto engine.
+	 */
+	if (!edesc->bklog)
+		akcipher_request_complete(req, ecode);
+	else
+		crypto_finalize_akcipher_request(jrp->engine, req, ecode);
 }
 
-static void rsa_priv_f3_done(struct device *dev, u32 *desc, u32 err,
-			     void *context)
+static void rsa_priv_f_done(struct device *dev, u32 *desc, u32 err,
+			    void *context)
 {
 	struct akcipher_request *req = context;
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+	struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
 	struct rsa_edesc *edesc;
 	int ecode = 0;
 
 	if (err)
 		ecode = caam_jr_strstatus(dev, err);
 
-	edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
+	edesc = req_ctx->edesc;
+
+	switch (key->priv_form) {
+	case FORM1:
+		rsa_priv_f1_unmap(dev, edesc, req);
+		break;
+	case FORM2:
+		rsa_priv_f2_unmap(dev, edesc, req);
+		break;
+	case FORM3:
+		rsa_priv_f3_unmap(dev, edesc, req);
+	}
 
-	rsa_priv_f3_unmap(dev, edesc, req);
 	rsa_io_unmap(dev, edesc, req);
 	kfree(edesc);
 
-	akcipher_request_complete(req, ecode);
+	/*
+	 * If no backlog flag, the completion of the request is done
+	 * by CAAM, not crypto engine.
+	 */
+	if (!edesc->bklog)
+		akcipher_request_complete(req, ecode);
+	else
+		crypto_finalize_akcipher_request(jrp->engine, req, ecode);
 }
 
 /**
@@ -334,6 +327,8 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
 	edesc->src_nents = src_nents;
 	edesc->dst_nents = dst_nents;
 
+	req_ctx->edesc = edesc;
+
 	if (!sec4_sg_bytes)
 		return edesc;
 
@@ -364,6 +359,33 @@ src_fail:
 	return ERR_PTR(-ENOMEM);
 }
 
+static int akcipher_do_one_req(struct crypto_engine *engine, void *areq)
+{
+	struct akcipher_request *req = container_of(areq,
+						    struct akcipher_request,
+						    base);
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct device *jrdev = ctx->dev;
+	u32 *desc = req_ctx->edesc->hw_desc;
+	int ret;
+
+	req_ctx->edesc->bklog = true;
+
+	ret = caam_jr_enqueue(jrdev, desc, req_ctx->akcipher_op_done, req);
+
+	if (ret != -EINPROGRESS) {
+		rsa_pub_unmap(jrdev, req_ctx->edesc, req);
+		rsa_io_unmap(jrdev, req_ctx->edesc, req);
+		kfree(req_ctx->edesc);
+	} else {
+		ret = 0;
+	}
+
+	return ret;
+}
+
 static int set_rsa_pub_pdb(struct akcipher_request *req,
 			   struct rsa_edesc *edesc)
 {
@@ -627,6 +649,53 @@ unmap_p:
 	return -ENOMEM;
 }
 
+static int akcipher_enqueue_req(struct device *jrdev,
+				void (*cbk)(struct device *jrdev, u32 *desc,
+					    u32 err, void *context),
+				struct akcipher_request *req)
+{
+	struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev);
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+	struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
+	struct rsa_edesc *edesc = req_ctx->edesc;
+	u32 *desc = edesc->hw_desc;
+	int ret;
+
+	req_ctx->akcipher_op_done = cbk;
+	/*
+	 * Only the backlog request are sent to crypto-engine since the others
+	 * can be handled by CAAM, if free, especially since JR has up to 1024
+	 * entries (more than the 10 entries from crypto-engine).
+	 */
+	if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
+		ret = crypto_transfer_akcipher_request_to_engine(jrpriv->engine,
+								 req);
+	else
+		ret = caam_jr_enqueue(jrdev, desc, cbk, req);
+
+	if ((ret != -EINPROGRESS) && (ret != -EBUSY)) {
+		switch (key->priv_form) {
+		case FORM1:
+			rsa_priv_f1_unmap(jrdev, edesc, req);
+			break;
+		case FORM2:
+			rsa_priv_f2_unmap(jrdev, edesc, req);
+			break;
+		case FORM3:
+			rsa_priv_f3_unmap(jrdev, edesc, req);
+			break;
+		default:
+			rsa_pub_unmap(jrdev, edesc, req);
+		}
+		rsa_io_unmap(jrdev, edesc, req);
+		kfree(edesc);
+	}
+
+	return ret;
+}
+
 static int caam_rsa_enc(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
@@ -658,11 +727,7 @@ static int caam_rsa_enc(struct akcipher_request *req)
 	/* Initialize Job Descriptor */
 	init_rsa_pub_desc(edesc->hw_desc, &edesc->pdb.pub);
 
-	ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_pub_done, req);
-	if (!ret)
-		return -EINPROGRESS;
-
-	rsa_pub_unmap(jrdev, edesc, req);
+	return akcipher_enqueue_req(jrdev, rsa_pub_done, req);
 
 init_fail:
 	rsa_io_unmap(jrdev, edesc, req);
@@ -691,11 +756,7 @@ static int caam_rsa_dec_priv_f1(struct akcipher_request *req)
 	/* Initialize Job Descriptor */
 	init_rsa_priv_f1_desc(edesc->hw_desc, &edesc->pdb.priv_f1);
 
-	ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f1_done, req);
-	if (!ret)
-		return -EINPROGRESS;
-
-	rsa_priv_f1_unmap(jrdev, edesc, req);
+	return akcipher_enqueue_req(jrdev, rsa_priv_f_done, req);
 
 init_fail:
 	rsa_io_unmap(jrdev, edesc, req);
@@ -724,11 +785,7 @@ static int caam_rsa_dec_priv_f2(struct akcipher_request *req)
 	/* Initialize Job Descriptor */
 	init_rsa_priv_f2_desc(edesc->hw_desc, &edesc->pdb.priv_f2);
 
-	ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f2_done, req);
-	if (!ret)
-		return -EINPROGRESS;
-
-	rsa_priv_f2_unmap(jrdev, edesc, req);
+	return akcipher_enqueue_req(jrdev, rsa_priv_f_done, req);
 
 init_fail:
 	rsa_io_unmap(jrdev, edesc, req);
@@ -757,11 +814,7 @@ static int caam_rsa_dec_priv_f3(struct akcipher_request *req)
 	/* Initialize Job Descriptor */
 	init_rsa_priv_f3_desc(edesc->hw_desc, &edesc->pdb.priv_f3);
 
-	ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f3_done, req);
-	if (!ret)
-		return -EINPROGRESS;
-
-	rsa_priv_f3_unmap(jrdev, edesc, req);
+	return akcipher_enqueue_req(jrdev, rsa_priv_f_done, req);
 
 init_fail:
 	rsa_io_unmap(jrdev, edesc, req);
@@ -1054,6 +1107,8 @@ static int caam_rsa_init_tfm(struct crypto_akcipher *tfm)
 		return -ENOMEM;
 	}
 
+	ctx->enginectx.op.do_one_request = akcipher_do_one_req;
+
 	return 0;
 }
 
diff --git a/drivers/crypto/caam/caampkc.h b/drivers/crypto/caam/caampkc.h
index c68fb4c03ee6..cc889a525e2f 100644
--- a/drivers/crypto/caam/caampkc.h
+++ b/drivers/crypto/caam/caampkc.h
@@ -12,6 +12,7 @@
 #define _PKC_DESC_H_
 #include "compat.h"
 #include "pdb.h"
+#include <crypto/engine.h>
 
 /**
  * caam_priv_key_form - CAAM RSA private key representation
@@ -87,11 +88,13 @@ struct caam_rsa_key {
 
 /**
  * caam_rsa_ctx - per session context.
+ * @enginectx   : crypto engine context
  * @key         : RSA key in DMA zone
  * @dev         : device structure
  * @padding_dma : dma address of padding, for adding it to the input
  */
 struct caam_rsa_ctx {
+	struct crypto_engine_ctx enginectx;
 	struct caam_rsa_key key;
 	struct device *dev;
 	dma_addr_t padding_dma;
@@ -103,11 +106,16 @@ struct caam_rsa_ctx {
  * @src           : input scatterlist (stripped of leading zeros)
  * @fixup_src     : input scatterlist (that might be stripped of leading zeros)
  * @fixup_src_len : length of the fixup_src input scatterlist
+ * @edesc         : s/w-extended rsa descriptor
+ * @akcipher_op_done : callback used when operation is done
  */
 struct caam_rsa_req_ctx {
 	struct scatterlist src[2];
 	struct scatterlist *fixup_src;
 	unsigned int fixup_src_len;
+	struct rsa_edesc *edesc;
+	void (*akcipher_op_done)(struct device *jrdev, u32 *desc, u32 err,
+				 void *context);
 };
 
 /**
@@ -117,6 +125,7 @@ struct caam_rsa_req_ctx {
  * @mapped_src_nents: number of segments in input h/w link table
  * @mapped_dst_nents: number of segments in output h/w link table
  * @sec4_sg_bytes : length of h/w link table
+ * @bklog         : stored to determine if the request needs backlog
  * @sec4_sg_dma   : dma address of h/w link table
  * @sec4_sg       : pointer to h/w link table
  * @pdb           : specific RSA Protocol Data Block (PDB)
@@ -128,6 +137,7 @@ struct rsa_edesc {
 	int mapped_src_nents;
 	int mapped_dst_nents;
 	int sec4_sg_bytes;
+	bool bklog;
 	dma_addr_t sec4_sg_dma;
 	struct sec4_sg_entry *sec4_sg;
 	union {
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index e8baacaabe07..77d048dfe5d0 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -7,35 +7,12 @@
  *
  * Based on caamalg.c crypto API driver.
  *
- * relationship between job descriptors to shared descriptors:
- *
- * ---------------                     --------------
- * | JobDesc #0  |-------------------->| ShareDesc  |
- * | *(buffer 0) |      |------------->| (generate) |
- * ---------------      |              | (move)     |
- *                      |              | (store)    |
- * ---------------      |              --------------
- * | JobDesc #1  |------|
- * | *(buffer 1) |
- * ---------------
- *
- * A job desc looks like this:
- *
- * ---------------------
- * | Header            |
- * | ShareDesc Pointer |
- * | SEQ_OUT_PTR       |
- * | (output buffer)   |
- * ---------------------
- *
- * The SharedDesc never changes, and each job descriptor points to one of two
- * buffers for each device, from which the data will be copied into the
- * requested destination
  */
 
 #include <linux/hw_random.h>
 #include <linux/completion.h>
 #include <linux/atomic.h>
+#include <linux/kfifo.h>
 
 #include "compat.h"
 
@@ -45,278 +22,205 @@
 #include "jr.h"
 #include "error.h"
 
+#define CAAM_RNG_MAX_FIFO_STORE_SIZE	16
+
 /*
- * Maximum buffer size: maximum number of random, cache-aligned bytes that
- * will be generated and moved to seq out ptr (extlen not allowed)
+ * Length of used descriptors, see caam_init_desc()
  */
-#define RN_BUF_SIZE			(0xffff / L1_CACHE_BYTES * \
-					 L1_CACHE_BYTES)
-
-/* length of descriptors */
-#define DESC_JOB_O_LEN			(CAAM_CMD_SZ * 2 + CAAM_PTR_SZ_MAX * 2)
-#define DESC_RNG_LEN			(3 * CAAM_CMD_SZ)
-
-/* Buffer, its dma address and lock */
-struct buf_data {
-	u8 buf[RN_BUF_SIZE] ____cacheline_aligned;
-	dma_addr_t addr;
-	struct completion filled;
-	u32 hw_desc[DESC_JOB_O_LEN];
-#define BUF_NOT_EMPTY 0
-#define BUF_EMPTY 1
-#define BUF_PENDING 2  /* Empty, but with job pending --don't submit another */
-	atomic_t empty;
-};
+#define CAAM_RNG_DESC_LEN (CAAM_CMD_SZ +				\
+			   CAAM_CMD_SZ +				\
+			   CAAM_CMD_SZ + CAAM_PTR_SZ_MAX)
 
 /* rng per-device context */
 struct caam_rng_ctx {
+	struct hwrng rng;
 	struct device *jrdev;
-	dma_addr_t sh_desc_dma;
-	u32 sh_desc[DESC_RNG_LEN];
-	unsigned int cur_buf_idx;
-	int current_buf;
-	struct buf_data bufs[2];
+	struct device *ctrldev;
+	void *desc_async;
+	void *desc_sync;
+	struct work_struct worker;
+	struct kfifo fifo;
 };
 
-static struct caam_rng_ctx *rng_ctx;
-
-/*
- * Variable used to avoid double free of resources in case
- * algorithm registration was unsuccessful
- */
-static bool init_done;
-
-static inline void rng_unmap_buf(struct device *jrdev, struct buf_data *bd)
-{
-	if (bd->addr)
-		dma_unmap_single(jrdev, bd->addr, RN_BUF_SIZE,
-				 DMA_FROM_DEVICE);
-}
+struct caam_rng_job_ctx {
+	struct completion *done;
+	int *err;
+};
 
-static inline void rng_unmap_ctx(struct caam_rng_ctx *ctx)
+static struct caam_rng_ctx *to_caam_rng_ctx(struct hwrng *r)
 {
-	struct device *jrdev = ctx->jrdev;
-
-	if (ctx->sh_desc_dma)
-		dma_unmap_single(jrdev, ctx->sh_desc_dma,
-				 desc_bytes(ctx->sh_desc), DMA_TO_DEVICE);
-	rng_unmap_buf(jrdev, &ctx->bufs[0]);
-	rng_unmap_buf(jrdev, &ctx->bufs[1]);
+	return (struct caam_rng_ctx *)r->priv;
 }
 
-static void rng_done(struct device *jrdev, u32 *desc, u32 err, void *context)
+static void caam_rng_done(struct device *jrdev, u32 *desc, u32 err,
+			  void *context)
 {
-	struct buf_data *bd;
-
-	bd = container_of(desc, struct buf_data, hw_desc[0]);
+	struct caam_rng_job_ctx *jctx = context;
 
 	if (err)
-		caam_jr_strstatus(jrdev, err);
+		*jctx->err = caam_jr_strstatus(jrdev, err);
 
-	atomic_set(&bd->empty, BUF_NOT_EMPTY);
-	complete(&bd->filled);
-
-	/* Buffer refilled, invalidate cache */
-	dma_sync_single_for_cpu(jrdev, bd->addr, RN_BUF_SIZE, DMA_FROM_DEVICE);
-
-	print_hex_dump_debug("rng refreshed buf@: ", DUMP_PREFIX_ADDRESS, 16, 4,
-			     bd->buf, RN_BUF_SIZE, 1);
+	complete(jctx->done);
 }
 
-static inline int submit_job(struct caam_rng_ctx *ctx, int to_current)
+static u32 *caam_init_desc(u32 *desc, dma_addr_t dst_dma)
 {
-	struct buf_data *bd = &ctx->bufs[!(to_current ^ ctx->current_buf)];
-	struct device *jrdev = ctx->jrdev;
-	u32 *desc = bd->hw_desc;
-	int err;
-
-	dev_dbg(jrdev, "submitting job %d\n", !(to_current ^ ctx->current_buf));
-	init_completion(&bd->filled);
-	err = caam_jr_enqueue(jrdev, desc, rng_done, ctx);
-	if (err)
-		complete(&bd->filled); /* don't wait on failed job*/
-	else
-		atomic_inc(&bd->empty); /* note if pending */
-
-	return err;
+	init_job_desc(desc, 0);	/* + 1 cmd_sz */
+	/* Generate random bytes: + 1 cmd_sz */
+	append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG |
+			 OP_ALG_PR_ON);
+	/* Store bytes: + 1 cmd_sz + caam_ptr_sz  */
+	append_fifo_store(desc, dst_dma,
+			  CAAM_RNG_MAX_FIFO_STORE_SIZE, FIFOST_TYPE_RNGSTORE);
+
+	print_hex_dump_debug("rng job desc@: ", DUMP_PREFIX_ADDRESS,
+			     16, 4, desc, desc_bytes(desc), 1);
+
+	return desc;
 }
 
-static int caam_read(struct hwrng *rng, void *data, size_t max, bool wait)
+static int caam_rng_read_one(struct device *jrdev,
+			     void *dst, int len,
+			     void *desc,
+			     struct completion *done)
 {
-	struct caam_rng_ctx *ctx = rng_ctx;
-	struct buf_data *bd = &ctx->bufs[ctx->current_buf];
-	int next_buf_idx, copied_idx;
-	int err;
-
-	if (atomic_read(&bd->empty)) {
-		/* try to submit job if there wasn't one */
-		if (atomic_read(&bd->empty) == BUF_EMPTY) {
-			err = submit_job(ctx, 1);
-			/* if can't submit job, can't even wait */
-			if (err)
-				return 0;
-		}
-		/* no immediate data, so exit if not waiting */
-		if (!wait)
-			return 0;
-
-		/* waiting for pending job */
-		if (atomic_read(&bd->empty))
-			wait_for_completion(&bd->filled);
+	dma_addr_t dst_dma;
+	int err, ret = 0;
+	struct caam_rng_job_ctx jctx = {
+		.done = done,
+		.err  = &ret,
+	};
+
+	len = CAAM_RNG_MAX_FIFO_STORE_SIZE;
+
+	dst_dma = dma_map_single(jrdev, dst, len, DMA_FROM_DEVICE);
+	if (dma_mapping_error(jrdev, dst_dma)) {
+		dev_err(jrdev, "unable to map destination memory\n");
+		return -ENOMEM;
 	}
 
-	next_buf_idx = ctx->cur_buf_idx + max;
-	dev_dbg(ctx->jrdev, "%s: start reading at buffer %d, idx %d\n",
-		 __func__, ctx->current_buf, ctx->cur_buf_idx);
-
-	/* if enough data in current buffer */
-	if (next_buf_idx < RN_BUF_SIZE) {
-		memcpy(data, bd->buf + ctx->cur_buf_idx, max);
-		ctx->cur_buf_idx = next_buf_idx;
-		return max;
+	init_completion(done);
+	err = caam_jr_enqueue(jrdev,
+			      caam_init_desc(desc, dst_dma),
+			      caam_rng_done, &jctx);
+	if (err == -EINPROGRESS) {
+		wait_for_completion(done);
+		err = 0;
 	}
 
-	/* else, copy what's left... */
-	copied_idx = RN_BUF_SIZE - ctx->cur_buf_idx;
-	memcpy(data, bd->buf + ctx->cur_buf_idx, copied_idx);
-	ctx->cur_buf_idx = 0;
-	atomic_set(&bd->empty, BUF_EMPTY);
-
-	/* ...refill... */
-	submit_job(ctx, 1);
+	dma_unmap_single(jrdev, dst_dma, len, DMA_FROM_DEVICE);
 
-	/* and use next buffer */
-	ctx->current_buf = !ctx->current_buf;
-	dev_dbg(ctx->jrdev, "switched to buffer %d\n", ctx->current_buf);
-
-	/* since there already is some data read, don't wait */
-	return copied_idx + caam_read(rng, data + copied_idx,
-				      max - copied_idx, false);
+	return err ?: (ret ?: len);
 }
 
-static inline int rng_create_sh_desc(struct caam_rng_ctx *ctx)
+static void caam_rng_fill_async(struct caam_rng_ctx *ctx)
 {
-	struct device *jrdev = ctx->jrdev;
-	u32 *desc = ctx->sh_desc;
-
-	init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-	/* Generate random bytes */
-	append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG);
-
-	/* Store bytes */
-	append_seq_fifo_store(desc, RN_BUF_SIZE, FIFOST_TYPE_RNGSTORE);
+	struct scatterlist sg[1];
+	struct completion done;
+	int len, nents;
+
+	sg_init_table(sg, ARRAY_SIZE(sg));
+	nents = kfifo_dma_in_prepare(&ctx->fifo, sg, ARRAY_SIZE(sg),
+				     CAAM_RNG_MAX_FIFO_STORE_SIZE);
+	if (!nents)
+		return;
 
-	ctx->sh_desc_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
-					  DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	len = caam_rng_read_one(ctx->jrdev, sg_virt(&sg[0]),
+				sg[0].length,
+				ctx->desc_async,
+				&done);
+	if (len < 0)
+		return;
 
-	print_hex_dump_debug("rng shdesc@: ", DUMP_PREFIX_ADDRESS, 16, 4,
-			     desc, desc_bytes(desc), 1);
+	kfifo_dma_in_finish(&ctx->fifo, len);
+}
 
-	return 0;
+static void caam_rng_worker(struct work_struct *work)
+{
+	struct caam_rng_ctx *ctx = container_of(work, struct caam_rng_ctx,
+						worker);
+	caam_rng_fill_async(ctx);
 }
 
-static inline int rng_create_job_desc(struct caam_rng_ctx *ctx, int buf_id)
+static int caam_read(struct hwrng *rng, void *dst, size_t max, bool wait)
 {
-	struct device *jrdev = ctx->jrdev;
-	struct buf_data *bd = &ctx->bufs[buf_id];
-	u32 *desc = bd->hw_desc;
-	int sh_len = desc_len(ctx->sh_desc);
+	struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng);
+	int out;
 
-	init_job_desc_shared(desc, ctx->sh_desc_dma, sh_len, HDR_SHARE_DEFER |
-			     HDR_REVERSE);
+	if (wait) {
+		struct completion done;
 
-	bd->addr = dma_map_single(jrdev, bd->buf, RN_BUF_SIZE, DMA_FROM_DEVICE);
-	if (dma_mapping_error(jrdev, bd->addr)) {
-		dev_err(jrdev, "unable to map dst\n");
-		return -ENOMEM;
+		return caam_rng_read_one(ctx->jrdev, dst, max,
+					 ctx->desc_sync, &done);
 	}
 
-	append_seq_out_ptr_intlen(desc, bd->addr, RN_BUF_SIZE, 0);
-
-	print_hex_dump_debug("rng job desc@: ", DUMP_PREFIX_ADDRESS, 16, 4,
-			     desc, desc_bytes(desc), 1);
+	out = kfifo_out(&ctx->fifo, dst, max);
+	if (kfifo_is_empty(&ctx->fifo))
+		schedule_work(&ctx->worker);
 
-	return 0;
+	return out;
 }
 
 static void caam_cleanup(struct hwrng *rng)
 {
-	int i;
-	struct buf_data *bd;
-
-	for (i = 0; i < 2; i++) {
-		bd = &rng_ctx->bufs[i];
-		if (atomic_read(&bd->empty) == BUF_PENDING)
-			wait_for_completion(&bd->filled);
-	}
+	struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng);
 
-	rng_unmap_ctx(rng_ctx);
+	flush_work(&ctx->worker);
+	caam_jr_free(ctx->jrdev);
+	kfifo_free(&ctx->fifo);
 }
 
-static int caam_init_buf(struct caam_rng_ctx *ctx, int buf_id)
+static int caam_init(struct hwrng *rng)
 {
-	struct buf_data *bd = &ctx->bufs[buf_id];
+	struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng);
 	int err;
 
-	err = rng_create_job_desc(ctx, buf_id);
-	if (err)
-		return err;
-
-	atomic_set(&bd->empty, BUF_EMPTY);
-	submit_job(ctx, buf_id == ctx->current_buf);
-	wait_for_completion(&bd->filled);
+	ctx->desc_sync = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN,
+				      GFP_DMA | GFP_KERNEL);
+	if (!ctx->desc_sync)
+		return -ENOMEM;
 
-	return 0;
-}
+	ctx->desc_async = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN,
+				       GFP_DMA | GFP_KERNEL);
+	if (!ctx->desc_async)
+		return -ENOMEM;
 
-static int caam_init_rng(struct caam_rng_ctx *ctx, struct device *jrdev)
-{
-	int err;
+	if (kfifo_alloc(&ctx->fifo, CAAM_RNG_MAX_FIFO_STORE_SIZE,
+			GFP_DMA | GFP_KERNEL))
+		return -ENOMEM;
 
-	ctx->jrdev = jrdev;
+	INIT_WORK(&ctx->worker, caam_rng_worker);
 
-	err = rng_create_sh_desc(ctx);
-	if (err)
+	ctx->jrdev = caam_jr_alloc();
+	err = PTR_ERR_OR_ZERO(ctx->jrdev);
+	if (err) {
+		kfifo_free(&ctx->fifo);
+		pr_err("Job Ring Device allocation for transform failed\n");
 		return err;
+	}
 
-	ctx->current_buf = 0;
-	ctx->cur_buf_idx = 0;
+	/*
+	 * Fill async buffer to have early randomness data for
+	 * hw_random
+	 */
+	caam_rng_fill_async(ctx);
 
-	err = caam_init_buf(ctx, 0);
-	if (err)
-		return err;
-
-	return caam_init_buf(ctx, 1);
+	return 0;
 }
 
-static struct hwrng caam_rng = {
-	.name		= "rng-caam",
-	.cleanup	= caam_cleanup,
-	.read		= caam_read,
-};
+int caam_rng_init(struct device *ctrldev);
 
-void caam_rng_exit(void)
+void caam_rng_exit(struct device *ctrldev)
 {
-	if (!init_done)
-		return;
-
-	caam_jr_free(rng_ctx->jrdev);
-	hwrng_unregister(&caam_rng);
-	kfree(rng_ctx);
+	devres_release_group(ctrldev, caam_rng_init);
 }
 
 int caam_rng_init(struct device *ctrldev)
 {
-	struct device *dev;
+	struct caam_rng_ctx *ctx;
 	u32 rng_inst;
 	struct caam_drv_private *priv = dev_get_drvdata(ctrldev);
-	int err;
-	init_done = false;
+	int ret;
 
 	/* Check for an instantiated RNG before registration */
 	if (priv->era < 10)
@@ -328,31 +232,30 @@ int caam_rng_init(struct device *ctrldev)
 	if (!rng_inst)
 		return 0;
 
-	dev = caam_jr_alloc();
-	if (IS_ERR(dev)) {
-		pr_err("Job Ring Device allocation for transform failed\n");
-		return PTR_ERR(dev);
-	}
-	rng_ctx = kmalloc(sizeof(*rng_ctx), GFP_DMA | GFP_KERNEL);
-	if (!rng_ctx) {
-		err = -ENOMEM;
-		goto free_caam_alloc;
-	}
-	err = caam_init_rng(rng_ctx, dev);
-	if (err)
-		goto free_rng_ctx;
+	if (!devres_open_group(ctrldev, caam_rng_init, GFP_KERNEL))
+		return -ENOMEM;
 
-	dev_info(dev, "registering rng-caam\n");
+	ctx = devm_kzalloc(ctrldev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
 
-	err = hwrng_register(&caam_rng);
-	if (!err) {
-		init_done = true;
-		return err;
+	ctx->ctrldev = ctrldev;
+
+	ctx->rng.name    = "rng-caam";
+	ctx->rng.init    = caam_init;
+	ctx->rng.cleanup = caam_cleanup;
+	ctx->rng.read    = caam_read;
+	ctx->rng.priv    = (unsigned long)ctx;
+	ctx->rng.quality = 1024;
+
+	dev_info(ctrldev, "registering rng-caam\n");
+
+	ret = devm_hwrng_register(ctrldev, &ctx->rng);
+	if (ret) {
+		caam_rng_exit(ctrldev);
+		return ret;
 	}
 
-free_rng_ctx:
-	kfree(rng_ctx);
-free_caam_alloc:
-	caam_jr_free(dev);
-	return err;
+	devres_close_group(ctrldev, caam_rng_init);
+	return 0;
 }
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 7139366da016..4fcdd262e581 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -10,6 +10,7 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/sys_soc.h>
+#include <linux/fsl/mc.h>
 
 #include "compat.h"
 #include "regs.h"
@@ -36,7 +37,8 @@ static void build_instantiation_desc(u32 *desc, int handle, int do_sk)
 	init_job_desc(desc, 0);
 
 	op_flags = OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG |
-			(handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INIT;
+			(handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INIT |
+			OP_ALG_PR_ON;
 
 	/* INIT RNG in non-test mode */
 	append_operation(desc, op_flags);
@@ -196,7 +198,7 @@ static int deinstantiate_rng(struct device *ctrldev, int state_handle_mask)
 	u32 *desc, status;
 	int sh_idx, ret = 0;
 
-	desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL);
+	desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL | GFP_DMA);
 	if (!desc)
 		return -ENOMEM;
 
@@ -273,17 +275,30 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask,
 	int ret = 0, sh_idx;
 
 	ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl;
-	desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL);
+	desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL | GFP_DMA);
 	if (!desc)
 		return -ENOMEM;
 
 	for (sh_idx = 0; sh_idx < RNG4_MAX_HANDLES; sh_idx++) {
+		const u32 rdsta_if = RDSTA_IF0 << sh_idx;
+		const u32 rdsta_pr = RDSTA_PR0 << sh_idx;
+		const u32 rdsta_mask = rdsta_if | rdsta_pr;
 		/*
 		 * If the corresponding bit is set, this state handle
 		 * was initialized by somebody else, so it's left alone.
 		 */
-		if ((1 << sh_idx) & state_handle_mask)
-			continue;
+		if (rdsta_if & state_handle_mask) {
+			if (rdsta_pr & state_handle_mask)
+				continue;
+
+			dev_info(ctrldev,
+				 "RNG4 SH%d was previously instantiated without prediction resistance. Tearing it down\n",
+				 sh_idx);
+
+			ret = deinstantiate_rng(ctrldev, rdsta_if);
+			if (ret)
+				break;
+		}
 
 		/* Create the descriptor for instantiating RNG State Handle */
 		build_instantiation_desc(desc, sh_idx, gen_sk);
@@ -303,9 +318,9 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask,
 		if (ret)
 			break;
 
-		rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK;
+		rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_MASK;
 		if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) ||
-		    !(rdsta_val & (1 << sh_idx))) {
+		    (rdsta_val & rdsta_mask) != rdsta_mask) {
 			ret = -EAGAIN;
 			break;
 		}
@@ -341,8 +356,12 @@ static void kick_trng(struct platform_device *pdev, int ent_delay)
 	ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl;
 	r4tst = &ctrl->r4tst[0];
 
-	/* put RNG4 into program mode */
-	clrsetbits_32(&r4tst->rtmctl, 0, RTMCTL_PRGM);
+	/*
+	 * Setting both RTMCTL:PRGM and RTMCTL:TRNG_ACC causes TRNG to
+	 * properly invalidate the entropy in the entropy register and
+	 * force re-generation.
+	 */
+	clrsetbits_32(&r4tst->rtmctl, 0, RTMCTL_PRGM | RTMCTL_ACC);
 
 	/*
 	 * Performance-wise, it does not make sense to
@@ -372,7 +391,8 @@ start_rng:
 	 * select raw sampling in both entropy shifter
 	 * and statistical checker; ; put RNG4 into run mode
 	 */
-	clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, RTMCTL_SAMP_MODE_RAW_ES_SC);
+	clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM | RTMCTL_ACC,
+		      RTMCTL_SAMP_MODE_RAW_ES_SC);
 }
 
 static int caam_get_era_from_hw(struct caam_ctrl __iomem *ctrl)
@@ -559,6 +579,26 @@ static void caam_remove_debugfs(void *root)
 }
 #endif
 
+#ifdef CONFIG_FSL_MC_BUS
+static bool check_version(struct fsl_mc_version *mc_version, u32 major,
+			  u32 minor, u32 revision)
+{
+	if (mc_version->major > major)
+		return true;
+
+	if (mc_version->major == major) {
+		if (mc_version->minor > minor)
+			return true;
+
+		if (mc_version->minor == minor &&
+		    mc_version->revision > revision)
+			return true;
+	}
+
+	return false;
+}
+#endif
+
 /* Probe routine for CAAM top (controller) level */
 static int caam_probe(struct platform_device *pdev)
 {
@@ -577,6 +617,7 @@ static int caam_probe(struct platform_device *pdev)
 	u8 rng_vid;
 	int pg_size;
 	int BLOCK_OFFSET = 0;
+	bool pr_support = false;
 
 	ctrlpriv = devm_kzalloc(&pdev->dev, sizeof(*ctrlpriv), GFP_KERNEL);
 	if (!ctrlpriv)
@@ -662,6 +703,21 @@ static int caam_probe(struct platform_device *pdev)
 
 	/* Get the IRQ of the controller (for security violations only) */
 	ctrlpriv->secvio_irq = irq_of_parse_and_map(nprop, 0);
+	np = of_find_compatible_node(NULL, NULL, "fsl,qoriq-mc");
+	ctrlpriv->mc_en = !!np;
+	of_node_put(np);
+
+#ifdef CONFIG_FSL_MC_BUS
+	if (ctrlpriv->mc_en) {
+		struct fsl_mc_version *mc_version;
+
+		mc_version = fsl_mc_get_version();
+		if (mc_version)
+			pr_support = check_version(mc_version, 10, 20, 0);
+		else
+			return -EPROBE_DEFER;
+	}
+#endif
 
 	/*
 	 * Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel,
@@ -669,10 +725,6 @@ static int caam_probe(struct platform_device *pdev)
 	 * In case of SoCs with Management Complex, MC f/w performs
 	 * the configuration.
 	 */
-	np = of_find_compatible_node(NULL, NULL, "fsl,qoriq-mc");
-	ctrlpriv->mc_en = !!np;
-	of_node_put(np);
-
 	if (!ctrlpriv->mc_en)
 		clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK,
 			      MCFGR_AWCACHE_CACH | MCFGR_AWCACHE_BUFF |
@@ -779,7 +831,7 @@ static int caam_probe(struct platform_device *pdev)
 	 * already instantiated, do RNG instantiation
 	 * In case of SoCs with Management Complex, RNG is managed by MC f/w.
 	 */
-	if (!ctrlpriv->mc_en && rng_vid >= 4) {
+	if (!(ctrlpriv->mc_en && pr_support) && rng_vid >= 4) {
 		ctrlpriv->rng4_sh_init =
 			rd_reg32(&ctrl->r4tst[0].rdsta);
 		/*
@@ -789,11 +841,11 @@ static int caam_probe(struct platform_device *pdev)
 		 * to regenerate these keys before the next POR.
 		 */
 		gen_sk = ctrlpriv->rng4_sh_init & RDSTA_SKVN ? 0 : 1;
-		ctrlpriv->rng4_sh_init &= RDSTA_IFMASK;
+		ctrlpriv->rng4_sh_init &= RDSTA_MASK;
 		do {
 			int inst_handles =
 				rd_reg32(&ctrl->r4tst[0].rdsta) &
-								RDSTA_IFMASK;
+								RDSTA_MASK;
 			/*
 			 * If either SH were instantiated by somebody else
 			 * (e.g. u-boot) then it is assumed that the entropy
@@ -833,7 +885,7 @@ static int caam_probe(struct platform_device *pdev)
 		 * Set handles init'ed by this module as the complement of the
 		 * already initialized ones
 		 */
-		ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_IFMASK;
+		ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_MASK;
 
 		/* Enable RDB bit so that RNG works faster */
 		clrsetbits_32(&ctrl->scfgr, 0, SCFGR_RDBENABLE);
diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h
index 4b6854bf896a..e796d3cb9be8 100644
--- a/drivers/crypto/caam/desc.h
+++ b/drivers/crypto/caam/desc.h
@@ -1254,6 +1254,8 @@
 #define OP_ALG_ICV_OFF		(0 << OP_ALG_ICV_SHIFT)
 #define OP_ALG_ICV_ON		(1 << OP_ALG_ICV_SHIFT)
 
+#define OP_ALG_PR_ON		BIT(1)
+
 #define OP_ALG_DIR_SHIFT	0
 #define OP_ALG_DIR_MASK		1
 #define OP_ALG_DECRYPT		0
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index c7c10c90464b..402d6a362e8c 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -11,6 +11,7 @@
 #define INTERN_H
 
 #include "ctrl.h"
+#include <crypto/engine.h>
 
 /* Currently comes from Kconfig param as a ^2 (driver-required) */
 #define JOBR_DEPTH (1 << CONFIG_CRYPTO_DEV_FSL_CAAM_RINGSIZE)
@@ -46,6 +47,7 @@ struct caam_drv_private_jr {
 	struct caam_job_ring __iomem *rregs;	/* JobR's register space */
 	struct tasklet_struct irqtask;
 	int irq;			/* One per queue */
+	bool hwrng;
 
 	/* Number of scatterlist crypt transforms active on the JobR */
 	atomic_t tfm_count ____cacheline_aligned;
@@ -60,6 +62,7 @@ struct caam_drv_private_jr {
 	int out_ring_read_index;	/* Output index "tail" */
 	int tail;			/* entinfo (s/w ring) tail index */
 	void *outring;			/* Base of output ring, DMA-safe */
+	struct crypto_engine *engine;
 };
 
 /*
@@ -161,7 +164,7 @@ static inline void caam_pkc_exit(void)
 #ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API
 
 int caam_rng_init(struct device *dev);
-void caam_rng_exit(void);
+void caam_rng_exit(struct device *dev);
 
 #else
 
@@ -170,9 +173,7 @@ static inline int caam_rng_init(struct device *dev)
 	return 0;
 }
 
-static inline void caam_rng_exit(void)
-{
-}
+static inline void caam_rng_exit(struct device *dev) {}
 
 #endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API */
 
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index fc97cde27059..4af22e7ceb4f 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -27,7 +27,8 @@ static struct jr_driver_data driver_data;
 static DEFINE_MUTEX(algs_lock);
 static unsigned int active_devs;
 
-static void register_algs(struct device *dev)
+static void register_algs(struct caam_drv_private_jr *jrpriv,
+			  struct device *dev)
 {
 	mutex_lock(&algs_lock);
 
@@ -37,7 +38,7 @@ static void register_algs(struct device *dev)
 	caam_algapi_init(dev);
 	caam_algapi_hash_init(dev);
 	caam_pkc_init(dev);
-	caam_rng_init(dev);
+	jrpriv->hwrng = !caam_rng_init(dev);
 	caam_qi_algapi_init(dev);
 
 algs_unlock:
@@ -53,7 +54,6 @@ static void unregister_algs(void)
 
 	caam_qi_algapi_exit();
 
-	caam_rng_exit();
 	caam_pkc_exit();
 	caam_algapi_hash_exit();
 	caam_algapi_exit();
@@ -62,6 +62,15 @@ algs_unlock:
 	mutex_unlock(&algs_lock);
 }
 
+static void caam_jr_crypto_engine_exit(void *data)
+{
+	struct device *jrdev = data;
+	struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev);
+
+	/* Free the resources of crypto-engine */
+	crypto_engine_exit(jrpriv->engine);
+}
+
 static int caam_reset_hw_jr(struct device *dev)
 {
 	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
@@ -126,6 +135,9 @@ static int caam_jr_remove(struct platform_device *pdev)
 	jrdev = &pdev->dev;
 	jrpriv = dev_get_drvdata(jrdev);
 
+	if (jrpriv->hwrng)
+		caam_rng_exit(jrdev->parent);
+
 	/*
 	 * Return EBUSY if job ring already allocated.
 	 */
@@ -324,8 +336,8 @@ void caam_jr_free(struct device *rdev)
 EXPORT_SYMBOL(caam_jr_free);
 
 /**
- * caam_jr_enqueue() - Enqueue a job descriptor head. Returns 0 if OK,
- * -EBUSY if the queue is full, -EIO if it cannot map the caller's
+ * caam_jr_enqueue() - Enqueue a job descriptor head. Returns -EINPROGRESS
+ * if OK, -ENOSPC if the queue is full, -EIO if it cannot map the caller's
  * descriptor.
  * @dev:  device of the job ring to be used. This device should have
  *        been assigned prior by caam_jr_register().
@@ -377,7 +389,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
 	    CIRC_SPACE(head, tail, JOBR_DEPTH) <= 0) {
 		spin_unlock_bh(&jrp->inplock);
 		dma_unmap_single(dev, desc_dma, desc_size, DMA_TO_DEVICE);
-		return -EBUSY;
+		return -ENOSPC;
 	}
 
 	head_entry = &jrp->entinfo[head];
@@ -414,7 +426,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
 
 	spin_unlock_bh(&jrp->inplock);
 
-	return 0;
+	return -EINPROGRESS;
 }
 EXPORT_SYMBOL(caam_jr_enqueue);
 
@@ -505,7 +517,7 @@ static int caam_jr_probe(struct platform_device *pdev)
 	int error;
 
 	jrdev = &pdev->dev;
-	jrpriv = devm_kmalloc(jrdev, sizeof(*jrpriv), GFP_KERNEL);
+	jrpriv = devm_kzalloc(jrdev, sizeof(*jrpriv), GFP_KERNEL);
 	if (!jrpriv)
 		return -ENOMEM;
 
@@ -538,6 +550,25 @@ static int caam_jr_probe(struct platform_device *pdev)
 		return error;
 	}
 
+	/* Initialize crypto engine */
+	jrpriv->engine = crypto_engine_alloc_init(jrdev, false);
+	if (!jrpriv->engine) {
+		dev_err(jrdev, "Could not init crypto-engine\n");
+		return -ENOMEM;
+	}
+
+	error = devm_add_action_or_reset(jrdev, caam_jr_crypto_engine_exit,
+					 jrdev);
+	if (error)
+		return error;
+
+	/* Start crypto engine */
+	error = crypto_engine_start(jrpriv->engine);
+	if (error) {
+		dev_err(jrdev, "Could not start crypto-engine\n");
+		return error;
+	}
+
 	/* Identify the interrupt */
 	jrpriv->irq = irq_of_parse_and_map(nprop, 0);
 	if (!jrpriv->irq) {
@@ -562,7 +593,7 @@ static int caam_jr_probe(struct platform_device *pdev)
 
 	atomic_set(&jrpriv->tfm_count, 0);
 
-	register_algs(jrdev->parent);
+	register_algs(jrpriv, jrdev->parent);
 
 	return 0;
 }
diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c
index 5a851ddc48fb..b0e8a4939b4f 100644
--- a/drivers/crypto/caam/key_gen.c
+++ b/drivers/crypto/caam/key_gen.c
@@ -108,7 +108,7 @@ int gen_split_key(struct device *jrdev, u8 *key_out,
 	init_completion(&result.completion);
 
 	ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result);
-	if (!ret) {
+	if (ret == -EINPROGRESS) {
 		/* in progress */
 		wait_for_completion(&result.completion);
 		ret = result.err;
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index dacf2fa4aa8e..b390b935db6d 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -4,7 +4,7 @@
  * Queue Interface backend functionality
  *
  * Copyright 2013-2016 Freescale Semiconductor, Inc.
- * Copyright 2016-2017, 2019 NXP
+ * Copyright 2016-2017, 2019-2020 NXP
  */
 
 #include <linux/cpumask.h>
@@ -124,8 +124,10 @@ int caam_qi_enqueue(struct device *qidev, struct caam_drv_req *req)
 
 	do {
 		ret = qman_enqueue(req->drv_ctx->req_fq, &fd);
-		if (likely(!ret))
+		if (likely(!ret)) {
+			refcount_inc(&req->drv_ctx->refcnt);
 			return 0;
+		}
 
 		if (ret != -EBUSY)
 			break;
@@ -148,11 +150,6 @@ static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq,
 
 	fd = &msg->ern.fd;
 
-	if (qm_fd_get_format(fd) != qm_fd_compound) {
-		dev_err(qidev, "Non-compound FD from CAAM\n");
-		return;
-	}
-
 	drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd));
 	if (!drv_req) {
 		dev_err(qidev,
@@ -160,6 +157,13 @@ static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq,
 		return;
 	}
 
+	refcount_dec(&drv_req->drv_ctx->refcnt);
+
+	if (qm_fd_get_format(fd) != qm_fd_compound) {
+		dev_err(qidev, "Non-compound FD from CAAM\n");
+		return;
+	}
+
 	dma_unmap_single(drv_req->drv_ctx->qidev, qm_fd_addr(fd),
 			 sizeof(drv_req->fd_sgt), DMA_BIDIRECTIONAL);
 
@@ -287,9 +291,10 @@ empty_fq:
 	return ret;
 }
 
-static int empty_caam_fq(struct qman_fq *fq)
+static int empty_caam_fq(struct qman_fq *fq, struct caam_drv_ctx *drv_ctx)
 {
 	int ret;
+	int retries = 10;
 	struct qm_mcr_queryfq_np np;
 
 	/* Wait till the older CAAM FQ get empty */
@@ -304,11 +309,18 @@ static int empty_caam_fq(struct qman_fq *fq)
 		msleep(20);
 	} while (1);
 
-	/*
-	 * Give extra time for pending jobs from this FQ in holding tanks
-	 * to get processed
-	 */
-	msleep(20);
+	/* Wait until pending jobs from this FQ are processed by CAAM */
+	do {
+		if (refcount_read(&drv_ctx->refcnt) == 1)
+			break;
+
+		msleep(20);
+	} while (--retries);
+
+	if (!retries)
+		dev_warn_once(drv_ctx->qidev, "%d frames from FQID %u still pending in CAAM\n",
+			      refcount_read(&drv_ctx->refcnt), fq->fqid);
+
 	return 0;
 }
 
@@ -340,7 +352,7 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc)
 	drv_ctx->req_fq = new_fq;
 
 	/* Empty and remove the older FQ */
-	ret = empty_caam_fq(old_fq);
+	ret = empty_caam_fq(old_fq, drv_ctx);
 	if (ret) {
 		dev_err(qidev, "Old CAAM FQ empty failed: %d\n", ret);
 
@@ -453,6 +465,9 @@ struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev,
 		return ERR_PTR(-ENOMEM);
 	}
 
+	/* init reference counter used to track references to request FQ */
+	refcount_set(&drv_ctx->refcnt, 1);
+
 	drv_ctx->qidev = qidev;
 	return drv_ctx;
 }
@@ -571,6 +586,16 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p,
 		return qman_cb_dqrr_stop;
 
 	fd = &dqrr->fd;
+
+	drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd));
+	if (unlikely(!drv_req)) {
+		dev_err(qidev,
+			"Can't find original request for caam response\n");
+		return qman_cb_dqrr_consume;
+	}
+
+	refcount_dec(&drv_req->drv_ctx->refcnt);
+
 	status = be32_to_cpu(fd->status);
 	if (unlikely(status)) {
 		u32 ssrc = status & JRSTA_SSRC_MASK;
@@ -588,13 +613,6 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p,
 		return qman_cb_dqrr_consume;
 	}
 
-	drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd));
-	if (unlikely(!drv_req)) {
-		dev_err(qidev,
-			"Can't find original request for caam response\n");
-		return qman_cb_dqrr_consume;
-	}
-
 	dma_unmap_single(drv_req->drv_ctx->qidev, qm_fd_addr(fd),
 			 sizeof(drv_req->fd_sgt), DMA_BIDIRECTIONAL);
 
diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h
index 848958951f68..5894f16f8fe3 100644
--- a/drivers/crypto/caam/qi.h
+++ b/drivers/crypto/caam/qi.h
@@ -3,7 +3,7 @@
  * Public definitions for the CAAM/QI (Queue Interface) backend.
  *
  * Copyright 2013-2016 Freescale Semiconductor, Inc.
- * Copyright 2016-2017 NXP
+ * Copyright 2016-2017, 2020 NXP
  */
 
 #ifndef __QI_H__
@@ -52,6 +52,7 @@ enum optype {
  * @context_a: shared descriptor dma address
  * @req_fq: to-CAAM request frame queue
  * @rsp_fq: from-CAAM response frame queue
+ * @refcnt: reference counter incremented for each frame enqueued in to-CAAM FQ
  * @cpu: cpu on which to receive CAAM response
  * @op_type: operation type
  * @qidev: device pointer for CAAM/QI backend
@@ -62,6 +63,7 @@ struct caam_drv_ctx {
 	dma_addr_t context_a;
 	struct qman_fq *req_fq;
 	struct qman_fq *rsp_fq;
+	refcount_t refcnt;
 	int cpu;
 	enum optype op_type;
 	struct device *qidev;
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index 05127b70527d..0f810bc13b2b 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -487,7 +487,8 @@ struct rngtst {
 
 /* RNG4 TRNG test registers */
 struct rng4tst {
-#define RTMCTL_PRGM	0x00010000	/* 1 -> program mode, 0 -> run mode */
+#define RTMCTL_ACC  BIT(5)  /* TRNG access mode */
+#define RTMCTL_PRGM BIT(16) /* 1 -> program mode, 0 -> run mode */
 #define RTMCTL_SAMP_MODE_VON_NEUMANN_ES_SC	0 /* use von Neumann data in
 						     both entropy shifter and
 						     statistical checker */
@@ -523,9 +524,11 @@ struct rng4tst {
 	u32 rsvd1[40];
 #define RDSTA_SKVT 0x80000000
 #define RDSTA_SKVN 0x40000000
+#define RDSTA_PR0 BIT(4)
+#define RDSTA_PR1 BIT(5)
 #define RDSTA_IF0 0x00000001
 #define RDSTA_IF1 0x00000002
-#define RDSTA_IFMASK (RDSTA_IF1 | RDSTA_IF0)
+#define RDSTA_MASK (RDSTA_PR1 | RDSTA_PR0 | RDSTA_IF1 | RDSTA_IF0)
 	u32 rdsta;
 	u32 rsvd2[15];
 };
diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c
index c4632d84c9a1..e91be9b8b083 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_main.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_main.c
@@ -71,7 +71,7 @@ struct ucode {
 	char version[VERSION_LEN - 1];
 	__be32 code_size;
 	u8 raz[12];
-	u64 code[0];
+	u64 code[];
 };
 
 /**
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index e95e7aa5dbf1..ae7b44599914 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -215,6 +215,9 @@ void psp_dev_destroy(struct sp_device *sp)
 	tee_dev_destroy(psp);
 
 	sp_free_psp_irq(sp, psp);
+
+	if (sp->clear_psp_master_device)
+		sp->clear_psp_master_device(sp);
 }
 
 void psp_set_sev_irq_handler(struct psp_device *psp, psp_irq_handler_t handler,
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index e467860f797d..896f190b9a50 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -283,11 +283,11 @@ static int sev_get_platform_state(int *state, int *error)
 	return rc;
 }
 
-static int sev_ioctl_do_reset(struct sev_issue_cmd *argp)
+static int sev_ioctl_do_reset(struct sev_issue_cmd *argp, bool writable)
 {
 	int state, rc;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!writable)
 		return -EPERM;
 
 	/*
@@ -331,12 +331,12 @@ static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
 	return ret;
 }
 
-static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp)
+static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp, bool writable)
 {
 	struct sev_device *sev = psp_master->sev_data;
 	int rc;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!writable)
 		return -EPERM;
 
 	if (sev->state == SEV_STATE_UNINIT) {
@@ -348,7 +348,7 @@ static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp)
 	return __sev_do_cmd_locked(cmd, NULL, &argp->error);
 }
 
-static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp)
+static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable)
 {
 	struct sev_device *sev = psp_master->sev_data;
 	struct sev_user_data_pek_csr input;
@@ -356,7 +356,7 @@ static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp)
 	void *blob = NULL;
 	int ret;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!writable)
 		return -EPERM;
 
 	if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
@@ -539,7 +539,7 @@ fw_err:
 	return ret;
 }
 
-static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp)
+static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp, bool writable)
 {
 	struct sev_device *sev = psp_master->sev_data;
 	struct sev_user_data_pek_cert_import input;
@@ -547,7 +547,7 @@ static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp)
 	void *pek_blob, *oca_blob;
 	int ret;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!writable)
 		return -EPERM;
 
 	if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
@@ -698,7 +698,7 @@ static int sev_ioctl_do_get_id(struct sev_issue_cmd *argp)
 	return ret;
 }
 
-static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp)
+static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable)
 {
 	struct sev_device *sev = psp_master->sev_data;
 	struct sev_user_data_pdh_cert_export input;
@@ -708,7 +708,7 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp)
 
 	/* If platform is not in INIT state then transition it to INIT. */
 	if (sev->state != SEV_STATE_INIT) {
-		if (!capable(CAP_SYS_ADMIN))
+		if (!writable)
 			return -EPERM;
 
 		ret = __sev_platform_init_locked(&argp->error);
@@ -801,6 +801,7 @@ static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg)
 	void __user *argp = (void __user *)arg;
 	struct sev_issue_cmd input;
 	int ret = -EFAULT;
+	bool writable = file->f_mode & FMODE_WRITE;
 
 	if (!psp_master || !psp_master->sev_data)
 		return -ENODEV;
@@ -819,25 +820,25 @@ static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg)
 	switch (input.cmd) {
 
 	case SEV_FACTORY_RESET:
-		ret = sev_ioctl_do_reset(&input);
+		ret = sev_ioctl_do_reset(&input, writable);
 		break;
 	case SEV_PLATFORM_STATUS:
 		ret = sev_ioctl_do_platform_status(&input);
 		break;
 	case SEV_PEK_GEN:
-		ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PEK_GEN, &input);
+		ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PEK_GEN, &input, writable);
 		break;
 	case SEV_PDH_GEN:
-		ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PDH_GEN, &input);
+		ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PDH_GEN, &input, writable);
 		break;
 	case SEV_PEK_CSR:
-		ret = sev_ioctl_do_pek_csr(&input);
+		ret = sev_ioctl_do_pek_csr(&input, writable);
 		break;
 	case SEV_PEK_CERT_IMPORT:
-		ret = sev_ioctl_do_pek_import(&input);
+		ret = sev_ioctl_do_pek_import(&input, writable);
 		break;
 	case SEV_PDH_CERT_EXPORT:
-		ret = sev_ioctl_do_pdh_export(&input);
+		ret = sev_ioctl_do_pdh_export(&input, writable);
 		break;
 	case SEV_GET_ID:
 		pr_warn_once("SEV_GET_ID command is deprecated, use SEV_GET_ID2\n");
@@ -896,9 +897,9 @@ EXPORT_SYMBOL_GPL(sev_guest_df_flush);
 
 static void sev_exit(struct kref *ref)
 {
-	struct sev_misc_dev *misc_dev = container_of(ref, struct sev_misc_dev, refcount);
-
 	misc_deregister(&misc_dev->misc);
+	kfree(misc_dev);
+	misc_dev = NULL;
 }
 
 static int sev_misc_init(struct sev_device *sev)
@@ -916,7 +917,7 @@ static int sev_misc_init(struct sev_device *sev)
 	if (!misc_dev) {
 		struct miscdevice *misc;
 
-		misc_dev = devm_kzalloc(dev, sizeof(*misc_dev), GFP_KERNEL);
+		misc_dev = kzalloc(sizeof(*misc_dev), GFP_KERNEL);
 		if (!misc_dev)
 			return -ENOMEM;
 
diff --git a/drivers/crypto/ccp/sp-dev.h b/drivers/crypto/ccp/sp-dev.h
index 423594608ad1..f913f1494af9 100644
--- a/drivers/crypto/ccp/sp-dev.h
+++ b/drivers/crypto/ccp/sp-dev.h
@@ -90,6 +90,7 @@ struct sp_device {
 	/* get and set master device */
 	struct sp_device*(*get_psp_master_device)(void);
 	void (*set_psp_master_device)(struct sp_device *);
+	void (*clear_psp_master_device)(struct sp_device *);
 
 	bool irq_registered;
 	bool use_tasklet;
diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c
index 56c1f61c0f84..cb6cb47053f4 100644
--- a/drivers/crypto/ccp/sp-pci.c
+++ b/drivers/crypto/ccp/sp-pci.c
@@ -146,6 +146,14 @@ static struct sp_device *psp_get_master(void)
 	return sp_dev_master;
 }
 
+static void psp_clear_master(struct sp_device *sp)
+{
+	if (sp == sp_dev_master) {
+		sp_dev_master = NULL;
+		dev_dbg(sp->dev, "Cleared sp_dev_master\n");
+	}
+}
+
 static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct sp_device *sp;
@@ -206,6 +214,7 @@ static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pci_set_master(pdev);
 	sp->set_psp_master_device = psp_set_master;
 	sp->get_psp_master_device = psp_get_master;
+	sp->clear_psp_master_device = psp_clear_master;
 
 	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
 	if (ret) {
diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c
index 2fc0e0da790b..1cf51edbc4b9 100644
--- a/drivers/crypto/ccree/cc_aead.c
+++ b/drivers/crypto/ccree/cc_aead.c
@@ -6,8 +6,9 @@
 #include <crypto/algapi.h>
 #include <crypto/internal/aead.h>
 #include <crypto/authenc.h>
-#include <crypto/internal/des.h>
+#include <crypto/gcm.h>
 #include <linux/rtnetlink.h>
+#include <crypto/internal/des.h>
 #include "cc_driver.h"
 #include "cc_buffer_mgr.h"
 #include "cc_aead.h"
@@ -26,7 +27,7 @@
 #define MAX_NONCE_SIZE CTR_RFC3686_NONCE_SIZE
 
 struct cc_aead_handle {
-	cc_sram_addr_t sram_workspace_addr;
+	u32 sram_workspace_addr;
 	struct list_head aead_list;
 };
 
@@ -60,11 +61,6 @@ struct cc_aead_ctx {
 	enum drv_hash_mode auth_mode;
 };
 
-static inline bool valid_assoclen(struct aead_request *req)
-{
-	return ((req->assoclen == 16) || (req->assoclen == 20));
-}
-
 static void cc_aead_exit(struct crypto_aead *tfm)
 {
 	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
@@ -417,7 +413,7 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *authkey,
 	dma_addr_t key_dma_addr = 0;
 	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
 	struct device *dev = drvdata_to_dev(ctx->drvdata);
-	u32 larval_addr = cc_larval_digest_addr(ctx->drvdata, ctx->auth_mode);
+	u32 larval_addr;
 	struct cc_crypto_req cc_req = {};
 	unsigned int blocksize;
 	unsigned int digestsize;
@@ -448,8 +444,7 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *authkey,
 		if (!key)
 			return -ENOMEM;
 
-		key_dma_addr = dma_map_single(dev, (void *)key, keylen,
-					      DMA_TO_DEVICE);
+		key_dma_addr = dma_map_single(dev, key, keylen, DMA_TO_DEVICE);
 		if (dma_mapping_error(dev, key_dma_addr)) {
 			dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n",
 				key, keylen);
@@ -460,6 +455,8 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *authkey,
 			/* Load hash initial state */
 			hw_desc_init(&desc[idx]);
 			set_cipher_mode(&desc[idx], hashmode);
+			larval_addr = cc_larval_digest_addr(ctx->drvdata,
+							    ctx->auth_mode);
 			set_din_sram(&desc[idx], larval_addr, digestsize);
 			set_flow_mode(&desc[idx], S_DIN_to_HASH);
 			set_setup_mode(&desc[idx], SETUP_LOAD_STATE0);
@@ -796,7 +793,7 @@ static void cc_proc_authen_desc(struct aead_request *areq,
 		 * assoc. + iv + data -compact in one table
 		 * if assoclen is ZERO only IV perform
 		 */
-		cc_sram_addr_t mlli_addr = areq_ctx->assoc.sram_addr;
+		u32 mlli_addr = areq_ctx->assoc.sram_addr;
 		u32 mlli_nents = areq_ctx->assoc.mlli_nents;
 
 		if (areq_ctx->is_single_pass) {
@@ -1170,7 +1167,7 @@ static void cc_mlli_to_sram(struct aead_request *req,
 	    req_ctx->data_buff_type == CC_DMA_BUF_MLLI ||
 	    !req_ctx->is_single_pass) && req_ctx->mlli_params.mlli_len) {
 		dev_dbg(dev, "Copy-to-sram: mlli_dma=%08x, mlli_size=%u\n",
-			(unsigned int)ctx->drvdata->mlli_sram_addr,
+			ctx->drvdata->mlli_sram_addr,
 			req_ctx->mlli_params.mlli_len);
 		/* Copy MLLI table host-to-sram */
 		hw_desc_init(&desc[*seq_size]);
@@ -1222,7 +1219,7 @@ static void cc_hmac_authenc(struct aead_request *req, struct cc_hw_desc desc[],
 				 req_ctx->is_single_pass);
 
 	if (req_ctx->is_single_pass) {
-		/**
+		/*
 		 * Single-pass flow
 		 */
 		cc_set_hmac_desc(req, desc, seq_size);
@@ -1234,7 +1231,7 @@ static void cc_hmac_authenc(struct aead_request *req, struct cc_hw_desc desc[],
 		return;
 	}
 
-	/**
+	/*
 	 * Double-pass flow
 	 * Fallback for unsupported single-pass modes,
 	 * i.e. using assoc. data of non-word-multiple
@@ -1275,7 +1272,7 @@ cc_xcbc_authenc(struct aead_request *req, struct cc_hw_desc desc[],
 				 req_ctx->is_single_pass);
 
 	if (req_ctx->is_single_pass) {
-		/**
+		/*
 		 * Single-pass flow
 		 */
 		cc_set_xcbc_desc(req, desc, seq_size);
@@ -1286,7 +1283,7 @@ cc_xcbc_authenc(struct aead_request *req, struct cc_hw_desc desc[],
 		return;
 	}
 
-	/**
+	/*
 	 * Double-pass flow
 	 * Fallback for unsupported single-pass modes,
 	 * i.e. using assoc. data of non-word-multiple
@@ -1611,7 +1608,6 @@ static void cc_proc_rfc4309_ccm(struct aead_request *req)
 	memcpy(areq_ctx->ctr_iv + CCM_BLOCK_IV_OFFSET, req->iv,
 	       CCM_BLOCK_IV_SIZE);
 	req->iv = areq_ctx->ctr_iv;
-	areq_ctx->assoclen -= CCM_BLOCK_IV_SIZE;
 }
 
 static void cc_set_ghash_desc(struct aead_request *req,
@@ -1799,12 +1795,6 @@ static int cc_gcm(struct aead_request *req, struct cc_hw_desc desc[],
 	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
 	unsigned int cipher_flow_mode;
 
-	if (req_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT) {
-		cipher_flow_mode = AES_and_HASH;
-	} else { /* Encrypt */
-		cipher_flow_mode = AES_to_HASH_and_DOUT;
-	}
-
 	//in RFC4543 no data to encrypt. just copy data from src to dest.
 	if (req_ctx->plaintext_authenticate_only) {
 		cc_proc_cipher_desc(req, BYPASS, desc, seq_size);
@@ -1816,6 +1806,12 @@ static int cc_gcm(struct aead_request *req, struct cc_hw_desc desc[],
 		return 0;
 	}
 
+	if (req_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT) {
+		cipher_flow_mode = AES_and_HASH;
+	} else { /* Encrypt */
+		cipher_flow_mode = AES_to_HASH_and_DOUT;
+	}
+
 	// for gcm and rfc4106.
 	cc_set_ghash_desc(req, desc, seq_size);
 	/* process(ghash) assoc data */
@@ -1870,8 +1866,7 @@ static int config_gcm_context(struct aead_request *req)
 		 */
 		__be64 temp64;
 
-		temp64 = cpu_to_be64((req_ctx->assoclen +
-				      GCM_BLOCK_RFC4_IV_SIZE + cryptlen) * 8);
+		temp64 = cpu_to_be64((req_ctx->assoclen + cryptlen) * 8);
 		memcpy(&req_ctx->gcm_len_block.len_a, &temp64, sizeof(temp64));
 		temp64 = 0;
 		memcpy(&req_ctx->gcm_len_block.len_c, &temp64, 8);
@@ -1891,7 +1886,6 @@ static void cc_proc_rfc4_gcm(struct aead_request *req)
 	memcpy(areq_ctx->ctr_iv + GCM_BLOCK_RFC4_IV_OFFSET, req->iv,
 	       GCM_BLOCK_RFC4_IV_SIZE);
 	req->iv = areq_ctx->ctr_iv;
-	areq_ctx->assoclen -= GCM_BLOCK_RFC4_IV_SIZE;
 }
 
 static int cc_proc_aead(struct aead_request *req,
@@ -1921,8 +1915,8 @@ static int cc_proc_aead(struct aead_request *req,
 	}
 
 	/* Setup request structure */
-	cc_req.user_cb = (void *)cc_aead_complete;
-	cc_req.user_arg = (void *)req;
+	cc_req.user_cb = cc_aead_complete;
+	cc_req.user_arg = req;
 
 	/* Setup request context */
 	areq_ctx->gen_ctx.op_type = direct;
@@ -1989,7 +1983,6 @@ static int cc_proc_aead(struct aead_request *req,
 	/* Load MLLI tables to SRAM if necessary */
 	cc_mlli_to_sram(req, desc, &seq_len);
 
-	/*TODO: move seq len by reference */
 	switch (ctx->auth_mode) {
 	case DRV_HASH_SHA1:
 	case DRV_HASH_SHA256:
@@ -2034,9 +2027,6 @@ static int cc_aead_encrypt(struct aead_request *req)
 	/* No generated IV required */
 	areq_ctx->backup_iv = req->iv;
 	areq_ctx->assoclen = req->assoclen;
-	areq_ctx->is_gcm4543 = false;
-
-	areq_ctx->plaintext_authenticate_only = false;
 
 	rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT);
 	if (rc != -EINPROGRESS && rc != -EBUSY)
@@ -2050,22 +2040,17 @@ static int cc_rfc4309_ccm_encrypt(struct aead_request *req)
 	/* Very similar to cc_aead_encrypt() above. */
 
 	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
-	struct device *dev = drvdata_to_dev(ctx->drvdata);
-	int rc = -EINVAL;
+	int rc;
 
-	if (!valid_assoclen(req)) {
-		dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen);
+	rc = crypto_ipsec_check_assoclen(req->assoclen);
+	if (rc)
 		goto out;
-	}
 
 	memset(areq_ctx, 0, sizeof(*areq_ctx));
 
 	/* No generated IV required */
 	areq_ctx->backup_iv = req->iv;
-	areq_ctx->assoclen = req->assoclen;
-	areq_ctx->is_gcm4543 = true;
+	areq_ctx->assoclen = req->assoclen - CCM_BLOCK_IV_SIZE;
 
 	cc_proc_rfc4309_ccm(req);
 
@@ -2086,9 +2071,6 @@ static int cc_aead_decrypt(struct aead_request *req)
 	/* No generated IV required */
 	areq_ctx->backup_iv = req->iv;
 	areq_ctx->assoclen = req->assoclen;
-	areq_ctx->is_gcm4543 = false;
-
-	areq_ctx->plaintext_authenticate_only = false;
 
 	rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT);
 	if (rc != -EINPROGRESS && rc != -EBUSY)
@@ -2099,24 +2081,19 @@ static int cc_aead_decrypt(struct aead_request *req)
 
 static int cc_rfc4309_ccm_decrypt(struct aead_request *req)
 {
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
-	struct device *dev = drvdata_to_dev(ctx->drvdata);
 	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
-	int rc = -EINVAL;
+	int rc;
 
-	if (!valid_assoclen(req)) {
-		dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen);
+	rc = crypto_ipsec_check_assoclen(req->assoclen);
+	if (rc)
 		goto out;
-	}
 
 	memset(areq_ctx, 0, sizeof(*areq_ctx));
 
 	/* No generated IV required */
 	areq_ctx->backup_iv = req->iv;
-	areq_ctx->assoclen = req->assoclen;
+	areq_ctx->assoclen = req->assoclen - CCM_BLOCK_IV_SIZE;
 
-	areq_ctx->is_gcm4543 = true;
 	cc_proc_rfc4309_ccm(req);
 
 	rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT);
@@ -2216,28 +2193,20 @@ static int cc_rfc4543_gcm_setauthsize(struct crypto_aead *authenc,
 
 static int cc_rfc4106_gcm_encrypt(struct aead_request *req)
 {
-	/* Very similar to cc_aead_encrypt() above. */
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
-	struct device *dev = drvdata_to_dev(ctx->drvdata);
 	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
-	int rc = -EINVAL;
+	int rc;
 
-	if (!valid_assoclen(req)) {
-		dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen);
+	rc = crypto_ipsec_check_assoclen(req->assoclen);
+	if (rc)
 		goto out;
-	}
 
 	memset(areq_ctx, 0, sizeof(*areq_ctx));
 
 	/* No generated IV required */
 	areq_ctx->backup_iv = req->iv;
-	areq_ctx->assoclen = req->assoclen;
-	areq_ctx->plaintext_authenticate_only = false;
+	areq_ctx->assoclen = req->assoclen - GCM_BLOCK_RFC4_IV_SIZE;
 
 	cc_proc_rfc4_gcm(req);
-	areq_ctx->is_gcm4543 = true;
 
 	rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT);
 	if (rc != -EINPROGRESS && rc != -EBUSY)
@@ -2248,17 +2217,12 @@ out:
 
 static int cc_rfc4543_gcm_encrypt(struct aead_request *req)
 {
-	/* Very similar to cc_aead_encrypt() above. */
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
-	struct device *dev = drvdata_to_dev(ctx->drvdata);
 	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
-	int rc = -EINVAL;
+	int rc;
 
-	if (!valid_assoclen(req)) {
-		dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen);
+	rc = crypto_ipsec_check_assoclen(req->assoclen);
+	if (rc)
 		goto out;
-	}
 
 	memset(areq_ctx, 0, sizeof(*areq_ctx));
 
@@ -2270,7 +2234,6 @@ static int cc_rfc4543_gcm_encrypt(struct aead_request *req)
 	areq_ctx->assoclen = req->assoclen;
 
 	cc_proc_rfc4_gcm(req);
-	areq_ctx->is_gcm4543 = true;
 
 	rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT);
 	if (rc != -EINPROGRESS && rc != -EBUSY)
@@ -2281,28 +2244,20 @@ out:
 
 static int cc_rfc4106_gcm_decrypt(struct aead_request *req)
 {
-	/* Very similar to cc_aead_decrypt() above. */
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
-	struct device *dev = drvdata_to_dev(ctx->drvdata);
 	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
-	int rc = -EINVAL;
+	int rc;
 
-	if (!valid_assoclen(req)) {
-		dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen);
+	rc = crypto_ipsec_check_assoclen(req->assoclen);
+	if (rc)
 		goto out;
-	}
 
 	memset(areq_ctx, 0, sizeof(*areq_ctx));
 
 	/* No generated IV required */
 	areq_ctx->backup_iv = req->iv;
-	areq_ctx->assoclen = req->assoclen;
-	areq_ctx->plaintext_authenticate_only = false;
+	areq_ctx->assoclen = req->assoclen - GCM_BLOCK_RFC4_IV_SIZE;
 
 	cc_proc_rfc4_gcm(req);
-	areq_ctx->is_gcm4543 = true;
 
 	rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT);
 	if (rc != -EINPROGRESS && rc != -EBUSY)
@@ -2313,17 +2268,12 @@ out:
 
 static int cc_rfc4543_gcm_decrypt(struct aead_request *req)
 {
-	/* Very similar to cc_aead_decrypt() above. */
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
-	struct device *dev = drvdata_to_dev(ctx->drvdata);
 	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
-	int rc = -EINVAL;
+	int rc;
 
-	if (!valid_assoclen(req)) {
-		dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen);
+	rc = crypto_ipsec_check_assoclen(req->assoclen);
+	if (rc)
 		goto out;
-	}
 
 	memset(areq_ctx, 0, sizeof(*areq_ctx));
 
@@ -2335,7 +2285,6 @@ static int cc_rfc4543_gcm_decrypt(struct aead_request *req)
 	areq_ctx->assoclen = req->assoclen;
 
 	cc_proc_rfc4_gcm(req);
-	areq_ctx->is_gcm4543 = true;
 
 	rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT);
 	if (rc != -EINPROGRESS && rc != -EBUSY)
@@ -2614,7 +2563,7 @@ static struct cc_crypto_alg *cc_create_aead_alg(struct cc_alg_template *tmpl,
 	struct cc_crypto_alg *t_alg;
 	struct aead_alg *alg;
 
-	t_alg = kzalloc(sizeof(*t_alg), GFP_KERNEL);
+	t_alg = devm_kzalloc(dev, sizeof(*t_alg), GFP_KERNEL);
 	if (!t_alg)
 		return ERR_PTR(-ENOMEM);
 
@@ -2628,6 +2577,7 @@ static struct cc_crypto_alg *cc_create_aead_alg(struct cc_alg_template *tmpl,
 
 	alg->base.cra_ctxsize = sizeof(struct cc_aead_ctx);
 	alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY;
+	alg->base.cra_blocksize = tmpl->blocksize;
 	alg->init = cc_aead_init;
 	alg->exit = cc_aead_exit;
 
@@ -2643,19 +2593,12 @@ static struct cc_crypto_alg *cc_create_aead_alg(struct cc_alg_template *tmpl,
 int cc_aead_free(struct cc_drvdata *drvdata)
 {
 	struct cc_crypto_alg *t_alg, *n;
-	struct cc_aead_handle *aead_handle =
-		(struct cc_aead_handle *)drvdata->aead_handle;
-
-	if (aead_handle) {
-		/* Remove registered algs */
-		list_for_each_entry_safe(t_alg, n, &aead_handle->aead_list,
-					 entry) {
-			crypto_unregister_aead(&t_alg->aead_alg);
-			list_del(&t_alg->entry);
-			kfree(t_alg);
-		}
-		kfree(aead_handle);
-		drvdata->aead_handle = NULL;
+	struct cc_aead_handle *aead_handle = drvdata->aead_handle;
+
+	/* Remove registered algs */
+	list_for_each_entry_safe(t_alg, n, &aead_handle->aead_list, entry) {
+		crypto_unregister_aead(&t_alg->aead_alg);
+		list_del(&t_alg->entry);
 	}
 
 	return 0;
@@ -2669,7 +2612,7 @@ int cc_aead_alloc(struct cc_drvdata *drvdata)
 	int alg;
 	struct device *dev = drvdata_to_dev(drvdata);
 
-	aead_handle = kmalloc(sizeof(*aead_handle), GFP_KERNEL);
+	aead_handle = devm_kmalloc(dev, sizeof(*aead_handle), GFP_KERNEL);
 	if (!aead_handle) {
 		rc = -ENOMEM;
 		goto fail0;
@@ -2682,7 +2625,6 @@ int cc_aead_alloc(struct cc_drvdata *drvdata)
 							 MAX_HMAC_DIGEST_SIZE);
 
 	if (aead_handle->sram_workspace_addr == NULL_SRAM_ADDR) {
-		dev_err(dev, "SRAM pool exhausted\n");
 		rc = -ENOMEM;
 		goto fail1;
 	}
@@ -2705,18 +2647,16 @@ int cc_aead_alloc(struct cc_drvdata *drvdata)
 		if (rc) {
 			dev_err(dev, "%s alg registration failed\n",
 				t_alg->aead_alg.base.cra_driver_name);
-			goto fail2;
-		} else {
-			list_add_tail(&t_alg->entry, &aead_handle->aead_list);
-			dev_dbg(dev, "Registered %s\n",
-				t_alg->aead_alg.base.cra_driver_name);
+			goto fail1;
 		}
+
+		list_add_tail(&t_alg->entry, &aead_handle->aead_list);
+		dev_dbg(dev, "Registered %s\n",
+			t_alg->aead_alg.base.cra_driver_name);
 	}
 
 	return 0;
 
-fail2:
-	kfree(t_alg);
 fail1:
 	cc_aead_free(drvdata);
 fail0:
diff --git a/drivers/crypto/ccree/cc_aead.h b/drivers/crypto/ccree/cc_aead.h
index f12169b57f9d..b69591550730 100644
--- a/drivers/crypto/ccree/cc_aead.h
+++ b/drivers/crypto/ccree/cc_aead.h
@@ -66,7 +66,7 @@ struct aead_req_ctx {
 	/* used to prevent cache coherence problem */
 	u8 backup_mac[MAX_MAC_SIZE];
 	u8 *backup_iv; /* store orig iv */
-	u32 assoclen; /* internal assoclen */
+	u32 assoclen; /* size of AAD buffer to authenticate */
 	dma_addr_t mac_buf_dma_addr; /* internal ICV DMA buffer */
 	/* buffer for internal ccm configurations */
 	dma_addr_t ccm_iv0_dma_addr;
@@ -79,7 +79,6 @@ struct aead_req_ctx {
 	dma_addr_t gcm_iv_inc2_dma_addr;
 	dma_addr_t hkey_dma_addr; /* Phys. address of hkey */
 	dma_addr_t gcm_block_len_dma_addr; /* Phys. address of gcm block len */
-	bool is_gcm4543;
 
 	u8 *icv_virt_addr; /* Virt. address of ICV */
 	struct async_gen_req_ctx gen_ctx;
diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c
index a72586eccd81..b2bd093e7013 100644
--- a/drivers/crypto/ccree/cc_buffer_mgr.c
+++ b/drivers/crypto/ccree/cc_buffer_mgr.c
@@ -13,16 +13,6 @@
 #include "cc_hash.h"
 #include "cc_aead.h"
 
-enum dma_buffer_type {
-	DMA_NULL_TYPE = -1,
-	DMA_SGL_TYPE = 1,
-	DMA_BUFF_TYPE = 2,
-};
-
-struct buff_mgr_handle {
-	struct dma_pool *mlli_buffs_pool;
-};
-
 union buffer_array_entry {
 	struct scatterlist *sgl;
 	dma_addr_t buffer_dma;
@@ -34,7 +24,6 @@ struct buffer_array {
 	unsigned int offset[MAX_NUM_OF_BUFFERS_IN_MLLI];
 	int nents[MAX_NUM_OF_BUFFERS_IN_MLLI];
 	int total_data_len[MAX_NUM_OF_BUFFERS_IN_MLLI];
-	enum dma_buffer_type type[MAX_NUM_OF_BUFFERS_IN_MLLI];
 	bool is_last[MAX_NUM_OF_BUFFERS_IN_MLLI];
 	u32 *mlli_nents[MAX_NUM_OF_BUFFERS_IN_MLLI];
 };
@@ -64,11 +53,7 @@ static void cc_copy_mac(struct device *dev, struct aead_request *req,
 			enum cc_sg_cpy_direct dir)
 {
 	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	u32 skip = areq_ctx->assoclen + req->cryptlen;
-
-	if (areq_ctx->is_gcm4543)
-		skip += crypto_aead_ivsize(tfm);
+	u32 skip = req->assoclen + req->cryptlen;
 
 	cc_copy_sg_portion(dev, areq_ctx->backup_mac, req->src,
 			   (skip - areq_ctx->req_authsize), skip, dir);
@@ -77,9 +62,13 @@ static void cc_copy_mac(struct device *dev, struct aead_request *req,
 /**
  * cc_get_sgl_nents() - Get scatterlist number of entries.
  *
+ * @dev: Device object
  * @sg_list: SG list
  * @nbytes: [IN] Total SGL data bytes.
  * @lbytes: [OUT] Returns the amount of bytes at the last entry
+ *
+ * Return:
+ * Number of entries in the scatterlist
  */
 static unsigned int cc_get_sgl_nents(struct device *dev,
 				     struct scatterlist *sg_list,
@@ -87,6 +76,8 @@ static unsigned int cc_get_sgl_nents(struct device *dev,
 {
 	unsigned int nents = 0;
 
+	*lbytes = 0;
+
 	while (nbytes && sg_list) {
 		nents++;
 		/* get the number of bytes in the last entry */
@@ -95,6 +86,7 @@ static unsigned int cc_get_sgl_nents(struct device *dev,
 				nbytes : sg_list->length;
 		sg_list = sg_next(sg_list);
 	}
+
 	dev_dbg(dev, "nents %d last bytes %d\n", nents, *lbytes);
 	return nents;
 }
@@ -103,11 +95,13 @@ static unsigned int cc_get_sgl_nents(struct device *dev,
  * cc_copy_sg_portion() - Copy scatter list data,
  * from to_skip to end, to dest and vice versa
  *
- * @dest:
- * @sg:
- * @to_skip:
- * @end:
- * @direct:
+ * @dev: Device object
+ * @dest: Buffer to copy to/from
+ * @sg: SG list
+ * @to_skip: Number of bytes to skip before copying
+ * @end: Offset of last byte to copy
+ * @direct: Transfer direction (true == from SG list to buffer, false == from
+ *          buffer to SG list)
  */
 void cc_copy_sg_portion(struct device *dev, u8 *dest, struct scatterlist *sg,
 			u32 to_skip, u32 end, enum cc_sg_cpy_direct direct)
@@ -115,7 +109,7 @@ void cc_copy_sg_portion(struct device *dev, u8 *dest, struct scatterlist *sg,
 	u32 nents;
 
 	nents = sg_nents_for_len(sg, end);
-	sg_copy_buffer(sg, nents, (void *)dest, (end - to_skip + 1), to_skip,
+	sg_copy_buffer(sg, nents, dest, (end - to_skip + 1), to_skip,
 		       (direct == CC_SG_TO_BUF));
 }
 
@@ -204,21 +198,15 @@ static int cc_generate_mlli(struct device *dev, struct buffer_array *sg_data,
 		goto build_mlli_exit;
 	}
 	/* Point to start of MLLI */
-	mlli_p = (u32 *)mlli_params->mlli_virt_addr;
+	mlli_p = mlli_params->mlli_virt_addr;
 	/* go over all SG's and link it to one MLLI table */
 	for (i = 0; i < sg_data->num_of_buffers; i++) {
 		union buffer_array_entry *entry = &sg_data->entry[i];
 		u32 tot_len = sg_data->total_data_len[i];
 		u32 offset = sg_data->offset[i];
 
-		if (sg_data->type[i] == DMA_SGL_TYPE)
-			rc = cc_render_sg_to_mlli(dev, entry->sgl, tot_len,
-						  offset, &total_nents,
-						  &mlli_p);
-		else /*DMA_BUFF_TYPE*/
-			rc = cc_render_buff_to_mlli(dev, entry->buffer_dma,
-						    tot_len, &total_nents,
-						    &mlli_p);
+		rc = cc_render_sg_to_mlli(dev, entry->sgl, tot_len, offset,
+					  &total_nents, &mlli_p);
 		if (rc)
 			return rc;
 
@@ -244,27 +232,6 @@ build_mlli_exit:
 	return rc;
 }
 
-static void cc_add_buffer_entry(struct device *dev,
-				struct buffer_array *sgl_data,
-				dma_addr_t buffer_dma, unsigned int buffer_len,
-				bool is_last_entry, u32 *mlli_nents)
-{
-	unsigned int index = sgl_data->num_of_buffers;
-
-	dev_dbg(dev, "index=%u single_buff=%pad buffer_len=0x%08X is_last=%d\n",
-		index, &buffer_dma, buffer_len, is_last_entry);
-	sgl_data->nents[index] = 1;
-	sgl_data->entry[index].buffer_dma = buffer_dma;
-	sgl_data->offset[index] = 0;
-	sgl_data->total_data_len[index] = buffer_len;
-	sgl_data->type[index] = DMA_BUFF_TYPE;
-	sgl_data->is_last[index] = is_last_entry;
-	sgl_data->mlli_nents[index] = mlli_nents;
-	if (sgl_data->mlli_nents[index])
-		*sgl_data->mlli_nents[index] = 0;
-	sgl_data->num_of_buffers++;
-}
-
 static void cc_add_sg_entry(struct device *dev, struct buffer_array *sgl_data,
 			    unsigned int nents, struct scatterlist *sgl,
 			    unsigned int data_len, unsigned int data_offset,
@@ -278,7 +245,6 @@ static void cc_add_sg_entry(struct device *dev, struct buffer_array *sgl_data,
 	sgl_data->entry[index].sgl = sgl;
 	sgl_data->offset[index] = data_offset;
 	sgl_data->total_data_len[index] = data_len;
-	sgl_data->type[index] = DMA_SGL_TYPE;
 	sgl_data->is_last[index] = is_last_table;
 	sgl_data->mlli_nents[index] = mlli_nents;
 	if (sgl_data->mlli_nents[index])
@@ -290,37 +256,25 @@ static int cc_map_sg(struct device *dev, struct scatterlist *sg,
 		     unsigned int nbytes, int direction, u32 *nents,
 		     u32 max_sg_nents, u32 *lbytes, u32 *mapped_nents)
 {
-	if (sg_is_last(sg)) {
-		/* One entry only case -set to DLLI */
-		if (dma_map_sg(dev, sg, 1, direction) != 1) {
-			dev_err(dev, "dma_map_sg() single buffer failed\n");
-			return -ENOMEM;
-		}
-		dev_dbg(dev, "Mapped sg: dma_address=%pad page=%p addr=%pK offset=%u length=%u\n",
-			&sg_dma_address(sg), sg_page(sg), sg_virt(sg),
-			sg->offset, sg->length);
-		*lbytes = nbytes;
-		*nents = 1;
-		*mapped_nents = 1;
-	} else {  /*sg_is_last*/
-		*nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes);
-		if (*nents > max_sg_nents) {
-			*nents = 0;
-			dev_err(dev, "Too many fragments. current %d max %d\n",
-				*nents, max_sg_nents);
-			return -ENOMEM;
-		}
-		/* In case of mmu the number of mapped nents might
-		 * be changed from the original sgl nents
-		 */
-		*mapped_nents = dma_map_sg(dev, sg, *nents, direction);
-		if (*mapped_nents == 0) {
-			*nents = 0;
-			dev_err(dev, "dma_map_sg() sg buffer failed\n");
-			return -ENOMEM;
-		}
+	int ret = 0;
+
+	*nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes);
+	if (*nents > max_sg_nents) {
+		*nents = 0;
+		dev_err(dev, "Too many fragments. current %d max %d\n",
+			*nents, max_sg_nents);
+		return -ENOMEM;
+	}
+
+	ret = dma_map_sg(dev, sg, *nents, direction);
+	if (dma_mapping_error(dev, ret)) {
+		*nents = 0;
+		dev_err(dev, "dma_map_sg() sg buffer failed %d\n", ret);
+		return -ENOMEM;
 	}
 
+	*mapped_nents = ret;
+
 	return 0;
 }
 
@@ -411,7 +365,6 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx,
 {
 	struct cipher_req_ctx *req_ctx = (struct cipher_req_ctx *)ctx;
 	struct mlli_params *mlli_params = &req_ctx->mlli_params;
-	struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle;
 	struct device *dev = drvdata_to_dev(drvdata);
 	struct buffer_array sg_data;
 	u32 dummy = 0;
@@ -424,10 +377,9 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx,
 
 	/* Map IV buffer */
 	if (ivsize) {
-		dump_byte_array("iv", (u8 *)info, ivsize);
+		dump_byte_array("iv", info, ivsize);
 		req_ctx->gen_ctx.iv_dma_addr =
-			dma_map_single(dev, (void *)info,
-				       ivsize, DMA_BIDIRECTIONAL);
+			dma_map_single(dev, info, ivsize, DMA_BIDIRECTIONAL);
 		if (dma_mapping_error(dev, req_ctx->gen_ctx.iv_dma_addr)) {
 			dev_err(dev, "Mapping iv %u B at va=%pK for DMA failed\n",
 				ivsize, info);
@@ -476,7 +428,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx,
 	}
 
 	if (req_ctx->dma_buf_type == CC_DMA_BUF_MLLI) {
-		mlli_params->curr_pool = buff_mgr->mlli_buffs_pool;
+		mlli_params->curr_pool = drvdata->mlli_buffs_pool;
 		rc = cc_generate_mlli(dev, &sg_data, mlli_params, flags);
 		if (rc)
 			goto cipher_exit;
@@ -555,11 +507,12 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req)
 		sg_virt(req->src), areq_ctx->src.nents, areq_ctx->assoc.nents,
 		areq_ctx->assoclen, req->cryptlen);
 
-	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_BIDIRECTIONAL);
+	dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents,
+		     DMA_BIDIRECTIONAL);
 	if (req->src != req->dst) {
 		dev_dbg(dev, "Unmapping dst sgl: req->dst=%pK\n",
 			sg_virt(req->dst));
-		dma_unmap_sg(dev, req->dst, sg_nents(req->dst),
+		dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents,
 			     DMA_BIDIRECTIONAL);
 	}
 	if (drvdata->coherent &&
@@ -614,18 +567,6 @@ static int cc_aead_chain_iv(struct cc_drvdata *drvdata,
 
 	dev_dbg(dev, "Mapped iv %u B at va=%pK to dma=%pad\n",
 		hw_iv_size, req->iv, &areq_ctx->gen_ctx.iv_dma_addr);
-	// TODO: what about CTR?? ask Ron
-	if (do_chain && areq_ctx->plaintext_authenticate_only) {
-		struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-		unsigned int iv_size_to_authenc = crypto_aead_ivsize(tfm);
-		unsigned int iv_ofs = GCM_BLOCK_RFC4_IV_OFFSET;
-		/* Chain to given list */
-		cc_add_buffer_entry(dev, sg_data,
-				    (areq_ctx->gen_ctx.iv_dma_addr + iv_ofs),
-				    iv_size_to_authenc, is_last,
-				    &areq_ctx->assoc.mlli_nents);
-		areq_ctx->assoc_buff_type = CC_DMA_BUF_MLLI;
-	}
 
 chain_iv_exit:
 	return rc;
@@ -639,13 +580,8 @@ static int cc_aead_chain_assoc(struct cc_drvdata *drvdata,
 	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
 	int rc = 0;
 	int mapped_nents = 0;
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	unsigned int size_of_assoc = areq_ctx->assoclen;
 	struct device *dev = drvdata_to_dev(drvdata);
 
-	if (areq_ctx->is_gcm4543)
-		size_of_assoc += crypto_aead_ivsize(tfm);
-
 	if (!sg_data) {
 		rc = -EINVAL;
 		goto chain_assoc_exit;
@@ -661,7 +597,7 @@ static int cc_aead_chain_assoc(struct cc_drvdata *drvdata,
 		goto chain_assoc_exit;
 	}
 
-	mapped_nents = sg_nents_for_len(req->src, size_of_assoc);
+	mapped_nents = sg_nents_for_len(req->src, areq_ctx->assoclen);
 	if (mapped_nents < 0)
 		return mapped_nents;
 
@@ -854,16 +790,11 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata,
 	u32 src_mapped_nents = 0, dst_mapped_nents = 0;
 	u32 offset = 0;
 	/* non-inplace mode */
-	unsigned int size_for_map = areq_ctx->assoclen + req->cryptlen;
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	unsigned int size_for_map = req->assoclen + req->cryptlen;
 	u32 sg_index = 0;
-	bool is_gcm4543 = areq_ctx->is_gcm4543;
-	u32 size_to_skip = areq_ctx->assoclen;
+	u32 size_to_skip = req->assoclen;
 	struct scatterlist *sgl;
 
-	if (is_gcm4543)
-		size_to_skip += crypto_aead_ivsize(tfm);
-
 	offset = size_to_skip;
 
 	if (!sg_data)
@@ -872,16 +803,13 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata,
 	areq_ctx->src_sgl = req->src;
 	areq_ctx->dst_sgl = req->dst;
 
-	if (is_gcm4543)
-		size_for_map += crypto_aead_ivsize(tfm);
-
 	size_for_map += (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) ?
 			authsize : 0;
 	src_mapped_nents = cc_get_sgl_nents(dev, req->src, size_for_map,
 					    &src_last_bytes);
 	sg_index = areq_ctx->src_sgl->length;
 	//check where the data starts
-	while (sg_index <= size_to_skip) {
+	while (src_mapped_nents && (sg_index <= size_to_skip)) {
 		src_mapped_nents--;
 		offset -= areq_ctx->src_sgl->length;
 		sgl = sg_next(areq_ctx->src_sgl);
@@ -901,14 +829,15 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata,
 	areq_ctx->src_offset = offset;
 
 	if (req->src != req->dst) {
-		size_for_map = areq_ctx->assoclen + req->cryptlen;
-		size_for_map += (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) ?
-				authsize : 0;
-		if (is_gcm4543)
-			size_for_map += crypto_aead_ivsize(tfm);
+		size_for_map = req->assoclen + req->cryptlen;
+
+		if (direct == DRV_CRYPTO_DIRECTION_ENCRYPT)
+			size_for_map += authsize;
+		else
+			size_for_map -= authsize;
 
 		rc = cc_map_sg(dev, req->dst, size_for_map, DMA_BIDIRECTIONAL,
-			       &areq_ctx->dst.nents,
+			       &areq_ctx->dst.mapped_nents,
 			       LLI_MAX_NUM_OF_DATA_ENTRIES, &dst_last_bytes,
 			       &dst_mapped_nents);
 		if (rc)
@@ -921,7 +850,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata,
 	offset = size_to_skip;
 
 	//check where the data starts
-	while (sg_index <= size_to_skip) {
+	while (dst_mapped_nents && sg_index <= size_to_skip) {
 		dst_mapped_nents--;
 		offset -= areq_ctx->dst_sgl->length;
 		sgl = sg_next(areq_ctx->dst_sgl);
@@ -1012,14 +941,11 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req)
 	struct device *dev = drvdata_to_dev(drvdata);
 	struct buffer_array sg_data;
 	unsigned int authsize = areq_ctx->req_authsize;
-	struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle;
 	int rc = 0;
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	bool is_gcm4543 = areq_ctx->is_gcm4543;
 	dma_addr_t dma_addr;
 	u32 mapped_nents = 0;
 	u32 dummy = 0; /*used for the assoc data fragments */
-	u32 size_to_map = 0;
+	u32 size_to_map;
 	gfp_t flags = cc_gfp_flags(&req->base);
 
 	mlli_params->curr_pool = NULL;
@@ -1116,14 +1042,15 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req)
 		areq_ctx->gcm_iv_inc2_dma_addr = dma_addr;
 	}
 
-	size_to_map = req->cryptlen + areq_ctx->assoclen;
-	if (areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_ENCRYPT)
+	size_to_map = req->cryptlen + req->assoclen;
+	/* If we do in-place encryption, we also need the auth tag */
+	if ((areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_ENCRYPT) &&
+	   (req->src == req->dst)) {
 		size_to_map += authsize;
+	}
 
-	if (is_gcm4543)
-		size_to_map += crypto_aead_ivsize(tfm);
 	rc = cc_map_sg(dev, req->src, size_to_map, DMA_BIDIRECTIONAL,
-		       &areq_ctx->src.nents,
+		       &areq_ctx->src.mapped_nents,
 		       (LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES +
 			LLI_MAX_NUM_OF_DATA_ENTRIES),
 		       &dummy, &mapped_nents);
@@ -1183,7 +1110,7 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req)
 	 */
 	if (areq_ctx->assoc_buff_type == CC_DMA_BUF_MLLI ||
 	    areq_ctx->data_buff_type == CC_DMA_BUF_MLLI) {
-		mlli_params->curr_pool = buff_mgr->mlli_buffs_pool;
+		mlli_params->curr_pool = drvdata->mlli_buffs_pool;
 		rc = cc_generate_mlli(dev, &sg_data, mlli_params, flags);
 		if (rc)
 			goto aead_map_failure;
@@ -1211,7 +1138,6 @@ int cc_map_hash_request_final(struct cc_drvdata *drvdata, void *ctx,
 	u32 *curr_buff_cnt = cc_hash_buf_cnt(areq_ctx);
 	struct mlli_params *mlli_params = &areq_ctx->mlli_params;
 	struct buffer_array sg_data;
-	struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle;
 	int rc = 0;
 	u32 dummy = 0;
 	u32 mapped_nents = 0;
@@ -1229,7 +1155,6 @@ int cc_map_hash_request_final(struct cc_drvdata *drvdata, void *ctx,
 		return 0;
 	}
 
-	/*TODO: copy data in case that buffer is enough for operation */
 	/* map the previous buffer */
 	if (*curr_buff_cnt) {
 		rc = cc_set_hash_buf(dev, areq_ctx, curr_buff, *curr_buff_cnt,
@@ -1258,7 +1183,7 @@ int cc_map_hash_request_final(struct cc_drvdata *drvdata, void *ctx,
 
 	/*build mlli */
 	if (areq_ctx->data_dma_buf_type == CC_DMA_BUF_MLLI) {
-		mlli_params->curr_pool = buff_mgr->mlli_buffs_pool;
+		mlli_params->curr_pool = drvdata->mlli_buffs_pool;
 		/* add the src data to the sg_data */
 		cc_add_sg_entry(dev, &sg_data, areq_ctx->in_nents, src, nbytes,
 				0, true, &areq_ctx->mlli_nents);
@@ -1296,7 +1221,6 @@ int cc_map_hash_request_update(struct cc_drvdata *drvdata, void *ctx,
 	unsigned int update_data_len;
 	u32 total_in_len = nbytes + *curr_buff_cnt;
 	struct buffer_array sg_data;
-	struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle;
 	unsigned int swap_index = 0;
 	int rc = 0;
 	u32 dummy = 0;
@@ -1371,7 +1295,7 @@ int cc_map_hash_request_update(struct cc_drvdata *drvdata, void *ctx,
 	}
 
 	if (areq_ctx->data_dma_buf_type == CC_DMA_BUF_MLLI) {
-		mlli_params->curr_pool = buff_mgr->mlli_buffs_pool;
+		mlli_params->curr_pool = drvdata->mlli_buffs_pool;
 		/* add the src data to the sg_data */
 		cc_add_sg_entry(dev, &sg_data, areq_ctx->in_nents, src,
 				(update_data_len - *curr_buff_cnt), 0, true,
@@ -1438,39 +1362,22 @@ void cc_unmap_hash_request(struct device *dev, void *ctx,
 
 int cc_buffer_mgr_init(struct cc_drvdata *drvdata)
 {
-	struct buff_mgr_handle *buff_mgr_handle;
 	struct device *dev = drvdata_to_dev(drvdata);
 
-	buff_mgr_handle = kmalloc(sizeof(*buff_mgr_handle), GFP_KERNEL);
-	if (!buff_mgr_handle)
-		return -ENOMEM;
-
-	drvdata->buff_mgr_handle = buff_mgr_handle;
-
-	buff_mgr_handle->mlli_buffs_pool =
+	drvdata->mlli_buffs_pool =
 		dma_pool_create("dx_single_mlli_tables", dev,
 				MAX_NUM_OF_TOTAL_MLLI_ENTRIES *
 				LLI_ENTRY_BYTE_SIZE,
 				MLLI_TABLE_MIN_ALIGNMENT, 0);
 
-	if (!buff_mgr_handle->mlli_buffs_pool)
-		goto error;
+	if (!drvdata->mlli_buffs_pool)
+		return -ENOMEM;
 
 	return 0;
-
-error:
-	cc_buffer_mgr_fini(drvdata);
-	return -ENOMEM;
 }
 
 int cc_buffer_mgr_fini(struct cc_drvdata *drvdata)
 {
-	struct buff_mgr_handle *buff_mgr_handle = drvdata->buff_mgr_handle;
-
-	if (buff_mgr_handle) {
-		dma_pool_destroy(buff_mgr_handle->mlli_buffs_pool);
-		kfree(drvdata->buff_mgr_handle);
-		drvdata->buff_mgr_handle = NULL;
-	}
+	dma_pool_destroy(drvdata->mlli_buffs_pool);
 	return 0;
 }
diff --git a/drivers/crypto/ccree/cc_buffer_mgr.h b/drivers/crypto/ccree/cc_buffer_mgr.h
index af434872c6ff..653441b6542e 100644
--- a/drivers/crypto/ccree/cc_buffer_mgr.h
+++ b/drivers/crypto/ccree/cc_buffer_mgr.h
@@ -24,14 +24,15 @@ enum cc_sg_cpy_direct {
 };
 
 struct cc_mlli {
-	cc_sram_addr_t sram_addr;
+	u32 sram_addr;
+	unsigned int mapped_nents;
 	unsigned int nents; //sg nents
 	unsigned int mlli_nents; //mlli nents might be different than the above
 };
 
 struct mlli_params {
 	struct dma_pool *curr_pool;
-	u8 *mlli_virt_addr;
+	void *mlli_virt_addr;
 	dma_addr_t mlli_dma_addr;
 	u32 mlli_len;
 };
diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c
index 7d6252d892d7..a84335328f37 100644
--- a/drivers/crypto/ccree/cc_cipher.c
+++ b/drivers/crypto/ccree/cc_cipher.c
@@ -20,10 +20,6 @@
 
 #define template_skcipher	template_u.skcipher
 
-struct cc_cipher_handle {
-	struct list_head alg_list;
-};
-
 struct cc_user_key_info {
 	u8 *key;
 	dma_addr_t key_dma_addr;
@@ -184,7 +180,7 @@ static int cc_cipher_init(struct crypto_tfm *tfm)
 		ctx_p->user.key);
 
 	/* Map key buffer */
-	ctx_p->user.key_dma_addr = dma_map_single(dev, (void *)ctx_p->user.key,
+	ctx_p->user.key_dma_addr = dma_map_single(dev, ctx_p->user.key,
 						  max_key_buf_size,
 						  DMA_TO_DEVICE);
 	if (dma_mapping_error(dev, ctx_p->user.key_dma_addr)) {
@@ -284,7 +280,7 @@ static int cc_cipher_sethkey(struct crypto_skcipher *sktfm, const u8 *key,
 
 	dev_dbg(dev, "Setting HW key in context @%p for %s. keylen=%u\n",
 		ctx_p, crypto_tfm_alg_name(tfm), keylen);
-	dump_byte_array("key", (u8 *)key, keylen);
+	dump_byte_array("key", key, keylen);
 
 	/* STAT_PHASE_0: Init and sanity checks */
 
@@ -387,7 +383,7 @@ static int cc_cipher_setkey(struct crypto_skcipher *sktfm, const u8 *key,
 
 	dev_dbg(dev, "Setting key in context @%p for %s. keylen=%u\n",
 		ctx_p, crypto_tfm_alg_name(tfm), keylen);
-	dump_byte_array("key", (u8 *)key, keylen);
+	dump_byte_array("key", key, keylen);
 
 	/* STAT_PHASE_0: Init and sanity checks */
 
@@ -533,14 +529,6 @@ static void cc_setup_state_desc(struct crypto_tfm *tfm,
 	int flow_mode = ctx_p->flow_mode;
 	int direction = req_ctx->gen_ctx.op_type;
 	dma_addr_t iv_dma_addr = req_ctx->gen_ctx.iv_dma_addr;
-	unsigned int du_size = nbytes;
-
-	struct cc_crypto_alg *cc_alg =
-		container_of(tfm->__crt_alg, struct cc_crypto_alg,
-			     skcipher_alg.base);
-
-	if (cc_alg->data_unit)
-		du_size = cc_alg->data_unit;
 
 	switch (cipher_mode) {
 	case DRV_CIPHER_ECB:
@@ -753,7 +741,7 @@ static void cc_setup_mlli_desc(struct crypto_tfm *tfm,
 		dev_dbg(dev, " bypass params addr %pad length 0x%X addr 0x%08X\n",
 			&req_ctx->mlli_params.mlli_dma_addr,
 			req_ctx->mlli_params.mlli_len,
-			(unsigned int)ctx_p->drvdata->mlli_sram_addr);
+			ctx_p->drvdata->mlli_sram_addr);
 		hw_desc_init(&desc[*seq_size]);
 		set_din_type(&desc[*seq_size], DMA_DLLI,
 			     req_ctx->mlli_params.mlli_dma_addr,
@@ -801,16 +789,16 @@ static void cc_setup_flow_desc(struct crypto_tfm *tfm,
 			     req_ctx->in_mlli_nents, NS_BIT);
 		if (req_ctx->out_nents == 0) {
 			dev_dbg(dev, " din/dout params addr 0x%08X addr 0x%08X\n",
-				(unsigned int)ctx_p->drvdata->mlli_sram_addr,
-				(unsigned int)ctx_p->drvdata->mlli_sram_addr);
+				ctx_p->drvdata->mlli_sram_addr,
+				ctx_p->drvdata->mlli_sram_addr);
 			set_dout_mlli(&desc[*seq_size],
 				      ctx_p->drvdata->mlli_sram_addr,
 				      req_ctx->in_mlli_nents, NS_BIT,
 				      (!last_desc ? 0 : 1));
 		} else {
 			dev_dbg(dev, " din/dout params addr 0x%08X addr 0x%08X\n",
-				(unsigned int)ctx_p->drvdata->mlli_sram_addr,
-				(unsigned int)ctx_p->drvdata->mlli_sram_addr +
+				ctx_p->drvdata->mlli_sram_addr,
+				ctx_p->drvdata->mlli_sram_addr +
 				(u32)LLI_ENTRY_BYTE_SIZE * req_ctx->in_nents);
 			set_dout_mlli(&desc[*seq_size],
 				      (ctx_p->drvdata->mlli_sram_addr +
@@ -871,7 +859,6 @@ static int cc_cipher_process(struct skcipher_request *req,
 
 	/* STAT_PHASE_0: Init and sanity checks */
 
-	/* TODO: check data length according to mode */
 	if (validate_data_size(ctx_p, nbytes)) {
 		dev_dbg(dev, "Unsupported data size %d.\n", nbytes);
 		rc = -EINVAL;
@@ -893,8 +880,8 @@ static int cc_cipher_process(struct skcipher_request *req,
 	}
 
 	/* Setup request structure */
-	cc_req.user_cb = (void *)cc_cipher_complete;
-	cc_req.user_arg = (void *)req;
+	cc_req.user_cb = cc_cipher_complete;
+	cc_req.user_arg = req;
 
 	/* Setup CPP operation details */
 	if (ctx_p->key_type == CC_POLICY_PROTECTED_KEY) {
@@ -1228,6 +1215,10 @@ static const struct cc_alg_template skcipher_algs[] = {
 		.sec_func = true,
 	},
 	{
+		/* See https://www.mail-archive.com/linux-crypto@vger.kernel.org/msg40576.html
+		 * for the reason why this differs from the generic
+		 * implementation.
+		 */
 		.name = "xts(aes)",
 		.driver_name = "xts-aes-ccree",
 		.blocksize = 1,
@@ -1423,7 +1414,7 @@ static const struct cc_alg_template skcipher_algs[] = {
 	{
 		.name = "ofb(aes)",
 		.driver_name = "ofb-aes-ccree",
-		.blocksize = AES_BLOCK_SIZE,
+		.blocksize = 1,
 		.template_skcipher = {
 			.setkey = cc_cipher_setkey,
 			.encrypt = cc_cipher_encrypt,
@@ -1576,7 +1567,7 @@ static const struct cc_alg_template skcipher_algs[] = {
 	{
 		.name = "ctr(sm4)",
 		.driver_name = "ctr-sm4-ccree",
-		.blocksize = SM4_BLOCK_SIZE,
+		.blocksize = 1,
 		.template_skcipher = {
 			.setkey = cc_cipher_setkey,
 			.encrypt = cc_cipher_encrypt,
@@ -1634,7 +1625,7 @@ static struct cc_crypto_alg *cc_create_alg(const struct cc_alg_template *tmpl,
 	struct cc_crypto_alg *t_alg;
 	struct skcipher_alg *alg;
 
-	t_alg = kzalloc(sizeof(*t_alg), GFP_KERNEL);
+	t_alg = devm_kzalloc(dev, sizeof(*t_alg), GFP_KERNEL);
 	if (!t_alg)
 		return ERR_PTR(-ENOMEM);
 
@@ -1665,36 +1656,23 @@ static struct cc_crypto_alg *cc_create_alg(const struct cc_alg_template *tmpl,
 int cc_cipher_free(struct cc_drvdata *drvdata)
 {
 	struct cc_crypto_alg *t_alg, *n;
-	struct cc_cipher_handle *cipher_handle = drvdata->cipher_handle;
-
-	if (cipher_handle) {
-		/* Remove registered algs */
-		list_for_each_entry_safe(t_alg, n, &cipher_handle->alg_list,
-					 entry) {
-			crypto_unregister_skcipher(&t_alg->skcipher_alg);
-			list_del(&t_alg->entry);
-			kfree(t_alg);
-		}
-		kfree(cipher_handle);
-		drvdata->cipher_handle = NULL;
+
+	/* Remove registered algs */
+	list_for_each_entry_safe(t_alg, n, &drvdata->alg_list, entry) {
+		crypto_unregister_skcipher(&t_alg->skcipher_alg);
+		list_del(&t_alg->entry);
 	}
 	return 0;
 }
 
 int cc_cipher_alloc(struct cc_drvdata *drvdata)
 {
-	struct cc_cipher_handle *cipher_handle;
 	struct cc_crypto_alg *t_alg;
 	struct device *dev = drvdata_to_dev(drvdata);
 	int rc = -ENOMEM;
 	int alg;
 
-	cipher_handle = kmalloc(sizeof(*cipher_handle), GFP_KERNEL);
-	if (!cipher_handle)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&cipher_handle->alg_list);
-	drvdata->cipher_handle = cipher_handle;
+	INIT_LIST_HEAD(&drvdata->alg_list);
 
 	/* Linux crypto */
 	dev_dbg(dev, "Number of algorithms = %zu\n",
@@ -1723,14 +1701,12 @@ int cc_cipher_alloc(struct cc_drvdata *drvdata)
 		if (rc) {
 			dev_err(dev, "%s alg registration failed\n",
 				t_alg->skcipher_alg.base.cra_driver_name);
-			kfree(t_alg);
 			goto fail0;
-		} else {
-			list_add_tail(&t_alg->entry,
-				      &cipher_handle->alg_list);
-			dev_dbg(dev, "Registered %s\n",
-				t_alg->skcipher_alg.base.cra_driver_name);
 		}
+
+		list_add_tail(&t_alg->entry, &drvdata->alg_list);
+		dev_dbg(dev, "Registered %s\n",
+			t_alg->skcipher_alg.base.cra_driver_name);
 	}
 	return 0;
 
diff --git a/drivers/crypto/ccree/cc_debugfs.c b/drivers/crypto/ccree/cc_debugfs.c
index 566999738698..c454afce7781 100644
--- a/drivers/crypto/ccree/cc_debugfs.c
+++ b/drivers/crypto/ccree/cc_debugfs.c
@@ -8,10 +8,6 @@
 #include "cc_crypto_ctx.h"
 #include "cc_debugfs.h"
 
-struct cc_debugfs_ctx {
-	struct dentry *dir;
-};
-
 #define CC_DEBUG_REG(_X) {	\
 	.name = __stringify(_X),\
 	.offset = CC_REG(_X)	\
@@ -67,13 +63,8 @@ void __exit cc_debugfs_global_fini(void)
 int cc_debugfs_init(struct cc_drvdata *drvdata)
 {
 	struct device *dev = drvdata_to_dev(drvdata);
-	struct cc_debugfs_ctx *ctx;
 	struct debugfs_regset32 *regset, *verset;
 
-	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
-	if (!ctx)
-		return -ENOMEM;
-
 	regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL);
 	if (!regset)
 		return -ENOMEM;
@@ -81,16 +72,18 @@ int cc_debugfs_init(struct cc_drvdata *drvdata)
 	regset->regs = debug_regs;
 	regset->nregs = ARRAY_SIZE(debug_regs);
 	regset->base = drvdata->cc_base;
+	regset->dev = dev;
 
-	ctx->dir = debugfs_create_dir(drvdata->plat_dev->name, cc_debugfs_dir);
+	drvdata->dir = debugfs_create_dir(drvdata->plat_dev->name,
+					  cc_debugfs_dir);
 
-	debugfs_create_regset32("regs", 0400, ctx->dir, regset);
-	debugfs_create_bool("coherent", 0400, ctx->dir, &drvdata->coherent);
+	debugfs_create_regset32("regs", 0400, drvdata->dir, regset);
+	debugfs_create_bool("coherent", 0400, drvdata->dir, &drvdata->coherent);
 
 	verset = devm_kzalloc(dev, sizeof(*verset), GFP_KERNEL);
 	/* Failing here is not important enough to fail the module load */
 	if (!verset)
-		goto out;
+		return 0;
 
 	if (drvdata->hw_rev <= CC_HW_REV_712) {
 		ver_sig_regs[0].offset = drvdata->sig_offset;
@@ -102,17 +95,13 @@ int cc_debugfs_init(struct cc_drvdata *drvdata)
 		verset->nregs = ARRAY_SIZE(pid_cid_regs);
 	}
 	verset->base = drvdata->cc_base;
+	verset->dev = dev;
 
-	debugfs_create_regset32("version", 0400, ctx->dir, verset);
-
-out:
-	drvdata->debugfs = ctx;
+	debugfs_create_regset32("version", 0400, drvdata->dir, verset);
 	return 0;
 }
 
 void cc_debugfs_fini(struct cc_drvdata *drvdata)
 {
-	struct cc_debugfs_ctx *ctx = (struct cc_debugfs_ctx *)drvdata->debugfs;
-
-	debugfs_remove_recursive(ctx->dir);
+	debugfs_remove_recursive(drvdata->dir);
 }
diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c
index 532bc95a8373..2d50991b9a17 100644
--- a/drivers/crypto/ccree/cc_driver.c
+++ b/drivers/crypto/ccree/cc_driver.c
@@ -14,6 +14,8 @@
 #include <linux/of.h>
 #include <linux/clk.h>
 #include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/pm_runtime.h>
 
 #include "cc_driver.h"
 #include "cc_request_mgr.h"
@@ -134,7 +136,7 @@ static irqreturn_t cc_isr(int irq, void *dev_id)
 
 	/* STAT_OP_TYPE_GENERIC STAT_PHASE_0: Interrupt */
 	/* if driver suspended return, probably shared interrupt */
-	if (cc_pm_is_dev_suspended(dev))
+	if (pm_runtime_suspended(dev))
 		return IRQ_NONE;
 
 	/* read the interrupt status */
@@ -269,7 +271,6 @@ static int init_cc_resources(struct platform_device *plat_dev)
 	u32 val, hw_rev_pidr, sig_cidr;
 	u64 dma_mask;
 	const struct cc_hw_data *hw_rev;
-	const struct of_device_id *dev_id;
 	struct clk *clk;
 	int irq;
 	int rc = 0;
@@ -278,11 +279,7 @@ static int init_cc_resources(struct platform_device *plat_dev)
 	if (!new_drvdata)
 		return -ENOMEM;
 
-	dev_id = of_match_node(arm_ccree_dev_of_match, np);
-	if (!dev_id)
-		return -ENODEV;
-
-	hw_rev = (struct cc_hw_data *)dev_id->data;
+	hw_rev = of_device_get_match_data(dev);
 	new_drvdata->hw_rev_name = hw_rev->name;
 	new_drvdata->hw_rev = hw_rev->rev;
 	new_drvdata->std_bodies = hw_rev->std_bodies;
@@ -302,22 +299,12 @@ static int init_cc_resources(struct platform_device *plat_dev)
 	platform_set_drvdata(plat_dev, new_drvdata);
 	new_drvdata->plat_dev = plat_dev;
 
-	clk = devm_clk_get(dev, NULL);
-	if (IS_ERR(clk))
-		switch (PTR_ERR(clk)) {
-		/* Clock is optional so this might be fine */
-		case -ENOENT:
-			break;
-
-		/* Clock not available, let's try again soon */
-		case -EPROBE_DEFER:
-			return -EPROBE_DEFER;
-
-		default:
-			dev_err(dev, "Error getting clock: %ld\n",
-				PTR_ERR(clk));
-			return PTR_ERR(clk);
-		}
+	clk = devm_clk_get_optional(dev, NULL);
+	if (IS_ERR(clk)) {
+		if (PTR_ERR(clk) != -EPROBE_DEFER)
+			dev_err(dev, "Error getting clock: %pe\n", clk);
+		return PTR_ERR(clk);
+	}
 	new_drvdata->clk = clk;
 
 	new_drvdata->coherent = of_dma_is_coherent(np);
@@ -344,13 +331,13 @@ static int init_cc_resources(struct platform_device *plat_dev)
 
 	init_completion(&new_drvdata->hw_queue_avail);
 
-	if (!plat_dev->dev.dma_mask)
-		plat_dev->dev.dma_mask = &plat_dev->dev.coherent_dma_mask;
+	if (!dev->dma_mask)
+		dev->dma_mask = &dev->coherent_dma_mask;
 
 	dma_mask = DMA_BIT_MASK(DMA_BIT_MASK_LEN);
 	while (dma_mask > 0x7fffffffUL) {
-		if (dma_supported(&plat_dev->dev, dma_mask)) {
-			rc = dma_set_coherent_mask(&plat_dev->dev, dma_mask);
+		if (dma_supported(dev, dma_mask)) {
+			rc = dma_set_coherent_mask(dev, dma_mask);
 			if (!rc)
 				break;
 		}
@@ -362,7 +349,7 @@ static int init_cc_resources(struct platform_device *plat_dev)
 		return rc;
 	}
 
-	rc = cc_clk_on(new_drvdata);
+	rc = clk_prepare_enable(new_drvdata->clk);
 	if (rc) {
 		dev_err(dev, "Failed to enable clock");
 		return rc;
@@ -370,7 +357,17 @@ static int init_cc_resources(struct platform_device *plat_dev)
 
 	new_drvdata->sec_disabled = cc_sec_disable;
 
-	/* wait for Crytpcell reset completion */
+	pm_runtime_set_autosuspend_delay(dev, CC_SUSPEND_TIMEOUT);
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_active(dev);
+	pm_runtime_enable(dev);
+	rc = pm_runtime_get_sync(dev);
+	if (rc < 0) {
+		dev_err(dev, "pm_runtime_get_sync() failed: %d\n", rc);
+		goto post_pm_err;
+	}
+
+	/* Wait for Cryptocell reset completion */
 	if (!cc_wait_for_reset_completion(new_drvdata)) {
 		dev_err(dev, "Cryptocell reset not completed");
 	}
@@ -382,7 +379,7 @@ static int init_cc_resources(struct platform_device *plat_dev)
 			dev_err(dev, "Invalid CC signature: SIGNATURE=0x%08X != expected=0x%08X\n",
 				val, hw_rev->sig);
 			rc = -EINVAL;
-			goto post_clk_err;
+			goto post_pm_err;
 		}
 		sig_cidr = val;
 		hw_rev_pidr = cc_ioread(new_drvdata, new_drvdata->ver_offset);
@@ -393,7 +390,7 @@ static int init_cc_resources(struct platform_device *plat_dev)
 			dev_err(dev, "Invalid CC PIDR: PIDR0124=0x%08X != expected=0x%08X\n",
 				val,  hw_rev->pidr_0124);
 			rc = -EINVAL;
-			goto post_clk_err;
+			goto post_pm_err;
 		}
 		hw_rev_pidr = val;
 
@@ -402,7 +399,7 @@ static int init_cc_resources(struct platform_device *plat_dev)
 			dev_err(dev, "Invalid CC CIDR: CIDR0123=0x%08X != expected=0x%08X\n",
 			val,  hw_rev->cidr_0123);
 			rc = -EINVAL;
-			goto post_clk_err;
+			goto post_pm_err;
 		}
 		sig_cidr = val;
 
@@ -421,7 +418,7 @@ static int init_cc_resources(struct platform_device *plat_dev)
 		default:
 			dev_err(dev, "Unsupported engines configuration.\n");
 			rc = -EINVAL;
-			goto post_clk_err;
+			goto post_pm_err;
 		}
 
 		/* Check security disable state */
@@ -447,14 +444,14 @@ static int init_cc_resources(struct platform_device *plat_dev)
 			      new_drvdata);
 	if (rc) {
 		dev_err(dev, "Could not register to interrupt %d\n", irq);
-		goto post_clk_err;
+		goto post_pm_err;
 	}
 	dev_dbg(dev, "Registered to IRQ: %d\n", irq);
 
 	rc = init_cc_regs(new_drvdata, true);
 	if (rc) {
 		dev_err(dev, "init_cc_regs failed\n");
-		goto post_clk_err;
+		goto post_pm_err;
 	}
 
 	rc = cc_debugfs_init(new_drvdata);
@@ -477,15 +474,14 @@ static int init_cc_resources(struct platform_device *plat_dev)
 	new_drvdata->mlli_sram_addr =
 		cc_sram_alloc(new_drvdata, MAX_MLLI_BUFF_SIZE);
 	if (new_drvdata->mlli_sram_addr == NULL_SRAM_ADDR) {
-		dev_err(dev, "Failed to alloc MLLI Sram buffer\n");
 		rc = -ENOMEM;
-		goto post_sram_mgr_err;
+		goto post_fips_init_err;
 	}
 
 	rc = cc_req_mgr_init(new_drvdata);
 	if (rc) {
 		dev_err(dev, "cc_req_mgr_init failed\n");
-		goto post_sram_mgr_err;
+		goto post_fips_init_err;
 	}
 
 	rc = cc_buffer_mgr_init(new_drvdata);
@@ -494,12 +490,6 @@ static int init_cc_resources(struct platform_device *plat_dev)
 		goto post_req_mgr_err;
 	}
 
-	rc = cc_pm_init(new_drvdata);
-	if (rc) {
-		dev_err(dev, "cc_pm_init failed\n");
-		goto post_buf_mgr_err;
-	}
-
 	/* Allocate crypto algs */
 	rc = cc_cipher_alloc(new_drvdata);
 	if (rc) {
@@ -520,15 +510,13 @@ static int init_cc_resources(struct platform_device *plat_dev)
 		goto post_hash_err;
 	}
 
-	/* All set, we can allow autosuspend */
-	cc_pm_go(new_drvdata);
-
 	/* If we got here and FIPS mode is enabled
 	 * it means all FIPS test passed, so let TEE
 	 * know we're good.
 	 */
 	cc_set_ree_fips_status(new_drvdata, true);
 
+	pm_runtime_put(dev);
 	return 0;
 
 post_hash_err:
@@ -539,16 +527,17 @@ post_buf_mgr_err:
 	 cc_buffer_mgr_fini(new_drvdata);
 post_req_mgr_err:
 	cc_req_mgr_fini(new_drvdata);
-post_sram_mgr_err:
-	cc_sram_mgr_fini(new_drvdata);
 post_fips_init_err:
 	cc_fips_fini(new_drvdata);
 post_debugfs_err:
 	cc_debugfs_fini(new_drvdata);
 post_regs_err:
 	fini_cc_regs(new_drvdata);
-post_clk_err:
-	cc_clk_off(new_drvdata);
+post_pm_err:
+	pm_runtime_put_noidle(dev);
+	pm_runtime_disable(dev);
+	pm_runtime_set_suspended(dev);
+	clk_disable_unprepare(new_drvdata->clk);
 	return rc;
 }
 
@@ -560,36 +549,22 @@ void fini_cc_regs(struct cc_drvdata *drvdata)
 
 static void cleanup_cc_resources(struct platform_device *plat_dev)
 {
+	struct device *dev = &plat_dev->dev;
 	struct cc_drvdata *drvdata =
 		(struct cc_drvdata *)platform_get_drvdata(plat_dev);
 
 	cc_aead_free(drvdata);
 	cc_hash_free(drvdata);
 	cc_cipher_free(drvdata);
-	cc_pm_fini(drvdata);
 	cc_buffer_mgr_fini(drvdata);
 	cc_req_mgr_fini(drvdata);
-	cc_sram_mgr_fini(drvdata);
 	cc_fips_fini(drvdata);
 	cc_debugfs_fini(drvdata);
 	fini_cc_regs(drvdata);
-	cc_clk_off(drvdata);
-}
-
-int cc_clk_on(struct cc_drvdata *drvdata)
-{
-	struct clk *clk = drvdata->clk;
-	int rc;
-
-	if (IS_ERR(clk))
-		/* Not all devices have a clock associated with CCREE  */
-		return 0;
-
-	rc = clk_prepare_enable(clk);
-	if (rc)
-		return rc;
-
-	return 0;
+	pm_runtime_put_noidle(dev);
+	pm_runtime_disable(dev);
+	pm_runtime_set_suspended(dev);
+	clk_disable_unprepare(drvdata->clk);
 }
 
 unsigned int cc_get_default_hash_len(struct cc_drvdata *drvdata)
@@ -600,17 +575,6 @@ unsigned int cc_get_default_hash_len(struct cc_drvdata *drvdata)
 		return HASH_LEN_SIZE_630;
 }
 
-void cc_clk_off(struct cc_drvdata *drvdata)
-{
-	struct clk *clk = drvdata->clk;
-
-	if (IS_ERR(clk))
-		/* Not all devices have a clock associated with CCREE */
-		return;
-
-	clk_disable_unprepare(clk);
-}
-
 static int ccree_probe(struct platform_device *plat_dev)
 {
 	int rc;
@@ -653,7 +617,6 @@ static struct platform_driver ccree_driver = {
 
 static int __init ccree_init(void)
 {
-	cc_hash_global_init();
 	cc_debugfs_global_init();
 
 	return platform_driver_register(&ccree_driver);
diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h
index c227718ba992..d938886390d2 100644
--- a/drivers/crypto/ccree/cc_driver.h
+++ b/drivers/crypto/ccree/cc_driver.h
@@ -26,7 +26,6 @@
 #include <linux/clk.h>
 #include <linux/platform_device.h>
 
-/* Registers definitions from shared/hw/ree_include */
 #include "cc_host_regs.h"
 #include "cc_crypto_ctx.h"
 #include "cc_hw_queue_defs.h"
@@ -71,9 +70,7 @@ enum cc_std_body {
 
 #define CC_NVM_IS_IDLE_MASK BIT(CC_NVM_IS_IDLE_VALUE_BIT_SHIFT)
 
-#define AXIM_MON_COMP_VALUE GENMASK(CC_AXIM_MON_COMP_VALUE_BIT_SIZE + \
-				    CC_AXIM_MON_COMP_VALUE_BIT_SHIFT, \
-				    CC_AXIM_MON_COMP_VALUE_BIT_SHIFT)
+#define AXIM_MON_COMP_VALUE CC_GENMASK(CC_AXIM_MON_COMP_VALUE)
 
 #define CC_CPP_AES_ABORT_MASK ( \
 	BIT(CC_HOST_IMR_REE_OP_ABORTED_AES_0_MASK_BIT_SHIFT) | \
@@ -139,15 +136,15 @@ struct cc_drvdata {
 	int irq;
 	struct completion hw_queue_avail; /* wait for HW queue availability */
 	struct platform_device *plat_dev;
-	cc_sram_addr_t mlli_sram_addr;
-	void *buff_mgr_handle;
-	void *cipher_handle;
+	u32 mlli_sram_addr;
+	struct dma_pool *mlli_buffs_pool;
+	struct list_head alg_list;
 	void *hash_handle;
 	void *aead_handle;
 	void *request_mgr_handle;
 	void *fips_handle;
-	void *sram_mgr_handle;
-	void *debugfs;
+	u32 sram_free_offset;	/* offset to non-allocated area in SRAM */
+	struct dentry *dir;	/* for debugfs */
 	struct clk *clk;
 	bool coherent;
 	char *hw_rev_name;
@@ -158,7 +155,6 @@ struct cc_drvdata {
 	int std_bodies;
 	bool sec_disabled;
 	u32 comp_mask;
-	bool pm_on;
 };
 
 struct cc_crypto_alg {
@@ -212,8 +208,6 @@ static inline void dump_byte_array(const char *name, const u8 *the_array,
 bool cc_wait_for_reset_completion(struct cc_drvdata *drvdata);
 int init_cc_regs(struct cc_drvdata *drvdata, bool is_probe);
 void fini_cc_regs(struct cc_drvdata *drvdata);
-int cc_clk_on(struct cc_drvdata *drvdata);
-void cc_clk_off(struct cc_drvdata *drvdata);
 unsigned int cc_get_default_hash_len(struct cc_drvdata *drvdata);
 
 static inline void cc_iowrite(struct cc_drvdata *drvdata, u32 reg, u32 val)
diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c
index 912e5ce5079d..d5310783af15 100644
--- a/drivers/crypto/ccree/cc_hash.c
+++ b/drivers/crypto/ccree/cc_hash.c
@@ -20,8 +20,8 @@
 #define CC_SM3_HASH_LEN_SIZE 8
 
 struct cc_hash_handle {
-	cc_sram_addr_t digest_len_sram_addr; /* const value in SRAM*/
-	cc_sram_addr_t larval_digest_sram_addr;   /* const value in SRAM */
+	u32 digest_len_sram_addr;	/* const value in SRAM*/
+	u32 larval_digest_sram_addr;   /* const value in SRAM */
 	struct list_head hash_list;
 };
 
@@ -39,12 +39,19 @@ static const u32 cc_sha256_init[] = {
 	SHA256_H3, SHA256_H2, SHA256_H1, SHA256_H0 };
 static const u32 cc_digest_len_sha512_init[] = {
 	0x00000080, 0x00000000, 0x00000000, 0x00000000 };
-static u64 cc_sha384_init[] = {
-	SHA384_H7, SHA384_H6, SHA384_H5, SHA384_H4,
-	SHA384_H3, SHA384_H2, SHA384_H1, SHA384_H0 };
-static u64 cc_sha512_init[] = {
-	SHA512_H7, SHA512_H6, SHA512_H5, SHA512_H4,
-	SHA512_H3, SHA512_H2, SHA512_H1, SHA512_H0 };
+
+/*
+ * Due to the way the HW works, every double word in the SHA384 and SHA512
+ * larval hashes must be stored in hi/lo order
+ */
+#define hilo(x)	upper_32_bits(x), lower_32_bits(x)
+static const u32 cc_sha384_init[] = {
+	hilo(SHA384_H7), hilo(SHA384_H6), hilo(SHA384_H5), hilo(SHA384_H4),
+	hilo(SHA384_H3), hilo(SHA384_H2), hilo(SHA384_H1), hilo(SHA384_H0) };
+static const u32 cc_sha512_init[] = {
+	hilo(SHA512_H7), hilo(SHA512_H6), hilo(SHA512_H5), hilo(SHA512_H4),
+	hilo(SHA512_H3), hilo(SHA512_H2), hilo(SHA512_H1), hilo(SHA512_H0) };
+
 static const u32 cc_sm3_init[] = {
 	SM3_IVH, SM3_IVG, SM3_IVF, SM3_IVE,
 	SM3_IVD, SM3_IVC, SM3_IVB, SM3_IVA };
@@ -342,7 +349,6 @@ static int cc_fin_result(struct cc_hw_desc *desc, struct ahash_request *req,
 	/* Get final MAC result */
 	hw_desc_init(&desc[idx]);
 	set_hash_cipher_mode(&desc[idx], ctx->hw_mode, ctx->hash_mode);
-	/* TODO */
 	set_dout_dlli(&desc[idx], state->digest_result_dma_addr, digestsize,
 		      NS_BIT, 1);
 	set_queue_last_ind(ctx->drvdata, &desc[idx]);
@@ -422,8 +428,7 @@ static int cc_hash_digest(struct ahash_request *req)
 	bool is_hmac = ctx->is_hmac;
 	struct cc_crypto_req cc_req = {};
 	struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN];
-	cc_sram_addr_t larval_digest_addr =
-		cc_larval_digest_addr(ctx->drvdata, ctx->hash_mode);
+	u32 larval_digest_addr;
 	int idx = 0;
 	int rc = 0;
 	gfp_t flags = cc_gfp_flags(&req->base);
@@ -465,6 +470,8 @@ static int cc_hash_digest(struct ahash_request *req)
 		set_din_type(&desc[idx], DMA_DLLI, state->digest_buff_dma_addr,
 			     ctx->inter_digestsize, NS_BIT);
 	} else {
+		larval_digest_addr = cc_larval_digest_addr(ctx->drvdata,
+							   ctx->hash_mode);
 		set_din_sram(&desc[idx], larval_digest_addr,
 			     ctx->inter_digestsize);
 	}
@@ -726,7 +733,7 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key,
 	int digestsize = 0;
 	int i, idx = 0, rc = 0;
 	struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN];
-	cc_sram_addr_t larval_addr;
+	u32 larval_addr;
 	struct device *dev;
 
 	ctx = crypto_ahash_ctx(ahash);
@@ -752,7 +759,7 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key,
 			return -ENOMEM;
 
 		ctx->key_params.key_dma_addr =
-			dma_map_single(dev, (void *)ctx->key_params.key, keylen,
+			dma_map_single(dev, ctx->key_params.key, keylen,
 				       DMA_TO_DEVICE);
 		if (dma_mapping_error(dev, ctx->key_params.key_dma_addr)) {
 			dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n",
@@ -1067,8 +1074,8 @@ static int cc_alloc_ctx(struct cc_hash_ctx *ctx)
 	ctx->key_params.keylen = 0;
 
 	ctx->digest_buff_dma_addr =
-		dma_map_single(dev, (void *)ctx->digest_buff,
-			       sizeof(ctx->digest_buff), DMA_BIDIRECTIONAL);
+		dma_map_single(dev, ctx->digest_buff, sizeof(ctx->digest_buff),
+			       DMA_BIDIRECTIONAL);
 	if (dma_mapping_error(dev, ctx->digest_buff_dma_addr)) {
 		dev_err(dev, "Mapping digest len %zu B at va=%pK for DMA failed\n",
 			sizeof(ctx->digest_buff), ctx->digest_buff);
@@ -1079,7 +1086,7 @@ static int cc_alloc_ctx(struct cc_hash_ctx *ctx)
 		&ctx->digest_buff_dma_addr);
 
 	ctx->opad_tmp_keys_dma_addr =
-		dma_map_single(dev, (void *)ctx->opad_tmp_keys_buff,
+		dma_map_single(dev, ctx->opad_tmp_keys_buff,
 			       sizeof(ctx->opad_tmp_keys_buff),
 			       DMA_BIDIRECTIONAL);
 	if (dma_mapping_error(dev, ctx->opad_tmp_keys_dma_addr)) {
@@ -1196,8 +1203,8 @@ static int cc_mac_update(struct ahash_request *req)
 	idx++;
 
 	/* Setup request structure */
-	cc_req.user_cb = (void *)cc_update_complete;
-	cc_req.user_arg = (void *)req;
+	cc_req.user_cb = cc_update_complete;
+	cc_req.user_arg = req;
 
 	rc = cc_send_request(ctx->drvdata, &cc_req, desc, idx, &req->base);
 	if (rc != -EINPROGRESS && rc != -EBUSY) {
@@ -1254,8 +1261,8 @@ static int cc_mac_final(struct ahash_request *req)
 	}
 
 	/* Setup request structure */
-	cc_req.user_cb = (void *)cc_hash_complete;
-	cc_req.user_arg = (void *)req;
+	cc_req.user_cb = cc_hash_complete;
+	cc_req.user_arg = req;
 
 	if (state->xcbc_count && rem_cnt == 0) {
 		/* Load key for ECB decryption */
@@ -1311,7 +1318,6 @@ static int cc_mac_final(struct ahash_request *req)
 
 	/* Get final MAC result */
 	hw_desc_init(&desc[idx]);
-	/* TODO */
 	set_dout_dlli(&desc[idx], state->digest_result_dma_addr,
 		      digestsize, NS_BIT, 1);
 	set_queue_last_ind(ctx->drvdata, &desc[idx]);
@@ -1369,8 +1375,8 @@ static int cc_mac_finup(struct ahash_request *req)
 	}
 
 	/* Setup request structure */
-	cc_req.user_cb = (void *)cc_hash_complete;
-	cc_req.user_arg = (void *)req;
+	cc_req.user_cb = cc_hash_complete;
+	cc_req.user_arg = req;
 
 	if (ctx->hw_mode == DRV_CIPHER_XCBC_MAC) {
 		key_len = CC_AES_128_BIT_KEY_SIZE;
@@ -1393,7 +1399,6 @@ static int cc_mac_finup(struct ahash_request *req)
 
 	/* Get final MAC result */
 	hw_desc_init(&desc[idx]);
-	/* TODO */
 	set_dout_dlli(&desc[idx], state->digest_result_dma_addr,
 		      digestsize, NS_BIT, 1);
 	set_queue_last_ind(ctx->drvdata, &desc[idx]);
@@ -1448,8 +1453,8 @@ static int cc_mac_digest(struct ahash_request *req)
 	}
 
 	/* Setup request structure */
-	cc_req.user_cb = (void *)cc_digest_complete;
-	cc_req.user_arg = (void *)req;
+	cc_req.user_cb = cc_digest_complete;
+	cc_req.user_arg = req;
 
 	if (ctx->hw_mode == DRV_CIPHER_XCBC_MAC) {
 		key_len = CC_AES_128_BIT_KEY_SIZE;
@@ -1820,7 +1825,7 @@ static struct cc_hash_alg *cc_alloc_hash_alg(struct cc_hash_template *template,
 	struct crypto_alg *alg;
 	struct ahash_alg *halg;
 
-	t_crypto_alg = kzalloc(sizeof(*t_crypto_alg), GFP_KERNEL);
+	t_crypto_alg = devm_kzalloc(dev, sizeof(*t_crypto_alg), GFP_KERNEL);
 	if (!t_crypto_alg)
 		return ERR_PTR(-ENOMEM);
 
@@ -1857,104 +1862,85 @@ static struct cc_hash_alg *cc_alloc_hash_alg(struct cc_hash_template *template,
 	return t_crypto_alg;
 }
 
+static int cc_init_copy_sram(struct cc_drvdata *drvdata, const u32 *data,
+			     unsigned int size, u32 *sram_buff_ofs)
+{
+	struct cc_hw_desc larval_seq[CC_DIGEST_SIZE_MAX / sizeof(u32)];
+	unsigned int larval_seq_len = 0;
+	int rc;
+
+	cc_set_sram_desc(data, *sram_buff_ofs, size / sizeof(*data),
+			 larval_seq, &larval_seq_len);
+	rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+	if (rc)
+		return rc;
+
+	*sram_buff_ofs += size;
+	return 0;
+}
+
 int cc_init_hash_sram(struct cc_drvdata *drvdata)
 {
 	struct cc_hash_handle *hash_handle = drvdata->hash_handle;
-	cc_sram_addr_t sram_buff_ofs = hash_handle->digest_len_sram_addr;
-	unsigned int larval_seq_len = 0;
-	struct cc_hw_desc larval_seq[CC_DIGEST_SIZE_MAX / sizeof(u32)];
+	u32 sram_buff_ofs = hash_handle->digest_len_sram_addr;
 	bool large_sha_supported = (drvdata->hw_rev >= CC_HW_REV_712);
 	bool sm3_supported = (drvdata->hw_rev >= CC_HW_REV_713);
 	int rc = 0;
 
 	/* Copy-to-sram digest-len */
-	cc_set_sram_desc(cc_digest_len_init, sram_buff_ofs,
-			 ARRAY_SIZE(cc_digest_len_init), larval_seq,
-			 &larval_seq_len);
-	rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+	rc = cc_init_copy_sram(drvdata, cc_digest_len_init,
+			       sizeof(cc_digest_len_init), &sram_buff_ofs);
 	if (rc)
 		goto init_digest_const_err;
 
-	sram_buff_ofs += sizeof(cc_digest_len_init);
-	larval_seq_len = 0;
-
 	if (large_sha_supported) {
 		/* Copy-to-sram digest-len for sha384/512 */
-		cc_set_sram_desc(cc_digest_len_sha512_init, sram_buff_ofs,
-				 ARRAY_SIZE(cc_digest_len_sha512_init),
-				 larval_seq, &larval_seq_len);
-		rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+		rc = cc_init_copy_sram(drvdata, cc_digest_len_sha512_init,
+				       sizeof(cc_digest_len_sha512_init),
+				       &sram_buff_ofs);
 		if (rc)
 			goto init_digest_const_err;
-
-		sram_buff_ofs += sizeof(cc_digest_len_sha512_init);
-		larval_seq_len = 0;
 	}
 
 	/* The initial digests offset */
 	hash_handle->larval_digest_sram_addr = sram_buff_ofs;
 
 	/* Copy-to-sram initial SHA* digests */
-	cc_set_sram_desc(cc_md5_init, sram_buff_ofs, ARRAY_SIZE(cc_md5_init),
-			 larval_seq, &larval_seq_len);
-	rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+	rc = cc_init_copy_sram(drvdata, cc_md5_init, sizeof(cc_md5_init),
+			       &sram_buff_ofs);
 	if (rc)
 		goto init_digest_const_err;
-	sram_buff_ofs += sizeof(cc_md5_init);
-	larval_seq_len = 0;
 
-	cc_set_sram_desc(cc_sha1_init, sram_buff_ofs,
-			 ARRAY_SIZE(cc_sha1_init), larval_seq,
-			 &larval_seq_len);
-	rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+	rc = cc_init_copy_sram(drvdata, cc_sha1_init, sizeof(cc_sha1_init),
+			       &sram_buff_ofs);
 	if (rc)
 		goto init_digest_const_err;
-	sram_buff_ofs += sizeof(cc_sha1_init);
-	larval_seq_len = 0;
 
-	cc_set_sram_desc(cc_sha224_init, sram_buff_ofs,
-			 ARRAY_SIZE(cc_sha224_init), larval_seq,
-			 &larval_seq_len);
-	rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+	rc = cc_init_copy_sram(drvdata, cc_sha224_init, sizeof(cc_sha224_init),
+			       &sram_buff_ofs);
 	if (rc)
 		goto init_digest_const_err;
-	sram_buff_ofs += sizeof(cc_sha224_init);
-	larval_seq_len = 0;
 
-	cc_set_sram_desc(cc_sha256_init, sram_buff_ofs,
-			 ARRAY_SIZE(cc_sha256_init), larval_seq,
-			 &larval_seq_len);
-	rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+	rc = cc_init_copy_sram(drvdata, cc_sha256_init, sizeof(cc_sha256_init),
+			       &sram_buff_ofs);
 	if (rc)
 		goto init_digest_const_err;
-	sram_buff_ofs += sizeof(cc_sha256_init);
-	larval_seq_len = 0;
 
 	if (sm3_supported) {
-		cc_set_sram_desc(cc_sm3_init, sram_buff_ofs,
-				 ARRAY_SIZE(cc_sm3_init), larval_seq,
-				 &larval_seq_len);
-		rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+		rc = cc_init_copy_sram(drvdata, cc_sm3_init,
+				       sizeof(cc_sm3_init), &sram_buff_ofs);
 		if (rc)
 			goto init_digest_const_err;
-		sram_buff_ofs += sizeof(cc_sm3_init);
-		larval_seq_len = 0;
 	}
 
 	if (large_sha_supported) {
-		cc_set_sram_desc((u32 *)cc_sha384_init, sram_buff_ofs,
-				 (ARRAY_SIZE(cc_sha384_init) * 2), larval_seq,
-				 &larval_seq_len);
-		rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+		rc = cc_init_copy_sram(drvdata, cc_sha384_init,
+				       sizeof(cc_sha384_init), &sram_buff_ofs);
 		if (rc)
 			goto init_digest_const_err;
-		sram_buff_ofs += sizeof(cc_sha384_init);
-		larval_seq_len = 0;
 
-		cc_set_sram_desc((u32 *)cc_sha512_init, sram_buff_ofs,
-				 (ARRAY_SIZE(cc_sha512_init) * 2), larval_seq,
-				 &larval_seq_len);
-		rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+		rc = cc_init_copy_sram(drvdata, cc_sha512_init,
+				       sizeof(cc_sha512_init), &sram_buff_ofs);
 		if (rc)
 			goto init_digest_const_err;
 	}
@@ -1963,38 +1949,16 @@ init_digest_const_err:
 	return rc;
 }
 
-static void __init cc_swap_dwords(u32 *buf, unsigned long size)
-{
-	int i;
-	u32 tmp;
-
-	for (i = 0; i < size; i += 2) {
-		tmp = buf[i];
-		buf[i] = buf[i + 1];
-		buf[i + 1] = tmp;
-	}
-}
-
-/*
- * Due to the way the HW works we need to swap every
- * double word in the SHA384 and SHA512 larval hashes
- */
-void __init cc_hash_global_init(void)
-{
-	cc_swap_dwords((u32 *)&cc_sha384_init, (ARRAY_SIZE(cc_sha384_init) * 2));
-	cc_swap_dwords((u32 *)&cc_sha512_init, (ARRAY_SIZE(cc_sha512_init) * 2));
-}
-
 int cc_hash_alloc(struct cc_drvdata *drvdata)
 {
 	struct cc_hash_handle *hash_handle;
-	cc_sram_addr_t sram_buff;
+	u32 sram_buff;
 	u32 sram_size_to_alloc;
 	struct device *dev = drvdata_to_dev(drvdata);
 	int rc = 0;
 	int alg;
 
-	hash_handle = kzalloc(sizeof(*hash_handle), GFP_KERNEL);
+	hash_handle = devm_kzalloc(dev, sizeof(*hash_handle), GFP_KERNEL);
 	if (!hash_handle)
 		return -ENOMEM;
 
@@ -2016,7 +1980,6 @@ int cc_hash_alloc(struct cc_drvdata *drvdata)
 
 	sram_buff = cc_sram_alloc(drvdata, sram_size_to_alloc);
 	if (sram_buff == NULL_SRAM_ADDR) {
-		dev_err(dev, "SRAM pool exhausted\n");
 		rc = -ENOMEM;
 		goto fail;
 	}
@@ -2056,12 +2019,10 @@ int cc_hash_alloc(struct cc_drvdata *drvdata)
 			if (rc) {
 				dev_err(dev, "%s alg registration failed\n",
 					driver_hash[alg].driver_name);
-				kfree(t_alg);
 				goto fail;
-			} else {
-				list_add_tail(&t_alg->entry,
-					      &hash_handle->hash_list);
 			}
+
+			list_add_tail(&t_alg->entry, &hash_handle->hash_list);
 		}
 		if (hw_mode == DRV_CIPHER_XCBC_MAC ||
 		    hw_mode == DRV_CIPHER_CMAC)
@@ -2081,18 +2042,16 @@ int cc_hash_alloc(struct cc_drvdata *drvdata)
 		if (rc) {
 			dev_err(dev, "%s alg registration failed\n",
 				driver_hash[alg].driver_name);
-			kfree(t_alg);
 			goto fail;
-		} else {
-			list_add_tail(&t_alg->entry, &hash_handle->hash_list);
 		}
+
+		list_add_tail(&t_alg->entry, &hash_handle->hash_list);
 	}
 
 	return 0;
 
 fail:
-	kfree(drvdata->hash_handle);
-	drvdata->hash_handle = NULL;
+	cc_hash_free(drvdata);
 	return rc;
 }
 
@@ -2101,17 +2060,12 @@ int cc_hash_free(struct cc_drvdata *drvdata)
 	struct cc_hash_alg *t_hash_alg, *hash_n;
 	struct cc_hash_handle *hash_handle = drvdata->hash_handle;
 
-	if (hash_handle) {
-		list_for_each_entry_safe(t_hash_alg, hash_n,
-					 &hash_handle->hash_list, entry) {
-			crypto_unregister_ahash(&t_hash_alg->ahash_alg);
-			list_del(&t_hash_alg->entry);
-			kfree(t_hash_alg);
-		}
-
-		kfree(hash_handle);
-		drvdata->hash_handle = NULL;
+	list_for_each_entry_safe(t_hash_alg, hash_n, &hash_handle->hash_list,
+				 entry) {
+		crypto_unregister_ahash(&t_hash_alg->ahash_alg);
+		list_del(&t_hash_alg->entry);
 	}
+
 	return 0;
 }
 
@@ -2272,22 +2226,23 @@ static const void *cc_larval_digest(struct device *dev, u32 mode)
 	}
 }
 
-/*!
- * Gets the address of the initial digest in SRAM
+/**
+ * cc_larval_digest_addr() - Get the address of the initial digest in SRAM
  * according to the given hash mode
  *
- * \param drvdata
- * \param mode The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256
+ * @drvdata: Associated device driver context
+ * @mode: The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256
  *
- * \return u32 The address of the initial digest in SRAM
+ * Return:
+ * The address of the initial digest in SRAM
  */
-cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode)
+u32 cc_larval_digest_addr(void *drvdata, u32 mode)
 {
 	struct cc_drvdata *_drvdata = (struct cc_drvdata *)drvdata;
 	struct cc_hash_handle *hash_handle = _drvdata->hash_handle;
 	struct device *dev = drvdata_to_dev(_drvdata);
 	bool sm3_supported = (_drvdata->hw_rev >= CC_HW_REV_713);
-	cc_sram_addr_t addr;
+	u32 addr;
 
 	switch (mode) {
 	case DRV_HASH_NULL:
@@ -2339,12 +2294,11 @@ cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode)
 	return hash_handle->larval_digest_sram_addr;
 }
 
-cc_sram_addr_t
-cc_digest_len_addr(void *drvdata, u32 mode)
+u32 cc_digest_len_addr(void *drvdata, u32 mode)
 {
 	struct cc_drvdata *_drvdata = (struct cc_drvdata *)drvdata;
 	struct cc_hash_handle *hash_handle = _drvdata->hash_handle;
-	cc_sram_addr_t digest_len_addr = hash_handle->digest_len_sram_addr;
+	u32 digest_len_addr = hash_handle->digest_len_sram_addr;
 
 	switch (mode) {
 	case DRV_HASH_SHA1:
diff --git a/drivers/crypto/ccree/cc_hash.h b/drivers/crypto/ccree/cc_hash.h
index 0d6dc61484d7..3d0f2179e07e 100644
--- a/drivers/crypto/ccree/cc_hash.h
+++ b/drivers/crypto/ccree/cc_hash.h
@@ -80,30 +80,27 @@ int cc_hash_alloc(struct cc_drvdata *drvdata);
 int cc_init_hash_sram(struct cc_drvdata *drvdata);
 int cc_hash_free(struct cc_drvdata *drvdata);
 
-/*!
- * Gets the initial digest length
+/**
+ * cc_digest_len_addr() - Gets the initial digest length
  *
- * \param drvdata
- * \param mode The Hash mode. Supported modes:
- *             MD5/SHA1/SHA224/SHA256/SHA384/SHA512
+ * @drvdata: Associated device driver context
+ * @mode: The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256/SHA384/SHA512
  *
- * \return u32 returns the address of the initial digest length in SRAM
+ * Return:
+ * Returns the address of the initial digest length in SRAM
  */
-cc_sram_addr_t
-cc_digest_len_addr(void *drvdata, u32 mode);
+u32 cc_digest_len_addr(void *drvdata, u32 mode);
 
-/*!
- * Gets the address of the initial digest in SRAM
+/**
+ * cc_larval_digest_addr() - Gets the address of the initial digest in SRAM
  * according to the given hash mode
  *
- * \param drvdata
- * \param mode The Hash mode. Supported modes:
- *             MD5/SHA1/SHA224/SHA256/SHA384/SHA512
+ * @drvdata: Associated device driver context
+ * @mode: The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256/SHA384/SHA512
  *
- * \return u32 The address of the initial digest in SRAM
+ * Return:
+ * The address of the initial digest in SRAM
  */
-cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode);
-
-void cc_hash_global_init(void);
+u32 cc_larval_digest_addr(void *drvdata, u32 mode);
 
 #endif /*__CC_HASH_H__*/
diff --git a/drivers/crypto/ccree/cc_hw_queue_defs.h b/drivers/crypto/ccree/cc_hw_queue_defs.h
index 9f4db9956e91..15df58c66911 100644
--- a/drivers/crypto/ccree/cc_hw_queue_defs.h
+++ b/drivers/crypto/ccree/cc_hw_queue_defs.h
@@ -17,46 +17,43 @@
 /* Define max. available slots in HW queue */
 #define HW_QUEUE_SLOTS_MAX              15
 
-#define CC_REG_LOW(word, name)  \
-	(CC_DSCRPTR_QUEUE_WORD ## word ## _ ## name ## _BIT_SHIFT)
-
-#define CC_REG_HIGH(word, name) \
-	(CC_REG_LOW(word, name) + \
-	 CC_DSCRPTR_QUEUE_WORD ## word ## _ ## name ## _BIT_SIZE - 1)
-
-#define CC_GENMASK(word, name) \
-	GENMASK(CC_REG_HIGH(word, name), CC_REG_LOW(word, name))
-
-#define WORD0_VALUE		CC_GENMASK(0, VALUE)
-#define	WORD0_CPP_CIPHER_MODE	CC_GENMASK(0, CPP_CIPHER_MODE)
-#define WORD1_DIN_CONST_VALUE	CC_GENMASK(1, DIN_CONST_VALUE)
-#define WORD1_DIN_DMA_MODE	CC_GENMASK(1, DIN_DMA_MODE)
-#define WORD1_DIN_SIZE		CC_GENMASK(1, DIN_SIZE)
-#define WORD1_NOT_LAST		CC_GENMASK(1, NOT_LAST)
-#define WORD1_NS_BIT		CC_GENMASK(1, NS_BIT)
-#define WORD1_LOCK_QUEUE	CC_GENMASK(1, LOCK_QUEUE)
-#define WORD2_VALUE		CC_GENMASK(2, VALUE)
-#define WORD3_DOUT_DMA_MODE	CC_GENMASK(3, DOUT_DMA_MODE)
-#define WORD3_DOUT_LAST_IND	CC_GENMASK(3, DOUT_LAST_IND)
-#define WORD3_DOUT_SIZE		CC_GENMASK(3, DOUT_SIZE)
-#define WORD3_HASH_XOR_BIT	CC_GENMASK(3, HASH_XOR_BIT)
-#define WORD3_NS_BIT		CC_GENMASK(3, NS_BIT)
-#define WORD3_QUEUE_LAST_IND	CC_GENMASK(3, QUEUE_LAST_IND)
-#define WORD4_ACK_NEEDED	CC_GENMASK(4, ACK_NEEDED)
-#define WORD4_AES_SEL_N_HASH	CC_GENMASK(4, AES_SEL_N_HASH)
-#define WORD4_AES_XOR_CRYPTO_KEY CC_GENMASK(4, AES_XOR_CRYPTO_KEY)
-#define WORD4_BYTES_SWAP	CC_GENMASK(4, BYTES_SWAP)
-#define WORD4_CIPHER_CONF0	CC_GENMASK(4, CIPHER_CONF0)
-#define WORD4_CIPHER_CONF1	CC_GENMASK(4, CIPHER_CONF1)
-#define WORD4_CIPHER_CONF2	CC_GENMASK(4, CIPHER_CONF2)
-#define WORD4_CIPHER_DO		CC_GENMASK(4, CIPHER_DO)
-#define WORD4_CIPHER_MODE	CC_GENMASK(4, CIPHER_MODE)
-#define WORD4_CMAC_SIZE0	CC_GENMASK(4, CMAC_SIZE0)
-#define WORD4_DATA_FLOW_MODE	CC_GENMASK(4, DATA_FLOW_MODE)
-#define WORD4_KEY_SIZE		CC_GENMASK(4, KEY_SIZE)
-#define WORD4_SETUP_OPERATION	CC_GENMASK(4, SETUP_OPERATION)
-#define WORD5_DIN_ADDR_HIGH	CC_GENMASK(5, DIN_ADDR_HIGH)
-#define WORD5_DOUT_ADDR_HIGH	CC_GENMASK(5, DOUT_ADDR_HIGH)
+#define CC_REG_LOW(name)  (name ## _BIT_SHIFT)
+#define CC_REG_HIGH(name) (CC_REG_LOW(name) + name ## _BIT_SIZE - 1)
+#define CC_GENMASK(name)  GENMASK(CC_REG_HIGH(name), CC_REG_LOW(name))
+
+#define CC_HWQ_GENMASK(word, field) \
+	CC_GENMASK(CC_DSCRPTR_QUEUE_WORD ## word ## _ ## field)
+
+#define WORD0_VALUE		CC_HWQ_GENMASK(0, VALUE)
+#define	WORD0_CPP_CIPHER_MODE	CC_HWQ_GENMASK(0, CPP_CIPHER_MODE)
+#define WORD1_DIN_CONST_VALUE	CC_HWQ_GENMASK(1, DIN_CONST_VALUE)
+#define WORD1_DIN_DMA_MODE	CC_HWQ_GENMASK(1, DIN_DMA_MODE)
+#define WORD1_DIN_SIZE		CC_HWQ_GENMASK(1, DIN_SIZE)
+#define WORD1_NOT_LAST		CC_HWQ_GENMASK(1, NOT_LAST)
+#define WORD1_NS_BIT		CC_HWQ_GENMASK(1, NS_BIT)
+#define WORD1_LOCK_QUEUE	CC_HWQ_GENMASK(1, LOCK_QUEUE)
+#define WORD2_VALUE		CC_HWQ_GENMASK(2, VALUE)
+#define WORD3_DOUT_DMA_MODE	CC_HWQ_GENMASK(3, DOUT_DMA_MODE)
+#define WORD3_DOUT_LAST_IND	CC_HWQ_GENMASK(3, DOUT_LAST_IND)
+#define WORD3_DOUT_SIZE		CC_HWQ_GENMASK(3, DOUT_SIZE)
+#define WORD3_HASH_XOR_BIT	CC_HWQ_GENMASK(3, HASH_XOR_BIT)
+#define WORD3_NS_BIT		CC_HWQ_GENMASK(3, NS_BIT)
+#define WORD3_QUEUE_LAST_IND	CC_HWQ_GENMASK(3, QUEUE_LAST_IND)
+#define WORD4_ACK_NEEDED	CC_HWQ_GENMASK(4, ACK_NEEDED)
+#define WORD4_AES_SEL_N_HASH	CC_HWQ_GENMASK(4, AES_SEL_N_HASH)
+#define WORD4_AES_XOR_CRYPTO_KEY CC_HWQ_GENMASK(4, AES_XOR_CRYPTO_KEY)
+#define WORD4_BYTES_SWAP	CC_HWQ_GENMASK(4, BYTES_SWAP)
+#define WORD4_CIPHER_CONF0	CC_HWQ_GENMASK(4, CIPHER_CONF0)
+#define WORD4_CIPHER_CONF1	CC_HWQ_GENMASK(4, CIPHER_CONF1)
+#define WORD4_CIPHER_CONF2	CC_HWQ_GENMASK(4, CIPHER_CONF2)
+#define WORD4_CIPHER_DO		CC_HWQ_GENMASK(4, CIPHER_DO)
+#define WORD4_CIPHER_MODE	CC_HWQ_GENMASK(4, CIPHER_MODE)
+#define WORD4_CMAC_SIZE0	CC_HWQ_GENMASK(4, CMAC_SIZE0)
+#define WORD4_DATA_FLOW_MODE	CC_HWQ_GENMASK(4, DATA_FLOW_MODE)
+#define WORD4_KEY_SIZE		CC_HWQ_GENMASK(4, KEY_SIZE)
+#define WORD4_SETUP_OPERATION	CC_HWQ_GENMASK(4, SETUP_OPERATION)
+#define WORD5_DIN_ADDR_HIGH	CC_HWQ_GENMASK(5, DIN_ADDR_HIGH)
+#define WORD5_DOUT_ADDR_HIGH	CC_HWQ_GENMASK(5, DOUT_ADDR_HIGH)
 
 /******************************************************************************
  *				TYPE DEFINITIONS
@@ -207,31 +204,32 @@ enum cc_hash_cipher_pad {
 /* Descriptor packing macros */
 /*****************************/
 
-/*
- * Init a HW descriptor struct
- * @pdesc: pointer HW descriptor struct
+/**
+ * hw_desc_init() - Init a HW descriptor struct
+ * @pdesc: pointer to HW descriptor struct
  */
 static inline void hw_desc_init(struct cc_hw_desc *pdesc)
 {
 	memset(pdesc, 0, sizeof(struct cc_hw_desc));
 }
 
-/*
- * Indicates the end of current HW descriptors flow and release the HW engines.
+/**
+ * set_queue_last_ind_bit() - Indicate the end of current HW descriptors flow
+ * and release the HW engines.
  *
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
  */
 static inline void set_queue_last_ind_bit(struct cc_hw_desc *pdesc)
 {
 	pdesc->word[3] |= FIELD_PREP(WORD3_QUEUE_LAST_IND, 1);
 }
 
-/*
- * Set the DIN field of a HW descriptors
+/**
+ * set_din_type() - Set the DIN field of a HW descriptor
  *
- * @pdesc: pointer HW descriptor struct
- * @dma_mode: dmaMode The DMA mode: NO_DMA, SRAM, DLLI, MLLI, CONSTANT
- * @addr: dinAdr DIN address
+ * @pdesc: Pointer to HW descriptor struct
+ * @dma_mode: The DMA mode: NO_DMA, SRAM, DLLI, MLLI, CONSTANT
+ * @addr: DIN address
  * @size: Data size in bytes
  * @axi_sec: AXI secure bit
  */
@@ -239,20 +237,20 @@ static inline void set_din_type(struct cc_hw_desc *pdesc,
 				enum cc_dma_mode dma_mode, dma_addr_t addr,
 				u32 size, enum cc_axi_sec axi_sec)
 {
-	pdesc->word[0] = (u32)addr;
+	pdesc->word[0] = lower_32_bits(addr);
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-	pdesc->word[5] |= FIELD_PREP(WORD5_DIN_ADDR_HIGH, ((u16)(addr >> 32)));
+	pdesc->word[5] |= FIELD_PREP(WORD5_DIN_ADDR_HIGH, upper_32_bits(addr));
 #endif
 	pdesc->word[1] |= FIELD_PREP(WORD1_DIN_DMA_MODE, dma_mode) |
 				FIELD_PREP(WORD1_DIN_SIZE, size) |
 				FIELD_PREP(WORD1_NS_BIT, axi_sec);
 }
 
-/*
- * Set the DIN field of a HW descriptors to NO DMA mode.
+/**
+ * set_din_no_dma() - Set the DIN field of a HW descriptor to NO DMA mode.
  * Used for NOP descriptor, register patches and other special modes.
  *
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
  * @addr: DIN address
  * @size: Data size in bytes
  */
@@ -262,14 +260,11 @@ static inline void set_din_no_dma(struct cc_hw_desc *pdesc, u32 addr, u32 size)
 	pdesc->word[1] |= FIELD_PREP(WORD1_DIN_SIZE, size);
 }
 
-/*
- * Setup the special CPP descriptor
+/**
+ * set_cpp_crypto_key() - Setup the special CPP descriptor
  *
- * @pdesc: pointer HW descriptor struct
- * @alg: cipher used (AES / SM4)
- * @mode: mode used (CTR or CBC)
- * @slot: slot number
- * @ksize: key size
+ * @pdesc: Pointer to HW descriptor struct
+ * @slot: Slot number
  */
 static inline void set_cpp_crypto_key(struct cc_hw_desc *pdesc, u8 slot)
 {
@@ -281,27 +276,26 @@ static inline void set_cpp_crypto_key(struct cc_hw_desc *pdesc, u8 slot)
 	pdesc->word[4] |= FIELD_PREP(WORD4_SETUP_OPERATION, slot);
 }
 
-/*
- * Set the DIN field of a HW descriptors to SRAM mode.
+/**
+ * set_din_sram() - Set the DIN field of a HW descriptor to SRAM mode.
  * Note: No need to check SRAM alignment since host requests do not use SRAM and
- * adaptor will enforce alignment check.
+ * the adaptor will enforce alignment checks.
  *
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
  * @addr: DIN address
- * @size Data size in bytes
+ * @size: Data size in bytes
  */
-static inline void set_din_sram(struct cc_hw_desc *pdesc, dma_addr_t addr,
-				u32 size)
+static inline void set_din_sram(struct cc_hw_desc *pdesc, u32 addr, u32 size)
 {
-	pdesc->word[0] = (u32)addr;
+	pdesc->word[0] = addr;
 	pdesc->word[1] |= FIELD_PREP(WORD1_DIN_SIZE, size) |
 				FIELD_PREP(WORD1_DIN_DMA_MODE, DMA_SRAM);
 }
 
-/*
- * Set the DIN field of a HW descriptors to CONST mode
+/**
+ * set_din_const() - Set the DIN field of a HW descriptor to CONST mode
  *
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
  * @val: DIN const value
  * @size: Data size in bytes
  */
@@ -313,20 +307,20 @@ static inline void set_din_const(struct cc_hw_desc *pdesc, u32 val, u32 size)
 			FIELD_PREP(WORD1_DIN_SIZE, size);
 }
 
-/*
- * Set the DIN not last input data indicator
+/**
+ * set_din_not_last_indication() - Set the DIN not last input data indicator
  *
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
  */
 static inline void set_din_not_last_indication(struct cc_hw_desc *pdesc)
 {
 	pdesc->word[1] |= FIELD_PREP(WORD1_NOT_LAST, 1);
 }
 
-/*
- * Set the DOUT field of a HW descriptors
+/**
+ * set_dout_type() - Set the DOUT field of a HW descriptor
  *
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
  * @dma_mode: The DMA mode: NO_DMA, SRAM, DLLI, MLLI, CONSTANT
  * @addr: DOUT address
  * @size: Data size in bytes
@@ -336,24 +330,24 @@ static inline void set_dout_type(struct cc_hw_desc *pdesc,
 				 enum cc_dma_mode dma_mode, dma_addr_t addr,
 				 u32 size, enum cc_axi_sec axi_sec)
 {
-	pdesc->word[2] = (u32)addr;
+	pdesc->word[2] = lower_32_bits(addr);
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-	pdesc->word[5] |= FIELD_PREP(WORD5_DOUT_ADDR_HIGH, ((u16)(addr >> 32)));
+	pdesc->word[5] |= FIELD_PREP(WORD5_DOUT_ADDR_HIGH, upper_32_bits(addr));
 #endif
 	pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_DMA_MODE, dma_mode) |
 				FIELD_PREP(WORD3_DOUT_SIZE, size) |
 				FIELD_PREP(WORD3_NS_BIT, axi_sec);
 }
 
-/*
- * Set the DOUT field of a HW descriptors to DLLI type
+/**
+ * set_dout_dlli() - Set the DOUT field of a HW descriptor to DLLI type
  * The LAST INDICATION is provided by the user
  *
- * @pdesc pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
  * @addr: DOUT address
  * @size: Data size in bytes
- * @last_ind: The last indication bit
  * @axi_sec: AXI secure bit
+ * @last_ind: The last indication bit
  */
 static inline void set_dout_dlli(struct cc_hw_desc *pdesc, dma_addr_t addr,
 				 u32 size, enum cc_axi_sec axi_sec,
@@ -363,29 +357,28 @@ static inline void set_dout_dlli(struct cc_hw_desc *pdesc, dma_addr_t addr,
 	pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_LAST_IND, last_ind);
 }
 
-/*
- * Set the DOUT field of a HW descriptors to DLLI type
+/**
+ * set_dout_mlli() - Set the DOUT field of a HW descriptor to MLLI type
  * The LAST INDICATION is provided by the user
  *
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
  * @addr: DOUT address
  * @size: Data size in bytes
- * @last_ind: The last indication bit
  * @axi_sec: AXI secure bit
+ * @last_ind: The last indication bit
  */
-static inline void set_dout_mlli(struct cc_hw_desc *pdesc, dma_addr_t addr,
-				 u32 size, enum cc_axi_sec axi_sec,
-				 bool last_ind)
+static inline void set_dout_mlli(struct cc_hw_desc *pdesc, u32 addr, u32 size,
+				 enum cc_axi_sec axi_sec, bool last_ind)
 {
 	set_dout_type(pdesc, DMA_MLLI, addr, size, axi_sec);
 	pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_LAST_IND, last_ind);
 }
 
-/*
- * Set the DOUT field of a HW descriptors to NO DMA mode.
+/**
+ * set_dout_no_dma() - Set the DOUT field of a HW descriptor to NO DMA mode.
  * Used for NOP descriptor, register patches and other special modes.
  *
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: pointer to HW descriptor struct
  * @addr: DOUT address
  * @size: Data size in bytes
  * @write_enable: Enables a write operation to a register
@@ -398,54 +391,55 @@ static inline void set_dout_no_dma(struct cc_hw_desc *pdesc, u32 addr,
 			FIELD_PREP(WORD3_DOUT_LAST_IND, write_enable);
 }
 
-/*
- * Set the word for the XOR operation.
+/**
+ * set_xor_val() - Set the word for the XOR operation.
  *
- * @pdesc: pointer HW descriptor struct
- * @val: xor data value
+ * @pdesc: Pointer to HW descriptor struct
+ * @val: XOR data value
  */
 static inline void set_xor_val(struct cc_hw_desc *pdesc, u32 val)
 {
 	pdesc->word[2] = val;
 }
 
-/*
- * Sets the XOR indicator bit in the descriptor
+/**
+ * set_xor_active() - Set the XOR indicator bit in the descriptor
  *
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
  */
 static inline void set_xor_active(struct cc_hw_desc *pdesc)
 {
 	pdesc->word[3] |= FIELD_PREP(WORD3_HASH_XOR_BIT, 1);
 }
 
-/*
- * Select the AES engine instead of HASH engine when setting up combined mode
- * with AES XCBC MAC
+/**
+ * set_aes_not_hash_mode() - Select the AES engine instead of HASH engine when
+ * setting up combined mode with AES XCBC MAC
  *
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
  */
 static inline void set_aes_not_hash_mode(struct cc_hw_desc *pdesc)
 {
 	pdesc->word[4] |= FIELD_PREP(WORD4_AES_SEL_N_HASH, 1);
 }
 
-/*
- * Set aes xor crypto key, this in some secenrios select SM3 engine
+/**
+ * set_aes_xor_crypto_key() - Set aes xor crypto key, which in some scenarios
+ * selects the SM3 engine
  *
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
  */
 static inline void set_aes_xor_crypto_key(struct cc_hw_desc *pdesc)
 {
 	pdesc->word[4] |= FIELD_PREP(WORD4_AES_XOR_CRYPTO_KEY, 1);
 }
 
-/*
- * Set the DOUT field of a HW descriptors to SRAM mode
+/**
+ * set_dout_sram() - Set the DOUT field of a HW descriptor to SRAM mode
  * Note: No need to check SRAM alignment since host requests do not use SRAM and
- * adaptor will enforce alignment check.
+ * the adaptor will enforce alignment checks.
  *
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
  * @addr: DOUT address
  * @size: Data size in bytes
  */
@@ -456,32 +450,34 @@ static inline void set_dout_sram(struct cc_hw_desc *pdesc, u32 addr, u32 size)
 			FIELD_PREP(WORD3_DOUT_SIZE, size);
 }
 
-/*
- * Sets the data unit size for XEX mode in data_out_addr[15:0]
+/**
+ * set_xex_data_unit_size() - Set the data unit size for XEX mode in
+ * data_out_addr[15:0]
  *
- * @pdesc: pDesc pointer HW descriptor struct
- * @size: data unit size for XEX mode
+ * @pdesc: Pointer to HW descriptor struct
+ * @size: Data unit size for XEX mode
  */
 static inline void set_xex_data_unit_size(struct cc_hw_desc *pdesc, u32 size)
 {
 	pdesc->word[2] = size;
 }
 
-/*
- * Set the number of rounds for Multi2 in data_out_addr[15:0]
+/**
+ * set_multi2_num_rounds() - Set the number of rounds for Multi2 in
+ * data_out_addr[15:0]
  *
- * @pdesc: pointer HW descriptor struct
- * @num: number of rounds for Multi2
+ * @pdesc: Pointer to HW descriptor struct
+ * @num: Number of rounds for Multi2
  */
 static inline void set_multi2_num_rounds(struct cc_hw_desc *pdesc, u32 num)
 {
 	pdesc->word[2] = num;
 }
 
-/*
- * Set the flow mode.
+/**
+ * set_flow_mode() - Set the flow mode.
  *
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
  * @mode: Any one of the modes defined in [CC7x-DESC]
  */
 static inline void set_flow_mode(struct cc_hw_desc *pdesc,
@@ -490,22 +486,22 @@ static inline void set_flow_mode(struct cc_hw_desc *pdesc,
 	pdesc->word[4] |= FIELD_PREP(WORD4_DATA_FLOW_MODE, mode);
 }
 
-/*
- * Set the cipher mode.
+/**
+ * set_cipher_mode() - Set the cipher mode.
  *
- * @pdesc: pointer HW descriptor struct
- * @mode:  Any one of the modes defined in [CC7x-DESC]
+ * @pdesc: Pointer to HW descriptor struct
+ * @mode: Any one of the modes defined in [CC7x-DESC]
  */
 static inline void set_cipher_mode(struct cc_hw_desc *pdesc, int mode)
 {
 	pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_MODE, mode);
 }
 
-/*
- * Set the cipher mode for hash algorithms.
+/**
+ * set_hash_cipher_mode() - Set the cipher mode for hash algorithms.
  *
- * @pdesc: pointer HW descriptor struct
- * @cipher_mode:  Any one of the modes defined in [CC7x-DESC]
+ * @pdesc: Pointer to HW descriptor struct
+ * @cipher_mode: Any one of the modes defined in [CC7x-DESC]
  * @hash_mode: specifies which hash is being handled
  */
 static inline void set_hash_cipher_mode(struct cc_hw_desc *pdesc,
@@ -517,10 +513,10 @@ static inline void set_hash_cipher_mode(struct cc_hw_desc *pdesc,
 		set_aes_xor_crypto_key(pdesc);
 }
 
-/*
- * Set the cipher configuration fields.
+/**
+ * set_cipher_config0() - Set the cipher configuration fields.
  *
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
  * @mode: Any one of the modes defined in [CC7x-DESC]
  */
 static inline void set_cipher_config0(struct cc_hw_desc *pdesc, int mode)
@@ -528,11 +524,11 @@ static inline void set_cipher_config0(struct cc_hw_desc *pdesc, int mode)
 	pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_CONF0, mode);
 }
 
-/*
- * Set the cipher configuration fields.
+/**
+ * set_cipher_config1() - Set the cipher configuration fields.
  *
- * @pdesc: pointer HW descriptor struct
- * @config: Any one of the modes defined in [CC7x-DESC]
+ * @pdesc: Pointer to HW descriptor struct
+ * @config: Padding mode
  */
 static inline void set_cipher_config1(struct cc_hw_desc *pdesc,
 				      enum cc_hash_conf_pad config)
@@ -540,10 +536,10 @@ static inline void set_cipher_config1(struct cc_hw_desc *pdesc,
 	pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_CONF1, config);
 }
 
-/*
- * Set HW key configuration fields.
+/**
+ * set_hw_crypto_key() - Set HW key configuration fields.
  *
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
  * @hw_key: The HW key slot asdefined in enum cc_hw_crypto_key
  */
 static inline void set_hw_crypto_key(struct cc_hw_desc *pdesc,
@@ -555,64 +551,64 @@ static inline void set_hw_crypto_key(struct cc_hw_desc *pdesc,
 				   (hw_key >> HW_KEY_SHIFT_CIPHER_CFG2));
 }
 
-/*
- * Set byte order of all setup-finalize descriptors.
+/**
+ * set_bytes_swap() - Set byte order of all setup-finalize descriptors.
  *
- * @pdesc: pointer HW descriptor struct
- * @config: Any one of the modes defined in [CC7x-DESC]
+ * @pdesc: Pointer to HW descriptor struct
+ * @config: True to enable byte swapping
  */
 static inline void set_bytes_swap(struct cc_hw_desc *pdesc, bool config)
 {
 	pdesc->word[4] |= FIELD_PREP(WORD4_BYTES_SWAP, config);
 }
 
-/*
- * Set CMAC_SIZE0 mode.
+/**
+ * set_cmac_size0_mode() - Set CMAC_SIZE0 mode.
  *
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
  */
 static inline void set_cmac_size0_mode(struct cc_hw_desc *pdesc)
 {
 	pdesc->word[4] |= FIELD_PREP(WORD4_CMAC_SIZE0, 1);
 }
 
-/*
- * Set key size descriptor field.
+/**
+ * set_key_size() - Set key size descriptor field.
  *
- * @pdesc: pointer HW descriptor struct
- * @size: key size in bytes (NOT size code)
+ * @pdesc: Pointer to HW descriptor struct
+ * @size: Key size in bytes (NOT size code)
  */
 static inline void set_key_size(struct cc_hw_desc *pdesc, u32 size)
 {
 	pdesc->word[4] |= FIELD_PREP(WORD4_KEY_SIZE, size);
 }
 
-/*
- * Set AES key size.
+/**
+ * set_key_size_aes() - Set AES key size.
  *
- * @pdesc: pointer HW descriptor struct
- * @size: key size in bytes (NOT size code)
+ * @pdesc: Pointer to HW descriptor struct
+ * @size: Key size in bytes (NOT size code)
  */
 static inline void set_key_size_aes(struct cc_hw_desc *pdesc, u32 size)
 {
 	set_key_size(pdesc, ((size >> 3) - 2));
 }
 
-/*
- * Set DES key size.
+/**
+ * set_key_size_des() - Set DES key size.
  *
- * @pdesc: pointer HW descriptor struct
- * @size: key size in bytes (NOT size code)
+ * @pdesc: Pointer to HW descriptor struct
+ * @size: Key size in bytes (NOT size code)
  */
 static inline void set_key_size_des(struct cc_hw_desc *pdesc, u32 size)
 {
 	set_key_size(pdesc, ((size >> 3) - 1));
 }
 
-/*
- * Set the descriptor setup mode
+/**
+ * set_setup_mode() - Set the descriptor setup mode
  *
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
  * @mode: Any one of the setup modes defined in [CC7x-DESC]
  */
 static inline void set_setup_mode(struct cc_hw_desc *pdesc,
@@ -621,10 +617,10 @@ static inline void set_setup_mode(struct cc_hw_desc *pdesc,
 	pdesc->word[4] |= FIELD_PREP(WORD4_SETUP_OPERATION, mode);
 }
 
-/*
- * Set the descriptor cipher DO
+/**
+ * set_cipher_do() - Set the descriptor cipher DO
  *
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
  * @config: Any one of the cipher do defined in [CC7x-DESC]
  */
 static inline void set_cipher_do(struct cc_hw_desc *pdesc,
diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c
index 24c368b866f6..d39e1664fc7e 100644
--- a/drivers/crypto/ccree/cc_pm.c
+++ b/drivers/crypto/ccree/cc_pm.c
@@ -15,29 +15,25 @@
 #define POWER_DOWN_ENABLE 0x01
 #define POWER_DOWN_DISABLE 0x00
 
-const struct dev_pm_ops ccree_pm = {
-	SET_RUNTIME_PM_OPS(cc_pm_suspend, cc_pm_resume, NULL)
-};
-
-int cc_pm_suspend(struct device *dev)
+static int cc_pm_suspend(struct device *dev)
 {
 	struct cc_drvdata *drvdata = dev_get_drvdata(dev);
 
 	dev_dbg(dev, "set HOST_POWER_DOWN_EN\n");
 	fini_cc_regs(drvdata);
 	cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_ENABLE);
-	cc_clk_off(drvdata);
+	clk_disable_unprepare(drvdata->clk);
 	return 0;
 }
 
-int cc_pm_resume(struct device *dev)
+static int cc_pm_resume(struct device *dev)
 {
 	int rc;
 	struct cc_drvdata *drvdata = dev_get_drvdata(dev);
 
 	dev_dbg(dev, "unset HOST_POWER_DOWN_EN\n");
 	/* Enables the device source clk */
-	rc = cc_clk_on(drvdata);
+	rc = clk_prepare_enable(drvdata->clk);
 	if (rc) {
 		dev_err(dev, "failed getting clock back on. We're toast.\n");
 		return rc;
@@ -62,53 +58,19 @@ int cc_pm_resume(struct device *dev)
 	return 0;
 }
 
+const struct dev_pm_ops ccree_pm = {
+	SET_RUNTIME_PM_OPS(cc_pm_suspend, cc_pm_resume, NULL)
+};
+
 int cc_pm_get(struct device *dev)
 {
-	int rc = 0;
-	struct cc_drvdata *drvdata = dev_get_drvdata(dev);
-
-	if (drvdata->pm_on)
-		rc = pm_runtime_get_sync(dev);
+	int rc = pm_runtime_get_sync(dev);
 
 	return (rc == 1 ? 0 : rc);
 }
 
 void cc_pm_put_suspend(struct device *dev)
 {
-	struct cc_drvdata *drvdata = dev_get_drvdata(dev);
-
-	if (drvdata->pm_on) {
-		pm_runtime_mark_last_busy(dev);
-		pm_runtime_put_autosuspend(dev);
-	}
-}
-
-bool cc_pm_is_dev_suspended(struct device *dev)
-{
-	/* check device state using runtime api */
-	return pm_runtime_suspended(dev);
-}
-
-int cc_pm_init(struct cc_drvdata *drvdata)
-{
-	struct device *dev = drvdata_to_dev(drvdata);
-
-	/* must be before the enabling to avoid redundant suspending */
-	pm_runtime_set_autosuspend_delay(dev, CC_SUSPEND_TIMEOUT);
-	pm_runtime_use_autosuspend(dev);
-	/* set us as active - note we won't do PM ops until cc_pm_go()! */
-	return pm_runtime_set_active(dev);
-}
-
-/* enable the PM module*/
-void cc_pm_go(struct cc_drvdata *drvdata)
-{
-	pm_runtime_enable(drvdata_to_dev(drvdata));
-	drvdata->pm_on = true;
-}
-
-void cc_pm_fini(struct cc_drvdata *drvdata)
-{
-	pm_runtime_disable(drvdata_to_dev(drvdata));
-	drvdata->pm_on = false;
+	pm_runtime_mark_last_busy(dev);
+	pm_runtime_put_autosuspend(dev);
 }
diff --git a/drivers/crypto/ccree/cc_pm.h b/drivers/crypto/ccree/cc_pm.h
index 80a18e11cae4..50cac33de118 100644
--- a/drivers/crypto/ccree/cc_pm.h
+++ b/drivers/crypto/ccree/cc_pm.h
@@ -15,26 +15,11 @@
 
 extern const struct dev_pm_ops ccree_pm;
 
-int cc_pm_init(struct cc_drvdata *drvdata);
-void cc_pm_go(struct cc_drvdata *drvdata);
-void cc_pm_fini(struct cc_drvdata *drvdata);
-int cc_pm_suspend(struct device *dev);
-int cc_pm_resume(struct device *dev);
 int cc_pm_get(struct device *dev);
 void cc_pm_put_suspend(struct device *dev);
-bool cc_pm_is_dev_suspended(struct device *dev);
 
 #else
 
-static inline int cc_pm_init(struct cc_drvdata *drvdata)
-{
-	return 0;
-}
-
-static inline void cc_pm_go(struct cc_drvdata *drvdata) {}
-
-static inline void cc_pm_fini(struct cc_drvdata *drvdata) {}
-
 static inline int cc_pm_get(struct device *dev)
 {
 	return 0;
@@ -42,12 +27,6 @@ static inline int cc_pm_get(struct device *dev)
 
 static inline void cc_pm_put_suspend(struct device *dev) {}
 
-static inline bool cc_pm_is_dev_suspended(struct device *dev)
-{
-	/* if PM not supported device is never suspend */
-	return false;
-}
-
 #endif
 
 #endif /*__POWER_MGR_H__*/
diff --git a/drivers/crypto/ccree/cc_request_mgr.c b/drivers/crypto/ccree/cc_request_mgr.c
index 9d61e6f12478..1d7649ecf44e 100644
--- a/drivers/crypto/ccree/cc_request_mgr.c
+++ b/drivers/crypto/ccree/cc_request_mgr.c
@@ -206,12 +206,13 @@ static void enqueue_seq(struct cc_drvdata *drvdata, struct cc_hw_desc seq[],
 	}
 }
 
-/*!
- * Completion will take place if and only if user requested completion
- * by cc_send_sync_request().
+/**
+ * request_mgr_complete() - Completion will take place if and only if user
+ * requested completion by cc_send_sync_request().
  *
- * \param dev
- * \param dx_compl_h The completion event to signal
+ * @dev: Device pointer
+ * @dx_compl_h: The completion event to signal
+ * @dummy: unused error code
  */
 static void request_mgr_complete(struct device *dev, void *dx_compl_h,
 				 int dummy)
@@ -264,15 +265,15 @@ static int cc_queues_status(struct cc_drvdata *drvdata,
 	return -ENOSPC;
 }
 
-/*!
- * Enqueue caller request to crypto hardware.
+/**
+ * cc_do_send_request() - Enqueue caller request to crypto hardware.
  * Need to be called with HW lock held and PM running
  *
- * \param drvdata
- * \param cc_req The request to enqueue
- * \param desc The crypto sequence
- * \param len The crypto sequence length
- * \param add_comp If "true": add an artificial dout DMA to mark completion
+ * @drvdata: Associated device driver context
+ * @cc_req: The request to enqueue
+ * @desc: The crypto sequence
+ * @len: The crypto sequence length
+ * @add_comp: If "true": add an artificial dout DMA to mark completion
  *
  */
 static void cc_do_send_request(struct cc_drvdata *drvdata,
@@ -295,7 +296,6 @@ static void cc_do_send_request(struct cc_drvdata *drvdata,
 	req_mgr_h->req_queue[req_mgr_h->req_queue_head] = *cc_req;
 	req_mgr_h->req_queue_head = (req_mgr_h->req_queue_head + 1) &
 				    (MAX_REQUEST_QUEUE_SIZE - 1);
-	/* TODO: Use circ_buf.h ? */
 
 	dev_dbg(dev, "Enqueue request head=%u\n", req_mgr_h->req_queue_head);
 
@@ -377,7 +377,7 @@ static void cc_proc_backlog(struct cc_drvdata *drvdata)
 		rc = cc_queues_status(drvdata, mgr, bli->len);
 		if (rc) {
 			/*
-			 * There is still not room in the FIFO for
+			 * There is still no room in the FIFO for
 			 * this request. Bail out. We'll return here
 			 * on the next completion irq.
 			 */
@@ -476,10 +476,6 @@ int cc_send_sync_request(struct cc_drvdata *drvdata,
 			break;
 
 		spin_unlock_bh(&mgr->hw_lock);
-		if (rc != -EAGAIN) {
-			cc_pm_put_suspend(dev);
-			return rc;
-		}
 		wait_for_completion_interruptible(&drvdata->hw_queue_avail);
 		reinit_completion(&drvdata->hw_queue_avail);
 	}
@@ -490,16 +486,18 @@ int cc_send_sync_request(struct cc_drvdata *drvdata,
 	return 0;
 }
 
-/*!
- * Enqueue caller request to crypto hardware during init process.
- * assume this function is not called in middle of a flow,
+/**
+ * send_request_init() - Enqueue caller request to crypto hardware during init
+ * process.
+ * Assume this function is not called in the middle of a flow,
  * since we set QUEUE_LAST_IND flag in the last descriptor.
  *
- * \param drvdata
- * \param desc The crypto sequence
- * \param len The crypto sequence length
+ * @drvdata: Associated device driver context
+ * @desc: The crypto sequence
+ * @len: The crypto sequence length
  *
- * \return int Returns "0" upon success
+ * Return:
+ * Returns "0" upon success
  */
 int send_request_init(struct cc_drvdata *drvdata, struct cc_hw_desc *desc,
 		      unsigned int len)
diff --git a/drivers/crypto/ccree/cc_request_mgr.h b/drivers/crypto/ccree/cc_request_mgr.h
index ff7746aaaf35..ae25ca843dce 100644
--- a/drivers/crypto/ccree/cc_request_mgr.h
+++ b/drivers/crypto/ccree/cc_request_mgr.h
@@ -12,18 +12,17 @@
 
 int cc_req_mgr_init(struct cc_drvdata *drvdata);
 
-/*!
- * Enqueue caller request to crypto hardware.
+/**
+ * cc_send_request() - Enqueue caller request to crypto hardware.
  *
- * \param drvdata
- * \param cc_req The request to enqueue
- * \param desc The crypto sequence
- * \param len The crypto sequence length
- * \param is_dout If "true": completion is handled by the caller
- *	  If "false": this function adds a dummy descriptor completion
- *	  and waits upon completion signal.
+ * @drvdata: Associated device driver context
+ * @cc_req: The request to enqueue
+ * @desc: The crypto sequence
+ * @len: The crypto sequence length
+ * @req: Asynchronous crypto request
  *
- * \return int Returns -EINPROGRESS or error
+ * Return:
+ * Returns -EINPROGRESS or error
  */
 int cc_send_request(struct cc_drvdata *drvdata, struct cc_crypto_req *cc_req,
 		    struct cc_hw_desc *desc, unsigned int len,
diff --git a/drivers/crypto/ccree/cc_sram_mgr.c b/drivers/crypto/ccree/cc_sram_mgr.c
index 62c885e6e791..37a95856361f 100644
--- a/drivers/crypto/ccree/cc_sram_mgr.c
+++ b/drivers/crypto/ccree/cc_sram_mgr.c
@@ -5,88 +5,61 @@
 #include "cc_sram_mgr.h"
 
 /**
- * struct cc_sram_ctx -Internal RAM context manager
- * @sram_free_offset:   the offset to the non-allocated area
- */
-struct cc_sram_ctx {
-	cc_sram_addr_t sram_free_offset;
-};
-
-/**
- * cc_sram_mgr_fini() - Cleanup SRAM pool.
- *
- * @drvdata: Associated device driver context
- */
-void cc_sram_mgr_fini(struct cc_drvdata *drvdata)
-{
-	/* Nothing needed */
-}
-
-/**
  * cc_sram_mgr_init() - Initializes SRAM pool.
  *      The pool starts right at the beginning of SRAM.
  *      Returns zero for success, negative value otherwise.
  *
  * @drvdata: Associated device driver context
+ *
+ * Return:
+ * 0 for success, negative error code for failure.
  */
 int cc_sram_mgr_init(struct cc_drvdata *drvdata)
 {
-	struct cc_sram_ctx *ctx;
-	dma_addr_t start = 0;
+	u32 start = 0;
 	struct device *dev = drvdata_to_dev(drvdata);
 
 	if (drvdata->hw_rev < CC_HW_REV_712) {
 		/* Pool starts after ROM bytes */
-		start = (dma_addr_t)cc_ioread(drvdata,
-					      CC_REG(HOST_SEP_SRAM_THRESHOLD));
-
+		start = cc_ioread(drvdata, CC_REG(HOST_SEP_SRAM_THRESHOLD));
 		if ((start & 0x3) != 0) {
-			dev_err(dev, "Invalid SRAM offset %pad\n", &start);
+			dev_err(dev, "Invalid SRAM offset 0x%x\n", start);
 			return -EINVAL;
 		}
 	}
 
-	/* Allocate "this" context */
-	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
-
-	if (!ctx)
-		return -ENOMEM;
-
-	ctx->sram_free_offset = start;
-	drvdata->sram_mgr_handle = ctx;
-
+	drvdata->sram_free_offset = start;
 	return 0;
 }
 
-/*!
- * Allocated buffer from SRAM pool.
- * Note: Caller is responsible to free the LAST allocated buffer.
- * This function does not taking care of any fragmentation may occur
- * by the order of calls to alloc/free.
+/**
+ * cc_sram_alloc() - Allocate buffer from SRAM pool.
+ *
+ * @drvdata: Associated device driver context
+ * @size: The requested numer of bytes to allocate
  *
- * \param drvdata
- * \param size The requested bytes to allocate
+ * Return:
+ * Address offset in SRAM or NULL_SRAM_ADDR for failure.
  */
-cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size)
+u32 cc_sram_alloc(struct cc_drvdata *drvdata, u32 size)
 {
-	struct cc_sram_ctx *smgr_ctx = drvdata->sram_mgr_handle;
 	struct device *dev = drvdata_to_dev(drvdata);
-	cc_sram_addr_t p;
+	u32 p;
 
 	if ((size & 0x3)) {
 		dev_err(dev, "Requested buffer size (%u) is not multiple of 4",
 			size);
 		return NULL_SRAM_ADDR;
 	}
-	if (size > (CC_CC_SRAM_SIZE - smgr_ctx->sram_free_offset)) {
-		dev_err(dev, "Not enough space to allocate %u B (at offset %llu)\n",
-			size, smgr_ctx->sram_free_offset);
+	if (size > (CC_CC_SRAM_SIZE - drvdata->sram_free_offset)) {
+		dev_err(dev, "Not enough space to allocate %u B (at offset %u)\n",
+			size, drvdata->sram_free_offset);
 		return NULL_SRAM_ADDR;
 	}
 
-	p = smgr_ctx->sram_free_offset;
-	smgr_ctx->sram_free_offset += size;
-	dev_dbg(dev, "Allocated %u B @ %u\n", size, (unsigned int)p);
+	p = drvdata->sram_free_offset;
+	drvdata->sram_free_offset += size;
+	dev_dbg(dev, "Allocated %u B @ %u\n", size, p);
 	return p;
 }
 
@@ -97,13 +70,12 @@ cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size)
  *
  * @src:	  A pointer to array of words to set as consts.
  * @dst:	  The target SRAM buffer to set into
- * @nelements:	  The number of words in "src" array
+ * @nelement:	  The number of words in "src" array
  * @seq:	  A pointer to the given IN/OUT descriptor sequence
  * @seq_len:	  A pointer to the given IN/OUT sequence length
  */
-void cc_set_sram_desc(const u32 *src, cc_sram_addr_t dst,
-		      unsigned int nelement, struct cc_hw_desc *seq,
-		      unsigned int *seq_len)
+void cc_set_sram_desc(const u32 *src, u32 dst, unsigned int nelement,
+		      struct cc_hw_desc *seq, unsigned int *seq_len)
 {
 	u32 i;
 	unsigned int idx = *seq_len;
diff --git a/drivers/crypto/ccree/cc_sram_mgr.h b/drivers/crypto/ccree/cc_sram_mgr.h
index 1d14de9ee8c3..1c965ef83002 100644
--- a/drivers/crypto/ccree/cc_sram_mgr.h
+++ b/drivers/crypto/ccree/cc_sram_mgr.h
@@ -10,42 +10,30 @@
 
 struct cc_drvdata;
 
-/**
- * Address (offset) within CC internal SRAM
- */
-
-typedef u64 cc_sram_addr_t;
-
-#define NULL_SRAM_ADDR ((cc_sram_addr_t)-1)
+#define NULL_SRAM_ADDR ((u32)-1)
 
-/*!
- * Initializes SRAM pool.
+/**
+ * cc_sram_mgr_init() - Initializes SRAM pool.
  * The first X bytes of SRAM are reserved for ROM usage, hence, pool
  * starts right after X bytes.
  *
- * \param drvdata
+ * @drvdata: Associated device driver context
  *
- * \return int Zero for success, negative value otherwise.
+ * Return:
+ * Zero for success, negative value otherwise.
  */
 int cc_sram_mgr_init(struct cc_drvdata *drvdata);
 
-/*!
- * Uninits SRAM pool.
+/**
+ * cc_sram_alloc() - Allocate buffer from SRAM pool.
  *
- * \param drvdata
- */
-void cc_sram_mgr_fini(struct cc_drvdata *drvdata);
-
-/*!
- * Allocated buffer from SRAM pool.
- * Note: Caller is responsible to free the LAST allocated buffer.
- * This function does not taking care of any fragmentation may occur
- * by the order of calls to alloc/free.
+ * @drvdata: Associated device driver context
+ * @size: The requested bytes to allocate
  *
- * \param drvdata
- * \param size The requested bytes to allocate
+ * Return:
+ * Address offset in SRAM or NULL_SRAM_ADDR for failure.
  */
-cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size);
+u32 cc_sram_alloc(struct cc_drvdata *drvdata, u32 size);
 
 /**
  * cc_set_sram_desc() - Create const descriptors sequence to
@@ -54,12 +42,11 @@ cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size);
  *
  * @src:	  A pointer to array of words to set as consts.
  * @dst:	  The target SRAM buffer to set into
- * @nelements:	  The number of words in "src" array
+ * @nelement:	  The number of words in "src" array
  * @seq:	  A pointer to the given IN/OUT descriptor sequence
  * @seq_len:	  A pointer to the given IN/OUT sequence length
  */
-void cc_set_sram_desc(const u32 *src, cc_sram_addr_t dst,
-		      unsigned int nelement, struct cc_hw_desc *seq,
-		      unsigned int *seq_len);
+void cc_set_sram_desc(const u32 *src, u32 dst, unsigned int nelement,
+		      struct cc_hw_desc *seq, unsigned int *seq_len);
 
 #endif /*__CC_SRAM_MGR_H__*/
diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index b4b9b22125d1..c29b80dd30d8 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -715,6 +715,52 @@ static int chcr_cipher_fallback(struct crypto_sync_skcipher *cipher,
 	return err;
 
 }
+
+static inline int get_qidxs(struct crypto_async_request *req,
+			    unsigned int *txqidx, unsigned int *rxqidx)
+{
+	struct crypto_tfm *tfm = req->tfm;
+	int ret = 0;
+
+	switch (tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
+	case CRYPTO_ALG_TYPE_AEAD:
+	{
+		struct aead_request *aead_req =
+			container_of(req, struct aead_request, base);
+		struct chcr_aead_reqctx *reqctx = aead_request_ctx(aead_req);
+		*txqidx = reqctx->txqidx;
+		*rxqidx = reqctx->rxqidx;
+		break;
+	}
+	case CRYPTO_ALG_TYPE_SKCIPHER:
+	{
+		struct skcipher_request *sk_req =
+			container_of(req, struct skcipher_request, base);
+		struct chcr_skcipher_req_ctx *reqctx =
+			skcipher_request_ctx(sk_req);
+		*txqidx = reqctx->txqidx;
+		*rxqidx = reqctx->rxqidx;
+		break;
+	}
+	case CRYPTO_ALG_TYPE_AHASH:
+	{
+		struct ahash_request *ahash_req =
+			container_of(req, struct ahash_request, base);
+		struct chcr_ahash_req_ctx *reqctx =
+			ahash_request_ctx(ahash_req);
+		*txqidx = reqctx->txqidx;
+		*rxqidx = reqctx->rxqidx;
+		break;
+	}
+	default:
+		ret = -EINVAL;
+		/* should never get here */
+		BUG();
+		break;
+	}
+	return ret;
+}
+
 static inline void create_wreq(struct chcr_context *ctx,
 			       struct chcr_wr *chcr_req,
 			       struct crypto_async_request *req,
@@ -725,7 +771,15 @@ static inline void create_wreq(struct chcr_context *ctx,
 			       unsigned int lcb)
 {
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
-	int qid = u_ctx->lldi.rxq_ids[ctx->rx_qidx];
+	unsigned int tx_channel_id, rx_channel_id;
+	unsigned int txqidx = 0, rxqidx = 0;
+	unsigned int qid, fid;
+
+	get_qidxs(req, &txqidx, &rxqidx);
+	qid = u_ctx->lldi.rxq_ids[rxqidx];
+	fid = u_ctx->lldi.rxq_ids[0];
+	tx_channel_id = txqidx / ctx->txq_perchan;
+	rx_channel_id = rxqidx / ctx->rxq_perchan;
 
 
 	chcr_req->wreq.op_to_cctx_size = FILL_WR_OP_CCTX_SIZE;
@@ -734,15 +788,12 @@ static inline void create_wreq(struct chcr_context *ctx,
 	chcr_req->wreq.len16_pkd =
 		htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP(len16, 16)));
 	chcr_req->wreq.cookie = cpu_to_be64((uintptr_t)req);
-	chcr_req->wreq.rx_chid_to_rx_q_id =
-		FILL_WR_RX_Q_ID(ctx->tx_chan_id, qid,
-				!!lcb, ctx->tx_qidx);
+	chcr_req->wreq.rx_chid_to_rx_q_id = FILL_WR_RX_Q_ID(rx_channel_id, qid,
+							    !!lcb, txqidx);
 
-	chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->tx_chan_id,
-						       qid);
+	chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(tx_channel_id, fid);
 	chcr_req->ulptx.len = htonl((DIV_ROUND_UP(len16, 16) -
-				     ((sizeof(chcr_req->wreq)) >> 4)));
-
+				((sizeof(chcr_req->wreq)) >> 4)));
 	chcr_req->sc_imm.cmd_more = FILL_CMD_MORE(!imm);
 	chcr_req->sc_imm.len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) +
 					   sizeof(chcr_req->key_ctx) + sc_len);
@@ -758,7 +809,8 @@ static inline void create_wreq(struct chcr_context *ctx,
 static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(wrparam->req);
-	struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(tfm));
+	struct chcr_context *ctx = c_ctx(tfm);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 	struct sk_buff *skb = NULL;
 	struct chcr_wr *chcr_req;
 	struct cpl_rx_phys_dsgl *phys_cpl;
@@ -771,7 +823,8 @@ static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam)
 	unsigned int kctx_len;
 	gfp_t flags = wrparam->req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
 			GFP_KERNEL : GFP_ATOMIC;
-	struct adapter *adap = padap(c_ctx(tfm)->dev);
+	struct adapter *adap = padap(ctx->dev);
+	unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan;
 
 	nents = sg_nents_xlen(reqctx->dstsg,  wrparam->bytes, CHCR_DST_SG_SIZE,
 			      reqctx->dst_ofst);
@@ -791,7 +844,7 @@ static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam)
 	}
 	chcr_req = __skb_put_zero(skb, transhdr_len);
 	chcr_req->sec_cpl.op_ivinsrtofst =
-		FILL_SEC_CPL_OP_IVINSR(c_ctx(tfm)->tx_chan_id, 2, 1);
+			FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 1);
 
 	chcr_req->sec_cpl.pldlen = htonl(IV + wrparam->bytes);
 	chcr_req->sec_cpl.aadstart_cipherstop_hi =
@@ -1086,8 +1139,12 @@ static int chcr_final_cipher_iv(struct skcipher_request *req,
 	if (subtype == CRYPTO_ALG_SUB_TYPE_CTR)
 		ctr_add_iv(iv, req->iv, DIV_ROUND_UP(reqctx->processed,
 						       AES_BLOCK_SIZE));
-	else if (subtype == CRYPTO_ALG_SUB_TYPE_XTS)
-		ret = chcr_update_tweak(req, iv, 1);
+	else if (subtype == CRYPTO_ALG_SUB_TYPE_XTS) {
+		if (!reqctx->partial_req)
+			memcpy(iv, reqctx->iv, AES_BLOCK_SIZE);
+		else
+			ret = chcr_update_tweak(req, iv, 1);
+	}
 	else if (subtype == CRYPTO_ALG_SUB_TYPE_CBC) {
 		/*Already updated for Decrypt*/
 		if (!reqctx->op)
@@ -1102,12 +1159,13 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req,
 				   unsigned char *input, int err)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chcr_context *ctx = c_ctx(tfm);
 	struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm));
 	struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(tfm));
 	struct sk_buff *skb;
 	struct cpl_fw6_pld *fw6_pld = (struct cpl_fw6_pld *)input;
 	struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req);
-	struct  cipher_wr_param wrparam;
+	struct cipher_wr_param wrparam;
 	struct chcr_dev *dev = c_ctx(tfm)->dev;
 	int bytes;
 
@@ -1152,7 +1210,7 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req,
 	if (get_cryptoalg_subtype(tfm) ==
 	    CRYPTO_ALG_SUB_TYPE_CTR)
 		bytes = adjust_ctr_overflow(reqctx->iv, bytes);
-	wrparam.qid = u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx];
+	wrparam.qid = u_ctx->lldi.rxq_ids[reqctx->rxqidx];
 	wrparam.req = req;
 	wrparam.bytes = bytes;
 	skb = create_cipher_wr(&wrparam);
@@ -1162,14 +1220,24 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req,
 		goto unmap;
 	}
 	skb->dev = u_ctx->lldi.ports[0];
-	set_wr_txq(skb, CPL_PRIORITY_DATA, c_ctx(tfm)->tx_qidx);
+	set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx);
 	chcr_send_wr(skb);
 	reqctx->last_req_len = bytes;
 	reqctx->processed += bytes;
+	if (get_cryptoalg_subtype(tfm) ==
+		CRYPTO_ALG_SUB_TYPE_CBC && req->base.flags ==
+			CRYPTO_TFM_REQ_MAY_SLEEP ) {
+		complete(&ctx->cbc_aes_aio_done);
+	}
 	return 0;
 unmap:
 	chcr_cipher_dma_unmap(&ULD_CTX(c_ctx(tfm))->lldi.pdev->dev, req);
 complete:
+	if (get_cryptoalg_subtype(tfm) ==
+		CRYPTO_ALG_SUB_TYPE_CBC && req->base.flags ==
+			CRYPTO_TFM_REQ_MAY_SLEEP ) {
+		complete(&ctx->cbc_aes_aio_done);
+	}
 	chcr_dec_wrcount(dev);
 	req->base.complete(&req->base, err);
 	return err;
@@ -1188,6 +1256,7 @@ static int process_cipher(struct skcipher_request *req,
 	int bytes, err = -EINVAL;
 
 	reqctx->processed = 0;
+	reqctx->partial_req = 0;
 	if (!req->iv)
 		goto error;
 	if ((ablkctx->enckey_len == 0) || (ivsize > AES_BLOCK_SIZE) ||
@@ -1278,6 +1347,7 @@ static int process_cipher(struct skcipher_request *req,
 	}
 	reqctx->processed = bytes;
 	reqctx->last_req_len = bytes;
+	reqctx->partial_req = !!(req->cryptlen - reqctx->processed);
 
 	return 0;
 unmap:
@@ -1289,31 +1359,43 @@ error:
 static int chcr_aes_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req);
 	struct chcr_dev *dev = c_ctx(tfm)->dev;
 	struct sk_buff *skb = NULL;
-	int err, isfull = 0;
+	int err;
 	struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm));
+	struct chcr_context *ctx = c_ctx(tfm);
+	unsigned int cpu;
+
+	cpu = get_cpu();
+	reqctx->txqidx = cpu % ctx->ntxq;
+	reqctx->rxqidx = cpu % ctx->nrxq;
+	put_cpu();
 
 	err = chcr_inc_wrcount(dev);
 	if (err)
 		return -ENXIO;
 	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
-					    c_ctx(tfm)->tx_qidx))) {
-		isfull = 1;
-		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+						reqctx->txqidx) &&
+		(!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) {
 			err = -ENOSPC;
 			goto error;
-		}
 	}
 
-	err = process_cipher(req, u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx],
+	err = process_cipher(req, u_ctx->lldi.rxq_ids[reqctx->rxqidx],
 			     &skb, CHCR_ENCRYPT_OP);
 	if (err || !skb)
 		return  err;
 	skb->dev = u_ctx->lldi.ports[0];
-	set_wr_txq(skb, CPL_PRIORITY_DATA, c_ctx(tfm)->tx_qidx);
+	set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx);
 	chcr_send_wr(skb);
-	return isfull ? -EBUSY : -EINPROGRESS;
+	if (get_cryptoalg_subtype(tfm) ==
+		CRYPTO_ALG_SUB_TYPE_CBC && req->base.flags ==
+			CRYPTO_TFM_REQ_MAY_SLEEP ) {
+			reqctx->partial_req = 1;
+			wait_for_completion(&ctx->cbc_aes_aio_done);
+        }
+	return -EINPROGRESS;
 error:
 	chcr_dec_wrcount(dev);
 	return err;
@@ -1322,44 +1404,45 @@ error:
 static int chcr_aes_decrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req);
 	struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm));
 	struct chcr_dev *dev = c_ctx(tfm)->dev;
 	struct sk_buff *skb = NULL;
-	int err, isfull = 0;
+	int err;
+	struct chcr_context *ctx = c_ctx(tfm);
+	unsigned int cpu;
+
+	cpu = get_cpu();
+	reqctx->txqidx = cpu % ctx->ntxq;
+	reqctx->rxqidx = cpu % ctx->nrxq;
+	put_cpu();
 
 	err = chcr_inc_wrcount(dev);
 	if (err)
 		return -ENXIO;
 
 	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
-					    c_ctx(tfm)->tx_qidx))) {
-		isfull = 1;
-		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+						reqctx->txqidx) &&
+		(!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))))
 			return -ENOSPC;
-	}
-
-	err = process_cipher(req, u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx],
+	err = process_cipher(req, u_ctx->lldi.rxq_ids[reqctx->rxqidx],
 			     &skb, CHCR_DECRYPT_OP);
 	if (err || !skb)
 		return err;
 	skb->dev = u_ctx->lldi.ports[0];
-	set_wr_txq(skb, CPL_PRIORITY_DATA, c_ctx(tfm)->tx_qidx);
+	set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx);
 	chcr_send_wr(skb);
-	return isfull ? -EBUSY : -EINPROGRESS;
+	return -EINPROGRESS;
 }
-
 static int chcr_device_init(struct chcr_context *ctx)
 {
 	struct uld_ctx *u_ctx = NULL;
-	unsigned int id;
-	int txq_perchan, txq_idx, ntxq;
-	int err = 0, rxq_perchan, rxq_idx;
+	int txq_perchan, ntxq;
+	int err = 0, rxq_perchan;
 
-	id = smp_processor_id();
 	if (!ctx->dev) {
 		u_ctx = assign_chcr_device();
 		if (!u_ctx) {
-			err = -ENXIO;
 			pr_err("chcr device assignment fails\n");
 			goto out;
 		}
@@ -1367,23 +1450,10 @@ static int chcr_device_init(struct chcr_context *ctx)
 		ntxq = u_ctx->lldi.ntxq;
 		rxq_perchan = u_ctx->lldi.nrxq / u_ctx->lldi.nchan;
 		txq_perchan = ntxq / u_ctx->lldi.nchan;
-		spin_lock(&ctx->dev->lock_chcr_dev);
-		ctx->tx_chan_id = ctx->dev->tx_channel_id;
-		ctx->dev->tx_channel_id =
-			(ctx->dev->tx_channel_id + 1) %  u_ctx->lldi.nchan;
-		spin_unlock(&ctx->dev->lock_chcr_dev);
-		rxq_idx = ctx->tx_chan_id * rxq_perchan;
-		rxq_idx += id % rxq_perchan;
-		txq_idx = ctx->tx_chan_id * txq_perchan;
-		txq_idx += id % txq_perchan;
-		ctx->rx_qidx = rxq_idx;
-		ctx->tx_qidx = txq_idx;
-		/* Channel Id used by SGE to forward packet to Host.
-		 * Same value should be used in cpl_fw6_pld RSS_CH field
-		 * by FW. Driver programs PCI channel ID to be used in fw
-		 * at the time of queue allocation with value "pi->tx_chan"
-		 */
-		ctx->pci_chan_id = txq_idx / txq_perchan;
+		ctx->ntxq = ntxq;
+		ctx->nrxq = u_ctx->lldi.nrxq;
+		ctx->rxq_perchan = rxq_perchan;
+		ctx->txq_perchan = txq_perchan;
 	}
 out:
 	return err;
@@ -1401,7 +1471,7 @@ static int chcr_init_tfm(struct crypto_skcipher *tfm)
 		pr_err("failed to allocate fallback for %s\n", alg->base.cra_name);
 		return PTR_ERR(ablkctx->sw_cipher);
 	}
-
+	init_completion(&ctx->cbc_aes_aio_done);
 	crypto_skcipher_set_reqsize(tfm, sizeof(struct chcr_skcipher_req_ctx));
 
 	return chcr_device_init(ctx);
@@ -1485,9 +1555,10 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req,
 {
 	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct hmac_ctx *hmacctx = HMAC_CTX(h_ctx(tfm));
+	struct chcr_context *ctx = h_ctx(tfm);
+	struct hmac_ctx *hmacctx = HMAC_CTX(ctx);
 	struct sk_buff *skb = NULL;
-	struct uld_ctx *u_ctx = ULD_CTX(h_ctx(tfm));
+	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	struct chcr_wr *chcr_req;
 	struct ulptx_sgl *ulptx;
 	unsigned int nents = 0, transhdr_len;
@@ -1496,6 +1567,7 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req,
 		GFP_ATOMIC;
 	struct adapter *adap = padap(h_ctx(tfm)->dev);
 	int error = 0;
+	unsigned int rx_channel_id = req_ctx->rxqidx / ctx->rxq_perchan;
 
 	transhdr_len = HASH_TRANSHDR_SIZE(param->kctx_len);
 	req_ctx->hctx_wr.imm = (transhdr_len + param->bfr_len +
@@ -1513,7 +1585,8 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req,
 	chcr_req = __skb_put_zero(skb, transhdr_len);
 
 	chcr_req->sec_cpl.op_ivinsrtofst =
-		FILL_SEC_CPL_OP_IVINSR(h_ctx(tfm)->tx_chan_id, 2, 0);
+		FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 0);
+
 	chcr_req->sec_cpl.pldlen = htonl(param->bfr_len + param->sg_len);
 
 	chcr_req->sec_cpl.aadstart_cipherstop_hi =
@@ -1576,16 +1649,22 @@ static int chcr_ahash_update(struct ahash_request *req)
 {
 	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
 	struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req);
-	struct uld_ctx *u_ctx = NULL;
+	struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm));
+	struct chcr_context *ctx = h_ctx(rtfm);
 	struct chcr_dev *dev = h_ctx(rtfm)->dev;
 	struct sk_buff *skb;
 	u8 remainder = 0, bs;
 	unsigned int nbytes = req->nbytes;
 	struct hash_wr_param params;
-	int error, isfull = 0;
+	int error;
+	unsigned int cpu;
+
+	cpu = get_cpu();
+	req_ctx->txqidx = cpu % ctx->ntxq;
+	req_ctx->rxqidx = cpu % ctx->nrxq;
+	put_cpu();
 
 	bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
-	u_ctx = ULD_CTX(h_ctx(rtfm));
 
 	if (nbytes + req_ctx->reqlen >= bs) {
 		remainder = (nbytes + req_ctx->reqlen) % bs;
@@ -1603,12 +1682,10 @@ static int chcr_ahash_update(struct ahash_request *req)
 	 * inflight count for dev guarantees that lldi and padap is valid
 	 */
 	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
-					    h_ctx(rtfm)->tx_qidx))) {
-		isfull = 1;
-		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+						req_ctx->txqidx) &&
+		(!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) {
 			error = -ENOSPC;
 			goto err;
-		}
 	}
 
 	chcr_init_hctx_per_wr(req_ctx);
@@ -1650,10 +1727,9 @@ static int chcr_ahash_update(struct ahash_request *req)
 	}
 	req_ctx->reqlen = remainder;
 	skb->dev = u_ctx->lldi.ports[0];
-	set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx);
+	set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx);
 	chcr_send_wr(skb);
-
-	return isfull ? -EBUSY : -EINPROGRESS;
+	return -EINPROGRESS;
 unmap:
 	chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req);
 err:
@@ -1678,16 +1754,22 @@ static int chcr_ahash_final(struct ahash_request *req)
 	struct chcr_dev *dev = h_ctx(rtfm)->dev;
 	struct hash_wr_param params;
 	struct sk_buff *skb;
-	struct uld_ctx *u_ctx = NULL;
+	struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm));
+	struct chcr_context *ctx = h_ctx(rtfm);
 	u8 bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
 	int error = -EINVAL;
+	unsigned int cpu;
+
+	cpu = get_cpu();
+	req_ctx->txqidx = cpu % ctx->ntxq;
+	req_ctx->rxqidx = cpu % ctx->nrxq;
+	put_cpu();
 
 	error = chcr_inc_wrcount(dev);
 	if (error)
 		return -ENXIO;
 
 	chcr_init_hctx_per_wr(req_ctx);
-	u_ctx = ULD_CTX(h_ctx(rtfm));
 	if (is_hmac(crypto_ahash_tfm(rtfm)))
 		params.opad_needed = 1;
 	else
@@ -1727,7 +1809,7 @@ static int chcr_ahash_final(struct ahash_request *req)
 	}
 	req_ctx->reqlen = 0;
 	skb->dev = u_ctx->lldi.ports[0];
-	set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx);
+	set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx);
 	chcr_send_wr(skb);
 	return -EINPROGRESS;
 err:
@@ -1740,25 +1822,29 @@ static int chcr_ahash_finup(struct ahash_request *req)
 	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
 	struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req);
 	struct chcr_dev *dev = h_ctx(rtfm)->dev;
-	struct uld_ctx *u_ctx = NULL;
+	struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm));
+	struct chcr_context *ctx = h_ctx(rtfm);
 	struct sk_buff *skb;
 	struct hash_wr_param params;
 	u8  bs;
-	int error, isfull = 0;
+	int error;
+	unsigned int cpu;
+
+	cpu = get_cpu();
+	req_ctx->txqidx = cpu % ctx->ntxq;
+	req_ctx->rxqidx = cpu % ctx->nrxq;
+	put_cpu();
 
 	bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
-	u_ctx = ULD_CTX(h_ctx(rtfm));
 	error = chcr_inc_wrcount(dev);
 	if (error)
 		return -ENXIO;
 
 	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
-					    h_ctx(rtfm)->tx_qidx))) {
-		isfull = 1;
-		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+						req_ctx->txqidx) &&
+		(!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) {
 			error = -ENOSPC;
 			goto err;
-		}
 	}
 	chcr_init_hctx_per_wr(req_ctx);
 	error = chcr_hash_dma_map(&u_ctx->lldi.pdev->dev, req);
@@ -1816,10 +1902,9 @@ static int chcr_ahash_finup(struct ahash_request *req)
 	req_ctx->reqlen = 0;
 	req_ctx->hctx_wr.processed += params.sg_len;
 	skb->dev = u_ctx->lldi.ports[0];
-	set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx);
+	set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx);
 	chcr_send_wr(skb);
-
-	return isfull ? -EBUSY : -EINPROGRESS;
+	return -EINPROGRESS;
 unmap:
 	chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req);
 err:
@@ -1832,11 +1917,18 @@ static int chcr_ahash_digest(struct ahash_request *req)
 	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
 	struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req);
 	struct chcr_dev *dev = h_ctx(rtfm)->dev;
-	struct uld_ctx *u_ctx = NULL;
+	struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm));
+	struct chcr_context *ctx = h_ctx(rtfm);
 	struct sk_buff *skb;
 	struct hash_wr_param params;
 	u8  bs;
-	int error, isfull = 0;
+	int error;
+	unsigned int cpu;
+
+	cpu = get_cpu();
+	req_ctx->txqidx = cpu % ctx->ntxq;
+	req_ctx->rxqidx = cpu % ctx->nrxq;
+	put_cpu();
 
 	rtfm->init(req);
 	bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
@@ -1844,14 +1936,11 @@ static int chcr_ahash_digest(struct ahash_request *req)
 	if (error)
 		return -ENXIO;
 
-	u_ctx = ULD_CTX(h_ctx(rtfm));
 	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
-					    h_ctx(rtfm)->tx_qidx))) {
-		isfull = 1;
-		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+						req_ctx->txqidx) &&
+		(!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) {
 			error = -ENOSPC;
 			goto err;
-		}
 	}
 
 	chcr_init_hctx_per_wr(req_ctx);
@@ -1907,9 +1996,9 @@ static int chcr_ahash_digest(struct ahash_request *req)
 	}
 	req_ctx->hctx_wr.processed += params.sg_len;
 	skb->dev = u_ctx->lldi.ports[0];
-	set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx);
+	set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx);
 	chcr_send_wr(skb);
-	return isfull ? -EBUSY : -EINPROGRESS;
+	return -EINPROGRESS;
 unmap:
 	chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req);
 err:
@@ -1922,14 +2011,20 @@ static int chcr_ahash_continue(struct ahash_request *req)
 	struct chcr_ahash_req_ctx *reqctx = ahash_request_ctx(req);
 	struct chcr_hctx_per_wr *hctx_wr = &reqctx->hctx_wr;
 	struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req);
-	struct uld_ctx *u_ctx = NULL;
+	struct chcr_context *ctx = h_ctx(rtfm);
+	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	struct sk_buff *skb;
 	struct hash_wr_param params;
 	u8  bs;
 	int error;
+	unsigned int cpu;
+
+	cpu = get_cpu();
+	reqctx->txqidx = cpu % ctx->ntxq;
+	reqctx->rxqidx = cpu % ctx->nrxq;
+	put_cpu();
 
 	bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
-	u_ctx = ULD_CTX(h_ctx(rtfm));
 	get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
 	params.kctx_len = roundup(params.alg_prm.result_size, 16);
 	if (is_hmac(crypto_ahash_tfm(rtfm))) {
@@ -1969,7 +2064,7 @@ static int chcr_ahash_continue(struct ahash_request *req)
 	}
 	hctx_wr->processed += params.sg_len;
 	skb->dev = u_ctx->lldi.ports[0];
-	set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx);
+	set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx);
 	chcr_send_wr(skb);
 	return 0;
 err:
@@ -2315,7 +2410,8 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 					 int size)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm));
+	struct chcr_context *ctx = a_ctx(tfm);
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
 	struct chcr_authenc_ctx *actx = AUTHENC_CTX(aeadctx);
 	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
 	struct sk_buff *skb = NULL;
@@ -2331,7 +2427,8 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	int null = 0;
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 		GFP_ATOMIC;
-	struct adapter *adap = padap(a_ctx(tfm)->dev);
+	struct adapter *adap = padap(ctx->dev);
+	unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan;
 
 	if (req->cryptlen == 0)
 		return NULL;
@@ -2351,7 +2448,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	snents = sg_nents_xlen(req->src, req->assoclen + req->cryptlen,
 			       CHCR_SRC_SG_SIZE, 0);
 	dst_size = get_space_for_phys_dsgl(dnents);
-	kctx_len = (ntohl(KEY_CONTEXT_CTX_LEN_V(aeadctx->key_ctx_hdr)) << 4)
+	kctx_len = (KEY_CONTEXT_CTX_LEN_G(ntohl(aeadctx->key_ctx_hdr)) << 4)
 		- sizeof(chcr_req->key_ctx);
 	transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size);
 	reqctx->imm = (transhdr_len + req->assoclen + req->cryptlen) <
@@ -2383,7 +2480,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	 * to the hardware spec
 	 */
 	chcr_req->sec_cpl.op_ivinsrtofst =
-		FILL_SEC_CPL_OP_IVINSR(a_ctx(tfm)->tx_chan_id, 2, 1);
+				FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 1);
 	chcr_req->sec_cpl.pldlen = htonl(req->assoclen + IV + req->cryptlen);
 	chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(
 					null ? 0 : 1 + IV,
@@ -2471,8 +2568,9 @@ int chcr_aead_dma_map(struct device *dev,
 	else
 		reqctx->b0_dma = 0;
 	if (req->src == req->dst) {
-		error = dma_map_sg(dev, req->src, sg_nents(req->src),
-				   DMA_BIDIRECTIONAL);
+		error = dma_map_sg(dev, req->src,
+				sg_nents_for_len(req->src, dst_size),
+					DMA_BIDIRECTIONAL);
 		if (!error)
 			goto err;
 	} else {
@@ -2558,13 +2656,14 @@ void chcr_add_aead_dst_ent(struct aead_request *req,
 	unsigned int authsize = crypto_aead_authsize(tfm);
 	struct chcr_context *ctx = a_ctx(tfm);
 	u32 temp;
+	unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan;
 
 	dsgl_walk_init(&dsgl_walk, phys_cpl);
 	dsgl_walk_add_page(&dsgl_walk, IV + reqctx->b0_len, reqctx->iv_dma);
 	temp = req->assoclen + req->cryptlen +
 		(reqctx->op ? -authsize : authsize);
 	dsgl_walk_add_sg(&dsgl_walk, req->dst, temp, 0);
-	dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id);
+	dsgl_walk_end(&dsgl_walk, qid, rx_channel_id);
 }
 
 void chcr_add_cipher_src_ent(struct skcipher_request *req,
@@ -2599,14 +2698,14 @@ void chcr_add_cipher_dst_ent(struct skcipher_request *req,
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(wrparam->req);
 	struct chcr_context *ctx = c_ctx(tfm);
 	struct dsgl_walk dsgl_walk;
+	unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan;
 
 	dsgl_walk_init(&dsgl_walk, phys_cpl);
 	dsgl_walk_add_sg(&dsgl_walk, reqctx->dstsg, wrparam->bytes,
 			 reqctx->dst_ofst);
 	reqctx->dstsg = dsgl_walk.last_sg;
 	reqctx->dst_ofst = dsgl_walk.last_sg_len;
-
-	dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id);
+	dsgl_walk_end(&dsgl_walk, qid, rx_channel_id);
 }
 
 void chcr_add_hash_src_ent(struct ahash_request *req,
@@ -2804,10 +2903,12 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl,
 				  unsigned short op_type)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm));
+	struct chcr_context *ctx = a_ctx(tfm);
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
 	unsigned int cipher_mode = CHCR_SCMD_CIPHER_MODE_AES_CCM;
 	unsigned int mac_mode = CHCR_SCMD_AUTH_MODE_CBCMAC;
-	unsigned int c_id = a_ctx(tfm)->tx_chan_id;
+	unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan;
 	unsigned int ccm_xtra;
 	unsigned char tag_offset = 0, auth_offset = 0;
 	unsigned int assoclen;
@@ -2828,9 +2929,7 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl,
 			auth_offset = 0;
 	}
 
-
-	sec_cpl->op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(c_id,
-					 2, 1);
+	sec_cpl->op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 1);
 	sec_cpl->pldlen =
 		htonl(req->assoclen + IV + req->cryptlen + ccm_xtra);
 	/* For CCM there wil be b0 always. So AAD start will be 1 always */
@@ -2973,7 +3072,8 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 				     int size)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm));
+	struct chcr_context *ctx = a_ctx(tfm);
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
 	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
 	struct sk_buff *skb = NULL;
 	struct chcr_wr *chcr_req;
@@ -2986,7 +3086,8 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 	u8 *ivptr;
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 		GFP_ATOMIC;
-	struct adapter *adap = padap(a_ctx(tfm)->dev);
+	struct adapter *adap = padap(ctx->dev);
+	unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan;
 
 	if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106)
 		assoclen = req->assoclen - 8;
@@ -3028,7 +3129,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 	//Offset of tag from end
 	temp = (reqctx->op == CHCR_ENCRYPT_OP) ? 0 : authsize;
 	chcr_req->sec_cpl.op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(
-					a_ctx(tfm)->tx_chan_id, 2, 1);
+						rx_channel_id, 2, 1);
 	chcr_req->sec_cpl.pldlen =
 		htonl(req->assoclen + IV + req->cryptlen);
 	chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(
@@ -3576,9 +3677,9 @@ static int chcr_aead_op(struct aead_request *req,
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
-	struct uld_ctx *u_ctx;
+	struct chcr_context *ctx = a_ctx(tfm);
+	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	struct sk_buff *skb;
-	int isfull = 0;
 	struct chcr_dev *cdev;
 
 	cdev = a_ctx(tfm)->dev;
@@ -3594,18 +3695,15 @@ static int chcr_aead_op(struct aead_request *req,
 		return chcr_aead_fallback(req, reqctx->op);
 	}
 
-	u_ctx = ULD_CTX(a_ctx(tfm));
 	if (cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
-				   a_ctx(tfm)->tx_qidx)) {
-		isfull = 1;
-		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+					reqctx->txqidx) &&
+		(!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))) {
 			chcr_dec_wrcount(cdev);
 			return -ENOSPC;
-		}
 	}
 
 	/* Form a WR from req */
-	skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[a_ctx(tfm)->rx_qidx], size);
+	skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[reqctx->rxqidx], size);
 
 	if (IS_ERR_OR_NULL(skb)) {
 		chcr_dec_wrcount(cdev);
@@ -3613,15 +3711,22 @@ static int chcr_aead_op(struct aead_request *req,
 	}
 
 	skb->dev = u_ctx->lldi.ports[0];
-	set_wr_txq(skb, CPL_PRIORITY_DATA, a_ctx(tfm)->tx_qidx);
+	set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx);
 	chcr_send_wr(skb);
-	return isfull ? -EBUSY : -EINPROGRESS;
+	return -EINPROGRESS;
 }
 
 static int chcr_aead_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+	struct chcr_context *ctx = a_ctx(tfm);
+	unsigned int cpu;
+
+	cpu = get_cpu();
+	reqctx->txqidx = cpu % ctx->ntxq;
+	reqctx->rxqidx = cpu % ctx->nrxq;
+	put_cpu();
 
 	reqctx->verify = VERIFY_HW;
 	reqctx->op = CHCR_ENCRYPT_OP;
@@ -3643,9 +3748,16 @@ static int chcr_aead_encrypt(struct aead_request *req)
 static int chcr_aead_decrypt(struct aead_request *req)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm));
+	struct chcr_context *ctx = a_ctx(tfm);
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
 	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
 	int size;
+	unsigned int cpu;
+
+	cpu = get_cpu();
+	reqctx->txqidx = cpu % ctx->ntxq;
+	reqctx->rxqidx = cpu % ctx->nrxq;
+	put_cpu();
 
 	if (aeadctx->mayverify == VERIFY_SW) {
 		size = crypto_aead_maxauthsize(tfm);
diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c
index dfb53e746e51..ffd4ec0c7374 100644
--- a/drivers/crypto/chelsio/chcr_core.c
+++ b/drivers/crypto/chelsio/chcr_core.c
@@ -195,6 +195,7 @@ static void *chcr_uld_add(const struct cxgb4_lld_info *lld)
 	struct uld_ctx *u_ctx;
 
 	/* Create the device and add it in the device list */
+	pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION);
 	if (!(lld->ulp_crypto & ULP_CRYPTO_LOOKASIDE))
 		return ERR_PTR(-EOPNOTSUPP);
 
@@ -287,6 +288,8 @@ static int chcr_uld_state_change(void *handle, enum cxgb4_state state)
 
 	case CXGB4_STATE_DETACH:
 		chcr_detach_device(u_ctx);
+		if (!atomic_read(&drv_data.dev_count))
+			stop_crypto();
 		break;
 
 	case CXGB4_STATE_START_RECOVERY:
diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h
index b5b371b8d343..2c09672e00a4 100644
--- a/drivers/crypto/chelsio/chcr_core.h
+++ b/drivers/crypto/chelsio/chcr_core.h
@@ -43,7 +43,8 @@
 #include "cxgb4_uld.h"
 
 #define DRV_MODULE_NAME "chcr"
-#define DRV_VERSION "1.0.0.0"
+#define DRV_VERSION "1.0.0.0-ko"
+#define DRV_DESC "Chelsio T6 Crypto Co-processor Driver"
 
 #define MAX_PENDING_REQ_TO_HW 20
 #define CHCR_TEST_RESPONSE_TIMEOUT 1000
@@ -67,7 +68,7 @@ struct _key_ctx {
 	__be32 ctx_hdr;
 	u8 salt[MAX_SALT];
 	__be64 iv_to_auth;
-	unsigned char key[0];
+	unsigned char key[];
 };
 
 #define KEYCTX_TX_WR_IV_S  55
@@ -147,7 +148,6 @@ struct chcr_dev {
 	int wqretry;
 	struct delayed_work detach_work;
 	struct completion detach_comp;
-	unsigned char tx_channel_id;
 };
 
 struct uld_ctx {
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
index 6db2df8c8a05..542bebae001f 100644
--- a/drivers/crypto/chelsio/chcr_crypto.h
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -187,6 +187,8 @@ struct chcr_aead_reqctx {
 	unsigned int op;
 	u16 imm;
 	u16 verify;
+	u16 txqidx;
+	u16 rxqidx;
 	u8 iv[CHCR_MAX_CRYPTO_IV_LEN + MAX_SCRATCH_PAD_SIZE];
 	u8 *scratch_pad;
 };
@@ -250,10 +252,11 @@ struct __crypto_ctx {
 
 struct chcr_context {
 	struct chcr_dev *dev;
-	unsigned char tx_qidx;
-	unsigned char rx_qidx;
-	unsigned char tx_chan_id;
-	unsigned char pci_chan_id;
+	unsigned char rxq_perchan;
+	unsigned char txq_perchan;
+	unsigned int  ntxq;
+	unsigned int  nrxq;
+	struct completion cbc_aes_aio_done;
 	struct __crypto_ctx crypto_ctx[0];
 };
 
@@ -279,6 +282,8 @@ struct chcr_ahash_req_ctx {
 	u8 *skbfr;
 	/* SKB which is being sent to the hardware for processing */
 	u64 data_len;  /* Data len till time */
+	u16 txqidx;
+	u16 rxqidx;
 	u8 reqlen;
 	u8 partial_hash[CHCR_HASH_MAX_DIGEST_SIZE];
 	u8 bfr1[CHCR_HASH_MAX_BLOCK_SIZE_128];
@@ -290,12 +295,15 @@ struct chcr_skcipher_req_ctx {
 	struct scatterlist *dstsg;
 	unsigned int processed;
 	unsigned int last_req_len;
+	unsigned int partial_req;
 	struct scatterlist *srcsg;
 	unsigned int src_ofst;
 	unsigned int dst_ofst;
 	unsigned int op;
 	u16 imm;
 	u8 iv[CHCR_MAX_CRYPTO_IV_LEN];
+	u16 txqidx;
+	u16 rxqidx;
 };
 
 struct chcr_alg_template {
diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c
index e1651adb9d06..dccef3a2908b 100644
--- a/drivers/crypto/chelsio/chtls/chtls_io.c
+++ b/drivers/crypto/chelsio/chtls/chtls_io.c
@@ -1110,10 +1110,10 @@ new_buf:
 				pg_size = page_size(page);
 			if (off < pg_size &&
 			    skb_can_coalesce(skb, i, page, off)) {
-				merge = 1;
+				merge = true;
 				goto copy;
 			}
-			merge = 0;
+			merge = false;
 			if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) :
 			    MAX_SKB_FRAGS))
 				goto new_buf;
@@ -1428,6 +1428,8 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 {
 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
 	struct chtls_hws *hws = &csk->tlshws;
+	struct net_device *dev = csk->egress_dev;
+	struct adapter *adap = netdev2adap(dev);
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned long avail;
 	int buffers_freed;
@@ -1585,6 +1587,7 @@ skip_copy:
 				tp->copied_seq += skb->len;
 				hws->rcvpld = skb->hdr_len;
 			} else {
+				atomic_inc(&adap->chcr_stats.tls_pdu_rx);
 				tp->copied_seq += hws->rcvpld;
 			}
 			chtls_free_skb(sk, skb);
diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c
index a038de90b2ea..2110d0893bc7 100644
--- a/drivers/crypto/chelsio/chtls/chtls_main.c
+++ b/drivers/crypto/chelsio/chtls/chtls_main.c
@@ -174,9 +174,16 @@ static inline void chtls_dev_release(struct kref *kref)
 {
 	struct tls_toe_device *dev;
 	struct chtls_dev *cdev;
+	struct adapter *adap;
 
 	dev = container_of(kref, struct tls_toe_device, kref);
 	cdev = to_chtls_dev(dev);
+
+	/* Reset tls rx/tx stats */
+	adap = pci_get_drvdata(cdev->pdev);
+	atomic_set(&adap->chcr_stats.tls_pdu_tx, 0);
+	atomic_set(&adap->chcr_stats.tls_pdu_rx, 0);
+
 	chtls_free_uld(cdev);
 }
 
@@ -229,8 +236,7 @@ static void *chtls_uld_add(const struct cxgb4_lld_info *info)
 	struct chtls_dev *cdev;
 	int i, j;
 
-	cdev = kzalloc(sizeof(*cdev) + info->nports *
-		      (sizeof(struct net_device *)), GFP_KERNEL);
+	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
 	if (!cdev)
 		goto out;
 
diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig
index 8851161f722f..095850d01dcc 100644
--- a/drivers/crypto/hisilicon/Kconfig
+++ b/drivers/crypto/hisilicon/Kconfig
@@ -40,6 +40,7 @@ config CRYPTO_DEV_HISI_QM
 	tristate
 	depends on ARM64 || COMPILE_TEST
 	depends on PCI && PCI_MSI
+	depends on UACCE || UACCE=n
 	help
 	  HiSilicon accelerator engines use a common queue management
 	  interface. Specific engine driver may use this module.
@@ -49,6 +50,7 @@ config CRYPTO_DEV_HISI_ZIP
 	depends on PCI && PCI_MSI
 	depends on ARM64 || (COMPILE_TEST && 64BIT)
 	depends on !CPU_BIG_ENDIAN || COMPILE_TEST
+	depends on UACCE || UACCE=n
 	select CRYPTO_DEV_HISI_QM
 	help
 	  Support for HiSilicon ZIP Driver
diff --git a/drivers/crypto/hisilicon/hpre/hpre.h b/drivers/crypto/hisilicon/hpre/hpre.h
index ddf13ea9862a..03d512ec6336 100644
--- a/drivers/crypto/hisilicon/hpre/hpre.h
+++ b/drivers/crypto/hisilicon/hpre/hpre.h
@@ -46,7 +46,6 @@ struct hpre_debug {
 
 struct hpre {
 	struct hisi_qm qm;
-	struct list_head list;
 	struct hpre_debug debug;
 	u32 num_vfs;
 	unsigned long status;
@@ -76,7 +75,7 @@ struct hpre_sqe {
 	__le32 rsvd1[_HPRE_SQE_ALIGN_EXT];
 };
 
-struct hpre *hpre_find_device(int node);
+struct hisi_qp *hpre_create_qp(void);
 int hpre_algs_register(void);
 void hpre_algs_unregister(void);
 
diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
index 5d400d69e8e4..65425250b2e9 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -147,26 +147,18 @@ static void hpre_rm_req_from_ctx(struct hpre_asym_request *hpre_req)
 static struct hisi_qp *hpre_get_qp_and_start(void)
 {
 	struct hisi_qp *qp;
-	struct hpre *hpre;
 	int ret;
 
-	/* find the proper hpre device, which is near the current CPU core */
-	hpre = hpre_find_device(cpu_to_node(smp_processor_id()));
-	if (!hpre) {
-		pr_err("Can not find proper hpre device!\n");
-		return ERR_PTR(-ENODEV);
-	}
-
-	qp = hisi_qm_create_qp(&hpre->qm, 0);
-	if (IS_ERR(qp)) {
-		pci_err(hpre->qm.pdev, "Can not create qp!\n");
+	qp = hpre_create_qp();
+	if (!qp) {
+		pr_err("Can not create hpre qp!\n");
 		return ERR_PTR(-ENODEV);
 	}
 
 	ret = hisi_qm_start_qp(qp, 0);
 	if (ret < 0) {
-		hisi_qm_release_qp(qp);
-		pci_err(hpre->qm.pdev, "Can not start qp!\n");
+		hisi_qm_free_qps(&qp, 1);
+		pci_err(qp->qm->pdev, "Can not start qp!\n");
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -338,7 +330,7 @@ static void hpre_ctx_clear(struct hpre_ctx *ctx, bool is_clear_all)
 	if (is_clear_all) {
 		idr_destroy(&ctx->req_idr);
 		kfree(ctx->req_list);
-		hisi_qm_release_qp(ctx->qp);
+		hisi_qm_free_qps(&ctx->qp, 1);
 	}
 
 	ctx->crt_g2_mode = false;
diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 401747de67a8..88be53bf4a38 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -82,8 +82,7 @@
 
 #define HPRE_VIA_MSI_DSM		1
 
-static LIST_HEAD(hpre_list);
-static DEFINE_MUTEX(hpre_list_lock);
+static struct hisi_qm_list hpre_devices;
 static const char hpre_name[] = "hisi_hpre";
 static struct dentry *hpre_debugfs_root;
 static const struct pci_device_id hpre_dev_ids[] = {
@@ -196,43 +195,17 @@ static u32 hpre_pf_q_num = HPRE_PF_DEF_Q_NUM;
 module_param_cb(hpre_pf_q_num, &hpre_pf_q_num_ops, &hpre_pf_q_num, 0444);
 MODULE_PARM_DESC(hpre_pf_q_num, "Number of queues in PF of CS(1-1024)");
 
-static inline void hpre_add_to_list(struct hpre *hpre)
+struct hisi_qp *hpre_create_qp(void)
 {
-	mutex_lock(&hpre_list_lock);
-	list_add_tail(&hpre->list, &hpre_list);
-	mutex_unlock(&hpre_list_lock);
-}
-
-static inline void hpre_remove_from_list(struct hpre *hpre)
-{
-	mutex_lock(&hpre_list_lock);
-	list_del(&hpre->list);
-	mutex_unlock(&hpre_list_lock);
-}
+	int node = cpu_to_node(smp_processor_id());
+	struct hisi_qp *qp = NULL;
+	int ret;
 
-struct hpre *hpre_find_device(int node)
-{
-	struct hpre *hpre, *ret = NULL;
-	int min_distance = INT_MAX;
-	struct device *dev;
-	int dev_node = 0;
-
-	mutex_lock(&hpre_list_lock);
-	list_for_each_entry(hpre, &hpre_list, list) {
-		dev = &hpre->qm.pdev->dev;
-#ifdef CONFIG_NUMA
-		dev_node = dev->numa_node;
-		if (dev_node < 0)
-			dev_node = 0;
-#endif
-		if (node_distance(dev_node, node) < min_distance) {
-			ret = hpre;
-			min_distance = node_distance(dev_node, node);
-		}
-	}
-	mutex_unlock(&hpre_list_lock);
+	ret = hisi_qm_alloc_qps_node(&hpre_devices, 1, 0, node, &qp);
+	if (!ret)
+		return qp;
 
-	return ret;
+	return NULL;
 }
 
 static int hpre_cfg_by_dsm(struct hisi_qm *qm)
@@ -349,18 +322,14 @@ static void hpre_cnt_regs_clear(struct hisi_qm *qm)
 	hisi_qm_debug_regs_clear(qm);
 }
 
-static void hpre_hw_error_disable(struct hpre *hpre)
+static void hpre_hw_error_disable(struct hisi_qm *qm)
 {
-	struct hisi_qm *qm = &hpre->qm;
-
 	/* disable hpre hw error interrupts */
 	writel(HPRE_CORE_INT_DISABLE, qm->io_base + HPRE_INT_MASK);
 }
 
-static void hpre_hw_error_enable(struct hpre *hpre)
+static void hpre_hw_error_enable(struct hisi_qm *qm)
 {
-	struct hisi_qm *qm = &hpre->qm;
-
 	/* enable hpre hw error interrupts */
 	writel(HPRE_CORE_INT_ENABLE, qm->io_base + HPRE_INT_MASK);
 	writel(HPRE_HAC_RAS_CE_ENABLE, qm->io_base + HPRE_RAS_CE_ENB);
@@ -713,13 +682,39 @@ static int hpre_qm_pre_init(struct hisi_qm *qm, struct pci_dev *pdev)
 	return 0;
 }
 
-static void hpre_hw_err_init(struct hpre *hpre)
+static void hpre_log_hw_error(struct hisi_qm *qm, u32 err_sts)
 {
-	hisi_qm_hw_error_init(&hpre->qm, QM_BASE_CE, QM_BASE_NFE,
-			      0, QM_DB_RANDOM_INVALID);
-	hpre_hw_error_enable(hpre);
+	const struct hpre_hw_error *err = hpre_hw_errors;
+	struct device *dev = &qm->pdev->dev;
+
+	while (err->msg) {
+		if (err->int_msk & err_sts)
+			dev_warn(dev, "%s [error status=0x%x] found\n",
+				 err->msg, err->int_msk);
+		err++;
+	}
+
+	writel(err_sts, qm->io_base + HPRE_HAC_SOURCE_INT);
+}
+
+static u32 hpre_get_hw_err_status(struct hisi_qm *qm)
+{
+	return readl(qm->io_base + HPRE_HAC_INT_STATUS);
 }
 
+static const struct hisi_qm_err_ini hpre_err_ini = {
+	.hw_err_enable		= hpre_hw_error_enable,
+	.hw_err_disable		= hpre_hw_error_disable,
+	.get_dev_hw_err_status	= hpre_get_hw_err_status,
+	.log_dev_hw_err		= hpre_log_hw_error,
+	.err_info		= {
+		.ce			= QM_BASE_CE,
+		.nfe			= QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT,
+		.fe			= 0,
+		.msi			= QM_DB_RANDOM_INVALID,
+	}
+};
+
 static int hpre_pf_probe_init(struct hpre *hpre)
 {
 	struct hisi_qm *qm = &hpre->qm;
@@ -731,7 +726,8 @@ static int hpre_pf_probe_init(struct hpre *hpre)
 	if (ret)
 		return ret;
 
-	hpre_hw_err_init(hpre);
+	qm->err_ini = &hpre_err_ini;
+	hisi_qm_dev_err_init(qm);
 
 	return 0;
 }
@@ -776,22 +772,21 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (ret)
 		dev_warn(&pdev->dev, "init debugfs fail!\n");
 
-	hpre_add_to_list(hpre);
+	hisi_qm_add_to_list(qm, &hpre_devices);
 
 	ret = hpre_algs_register();
 	if (ret < 0) {
-		hpre_remove_from_list(hpre);
 		pci_err(pdev, "fail to register algs to crypto!\n");
 		goto err_with_qm_start;
 	}
 	return 0;
 
 err_with_qm_start:
+	hisi_qm_del_from_list(qm, &hpre_devices);
 	hisi_qm_stop(qm);
 
 err_with_err_init:
-	if (pdev->is_physfn)
-		hpre_hw_error_disable(hpre);
+	hisi_qm_dev_err_uninit(qm);
 
 err_with_qm_init:
 	hisi_qm_uninit(qm);
@@ -907,7 +902,7 @@ static void hpre_remove(struct pci_dev *pdev)
 	int ret;
 
 	hpre_algs_unregister();
-	hpre_remove_from_list(hpre);
+	hisi_qm_del_from_list(qm, &hpre_devices);
 	if (qm->fun_type == QM_HW_PF && hpre->num_vfs != 0) {
 		ret = hpre_sriov_disable(pdev);
 		if (ret) {
@@ -922,69 +917,13 @@ static void hpre_remove(struct pci_dev *pdev)
 
 	hpre_debugfs_exit(hpre);
 	hisi_qm_stop(qm);
-	if (qm->fun_type == QM_HW_PF)
-		hpre_hw_error_disable(hpre);
+	hisi_qm_dev_err_uninit(qm);
 	hisi_qm_uninit(qm);
 }
 
-static void hpre_log_hw_error(struct hpre *hpre, u32 err_sts)
-{
-	const struct hpre_hw_error *err = hpre_hw_errors;
-	struct device *dev = &hpre->qm.pdev->dev;
-
-	while (err->msg) {
-		if (err->int_msk & err_sts)
-			dev_warn(dev, "%s [error status=0x%x] found\n",
-				 err->msg, err->int_msk);
-		err++;
-	}
-}
-
-static pci_ers_result_t hpre_hw_error_handle(struct hpre *hpre)
-{
-	u32 err_sts;
-
-	/* read err sts */
-	err_sts = readl(hpre->qm.io_base + HPRE_HAC_INT_STATUS);
-	if (err_sts) {
-		hpre_log_hw_error(hpre, err_sts);
-
-		/* clear error interrupts */
-		writel(err_sts, hpre->qm.io_base + HPRE_HAC_SOURCE_INT);
-		return PCI_ERS_RESULT_NEED_RESET;
-	}
-
-	return PCI_ERS_RESULT_RECOVERED;
-}
-
-static pci_ers_result_t hpre_process_hw_error(struct pci_dev *pdev)
-{
-	struct hpre *hpre = pci_get_drvdata(pdev);
-	pci_ers_result_t qm_ret, hpre_ret;
-
-	/* log qm error */
-	qm_ret = hisi_qm_hw_error_handle(&hpre->qm);
-
-	/* log hpre error */
-	hpre_ret = hpre_hw_error_handle(hpre);
-
-	return (qm_ret == PCI_ERS_RESULT_NEED_RESET ||
-		hpre_ret == PCI_ERS_RESULT_NEED_RESET) ?
-		PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED;
-}
-
-static pci_ers_result_t hpre_error_detected(struct pci_dev *pdev,
-					    pci_channel_state_t state)
-{
-	pci_info(pdev, "PCI error detected, state(=%d)!!\n", state);
-	if (state == pci_channel_io_perm_failure)
-		return PCI_ERS_RESULT_DISCONNECT;
-
-	return hpre_process_hw_error(pdev);
-}
 
 static const struct pci_error_handlers hpre_err_handler = {
-	.error_detected		= hpre_error_detected,
+	.error_detected		= hisi_qm_dev_err_detected,
 };
 
 static struct pci_driver hpre_pci_driver = {
@@ -1013,6 +952,7 @@ static int __init hpre_init(void)
 {
 	int ret;
 
+	hisi_qm_init_list(&hpre_devices);
 	hpre_register_debugfs();
 
 	ret = pci_register_driver(&hpre_pci_driver);
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index b57da5ef8b5b..f795fb557630 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -9,6 +9,9 @@
 #include <linux/log2.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
+#include <linux/uacce.h>
+#include <linux/uaccess.h>
+#include <uapi/misc/uacce/hisi_qm.h>
 #include "qm.h"
 
 /* eq/aeq irq enable */
@@ -269,6 +272,12 @@ struct qm_doorbell {
 	__le16 priority;
 };
 
+struct hisi_qm_resource {
+	struct hisi_qm *qm;
+	int distance;
+	struct list_head list;
+};
+
 struct hisi_qm_hw_ops {
 	int (*get_vft)(struct hisi_qm *qm, u32 *base, u32 *number);
 	void (*qm_db)(struct hisi_qm *qm, u16 qn,
@@ -277,6 +286,7 @@ struct hisi_qm_hw_ops {
 	int (*debug_init)(struct hisi_qm *qm);
 	void (*hw_error_init)(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
 			      u32 msi);
+	void (*hw_error_uninit)(struct hisi_qm *qm);
 	pci_ers_result_t (*hw_error_handle)(struct hisi_qm *qm);
 };
 
@@ -465,9 +475,14 @@ static void qm_cq_head_update(struct hisi_qp *qp)
 
 static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm)
 {
-	struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
+	if (qp->event_cb) {
+		qp->event_cb(qp);
+		return;
+	}
 
 	if (qp->req_cb) {
+		struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
+
 		while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
 			dma_rmb();
 			qp->req_cb(qp, qp->sqe + qm->sqe_size *
@@ -485,17 +500,9 @@ static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm)
 	}
 }
 
-static void qm_qp_work_func(struct work_struct *work)
+static void qm_work_process(struct work_struct *work)
 {
-	struct hisi_qp *qp;
-
-	qp = container_of(work, struct hisi_qp, work);
-	qm_poll_qp(qp, qp->qm);
-}
-
-static irqreturn_t qm_irq_handler(int irq, void *data)
-{
-	struct hisi_qm *qm = data;
+	struct hisi_qm *qm = container_of(work, struct hisi_qm, work);
 	struct qm_eqe *eqe = qm->eqe + qm->status.eq_head;
 	struct hisi_qp *qp;
 	int eqe_num = 0;
@@ -504,7 +511,7 @@ static irqreturn_t qm_irq_handler(int irq, void *data)
 		eqe_num++;
 		qp = qm_to_hisi_qp(qm, eqe);
 		if (qp)
-			queue_work(qp->wq, &qp->work);
+			qm_poll_qp(qp, qm);
 
 		if (qm->status.eq_head == QM_Q_DEPTH - 1) {
 			qm->status.eqc_phase = !qm->status.eqc_phase;
@@ -522,6 +529,17 @@ static irqreturn_t qm_irq_handler(int irq, void *data)
 	}
 
 	qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
+}
+
+static irqreturn_t do_qm_irq(int irq, void *data)
+{
+	struct hisi_qm *qm = (struct hisi_qm *)data;
+
+	/* the workqueue created by device driver of QM */
+	if (qm->wq)
+		queue_work(qm->wq, &qm->work);
+	else
+		schedule_work(&qm->work);
 
 	return IRQ_HANDLED;
 }
@@ -531,7 +549,7 @@ static irqreturn_t qm_irq(int irq, void *data)
 	struct hisi_qm *qm = data;
 
 	if (readl(qm->io_base + QM_VF_EQ_INT_SOURCE))
-		return qm_irq_handler(irq, data);
+		return do_qm_irq(irq, data);
 
 	dev_err(&qm->pdev->dev, "invalid int source\n");
 	qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
@@ -1011,43 +1029,45 @@ static void qm_hw_error_init_v2(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
 	writel(irq_unmask, qm->io_base + QM_ABNORMAL_INT_MASK);
 }
 
+static void qm_hw_error_uninit_v2(struct hisi_qm *qm)
+{
+	writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK);
+}
+
 static void qm_log_hw_error(struct hisi_qm *qm, u32 error_status)
 {
-	const struct hisi_qm_hw_error *err = qm_hw_error;
+	const struct hisi_qm_hw_error *err;
 	struct device *dev = &qm->pdev->dev;
 	u32 reg_val, type, vf_num;
+	int i;
 
-	while (err->msg) {
-		if (err->int_msk & error_status) {
-			dev_err(dev, "%s [error status=0x%x] found\n",
-				err->msg, err->int_msk);
-
-			if (error_status & QM_DB_TIMEOUT) {
-				reg_val = readl(qm->io_base +
-						QM_ABNORMAL_INF01);
-				type = (reg_val & QM_DB_TIMEOUT_TYPE) >>
-				       QM_DB_TIMEOUT_TYPE_SHIFT;
-				vf_num = reg_val & QM_DB_TIMEOUT_VF;
-				dev_err(dev, "qm %s doorbell timeout in function %u\n",
-					qm_db_timeout[type], vf_num);
-			}
-
-			if (error_status & QM_OF_FIFO_OF) {
-				reg_val = readl(qm->io_base +
-						QM_ABNORMAL_INF00);
-				type = (reg_val & QM_FIFO_OVERFLOW_TYPE) >>
-				       QM_FIFO_OVERFLOW_TYPE_SHIFT;
-				vf_num = reg_val & QM_FIFO_OVERFLOW_VF;
-
-				if (type < ARRAY_SIZE(qm_fifo_overflow))
-					dev_err(dev, "qm %s fifo overflow in function %u\n",
-						qm_fifo_overflow[type],
-						vf_num);
-				else
-					dev_err(dev, "unknown error type\n");
-			}
+	for (i = 0; i < ARRAY_SIZE(qm_hw_error); i++) {
+		err = &qm_hw_error[i];
+		if (!(err->int_msk & error_status))
+			continue;
+
+		dev_err(dev, "%s [error status=0x%x] found\n",
+			err->msg, err->int_msk);
+
+		if (err->int_msk & QM_DB_TIMEOUT) {
+			reg_val = readl(qm->io_base + QM_ABNORMAL_INF01);
+			type = (reg_val & QM_DB_TIMEOUT_TYPE) >>
+			       QM_DB_TIMEOUT_TYPE_SHIFT;
+			vf_num = reg_val & QM_DB_TIMEOUT_VF;
+			dev_err(dev, "qm %s doorbell timeout in function %u\n",
+				qm_db_timeout[type], vf_num);
+		} else if (err->int_msk & QM_OF_FIFO_OF) {
+			reg_val = readl(qm->io_base + QM_ABNORMAL_INF00);
+			type = (reg_val & QM_FIFO_OVERFLOW_TYPE) >>
+			       QM_FIFO_OVERFLOW_TYPE_SHIFT;
+			vf_num = reg_val & QM_FIFO_OVERFLOW_VF;
+
+			if (type < ARRAY_SIZE(qm_fifo_overflow))
+				dev_err(dev, "qm %s fifo overflow in function %u\n",
+					qm_fifo_overflow[type], vf_num);
+			else
+				dev_err(dev, "unknown error type\n");
 		}
-		err++;
 	}
 }
 
@@ -1082,6 +1102,7 @@ static const struct hisi_qm_hw_ops qm_hw_ops_v2 = {
 	.qm_db = qm_db_v2,
 	.get_irq_num = qm_get_irq_num_v2,
 	.hw_error_init = qm_hw_error_init_v2,
+	.hw_error_uninit = qm_hw_error_uninit_v2,
 	.hw_error_handle = qm_hw_error_handle_v2,
 };
 
@@ -1147,20 +1168,9 @@ struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
 
 	qp->qp_id = qp_id;
 	qp->alg_type = alg_type;
-	INIT_WORK(&qp->work, qm_qp_work_func);
-	qp->wq = alloc_workqueue("hisi_qm", WQ_UNBOUND | WQ_HIGHPRI |
-				 WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0);
-	if (!qp->wq) {
-		ret = -EFAULT;
-		goto err_free_qp_mem;
-	}
 
 	return qp;
 
-err_free_qp_mem:
-	if (qm->use_dma_api)
-		dma_free_coherent(dev, qp->qdma.size, qp->qdma.va,
-				  qp->qdma.dma);
 err_clear_bit:
 	write_lock(&qm->qps_lock);
 	qm->qp_array[qp_id] = NULL;
@@ -1269,7 +1279,7 @@ static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, int pasid)
  * @qp: The qp we want to start to run.
  * @arg: Accelerator specific argument.
  *
- * After this function, qp can receive request from user. Return qp_id if
+ * After this function, qp can receive request from user. Return 0 if
  * successful, Return -EBUSY if failed.
  */
 int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg)
@@ -1314,7 +1324,7 @@ int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg)
 
 	dev_dbg(dev, "queue %d started\n", qp_id);
 
-	return qp_id;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(hisi_qm_start_qp);
 
@@ -1395,6 +1405,214 @@ static void hisi_qm_cache_wb(struct hisi_qm *qm)
 	}
 }
 
+static void qm_qp_event_notifier(struct hisi_qp *qp)
+{
+	wake_up_interruptible(&qp->uacce_q->wait);
+}
+
+static int hisi_qm_get_available_instances(struct uacce_device *uacce)
+{
+	int i, ret;
+	struct hisi_qm *qm = uacce->priv;
+
+	read_lock(&qm->qps_lock);
+	for (i = 0, ret = 0; i < qm->qp_num; i++)
+		if (!qm->qp_array[i])
+			ret++;
+	read_unlock(&qm->qps_lock);
+
+	return ret;
+}
+
+static int hisi_qm_uacce_get_queue(struct uacce_device *uacce,
+				   unsigned long arg,
+				   struct uacce_queue *q)
+{
+	struct hisi_qm *qm = uacce->priv;
+	struct hisi_qp *qp;
+	u8 alg_type = 0;
+
+	qp = hisi_qm_create_qp(qm, alg_type);
+	if (IS_ERR(qp))
+		return PTR_ERR(qp);
+
+	q->priv = qp;
+	q->uacce = uacce;
+	qp->uacce_q = q;
+	qp->event_cb = qm_qp_event_notifier;
+	qp->pasid = arg;
+
+	return 0;
+}
+
+static void hisi_qm_uacce_put_queue(struct uacce_queue *q)
+{
+	struct hisi_qp *qp = q->priv;
+
+	hisi_qm_cache_wb(qp->qm);
+	hisi_qm_release_qp(qp);
+}
+
+/* map sq/cq/doorbell to user space */
+static int hisi_qm_uacce_mmap(struct uacce_queue *q,
+			      struct vm_area_struct *vma,
+			      struct uacce_qfile_region *qfr)
+{
+	struct hisi_qp *qp = q->priv;
+	struct hisi_qm *qm = qp->qm;
+	size_t sz = vma->vm_end - vma->vm_start;
+	struct pci_dev *pdev = qm->pdev;
+	struct device *dev = &pdev->dev;
+	unsigned long vm_pgoff;
+	int ret;
+
+	switch (qfr->type) {
+	case UACCE_QFRT_MMIO:
+		if (qm->ver == QM_HW_V2) {
+			if (sz > PAGE_SIZE * (QM_DOORBELL_PAGE_NR +
+			    QM_DOORBELL_SQ_CQ_BASE_V2 / PAGE_SIZE))
+				return -EINVAL;
+		} else {
+			if (sz > PAGE_SIZE * QM_DOORBELL_PAGE_NR)
+				return -EINVAL;
+		}
+
+		vma->vm_flags |= VM_IO;
+
+		return remap_pfn_range(vma, vma->vm_start,
+				       qm->phys_base >> PAGE_SHIFT,
+				       sz, pgprot_noncached(vma->vm_page_prot));
+	case UACCE_QFRT_DUS:
+		if (sz != qp->qdma.size)
+			return -EINVAL;
+
+		/*
+		 * dma_mmap_coherent() requires vm_pgoff as 0
+		 * restore vm_pfoff to initial value for mmap()
+		 */
+		vm_pgoff = vma->vm_pgoff;
+		vma->vm_pgoff = 0;
+		ret = dma_mmap_coherent(dev, vma, qp->qdma.va,
+					qp->qdma.dma, sz);
+		vma->vm_pgoff = vm_pgoff;
+		return ret;
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static int hisi_qm_uacce_start_queue(struct uacce_queue *q)
+{
+	struct hisi_qp *qp = q->priv;
+
+	return hisi_qm_start_qp(qp, qp->pasid);
+}
+
+static void hisi_qm_uacce_stop_queue(struct uacce_queue *q)
+{
+	hisi_qm_stop_qp(q->priv);
+}
+
+static int qm_set_sqctype(struct uacce_queue *q, u16 type)
+{
+	struct hisi_qm *qm = q->uacce->priv;
+	struct hisi_qp *qp = q->priv;
+
+	write_lock(&qm->qps_lock);
+	qp->alg_type = type;
+	write_unlock(&qm->qps_lock);
+
+	return 0;
+}
+
+static long hisi_qm_uacce_ioctl(struct uacce_queue *q, unsigned int cmd,
+				unsigned long arg)
+{
+	struct hisi_qp *qp = q->priv;
+	struct hisi_qp_ctx qp_ctx;
+
+	if (cmd == UACCE_CMD_QM_SET_QP_CTX) {
+		if (copy_from_user(&qp_ctx, (void __user *)arg,
+				   sizeof(struct hisi_qp_ctx)))
+			return -EFAULT;
+
+		if (qp_ctx.qc_type != 0 && qp_ctx.qc_type != 1)
+			return -EINVAL;
+
+		qm_set_sqctype(q, qp_ctx.qc_type);
+		qp_ctx.id = qp->qp_id;
+
+		if (copy_to_user((void __user *)arg, &qp_ctx,
+				 sizeof(struct hisi_qp_ctx)))
+			return -EFAULT;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct uacce_ops uacce_qm_ops = {
+	.get_available_instances = hisi_qm_get_available_instances,
+	.get_queue = hisi_qm_uacce_get_queue,
+	.put_queue = hisi_qm_uacce_put_queue,
+	.start_queue = hisi_qm_uacce_start_queue,
+	.stop_queue = hisi_qm_uacce_stop_queue,
+	.mmap = hisi_qm_uacce_mmap,
+	.ioctl = hisi_qm_uacce_ioctl,
+};
+
+static int qm_alloc_uacce(struct hisi_qm *qm)
+{
+	struct pci_dev *pdev = qm->pdev;
+	struct uacce_device *uacce;
+	unsigned long mmio_page_nr;
+	unsigned long dus_page_nr;
+	struct uacce_interface interface = {
+		.flags = UACCE_DEV_SVA,
+		.ops = &uacce_qm_ops,
+	};
+
+	strncpy(interface.name, pdev->driver->name, sizeof(interface.name));
+
+	uacce = uacce_alloc(&pdev->dev, &interface);
+	if (IS_ERR(uacce))
+		return PTR_ERR(uacce);
+
+	if (uacce->flags & UACCE_DEV_SVA) {
+		qm->use_sva = true;
+	} else {
+		/* only consider sva case */
+		uacce_remove(uacce);
+		qm->uacce = NULL;
+		return -EINVAL;
+	}
+
+	uacce->is_vf = pdev->is_virtfn;
+	uacce->priv = qm;
+	uacce->algs = qm->algs;
+
+	if (qm->ver == QM_HW_V1) {
+		mmio_page_nr = QM_DOORBELL_PAGE_NR;
+		uacce->api_ver = HISI_QM_API_VER_BASE;
+	} else {
+		mmio_page_nr = QM_DOORBELL_PAGE_NR +
+			QM_DOORBELL_SQ_CQ_BASE_V2 / PAGE_SIZE;
+		uacce->api_ver = HISI_QM_API_VER2_BASE;
+	}
+
+	dus_page_nr = (PAGE_SIZE - 1 + qm->sqe_size * QM_Q_DEPTH +
+		       sizeof(struct qm_cqe) * QM_Q_DEPTH) >> PAGE_SHIFT;
+
+	uacce->qf_pg_num[UACCE_QFRT_MMIO] = mmio_page_nr;
+	uacce->qf_pg_num[UACCE_QFRT_DUS]  = dus_page_nr;
+
+	qm->uacce = uacce;
+
+	return 0;
+}
+
 /**
  * hisi_qm_get_free_qp_num() - Get free number of qp in qm.
  * @qm: The qm which want to get free qp.
@@ -1437,10 +1655,14 @@ int hisi_qm_init(struct hisi_qm *qm)
 		return -EINVAL;
 	}
 
+	ret = qm_alloc_uacce(qm);
+	if (ret < 0)
+		dev_warn(&pdev->dev, "fail to alloc uacce (%d)\n", ret);
+
 	ret = pci_enable_device_mem(pdev);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to enable device mem!\n");
-		return ret;
+		goto err_remove_uacce;
 	}
 
 	ret = pci_request_mem_regions(pdev, qm->dev_name);
@@ -1449,8 +1671,9 @@ int hisi_qm_init(struct hisi_qm *qm)
 		goto err_disable_pcidev;
 	}
 
-	qm->io_base = ioremap(pci_resource_start(pdev, PCI_BAR_2),
-			      pci_resource_len(qm->pdev, PCI_BAR_2));
+	qm->phys_base = pci_resource_start(pdev, PCI_BAR_2);
+	qm->phys_size = pci_resource_len(qm->pdev, PCI_BAR_2);
+	qm->io_base = ioremap(qm->phys_base, qm->phys_size);
 	if (!qm->io_base) {
 		ret = -EIO;
 		goto err_release_mem_regions;
@@ -1479,6 +1702,7 @@ int hisi_qm_init(struct hisi_qm *qm)
 	qm->qp_in_used = 0;
 	mutex_init(&qm->mailbox_lock);
 	rwlock_init(&qm->qps_lock);
+	INIT_WORK(&qm->work, qm_work_process);
 
 	dev_dbg(dev, "init qm %s with %s\n", pdev->is_physfn ? "pf" : "vf",
 		qm->use_dma_api ? "dma api" : "iommu api");
@@ -1493,6 +1717,9 @@ err_release_mem_regions:
 	pci_release_mem_regions(pdev);
 err_disable_pcidev:
 	pci_disable_device(pdev);
+err_remove_uacce:
+	uacce_remove(qm->uacce);
+	qm->uacce = NULL;
 
 	return ret;
 }
@@ -1509,6 +1736,9 @@ void hisi_qm_uninit(struct hisi_qm *qm)
 	struct pci_dev *pdev = qm->pdev;
 	struct device *dev = &pdev->dev;
 
+	uacce_remove(qm->uacce);
+	qm->uacce = NULL;
+
 	if (qm->use_dma_api && qm->qdma.va) {
 		hisi_qm_cache_wb(qm);
 		dma_free_coherent(dev, qm->qdma.size,
@@ -1856,43 +2086,30 @@ void hisi_qm_debug_regs_clear(struct hisi_qm *qm)
 }
 EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear);
 
-/**
- * hisi_qm_hw_error_init() - Configure qm hardware error report method.
- * @qm: The qm which we want to configure.
- * @ce: Bit mask of correctable error configure.
- * @nfe: Bit mask of non-fatal error configure.
- * @fe: Bit mask of fatal error configure.
- * @msi: Bit mask of error reported by message signal interrupt.
- *
- * Hardware errors of qm can be reported either by RAS interrupts which will
- * be handled by UEFI and then PCIe AER or by device MSI. User can configure
- * each error to use either of above two methods. For RAS interrupts, we can
- * configure an error as one of correctable error, non-fatal error or
- * fatal error.
- *
- * Bits indicating errors can be configured to ce, nfe, fe and msi to enable
- * related report methods. Error report will be masked if related error bit
- * does not configure.
- */
-void hisi_qm_hw_error_init(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
-			   u32 msi)
+static void qm_hw_error_init(struct hisi_qm *qm)
 {
+	const struct hisi_qm_err_info *err_info = &qm->err_ini->err_info;
+
 	if (!qm->ops->hw_error_init) {
 		dev_err(&qm->pdev->dev, "QM doesn't support hw error handling!\n");
 		return;
 	}
 
-	qm->ops->hw_error_init(qm, ce, nfe, fe, msi);
+	qm->ops->hw_error_init(qm, err_info->ce, err_info->nfe,
+			       err_info->fe, err_info->msi);
 }
-EXPORT_SYMBOL_GPL(hisi_qm_hw_error_init);
 
-/**
- * hisi_qm_hw_error_handle() - Handle qm non-fatal hardware errors.
- * @qm: The qm which has non-fatal hardware errors.
- *
- * Accelerators use this function to handle qm non-fatal hardware errors.
- */
-pci_ers_result_t hisi_qm_hw_error_handle(struct hisi_qm *qm)
+static void qm_hw_error_uninit(struct hisi_qm *qm)
+{
+	if (!qm->ops->hw_error_uninit) {
+		dev_err(&qm->pdev->dev, "Unexpected QM hw error uninit!\n");
+		return;
+	}
+
+	qm->ops->hw_error_uninit(qm);
+}
+
+static pci_ers_result_t qm_hw_error_handle(struct hisi_qm *qm)
 {
 	if (!qm->ops->hw_error_handle) {
 		dev_err(&qm->pdev->dev, "QM doesn't support hw error report!\n");
@@ -1901,7 +2118,6 @@ pci_ers_result_t hisi_qm_hw_error_handle(struct hisi_qm *qm)
 
 	return qm->ops->hw_error_handle(qm);
 }
-EXPORT_SYMBOL_GPL(hisi_qm_hw_error_handle);
 
 /**
  * hisi_qm_get_hw_version() - Get hardware version of a qm.
@@ -1922,6 +2138,229 @@ enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev)
 }
 EXPORT_SYMBOL_GPL(hisi_qm_get_hw_version);
 
+/**
+ * hisi_qm_dev_err_init() - Initialize device error configuration.
+ * @qm: The qm for which we want to do error initialization.
+ *
+ * Initialize QM and device error related configuration.
+ */
+void hisi_qm_dev_err_init(struct hisi_qm *qm)
+{
+	if (qm->fun_type == QM_HW_VF)
+		return;
+
+	qm_hw_error_init(qm);
+
+	if (!qm->err_ini->hw_err_enable) {
+		dev_err(&qm->pdev->dev, "Device doesn't support hw error init!\n");
+		return;
+	}
+	qm->err_ini->hw_err_enable(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_dev_err_init);
+
+/**
+ * hisi_qm_dev_err_uninit() - Uninitialize device error configuration.
+ * @qm: The qm for which we want to do error uninitialization.
+ *
+ * Uninitialize QM and device error related configuration.
+ */
+void hisi_qm_dev_err_uninit(struct hisi_qm *qm)
+{
+	if (qm->fun_type == QM_HW_VF)
+		return;
+
+	qm_hw_error_uninit(qm);
+
+	if (!qm->err_ini->hw_err_disable) {
+		dev_err(&qm->pdev->dev, "Unexpected device hw error uninit!\n");
+		return;
+	}
+	qm->err_ini->hw_err_disable(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_dev_err_uninit);
+
+/**
+ * hisi_qm_free_qps() - free multiple queue pairs.
+ * @qps: The queue pairs need to be freed.
+ * @qp_num: The num of queue pairs.
+ */
+void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num)
+{
+	int i;
+
+	if (!qps || qp_num <= 0)
+		return;
+
+	for (i = qp_num - 1; i >= 0; i--)
+		hisi_qm_release_qp(qps[i]);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_free_qps);
+
+static void free_list(struct list_head *head)
+{
+	struct hisi_qm_resource *res, *tmp;
+
+	list_for_each_entry_safe(res, tmp, head, list) {
+		list_del(&res->list);
+		kfree(res);
+	}
+}
+
+static int hisi_qm_sort_devices(int node, struct list_head *head,
+				struct hisi_qm_list *qm_list)
+{
+	struct hisi_qm_resource *res, *tmp;
+	struct hisi_qm *qm;
+	struct list_head *n;
+	struct device *dev;
+	int dev_node = 0;
+
+	list_for_each_entry(qm, &qm_list->list, list) {
+		dev = &qm->pdev->dev;
+
+		if (IS_ENABLED(CONFIG_NUMA)) {
+			dev_node = dev_to_node(dev);
+			if (dev_node < 0)
+				dev_node = 0;
+		}
+
+		res = kzalloc(sizeof(*res), GFP_KERNEL);
+		if (!res)
+			return -ENOMEM;
+
+		res->qm = qm;
+		res->distance = node_distance(dev_node, node);
+		n = head;
+		list_for_each_entry(tmp, head, list) {
+			if (res->distance < tmp->distance) {
+				n = &tmp->list;
+				break;
+			}
+		}
+		list_add_tail(&res->list, n);
+	}
+
+	return 0;
+}
+
+/**
+ * hisi_qm_alloc_qps_node() - Create multiple queue pairs.
+ * @qm_list: The list of all available devices.
+ * @qp_num: The number of queue pairs need created.
+ * @alg_type: The algorithm type.
+ * @node: The numa node.
+ * @qps: The queue pairs need created.
+ *
+ * This function will sort all available device according to numa distance.
+ * Then try to create all queue pairs from one device, if all devices do
+ * not meet the requirements will return error.
+ */
+int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num,
+			   u8 alg_type, int node, struct hisi_qp **qps)
+{
+	struct hisi_qm_resource *tmp;
+	int ret = -ENODEV;
+	LIST_HEAD(head);
+	int i;
+
+	if (!qps || !qm_list || qp_num <= 0)
+		return -EINVAL;
+
+	mutex_lock(&qm_list->lock);
+	if (hisi_qm_sort_devices(node, &head, qm_list)) {
+		mutex_unlock(&qm_list->lock);
+		goto err;
+	}
+
+	list_for_each_entry(tmp, &head, list) {
+		for (i = 0; i < qp_num; i++) {
+			qps[i] = hisi_qm_create_qp(tmp->qm, alg_type);
+			if (IS_ERR(qps[i])) {
+				hisi_qm_free_qps(qps, i);
+				break;
+			}
+		}
+
+		if (i == qp_num) {
+			ret = 0;
+			break;
+		}
+	}
+
+	mutex_unlock(&qm_list->lock);
+	if (ret)
+		pr_info("Failed to create qps, node[%d], alg[%d], qp[%d]!\n",
+			node, alg_type, qp_num);
+
+err:
+	free_list(&head);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_alloc_qps_node);
+
+static pci_ers_result_t qm_dev_err_handle(struct hisi_qm *qm)
+{
+	u32 err_sts;
+
+	if (!qm->err_ini->get_dev_hw_err_status) {
+		dev_err(&qm->pdev->dev, "Device doesn't support get hw error status!\n");
+		return PCI_ERS_RESULT_NONE;
+	}
+
+	/* get device hardware error status */
+	err_sts = qm->err_ini->get_dev_hw_err_status(qm);
+	if (err_sts) {
+		if (!qm->err_ini->log_dev_hw_err) {
+			dev_err(&qm->pdev->dev, "Device doesn't support log hw error!\n");
+			return PCI_ERS_RESULT_NEED_RESET;
+		}
+
+		qm->err_ini->log_dev_hw_err(qm, err_sts);
+		return PCI_ERS_RESULT_NEED_RESET;
+	}
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+static pci_ers_result_t qm_process_dev_error(struct pci_dev *pdev)
+{
+	struct hisi_qm *qm = pci_get_drvdata(pdev);
+	pci_ers_result_t qm_ret, dev_ret;
+
+	/* log qm error */
+	qm_ret = qm_hw_error_handle(qm);
+
+	/* log device error */
+	dev_ret = qm_dev_err_handle(qm);
+
+	return (qm_ret == PCI_ERS_RESULT_NEED_RESET ||
+		dev_ret == PCI_ERS_RESULT_NEED_RESET) ?
+		PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * hisi_qm_dev_err_detected() - Get device and qm error status then log it.
+ * @pdev: The PCI device which need report error.
+ * @state: The connectivity between CPU and device.
+ *
+ * We register this function into PCIe AER handlers, It will report device or
+ * qm hardware error status when error occur.
+ */
+pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev,
+					  pci_channel_state_t state)
+{
+	if (pdev->is_virtfn)
+		return PCI_ERS_RESULT_NONE;
+
+	pci_info(pdev, "PCI error detected, state(=%d)!!\n", state);
+	if (state == pci_channel_io_perm_failure)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	return qm_process_dev_error(pdev);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_dev_err_detected);
+
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Zhou Wang <wangzhou1@hisilicon.com>");
 MODULE_DESCRIPTION("HiSilicon Accelerator queue manager driver");
diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h
index 078b8f1f1b77..ec5b6f48db6c 100644
--- a/drivers/crypto/hisilicon/qm.h
+++ b/drivers/crypto/hisilicon/qm.h
@@ -77,6 +77,9 @@
 
 #define HISI_ACC_SGL_SGE_NR_MAX		255
 
+/* page number for queue file region */
+#define QM_DOORBELL_PAGE_NR		1
+
 enum qp_state {
 	QP_STOP,
 };
@@ -125,6 +128,28 @@ struct hisi_qm_status {
 	unsigned long flags;
 };
 
+struct hisi_qm;
+
+struct hisi_qm_err_info {
+	u32 ce;
+	u32 nfe;
+	u32 fe;
+	u32 msi;
+};
+
+struct hisi_qm_err_ini {
+	void (*hw_err_enable)(struct hisi_qm *qm);
+	void (*hw_err_disable)(struct hisi_qm *qm);
+	u32 (*get_dev_hw_err_status)(struct hisi_qm *qm);
+	void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts);
+	struct hisi_qm_err_info err_info;
+};
+
+struct hisi_qm_list {
+	struct mutex lock;
+	struct list_head list;
+};
+
 struct hisi_qm {
 	enum qm_hw_ver ver;
 	enum qm_fun_type fun_type;
@@ -136,6 +161,7 @@ struct hisi_qm {
 	u32 qp_num;
 	u32 qp_in_used;
 	u32 ctrl_qp_num;
+	struct list_head list;
 
 	struct qm_dma qdma;
 	struct qm_sqc *sqc;
@@ -148,6 +174,7 @@ struct hisi_qm {
 	dma_addr_t aeqe_dma;
 
 	struct hisi_qm_status status;
+	const struct hisi_qm_err_ini *err_ini;
 
 	rwlock_t qps_lock;
 	unsigned long *qp_bitmap;
@@ -162,7 +189,15 @@ struct hisi_qm {
 	u32 error_mask;
 	u32 msi_mask;
 
+	struct workqueue_struct *wq;
+	struct work_struct work;
+
+	const char *algs;
 	bool use_dma_api;
+	bool use_sva;
+	resource_size_t phys_base;
+	resource_size_t phys_size;
+	struct uacce_device *uacce;
 };
 
 struct hisi_qp_status {
@@ -192,12 +227,35 @@ struct hisi_qp {
 	struct hisi_qp_ops *hw_ops;
 	void *qp_ctx;
 	void (*req_cb)(struct hisi_qp *qp, void *data);
-	struct work_struct work;
-	struct workqueue_struct *wq;
+	void (*event_cb)(struct hisi_qp *qp);
 
 	struct hisi_qm *qm;
+	u16 pasid;
+	struct uacce_queue *uacce_q;
 };
 
+static inline void hisi_qm_init_list(struct hisi_qm_list *qm_list)
+{
+	INIT_LIST_HEAD(&qm_list->list);
+	mutex_init(&qm_list->lock);
+}
+
+static inline void hisi_qm_add_to_list(struct hisi_qm *qm,
+				       struct hisi_qm_list *qm_list)
+{
+	mutex_lock(&qm_list->lock);
+	list_add_tail(&qm->list, &qm_list->list);
+	mutex_unlock(&qm_list->lock);
+}
+
+static inline void hisi_qm_del_from_list(struct hisi_qm *qm,
+					 struct hisi_qm_list *qm_list)
+{
+	mutex_lock(&qm_list->lock);
+	list_del(&qm->list);
+	mutex_unlock(&qm_list->lock);
+}
+
 int hisi_qm_init(struct hisi_qm *qm);
 void hisi_qm_uninit(struct hisi_qm *qm);
 int hisi_qm_start(struct hisi_qm *qm);
@@ -211,11 +269,12 @@ int hisi_qm_get_free_qp_num(struct hisi_qm *qm);
 int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number);
 int hisi_qm_set_vft(struct hisi_qm *qm, u32 fun_num, u32 base, u32 number);
 int hisi_qm_debug_init(struct hisi_qm *qm);
-void hisi_qm_hw_error_init(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
-			   u32 msi);
-pci_ers_result_t hisi_qm_hw_error_handle(struct hisi_qm *qm);
 enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev);
 void hisi_qm_debug_regs_clear(struct hisi_qm *qm);
+void hisi_qm_dev_err_init(struct hisi_qm *qm);
+void hisi_qm_dev_err_uninit(struct hisi_qm *qm);
+pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev,
+					  pci_channel_state_t state);
 
 struct hisi_acc_sgl_pool;
 struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev,
@@ -227,4 +286,7 @@ struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev,
 						   u32 count, u32 sge_nr);
 void hisi_acc_free_sgl_pool(struct device *dev,
 			    struct hisi_acc_sgl_pool *pool);
+int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num,
+			   u8 alg_type, int node, struct hisi_qp **qps);
+void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num);
 #endif
diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index 13e2d8d7be94..3598fa17beb2 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -11,6 +11,8 @@
 
 /* Algorithm resource per hardware SEC queue */
 struct sec_alg_res {
+	u8 *pbuf;
+	dma_addr_t pbuf_dma;
 	u8 *c_ivin;
 	dma_addr_t c_ivin_dma;
 	u8 *out_mac;
@@ -23,6 +25,8 @@ struct sec_cipher_req {
 	dma_addr_t c_in_dma;
 	struct hisi_acc_hw_sgl *c_out;
 	dma_addr_t c_out_dma;
+	u8 *c_ivin;
+	dma_addr_t c_ivin_dma;
 	struct skcipher_request *sk_req;
 	u32 c_len;
 	bool encrypt;
@@ -48,6 +52,7 @@ struct sec_req {
 
 	/* Status of the SEC request */
 	bool fake_busy;
+	bool use_pbuf;
 };
 
 /**
@@ -114,6 +119,7 @@ struct sec_ctx {
 	struct sec_qp_ctx *qp_ctx;
 	struct sec_dev *sec;
 	const struct sec_req_op *req_op;
+	struct hisi_qp **qps;
 
 	/* Half queues for encipher, and half for decipher */
 	u32 hlf_q_num;
@@ -128,6 +134,7 @@ struct sec_ctx {
 	atomic_t dec_qcyclic;
 
 	enum sec_alg_type alg_type;
+	bool pbuf_supported;
 	struct sec_cipher_ctx c_ctx;
 	struct sec_auth_ctx a_ctx;
 };
@@ -162,14 +169,15 @@ struct sec_debug {
 
 struct sec_dev {
 	struct hisi_qm qm;
-	struct list_head list;
 	struct sec_debug debug;
 	u32 ctx_q_num;
+	bool iommu_used;
 	u32 num_vfs;
 	unsigned long status;
 };
 
-struct sec_dev *sec_find_device(int node);
+void sec_destroy_qps(struct hisi_qp **qps, int qp_num);
+struct hisi_qp **sec_create_qps(void);
 int sec_register_to_crypto(void);
 void sec_unregister_from_crypto(void);
 #endif
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index a2cfcc9ccd94..7f1c6a31b82f 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -46,7 +46,21 @@
 #define SEC_CIPHER_AUTH		0xfe
 #define SEC_AUTH_CIPHER		0x1
 #define SEC_MAX_MAC_LEN		64
+#define SEC_MAX_AAD_LEN		65535
 #define SEC_TOTAL_MAC_SZ	(SEC_MAX_MAC_LEN * QM_Q_DEPTH)
+
+#define SEC_PBUF_SZ			512
+#define SEC_PBUF_IV_OFFSET		SEC_PBUF_SZ
+#define SEC_PBUF_MAC_OFFSET		(SEC_PBUF_SZ + SEC_IV_SIZE)
+#define SEC_PBUF_PKG		(SEC_PBUF_SZ + SEC_IV_SIZE +	\
+			SEC_MAX_MAC_LEN * 2)
+#define SEC_PBUF_NUM		(PAGE_SIZE / SEC_PBUF_PKG)
+#define SEC_PBUF_PAGE_NUM	(QM_Q_DEPTH / SEC_PBUF_NUM)
+#define SEC_PBUF_LEFT_SZ	(SEC_PBUF_PKG * (QM_Q_DEPTH -	\
+			SEC_PBUF_PAGE_NUM * SEC_PBUF_NUM))
+#define SEC_TOTAL_PBUF_SZ	(PAGE_SIZE * SEC_PBUF_PAGE_NUM +	\
+			SEC_PBUF_LEFT_SZ)
+
 #define SEC_SQE_LEN_RATE	4
 #define SEC_SQE_CFLAG		2
 #define SEC_SQE_AEAD_FLAG	3
@@ -110,12 +124,12 @@ static void sec_free_req_id(struct sec_req *req)
 	mutex_unlock(&qp_ctx->req_lock);
 }
 
-static int sec_aead_verify(struct sec_req *req, struct sec_qp_ctx *qp_ctx)
+static int sec_aead_verify(struct sec_req *req)
 {
 	struct aead_request *aead_req = req->aead_req.aead_req;
 	struct crypto_aead *tfm = crypto_aead_reqtfm(aead_req);
-	u8 *mac_out = qp_ctx->res[req->req_id].out_mac;
 	size_t authsize = crypto_aead_authsize(tfm);
+	u8 *mac_out = req->aead_req.out_mac;
 	u8 *mac = mac_out + SEC_MAX_MAC_LEN;
 	struct scatterlist *sgl = aead_req->src;
 	size_t sz;
@@ -163,7 +177,7 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp)
 	}
 
 	if (ctx->alg_type == SEC_AEAD && !req->c_req.encrypt)
-		err = sec_aead_verify(req, qp_ctx);
+		err = sec_aead_verify(req);
 
 	atomic64_inc(&ctx->sec->debug.dfx.recv_cnt);
 
@@ -245,6 +259,50 @@ static void sec_free_mac_resource(struct device *dev, struct sec_alg_res *res)
 				  res->out_mac, res->out_mac_dma);
 }
 
+static void sec_free_pbuf_resource(struct device *dev, struct sec_alg_res *res)
+{
+	if (res->pbuf)
+		dma_free_coherent(dev, SEC_TOTAL_PBUF_SZ,
+				  res->pbuf, res->pbuf_dma);
+}
+
+/*
+ * To improve performance, pbuffer is used for
+ * small packets (< 512Bytes) as IOMMU translation using.
+ */
+static int sec_alloc_pbuf_resource(struct device *dev, struct sec_alg_res *res)
+{
+	int pbuf_page_offset;
+	int i, j, k;
+
+	res->pbuf = dma_alloc_coherent(dev, SEC_TOTAL_PBUF_SZ,
+				&res->pbuf_dma, GFP_KERNEL);
+	if (!res->pbuf)
+		return -ENOMEM;
+
+	/*
+	 * SEC_PBUF_PKG contains data pbuf, iv and
+	 * out_mac : <SEC_PBUF|SEC_IV|SEC_MAC>
+	 * Every PAGE contains six SEC_PBUF_PKG
+	 * The sec_qp_ctx contains QM_Q_DEPTH numbers of SEC_PBUF_PKG
+	 * So we need SEC_PBUF_PAGE_NUM numbers of PAGE
+	 * for the SEC_TOTAL_PBUF_SZ
+	 */
+	for (i = 0; i <= SEC_PBUF_PAGE_NUM; i++) {
+		pbuf_page_offset = PAGE_SIZE * i;
+		for (j = 0; j < SEC_PBUF_NUM; j++) {
+			k = i * SEC_PBUF_NUM + j;
+			if (k == QM_Q_DEPTH)
+				break;
+			res[k].pbuf = res->pbuf +
+				j * SEC_PBUF_PKG + pbuf_page_offset;
+			res[k].pbuf_dma = res->pbuf_dma +
+				j * SEC_PBUF_PKG + pbuf_page_offset;
+		}
+	}
+	return 0;
+}
+
 static int sec_alg_resource_alloc(struct sec_ctx *ctx,
 				  struct sec_qp_ctx *qp_ctx)
 {
@@ -259,11 +317,18 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx,
 	if (ctx->alg_type == SEC_AEAD) {
 		ret = sec_alloc_mac_resource(dev, res);
 		if (ret)
-			goto get_fail;
+			goto alloc_fail;
+	}
+	if (ctx->pbuf_supported) {
+		ret = sec_alloc_pbuf_resource(dev, res);
+		if (ret) {
+			dev_err(dev, "fail to alloc pbuf dma resource!\n");
+			goto alloc_fail;
+		}
 	}
 
 	return 0;
-get_fail:
+alloc_fail:
 	sec_free_civ_resource(dev, res);
 
 	return ret;
@@ -276,6 +341,8 @@ static void sec_alg_resource_free(struct sec_ctx *ctx,
 
 	sec_free_civ_resource(dev, qp_ctx->res);
 
+	if (ctx->pbuf_supported)
+		sec_free_pbuf_resource(dev, qp_ctx->res);
 	if (ctx->alg_type == SEC_AEAD)
 		sec_free_mac_resource(dev, qp_ctx->res);
 }
@@ -288,11 +355,8 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
 	struct hisi_qp *qp;
 	int ret = -ENOMEM;
 
-	qp = hisi_qm_create_qp(qm, alg_type);
-	if (IS_ERR(qp))
-		return PTR_ERR(qp);
-
 	qp_ctx = &ctx->qp_ctx[qp_ctx_id];
+	qp = ctx->qps[qp_ctx_id];
 	qp->req_type = 0;
 	qp->qp_ctx = qp_ctx;
 	qp->req_cb = sec_req_cb;
@@ -335,7 +399,6 @@ err_free_c_in_pool:
 	hisi_acc_free_sgl_pool(dev, qp_ctx->c_in_pool);
 err_destroy_idr:
 	idr_destroy(&qp_ctx->req_idr);
-	hisi_qm_release_qp(qp);
 
 	return ret;
 }
@@ -352,7 +415,6 @@ static void sec_release_qp_ctx(struct sec_ctx *ctx,
 	hisi_acc_free_sgl_pool(dev, qp_ctx->c_in_pool);
 
 	idr_destroy(&qp_ctx->req_idr);
-	hisi_qm_release_qp(qp_ctx->qp);
 }
 
 static int sec_ctx_base_init(struct sec_ctx *ctx)
@@ -360,14 +422,18 @@ static int sec_ctx_base_init(struct sec_ctx *ctx)
 	struct sec_dev *sec;
 	int i, ret;
 
-	sec = sec_find_device(cpu_to_node(smp_processor_id()));
-	if (!sec) {
-		pr_err("Can not find proper Hisilicon SEC device!\n");
+	ctx->qps = sec_create_qps();
+	if (!ctx->qps) {
+		pr_err("Can not create sec qps!\n");
 		return -ENODEV;
 	}
+
+	sec = container_of(ctx->qps[0]->qm, struct sec_dev, qm);
 	ctx->sec = sec;
 	ctx->hlf_q_num = sec->ctx_q_num >> 1;
 
+	ctx->pbuf_supported = ctx->sec->iommu_used;
+
 	/* Half of queue depth is taken as fake requests limit in the queue. */
 	ctx->fake_req_limit = QM_Q_DEPTH >> 1;
 	ctx->qp_ctx = kcalloc(sec->ctx_q_num, sizeof(struct sec_qp_ctx),
@@ -386,6 +452,7 @@ err_sec_release_qp_ctx:
 	for (i = i - 1; i >= 0; i--)
 		sec_release_qp_ctx(ctx, &ctx->qp_ctx[i]);
 
+	sec_destroy_qps(ctx->qps, sec->ctx_q_num);
 	kfree(ctx->qp_ctx);
 	return ret;
 }
@@ -397,6 +464,7 @@ static void sec_ctx_base_uninit(struct sec_ctx *ctx)
 	for (i = 0; i < ctx->sec->ctx_q_num; i++)
 		sec_release_qp_ctx(ctx, &ctx->qp_ctx[i]);
 
+	sec_destroy_qps(ctx->qps, ctx->sec->ctx_q_num);
 	kfree(ctx->qp_ctx);
 }
 
@@ -447,7 +515,6 @@ static int sec_skcipher_init(struct crypto_skcipher *tfm)
 	struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int ret;
 
-	ctx = crypto_skcipher_ctx(tfm);
 	ctx->alg_type = SEC_SKCIPHER;
 	crypto_skcipher_set_reqsize(tfm, sizeof(struct sec_req));
 	ctx->c_ctx.ivsize = crypto_skcipher_ivsize(tfm);
@@ -591,11 +658,94 @@ GEN_SEC_SETKEY_FUNC(3des_cbc, SEC_CALG_3DES, SEC_CMODE_CBC)
 GEN_SEC_SETKEY_FUNC(sm4_xts, SEC_CALG_SM4, SEC_CMODE_XTS)
 GEN_SEC_SETKEY_FUNC(sm4_cbc, SEC_CALG_SM4, SEC_CMODE_CBC)
 
-static int sec_cipher_map(struct device *dev, struct sec_req *req,
+static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req,
+			struct scatterlist *src)
+{
+	struct aead_request *aead_req = req->aead_req.aead_req;
+	struct sec_cipher_req *c_req = &req->c_req;
+	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
+	struct device *dev = SEC_CTX_DEV(ctx);
+	int copy_size, pbuf_length;
+	int req_id = req->req_id;
+
+	if (ctx->alg_type == SEC_AEAD)
+		copy_size = aead_req->cryptlen + aead_req->assoclen;
+	else
+		copy_size = c_req->c_len;
+
+	pbuf_length = sg_copy_to_buffer(src, sg_nents(src),
+				qp_ctx->res[req_id].pbuf,
+				copy_size);
+
+	if (unlikely(pbuf_length != copy_size)) {
+		dev_err(dev, "copy src data to pbuf error!\n");
+		return -EINVAL;
+	}
+
+	c_req->c_in_dma = qp_ctx->res[req_id].pbuf_dma;
+
+	if (!c_req->c_in_dma) {
+		dev_err(dev, "fail to set pbuffer address!\n");
+		return -ENOMEM;
+	}
+
+	c_req->c_out_dma = c_req->c_in_dma;
+
+	return 0;
+}
+
+static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req,
+			struct scatterlist *dst)
+{
+	struct aead_request *aead_req = req->aead_req.aead_req;
+	struct sec_cipher_req *c_req = &req->c_req;
+	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
+	struct device *dev = SEC_CTX_DEV(ctx);
+	int copy_size, pbuf_length;
+	int req_id = req->req_id;
+
+	if (ctx->alg_type == SEC_AEAD)
+		copy_size = c_req->c_len + aead_req->assoclen;
+	else
+		copy_size = c_req->c_len;
+
+	pbuf_length = sg_copy_from_buffer(dst, sg_nents(dst),
+				qp_ctx->res[req_id].pbuf,
+				copy_size);
+
+	if (unlikely(pbuf_length != copy_size))
+		dev_err(dev, "copy pbuf data to dst error!\n");
+
+}
+
+static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
 			  struct scatterlist *src, struct scatterlist *dst)
 {
 	struct sec_cipher_req *c_req = &req->c_req;
+	struct sec_aead_req *a_req = &req->aead_req;
 	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
+	struct sec_alg_res *res = &qp_ctx->res[req->req_id];
+	struct device *dev = SEC_CTX_DEV(ctx);
+	int ret;
+
+	if (req->use_pbuf) {
+		ret = sec_cipher_pbuf_map(ctx, req, src);
+		c_req->c_ivin = res->pbuf + SEC_PBUF_IV_OFFSET;
+		c_req->c_ivin_dma = res->pbuf_dma + SEC_PBUF_IV_OFFSET;
+		if (ctx->alg_type == SEC_AEAD) {
+			a_req->out_mac = res->pbuf + SEC_PBUF_MAC_OFFSET;
+			a_req->out_mac_dma = res->pbuf_dma +
+					SEC_PBUF_MAC_OFFSET;
+		}
+
+		return ret;
+	}
+	c_req->c_ivin = res->c_ivin;
+	c_req->c_ivin_dma = res->c_ivin_dma;
+	if (ctx->alg_type == SEC_AEAD) {
+		a_req->out_mac = res->out_mac;
+		a_req->out_mac_dma = res->out_mac_dma;
+	}
 
 	c_req->c_in = hisi_acc_sg_buf_map_to_hw_sgl(dev, src,
 						    qp_ctx->c_in_pool,
@@ -626,29 +776,34 @@ static int sec_cipher_map(struct device *dev, struct sec_req *req,
 	return 0;
 }
 
-static void sec_cipher_unmap(struct device *dev, struct sec_cipher_req *req,
+static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req,
 			     struct scatterlist *src, struct scatterlist *dst)
 {
-	if (dst != src)
-		hisi_acc_sg_buf_unmap(dev, src, req->c_in);
+	struct sec_cipher_req *c_req = &req->c_req;
+	struct device *dev = SEC_CTX_DEV(ctx);
+
+	if (req->use_pbuf) {
+		sec_cipher_pbuf_unmap(ctx, req, dst);
+	} else {
+		if (dst != src)
+			hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
 
-	hisi_acc_sg_buf_unmap(dev, dst, req->c_out);
+		hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out);
+	}
 }
 
 static int sec_skcipher_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
 {
 	struct skcipher_request *sq = req->c_req.sk_req;
 
-	return sec_cipher_map(SEC_CTX_DEV(ctx), req, sq->src, sq->dst);
+	return sec_cipher_map(ctx, req, sq->src, sq->dst);
 }
 
 static void sec_skcipher_sgl_unmap(struct sec_ctx *ctx, struct sec_req *req)
 {
-	struct device *dev = SEC_CTX_DEV(ctx);
-	struct sec_cipher_req *c_req = &req->c_req;
-	struct skcipher_request *sk_req = c_req->sk_req;
+	struct skcipher_request *sq = req->c_req.sk_req;
 
-	sec_cipher_unmap(dev, c_req, sk_req->src, sk_req->dst);
+	sec_cipher_unmap(ctx, req, sq->src, sq->dst);
 }
 
 static int sec_aead_aes_set_key(struct sec_cipher_ctx *c_ctx,
@@ -759,16 +914,14 @@ static int sec_aead_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
 {
 	struct aead_request *aq = req->aead_req.aead_req;
 
-	return sec_cipher_map(SEC_CTX_DEV(ctx), req, aq->src, aq->dst);
+	return sec_cipher_map(ctx, req, aq->src, aq->dst);
 }
 
 static void sec_aead_sgl_unmap(struct sec_ctx *ctx, struct sec_req *req)
 {
-	struct device *dev = SEC_CTX_DEV(ctx);
-	struct sec_cipher_req *cq = &req->c_req;
 	struct aead_request *aq = req->aead_req.aead_req;
 
-	sec_cipher_unmap(dev, cq, aq->src, aq->dst);
+	sec_cipher_unmap(ctx, req, aq->src, aq->dst);
 }
 
 static int sec_request_transfer(struct sec_ctx *ctx, struct sec_req *req)
@@ -801,9 +954,9 @@ static void sec_request_untransfer(struct sec_ctx *ctx, struct sec_req *req)
 static void sec_skcipher_copy_iv(struct sec_ctx *ctx, struct sec_req *req)
 {
 	struct skcipher_request *sk_req = req->c_req.sk_req;
-	u8 *c_ivin = req->qp_ctx->res[req->req_id].c_ivin;
+	struct sec_cipher_req *c_req = &req->c_req;
 
-	memcpy(c_ivin, sk_req->iv, ctx->c_ctx.ivsize);
+	memcpy(c_req->c_ivin, sk_req->iv, ctx->c_ctx.ivsize);
 }
 
 static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
@@ -818,8 +971,7 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
 	memset(sec_sqe, 0, sizeof(struct sec_sqe));
 
 	sec_sqe->type2.c_key_addr = cpu_to_le64(c_ctx->c_key_dma);
-	sec_sqe->type2.c_ivin_addr =
-		cpu_to_le64(req->qp_ctx->res[req->req_id].c_ivin_dma);
+	sec_sqe->type2.c_ivin_addr = cpu_to_le64(c_req->c_ivin_dma);
 	sec_sqe->type2.data_src_addr = cpu_to_le64(c_req->c_in_dma);
 	sec_sqe->type2.data_dst_addr = cpu_to_le64(c_req->c_out_dma);
 
@@ -836,7 +988,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
 		cipher = SEC_CIPHER_DEC << SEC_CIPHER_OFFSET;
 	sec_sqe->type_cipher_auth = bd_type | cipher;
 
-	sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET;
+	if (req->use_pbuf)
+		sa_type = SEC_PBUF << SEC_SRC_SGL_OFFSET;
+	else
+		sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET;
 	scene = SEC_COMM_SCENE << SEC_SCENE_OFFSET;
 	if (c_req->c_in_dma != c_req->c_out_dma)
 		de = 0x1 << SEC_DE_OFFSET;
@@ -844,7 +999,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
 	sec_sqe->sds_sa_type = (de | scene | sa_type);
 
 	/* Just set DST address type */
-	da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
+	if (req->use_pbuf)
+		da_type = SEC_PBUF << SEC_DST_SGL_OFFSET;
+	else
+		da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
 	sec_sqe->sdm_addr_type |= da_type;
 
 	sec_sqe->type2.clen_ivhlen |= cpu_to_le32(c_req->c_len);
@@ -904,9 +1062,9 @@ static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req,
 static void sec_aead_copy_iv(struct sec_ctx *ctx, struct sec_req *req)
 {
 	struct aead_request *aead_req = req->aead_req.aead_req;
-	u8 *c_ivin = req->qp_ctx->res[req->req_id].c_ivin;
+	struct sec_cipher_req *c_req = &req->c_req;
 
-	memcpy(c_ivin, aead_req->iv, ctx->c_ctx.ivsize);
+	memcpy(c_req->c_ivin, aead_req->iv, ctx->c_ctx.ivsize);
 }
 
 static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir,
@@ -939,8 +1097,7 @@ static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir,
 
 	sec_sqe->type2.cipher_src_offset = cpu_to_le16((u16)aq->assoclen);
 
-	sec_sqe->type2.mac_addr =
-		cpu_to_le64(req->qp_ctx->res[req->req_id].out_mac_dma);
+	sec_sqe->type2.mac_addr = cpu_to_le64(a_req->out_mac_dma);
 }
 
 static int sec_aead_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
@@ -964,6 +1121,7 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err)
 {
 	struct aead_request *a_req = req->aead_req.aead_req;
 	struct crypto_aead *tfm = crypto_aead_reqtfm(a_req);
+	struct sec_aead_req *aead_req = &req->aead_req;
 	struct sec_cipher_req *c_req = &req->c_req;
 	size_t authsize = crypto_aead_authsize(tfm);
 	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
@@ -979,7 +1137,7 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err)
 		struct scatterlist *sgl = a_req->dst;
 
 		sz = sg_pcopy_from_buffer(sgl, sg_nents(sgl),
-					  qp_ctx->res[req->req_id].out_mac,
+					  aead_req->out_mac,
 					  authsize, a_req->cryptlen +
 					  a_req->assoclen);
 
@@ -1031,6 +1189,7 @@ static int sec_request_init(struct sec_ctx *ctx, struct sec_req *req)
 
 static int sec_process(struct sec_ctx *ctx, struct sec_req *req)
 {
+	struct sec_cipher_req *c_req = &req->c_req;
 	int ret;
 
 	ret = sec_request_init(ctx, req);
@@ -1057,12 +1216,10 @@ err_send_req:
 	/* As failing, restore the IV from user */
 	if (ctx->c_ctx.c_mode == SEC_CMODE_CBC && !req->c_req.encrypt) {
 		if (ctx->alg_type == SEC_SKCIPHER)
-			memcpy(req->c_req.sk_req->iv,
-			       req->qp_ctx->res[req->req_id].c_ivin,
+			memcpy(req->c_req.sk_req->iv, c_req->c_ivin,
 			       ctx->c_ctx.ivsize);
 		else
-			memcpy(req->aead_req.aead_req->iv,
-			       req->qp_ctx->res[req->req_id].c_ivin,
+			memcpy(req->aead_req.aead_req->iv, c_req->c_ivin,
 			       ctx->c_ctx.ivsize);
 	}
 
@@ -1208,6 +1365,12 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
 		return -EINVAL;
 	}
 	sreq->c_req.c_len = sk_req->cryptlen;
+
+	if (ctx->pbuf_supported && sk_req->cryptlen <= SEC_PBUF_SZ)
+		sreq->use_pbuf = true;
+	else
+		sreq->use_pbuf = false;
+
 	if (c_alg == SEC_CALG_3DES) {
 		if (unlikely(sk_req->cryptlen & (DES3_EDE_BLOCK_SIZE - 1))) {
 			dev_err(dev, "skcipher 3des input length error!\n");
@@ -1321,11 +1484,18 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	size_t authsize = crypto_aead_authsize(tfm);
 
-	if (unlikely(!req->src || !req->dst || !req->cryptlen)) {
+	if (unlikely(!req->src || !req->dst || !req->cryptlen ||
+		req->assoclen > SEC_MAX_AAD_LEN)) {
 		dev_err(SEC_CTX_DEV(ctx), "aead input param error!\n");
 		return -EINVAL;
 	}
 
+	if (ctx->pbuf_supported && (req->cryptlen + req->assoclen) <=
+		SEC_PBUF_SZ)
+		sreq->use_pbuf = true;
+	else
+		sreq->use_pbuf = false;
+
 	/* Support AES only */
 	if (unlikely(c_alg != SEC_CALG_AES)) {
 		dev_err(SEC_CTX_DEV(ctx), "aead crypto alg error!\n");
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 2bbaf1e2dae7..1f54ebe164b6 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -7,6 +7,7 @@
 #include <linux/debugfs.h>
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/iommu.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
@@ -89,8 +90,7 @@ struct sec_hw_error {
 
 static const char sec_name[] = "hisi_sec2";
 static struct dentry *sec_debugfs_root;
-static LIST_HEAD(sec_list);
-static DEFINE_MUTEX(sec_list_lock);
+static struct hisi_qm_list sec_devices;
 
 static const struct sec_hw_error sec_hw_errors[] = {
 	{.int_msk = BIT(0), .msg = "sec_axi_rresp_err_rint"},
@@ -105,37 +105,6 @@ static const struct sec_hw_error sec_hw_errors[] = {
 	{ /* sentinel */ }
 };
 
-struct sec_dev *sec_find_device(int node)
-{
-#define SEC_NUMA_MAX_DISTANCE	100
-	int min_distance = SEC_NUMA_MAX_DISTANCE;
-	int dev_node = 0, free_qp_num = 0;
-	struct sec_dev *sec, *ret = NULL;
-	struct hisi_qm *qm;
-	struct device *dev;
-
-	mutex_lock(&sec_list_lock);
-	list_for_each_entry(sec, &sec_list, list) {
-		qm = &sec->qm;
-		dev = &qm->pdev->dev;
-#ifdef CONFIG_NUMA
-		dev_node = dev->numa_node;
-		if (dev_node < 0)
-			dev_node = 0;
-#endif
-		if (node_distance(dev_node, node) < min_distance) {
-			free_qp_num = hisi_qm_get_free_qp_num(qm);
-			if (free_qp_num >= sec->ctx_q_num) {
-				ret = sec;
-				min_distance = node_distance(dev_node, node);
-			}
-		}
-	}
-	mutex_unlock(&sec_list_lock);
-
-	return ret;
-}
-
 static const char * const sec_dbg_file_name[] = {
 	[SEC_CURRENT_QM] = "current_qm",
 	[SEC_CLEAR_ENABLE] = "clear_enable",
@@ -238,6 +207,32 @@ static u32 ctx_q_num = SEC_CTX_Q_NUM_DEF;
 module_param_cb(ctx_q_num, &sec_ctx_q_num_ops, &ctx_q_num, 0444);
 MODULE_PARM_DESC(ctx_q_num, "Queue num in ctx (24 default, 2, 4, ..., 32)");
 
+void sec_destroy_qps(struct hisi_qp **qps, int qp_num)
+{
+	hisi_qm_free_qps(qps, qp_num);
+	kfree(qps);
+}
+
+struct hisi_qp **sec_create_qps(void)
+{
+	int node = cpu_to_node(smp_processor_id());
+	u32 ctx_num = ctx_q_num;
+	struct hisi_qp **qps;
+	int ret;
+
+	qps = kcalloc(ctx_num, sizeof(struct hisi_qp *), GFP_KERNEL);
+	if (!qps)
+		return NULL;
+
+	ret = hisi_qm_alloc_qps_node(&sec_devices, ctx_num, 0, node, qps);
+	if (!ret)
+		return qps;
+
+	kfree(qps);
+	return NULL;
+}
+
+
 static const struct pci_device_id sec_dev_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_PF_PCI_DEVICE_ID) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_VF_PCI_DEVICE_ID) },
@@ -245,20 +240,6 @@ static const struct pci_device_id sec_dev_ids[] = {
 };
 MODULE_DEVICE_TABLE(pci, sec_dev_ids);
 
-static inline void sec_add_to_list(struct sec_dev *sec)
-{
-	mutex_lock(&sec_list_lock);
-	list_add_tail(&sec->list, &sec_list);
-	mutex_unlock(&sec_list_lock);
-}
-
-static inline void sec_remove_from_list(struct sec_dev *sec)
-{
-	mutex_lock(&sec_list_lock);
-	list_del(&sec->list);
-	mutex_unlock(&sec_list_lock);
-}
-
 static u8 sec_get_endian(struct sec_dev *sec)
 {
 	struct hisi_qm *qm = &sec->qm;
@@ -384,9 +365,8 @@ static void sec_debug_regs_clear(struct hisi_qm *qm)
 	hisi_qm_debug_regs_clear(qm);
 }
 
-static void sec_hw_error_enable(struct sec_dev *sec)
+static void sec_hw_error_enable(struct hisi_qm *qm)
 {
-	struct hisi_qm *qm = &sec->qm;
 	u32 val;
 
 	if (qm->ver == QM_HW_V1) {
@@ -414,9 +394,8 @@ static void sec_hw_error_enable(struct sec_dev *sec)
 	writel(val, qm->io_base + SEC_CONTROL_REG);
 }
 
-static void sec_hw_error_disable(struct sec_dev *sec)
+static void sec_hw_error_disable(struct hisi_qm *qm)
 {
-	struct hisi_qm *qm = &sec->qm;
 	u32 val;
 
 	val = readl(qm->io_base + SEC_CONTROL_REG);
@@ -435,27 +414,6 @@ static void sec_hw_error_disable(struct sec_dev *sec)
 	writel(val, qm->io_base + SEC_CONTROL_REG);
 }
 
-static void sec_hw_error_init(struct sec_dev *sec)
-{
-	if (sec->qm.fun_type == QM_HW_VF)
-		return;
-
-	hisi_qm_hw_error_init(&sec->qm, QM_BASE_CE,
-			      QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT
-			      | QM_ACC_WB_NOT_READY_TIMEOUT, 0,
-			      QM_DB_RANDOM_INVALID);
-	sec_hw_error_enable(sec);
-}
-
-static void sec_hw_error_uninit(struct sec_dev *sec)
-{
-	if (sec->qm.fun_type == QM_HW_VF)
-		return;
-
-	sec_hw_error_disable(sec);
-	writel(GENMASK(12, 0), sec->qm.io_base + SEC_QM_ABNORMAL_INT_MASK);
-}
-
 static u32 sec_current_qm_read(struct sec_debug_file *file)
 {
 	struct hisi_qm *qm = file->qm;
@@ -695,6 +653,51 @@ static void sec_debugfs_exit(struct sec_dev *sec)
 	debugfs_remove_recursive(sec->qm.debug.debug_root);
 }
 
+static void sec_log_hw_error(struct hisi_qm *qm, u32 err_sts)
+{
+	const struct sec_hw_error *errs = sec_hw_errors;
+	struct device *dev = &qm->pdev->dev;
+	u32 err_val;
+
+	while (errs->msg) {
+		if (errs->int_msk & err_sts) {
+			dev_err(dev, "%s [error status=0x%x] found\n",
+				errs->msg, errs->int_msk);
+
+			if (SEC_CORE_INT_STATUS_M_ECC & errs->int_msk) {
+				err_val = readl(qm->io_base +
+						SEC_CORE_SRAM_ECC_ERR_INFO);
+				dev_err(dev, "multi ecc sram num=0x%x\n",
+					SEC_ECC_NUM(err_val));
+				dev_err(dev, "multi ecc sram addr=0x%x\n",
+					SEC_ECC_ADDR(err_val));
+			}
+		}
+		errs++;
+	}
+
+	writel(err_sts, qm->io_base + SEC_CORE_INT_SOURCE);
+}
+
+static u32 sec_get_hw_err_status(struct hisi_qm *qm)
+{
+	return readl(qm->io_base + SEC_CORE_INT_STATUS);
+}
+
+static const struct hisi_qm_err_ini sec_err_ini = {
+	.hw_err_enable		= sec_hw_error_enable,
+	.hw_err_disable		= sec_hw_error_disable,
+	.get_dev_hw_err_status	= sec_get_hw_err_status,
+	.log_dev_hw_err		= sec_log_hw_error,
+	.err_info		= {
+		.ce			= QM_BASE_CE,
+		.nfe			= QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT |
+					  QM_ACC_WB_NOT_READY_TIMEOUT,
+		.fe			= 0,
+		.msi			= QM_DB_RANDOM_INVALID,
+	}
+};
+
 static int sec_pf_probe_init(struct sec_dev *sec)
 {
 	struct hisi_qm *qm = &sec->qm;
@@ -713,11 +716,13 @@ static int sec_pf_probe_init(struct sec_dev *sec)
 		return -EINVAL;
 	}
 
+	qm->err_ini = &sec_err_ini;
+
 	ret = sec_set_user_domain_and_cache(sec);
 	if (ret)
 		return ret;
 
-	sec_hw_error_init(sec);
+	hisi_qm_dev_err_init(qm);
 	sec_debug_regs_clear(qm);
 
 	return 0;
@@ -750,12 +755,30 @@ static void sec_qm_uninit(struct hisi_qm *qm)
 
 static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec)
 {
+	int ret;
+
+	/*
+	 * WQ_HIGHPRI: SEC request must be low delayed,
+	 * so need a high priority workqueue.
+	 * WQ_UNBOUND: SEC task is likely with long
+	 * running CPU intensive workloads.
+	 */
+	qm->wq = alloc_workqueue("%s", WQ_HIGHPRI |
+		WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus(),
+		pci_name(qm->pdev));
+	if (!qm->wq) {
+		pci_err(qm->pdev, "fail to alloc workqueue\n");
+		return -ENOMEM;
+	}
+
 	if (qm->fun_type == QM_HW_PF) {
 		qm->qp_base = SEC_PF_DEF_Q_BASE;
 		qm->qp_num = pf_q_num;
 		qm->debug.curr_qm_qp_num = pf_q_num;
 
-		return sec_pf_probe_init(sec);
+		ret = sec_pf_probe_init(sec);
+		if (ret)
+			goto err_probe_uninit;
 	} else if (qm->fun_type == QM_HW_VF) {
 		/*
 		 * have no way to get qm configure in VM in v1 hardware,
@@ -768,18 +791,43 @@ static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec)
 			qm->qp_num = SEC_QUEUE_NUM_V1 - SEC_PF_DEF_Q_NUM;
 		} else if (qm->ver == QM_HW_V2) {
 			/* v2 starts to support get vft by mailbox */
-			return hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
+			ret = hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
+			if (ret)
+				goto err_probe_uninit;
 		}
 	} else {
-		return -ENODEV;
+		ret = -ENODEV;
+		goto err_probe_uninit;
 	}
 
 	return 0;
+err_probe_uninit:
+	destroy_workqueue(qm->wq);
+	return ret;
 }
 
-static void sec_probe_uninit(struct sec_dev *sec)
+static void sec_probe_uninit(struct hisi_qm *qm)
 {
-	sec_hw_error_uninit(sec);
+	hisi_qm_dev_err_uninit(qm);
+
+	destroy_workqueue(qm->wq);
+}
+
+static void sec_iommu_used_check(struct sec_dev *sec)
+{
+	struct iommu_domain *domain;
+	struct device *dev = &sec->qm.pdev->dev;
+
+	domain = iommu_get_domain_for_dev(dev);
+
+	/* Check if iommu is used */
+	sec->iommu_used = false;
+	if (domain) {
+		if (domain->type & __IOMMU_DOMAIN_PAGING)
+			sec->iommu_used = true;
+		dev_info(dev, "SMMU Opened, the iommu type = %u\n",
+			domain->type);
+	}
 }
 
 static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
@@ -795,6 +843,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pci_set_drvdata(pdev, sec);
 
 	sec->ctx_q_num = ctx_q_num;
+	sec_iommu_used_check(sec);
 
 	qm = &sec->qm;
 
@@ -820,7 +869,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (ret)
 		pci_warn(pdev, "Failed to init debugfs!\n");
 
-	sec_add_to_list(sec);
+	hisi_qm_add_to_list(qm, &sec_devices);
 
 	ret = sec_register_to_crypto();
 	if (ret < 0) {
@@ -831,12 +880,12 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return 0;
 
 err_remove_from_list:
-	sec_remove_from_list(sec);
+	hisi_qm_del_from_list(qm, &sec_devices);
 	sec_debugfs_exit(sec);
 	hisi_qm_stop(qm);
 
 err_probe_uninit:
-	sec_probe_uninit(sec);
+	sec_probe_uninit(qm);
 
 err_qm_uninit:
 	sec_qm_uninit(qm);
@@ -955,7 +1004,7 @@ static void sec_remove(struct pci_dev *pdev)
 
 	sec_unregister_from_crypto();
 
-	sec_remove_from_list(sec);
+	hisi_qm_del_from_list(qm, &sec_devices);
 
 	if (qm->fun_type == QM_HW_PF && sec->num_vfs)
 		(void)sec_sriov_disable(pdev);
@@ -967,89 +1016,13 @@ static void sec_remove(struct pci_dev *pdev)
 	if (qm->fun_type == QM_HW_PF)
 		sec_debug_regs_clear(qm);
 
-	sec_probe_uninit(sec);
+	sec_probe_uninit(qm);
 
 	sec_qm_uninit(qm);
 }
 
-static void sec_log_hw_error(struct sec_dev *sec, u32 err_sts)
-{
-	const struct sec_hw_error *errs = sec_hw_errors;
-	struct device *dev = &sec->qm.pdev->dev;
-	u32 err_val;
-
-	while (errs->msg) {
-		if (errs->int_msk & err_sts) {
-			dev_err(dev, "%s [error status=0x%x] found\n",
-				errs->msg, errs->int_msk);
-
-			if (SEC_CORE_INT_STATUS_M_ECC & err_sts) {
-				err_val = readl(sec->qm.io_base +
-						SEC_CORE_SRAM_ECC_ERR_INFO);
-				dev_err(dev, "multi ecc sram num=0x%x\n",
-					SEC_ECC_NUM(err_val));
-				dev_err(dev, "multi ecc sram addr=0x%x\n",
-					SEC_ECC_ADDR(err_val));
-			}
-		}
-		errs++;
-	}
-}
-
-static pci_ers_result_t sec_hw_error_handle(struct sec_dev *sec)
-{
-	u32 err_sts;
-
-	/* read err sts */
-	err_sts = readl(sec->qm.io_base + SEC_CORE_INT_STATUS);
-	if (err_sts) {
-		sec_log_hw_error(sec, err_sts);
-
-		/* clear error interrupts */
-		writel(err_sts, sec->qm.io_base + SEC_CORE_INT_SOURCE);
-
-		return PCI_ERS_RESULT_NEED_RESET;
-	}
-
-	return PCI_ERS_RESULT_RECOVERED;
-}
-
-static pci_ers_result_t sec_process_hw_error(struct pci_dev *pdev)
-{
-	struct sec_dev *sec = pci_get_drvdata(pdev);
-	pci_ers_result_t qm_ret, sec_ret;
-
-	if (!sec) {
-		pci_err(pdev, "Can't recover error during device init\n");
-		return PCI_ERS_RESULT_NONE;
-	}
-
-	/* log qm error */
-	qm_ret = hisi_qm_hw_error_handle(&sec->qm);
-
-	/* log sec error */
-	sec_ret = sec_hw_error_handle(sec);
-
-	return (qm_ret == PCI_ERS_RESULT_NEED_RESET ||
-		sec_ret == PCI_ERS_RESULT_NEED_RESET) ?
-		PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED;
-}
-
-static pci_ers_result_t sec_error_detected(struct pci_dev *pdev,
-					   pci_channel_state_t state)
-{
-	if (pdev->is_virtfn)
-		return PCI_ERS_RESULT_NONE;
-
-	pci_info(pdev, "PCI error detected, state(=%d)!!\n", state);
-	if (state == pci_channel_io_perm_failure)
-		return PCI_ERS_RESULT_DISCONNECT;
-
-	return sec_process_hw_error(pdev);
-}
-
 static const struct pci_error_handlers sec_err_handler = {
-	.error_detected = sec_error_detected,
+	.error_detected = hisi_qm_dev_err_detected,
 };
 
 static struct pci_driver sec_pci_driver = {
@@ -1078,6 +1051,7 @@ static int __init sec_init(void)
 {
 	int ret;
 
+	hisi_qm_init_list(&sec_devices);
 	sec_register_debugfs();
 
 	ret = pci_register_driver(&sec_pci_driver);
diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h
index bc1db26598bb..82dc6f867171 100644
--- a/drivers/crypto/hisilicon/zip/zip.h
+++ b/drivers/crypto/hisilicon/zip/zip.h
@@ -68,7 +68,7 @@ struct hisi_zip_sqe {
 	u32 rsvd1[4];
 };
 
-struct hisi_zip *find_zip_device(int node);
+int zip_create_qps(struct hisi_qp **qps, int ctx_num);
 int hisi_zip_register_to_crypto(void);
 void hisi_zip_unregister_from_crypto(void);
 #endif
diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c
index 9815d5e3ccd0..369ec3220574 100644
--- a/drivers/crypto/hisilicon/zip/zip_crypto.c
+++ b/drivers/crypto/hisilicon/zip/zip_crypto.c
@@ -132,29 +132,25 @@ static void hisi_zip_fill_sqe(struct hisi_zip_sqe *sqe, u8 req_type,
 	sqe->dest_addr_h = upper_32_bits(d_addr);
 }
 
-static int hisi_zip_create_qp(struct hisi_qm *qm, struct hisi_zip_qp_ctx *ctx,
-			      int alg_type, int req_type)
+static int hisi_zip_start_qp(struct hisi_qp *qp, struct hisi_zip_qp_ctx *ctx,
+			     int alg_type, int req_type)
 {
-	struct hisi_qp *qp;
+	struct device *dev = &qp->qm->pdev->dev;
 	int ret;
 
-	qp = hisi_qm_create_qp(qm, alg_type);
-	if (IS_ERR(qp))
-		return PTR_ERR(qp);
-
 	qp->req_type = req_type;
+	qp->alg_type = alg_type;
 	qp->qp_ctx = ctx;
-	ctx->qp = qp;
 
 	ret = hisi_qm_start_qp(qp, 0);
-	if (ret < 0)
-		goto err_release_qp;
+	if (ret < 0) {
+		dev_err(dev, "start qp failed!\n");
+		return ret;
+	}
 
-	return 0;
+	ctx->qp = qp;
 
-err_release_qp:
-	hisi_qm_release_qp(qp);
-	return ret;
+	return 0;
 }
 
 static void hisi_zip_release_qp(struct hisi_zip_qp_ctx *ctx)
@@ -165,34 +161,34 @@ static void hisi_zip_release_qp(struct hisi_zip_qp_ctx *ctx)
 
 static int hisi_zip_ctx_init(struct hisi_zip_ctx *hisi_zip_ctx, u8 req_type)
 {
+	struct hisi_qp *qps[HZIP_CTX_Q_NUM] = { NULL };
 	struct hisi_zip *hisi_zip;
-	struct hisi_qm *qm;
 	int ret, i, j;
 
-	/* find the proper zip device */
-	hisi_zip = find_zip_device(cpu_to_node(smp_processor_id()));
-	if (!hisi_zip) {
-		pr_err("Failed to find a proper ZIP device!\n");
+	ret = zip_create_qps(qps, HZIP_CTX_Q_NUM);
+	if (ret) {
+		pr_err("Can not create zip qps!\n");
 		return -ENODEV;
 	}
-	qm = &hisi_zip->qm;
+
+	hisi_zip = container_of(qps[0]->qm, struct hisi_zip, qm);
 
 	for (i = 0; i < HZIP_CTX_Q_NUM; i++) {
 		/* alg_type = 0 for compress, 1 for decompress in hw sqe */
-		ret = hisi_zip_create_qp(qm, &hisi_zip_ctx->qp_ctx[i], i,
-					 req_type);
-		if (ret)
-			goto err;
+		ret = hisi_zip_start_qp(qps[i], &hisi_zip_ctx->qp_ctx[i], i,
+					req_type);
+		if (ret) {
+			for (j = i - 1; j >= 0; j--)
+				hisi_qm_stop_qp(hisi_zip_ctx->qp_ctx[j].qp);
+
+			hisi_qm_free_qps(qps, HZIP_CTX_Q_NUM);
+			return ret;
+		}
 
 		hisi_zip_ctx->qp_ctx[i].zip_dev = hisi_zip;
 	}
 
 	return 0;
-err:
-	for (j = i - 1; j >= 0; j--)
-		hisi_zip_release_qp(&hisi_zip_ctx->qp_ctx[j]);
-
-	return ret;
 }
 
 static void hisi_zip_ctx_exit(struct hisi_zip_ctx *hisi_zip_ctx)
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index e1bab1a91333..fcc85d2dbd07 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -11,6 +11,7 @@
 #include <linux/pci.h>
 #include <linux/seq_file.h>
 #include <linux/topology.h>
+#include <linux/uacce.h>
 #include "zip.h"
 
 #define PCI_DEVICE_ID_ZIP_PF		0xa250
@@ -60,13 +61,17 @@
 #define HZIP_CORE_DEBUG_DECOMP_5	0x309000
 
 #define HZIP_CORE_INT_SOURCE		0x3010A0
-#define HZIP_CORE_INT_MASK		0x3010A4
+#define HZIP_CORE_INT_MASK_REG		0x3010A4
 #define HZIP_CORE_INT_STATUS		0x3010AC
 #define HZIP_CORE_INT_STATUS_M_ECC	BIT(1)
 #define HZIP_CORE_SRAM_ECC_ERR_INFO	0x301148
-#define SRAM_ECC_ERR_NUM_SHIFT		16
-#define SRAM_ECC_ERR_ADDR_SHIFT		24
-#define HZIP_CORE_INT_DISABLE		0x000007FF
+#define HZIP_CORE_INT_RAS_CE_ENB	0x301160
+#define HZIP_CORE_INT_RAS_NFE_ENB	0x301164
+#define HZIP_CORE_INT_RAS_FE_ENB        0x301168
+#define HZIP_CORE_INT_RAS_NFE_ENABLE	0x7FE
+#define HZIP_SRAM_ECC_ERR_NUM_SHIFT	16
+#define HZIP_SRAM_ECC_ERR_ADDR_SHIFT	24
+#define HZIP_CORE_INT_MASK_ALL		GENMASK(10, 0)
 #define HZIP_COMP_CORE_NUM		2
 #define HZIP_DECOMP_CORE_NUM		6
 #define HZIP_CORE_NUM			(HZIP_COMP_CORE_NUM + \
@@ -83,77 +88,7 @@
 
 static const char hisi_zip_name[] = "hisi_zip";
 static struct dentry *hzip_debugfs_root;
-static LIST_HEAD(hisi_zip_list);
-static DEFINE_MUTEX(hisi_zip_list_lock);
-
-struct hisi_zip_resource {
-	struct hisi_zip *hzip;
-	int distance;
-	struct list_head list;
-};
-
-static void free_list(struct list_head *head)
-{
-	struct hisi_zip_resource *res, *tmp;
-
-	list_for_each_entry_safe(res, tmp, head, list) {
-		list_del(&res->list);
-		kfree(res);
-	}
-}
-
-struct hisi_zip *find_zip_device(int node)
-{
-	struct hisi_zip_resource *res, *tmp;
-	struct hisi_zip *ret = NULL;
-	struct hisi_zip *hisi_zip;
-	struct list_head *n;
-	struct device *dev;
-	LIST_HEAD(head);
-
-	mutex_lock(&hisi_zip_list_lock);
-
-	if (IS_ENABLED(CONFIG_NUMA)) {
-		list_for_each_entry(hisi_zip, &hisi_zip_list, list) {
-			res = kzalloc(sizeof(*res), GFP_KERNEL);
-			if (!res)
-				goto err;
-
-			dev = &hisi_zip->qm.pdev->dev;
-			res->hzip = hisi_zip;
-			res->distance = node_distance(dev_to_node(dev), node);
-
-			n = &head;
-			list_for_each_entry(tmp, &head, list) {
-				if (res->distance < tmp->distance) {
-					n = &tmp->list;
-					break;
-				}
-			}
-			list_add_tail(&res->list, n);
-		}
-
-		list_for_each_entry(tmp, &head, list) {
-			if (hisi_qm_get_free_qp_num(&tmp->hzip->qm)) {
-				ret = tmp->hzip;
-				break;
-			}
-		}
-
-		free_list(&head);
-	} else {
-		ret = list_first_entry(&hisi_zip_list, struct hisi_zip, list);
-	}
-
-	mutex_unlock(&hisi_zip_list_lock);
-
-	return ret;
-
-err:
-	free_list(&head);
-	mutex_unlock(&hisi_zip_list_lock);
-	return NULL;
-}
+static struct hisi_qm_list zip_devices;
 
 struct hisi_zip_hw_error {
 	u32 int_msk;
@@ -297,9 +232,6 @@ static u32 pf_q_num = HZIP_PF_DEF_Q_NUM;
 module_param_cb(pf_q_num, &pf_q_num_ops, &pf_q_num, 0444);
 MODULE_PARM_DESC(pf_q_num, "Number of queues in PF(v1 1-4096, v2 1-1024)");
 
-static int uacce_mode;
-module_param(uacce_mode, int, 0);
-
 static u32 vfs_num;
 module_param(vfs_num, uint, 0444);
 MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63)");
@@ -311,18 +243,11 @@ static const struct pci_device_id hisi_zip_dev_ids[] = {
 };
 MODULE_DEVICE_TABLE(pci, hisi_zip_dev_ids);
 
-static inline void hisi_zip_add_to_list(struct hisi_zip *hisi_zip)
+int zip_create_qps(struct hisi_qp **qps, int qp_num)
 {
-	mutex_lock(&hisi_zip_list_lock);
-	list_add_tail(&hisi_zip->list, &hisi_zip_list);
-	mutex_unlock(&hisi_zip_list_lock);
-}
+	int node = cpu_to_node(smp_processor_id());
 
-static inline void hisi_zip_remove_from_list(struct hisi_zip *hisi_zip)
-{
-	mutex_lock(&hisi_zip_list_lock);
-	list_del(&hisi_zip->list);
-	mutex_unlock(&hisi_zip_list_lock);
+	return hisi_qm_alloc_qps_node(&zip_devices, qp_num, 0, node, qps);
 }
 
 static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip)
@@ -353,8 +278,14 @@ static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip)
 	writel(AXUSER_BASE, base + HZIP_BD_RUSER_32_63);
 	writel(AXUSER_BASE, base + HZIP_SGL_RUSER_32_63);
 	writel(AXUSER_BASE, base + HZIP_BD_WUSER_32_63);
-	writel(AXUSER_BASE, base + HZIP_DATA_RUSER_32_63);
-	writel(AXUSER_BASE, base + HZIP_DATA_WUSER_32_63);
+
+	if (hisi_zip->qm.use_sva) {
+		writel(AXUSER_BASE | AXUSER_SSV, base + HZIP_DATA_RUSER_32_63);
+		writel(AXUSER_BASE | AXUSER_SSV, base + HZIP_DATA_WUSER_32_63);
+	} else {
+		writel(AXUSER_BASE, base + HZIP_DATA_RUSER_32_63);
+		writel(AXUSER_BASE, base + HZIP_DATA_WUSER_32_63);
+	}
 
 	/* let's open all compression/decompression cores */
 	writel(DECOMP_CHECK_ENABLE | ALL_COMP_DECOMP_EN,
@@ -366,27 +297,32 @@ static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip)
 	       FIELD_PREP(CQC_CACHE_WB_THRD, 1), base + QM_CACHE_CTL);
 }
 
-static void hisi_zip_hw_error_set_state(struct hisi_zip *hisi_zip, bool state)
+static void hisi_zip_hw_error_enable(struct hisi_qm *qm)
 {
-	struct hisi_qm *qm = &hisi_zip->qm;
-
 	if (qm->ver == QM_HW_V1) {
-		writel(HZIP_CORE_INT_DISABLE, qm->io_base + HZIP_CORE_INT_MASK);
+		writel(HZIP_CORE_INT_MASK_ALL,
+		       qm->io_base + HZIP_CORE_INT_MASK_REG);
 		dev_info(&qm->pdev->dev, "Does not support hw error handle\n");
 		return;
 	}
 
-	if (state) {
-		/* clear ZIP hw error source if having */
-		writel(HZIP_CORE_INT_DISABLE, hisi_zip->qm.io_base +
-					      HZIP_CORE_INT_SOURCE);
-		/* enable ZIP hw error interrupts */
-		writel(0, hisi_zip->qm.io_base + HZIP_CORE_INT_MASK);
-	} else {
-		/* disable ZIP hw error interrupts */
-		writel(HZIP_CORE_INT_DISABLE,
-		       hisi_zip->qm.io_base + HZIP_CORE_INT_MASK);
-	}
+	/* clear ZIP hw error source if having */
+	writel(HZIP_CORE_INT_MASK_ALL, qm->io_base + HZIP_CORE_INT_SOURCE);
+
+	/* configure error type */
+	writel(0x1, qm->io_base + HZIP_CORE_INT_RAS_CE_ENB);
+	writel(0x0, qm->io_base + HZIP_CORE_INT_RAS_FE_ENB);
+	writel(HZIP_CORE_INT_RAS_NFE_ENABLE,
+		qm->io_base + HZIP_CORE_INT_RAS_NFE_ENB);
+
+	/* enable ZIP hw error interrupts */
+	writel(0, qm->io_base + HZIP_CORE_INT_MASK_REG);
+}
+
+static void hisi_zip_hw_error_disable(struct hisi_qm *qm)
+{
+	/* disable ZIP hw error interrupts */
+	writel(HZIP_CORE_INT_MASK_ALL, qm->io_base + HZIP_CORE_INT_MASK_REG);
 }
 
 static inline struct hisi_qm *file_to_qm(struct ctrl_debug_file *file)
@@ -638,14 +574,53 @@ static void hisi_zip_debugfs_exit(struct hisi_zip *hisi_zip)
 		hisi_zip_debug_regs_clear(hisi_zip);
 }
 
-static void hisi_zip_hw_error_init(struct hisi_zip *hisi_zip)
+static void hisi_zip_log_hw_error(struct hisi_qm *qm, u32 err_sts)
+{
+	const struct hisi_zip_hw_error *err = zip_hw_error;
+	struct device *dev = &qm->pdev->dev;
+	u32 err_val;
+
+	while (err->msg) {
+		if (err->int_msk & err_sts) {
+			dev_err(dev, "%s [error status=0x%x] found\n",
+				 err->msg, err->int_msk);
+
+			if (err->int_msk & HZIP_CORE_INT_STATUS_M_ECC) {
+				err_val = readl(qm->io_base +
+						HZIP_CORE_SRAM_ECC_ERR_INFO);
+				dev_err(dev, "hisi-zip multi ecc sram num=0x%x\n",
+					((err_val >>
+					HZIP_SRAM_ECC_ERR_NUM_SHIFT) & 0xFF));
+				dev_err(dev, "hisi-zip multi ecc sram addr=0x%x\n",
+					(err_val >>
+					HZIP_SRAM_ECC_ERR_ADDR_SHIFT));
+			}
+		}
+		err++;
+	}
+
+	writel(err_sts, qm->io_base + HZIP_CORE_INT_SOURCE);
+}
+
+static u32 hisi_zip_get_hw_err_status(struct hisi_qm *qm)
 {
-	hisi_qm_hw_error_init(&hisi_zip->qm, QM_BASE_CE,
-			      QM_BASE_NFE | QM_ACC_WB_NOT_READY_TIMEOUT, 0,
-			      QM_DB_RANDOM_INVALID);
-	hisi_zip_hw_error_set_state(hisi_zip, true);
+	return readl(qm->io_base + HZIP_CORE_INT_STATUS);
 }
 
+static const struct hisi_qm_err_ini hisi_zip_err_ini = {
+	.hw_err_enable		= hisi_zip_hw_error_enable,
+	.hw_err_disable		= hisi_zip_hw_error_disable,
+	.get_dev_hw_err_status	= hisi_zip_get_hw_err_status,
+	.log_dev_hw_err		= hisi_zip_log_hw_error,
+	.err_info		= {
+		.ce			= QM_BASE_CE,
+		.nfe			= QM_BASE_NFE |
+					  QM_ACC_WB_NOT_READY_TIMEOUT,
+		.fe			= 0,
+		.msi			= QM_DB_RANDOM_INVALID,
+	}
+};
+
 static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)
 {
 	struct hisi_qm *qm = &hisi_zip->qm;
@@ -671,8 +646,10 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)
 		return -EINVAL;
 	}
 
+	qm->err_ini = &hisi_zip_err_ini;
+
 	hisi_zip_set_user_domain_and_cache(hisi_zip);
-	hisi_zip_hw_error_init(hisi_zip);
+	hisi_qm_dev_err_init(qm);
 	hisi_zip_debug_regs_clear(hisi_zip);
 
 	return 0;
@@ -791,27 +768,15 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pci_set_drvdata(pdev, hisi_zip);
 
 	qm = &hisi_zip->qm;
+	qm->use_dma_api = true;
 	qm->pdev = pdev;
 	qm->ver = rev_id;
 
+	qm->algs = "zlib\ngzip";
 	qm->sqe_size = HZIP_SQE_SIZE;
 	qm->dev_name = hisi_zip_name;
 	qm->fun_type = (pdev->device == PCI_DEVICE_ID_ZIP_PF) ? QM_HW_PF :
 								QM_HW_VF;
-	switch (uacce_mode) {
-	case 0:
-		qm->use_dma_api = true;
-		break;
-	case 1:
-		qm->use_dma_api = false;
-		break;
-	case 2:
-		qm->use_dma_api = true;
-		break;
-	default:
-		return -EINVAL;
-	}
-
 	ret = hisi_qm_init(qm);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to init qm!\n");
@@ -849,7 +814,13 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (ret)
 		dev_err(&pdev->dev, "Failed to init debugfs (%d)!\n", ret);
 
-	hisi_zip_add_to_list(hisi_zip);
+	hisi_qm_add_to_list(qm, &zip_devices);
+
+	if (qm->uacce) {
+		ret = uacce_register(qm->uacce);
+		if (ret)
+			goto err_qm_uninit;
+	}
 
 	if (qm->fun_type == QM_HW_PF && vfs_num > 0) {
 		ret = hisi_zip_sriov_enable(pdev, vfs_num);
@@ -860,7 +831,7 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return 0;
 
 err_remove_from_list:
-	hisi_zip_remove_from_list(hisi_zip);
+	hisi_qm_del_from_list(qm, &zip_devices);
 	hisi_zip_debugfs_exit(hisi_zip);
 	hisi_qm_stop(qm);
 err_qm_uninit:
@@ -887,92 +858,13 @@ static void hisi_zip_remove(struct pci_dev *pdev)
 	hisi_zip_debugfs_exit(hisi_zip);
 	hisi_qm_stop(qm);
 
-	if (qm->fun_type == QM_HW_PF)
-		hisi_zip_hw_error_set_state(hisi_zip, false);
-
+	hisi_qm_dev_err_uninit(qm);
 	hisi_qm_uninit(qm);
-	hisi_zip_remove_from_list(hisi_zip);
-}
-
-static void hisi_zip_log_hw_error(struct hisi_zip *hisi_zip, u32 err_sts)
-{
-	const struct hisi_zip_hw_error *err = zip_hw_error;
-	struct device *dev = &hisi_zip->qm.pdev->dev;
-	u32 err_val;
-
-	while (err->msg) {
-		if (err->int_msk & err_sts) {
-			dev_warn(dev, "%s [error status=0x%x] found\n",
-				 err->msg, err->int_msk);
-
-			if (HZIP_CORE_INT_STATUS_M_ECC & err->int_msk) {
-				err_val = readl(hisi_zip->qm.io_base +
-						HZIP_CORE_SRAM_ECC_ERR_INFO);
-				dev_warn(dev, "hisi-zip multi ecc sram num=0x%x\n",
-					 ((err_val >> SRAM_ECC_ERR_NUM_SHIFT) &
-					  0xFF));
-				dev_warn(dev, "hisi-zip multi ecc sram addr=0x%x\n",
-					 (err_val >> SRAM_ECC_ERR_ADDR_SHIFT));
-			}
-		}
-		err++;
-	}
-}
-
-static pci_ers_result_t hisi_zip_hw_error_handle(struct hisi_zip *hisi_zip)
-{
-	u32 err_sts;
-
-	/* read err sts */
-	err_sts = readl(hisi_zip->qm.io_base + HZIP_CORE_INT_STATUS);
-
-	if (err_sts) {
-		hisi_zip_log_hw_error(hisi_zip, err_sts);
-		/* clear error interrupts */
-		writel(err_sts, hisi_zip->qm.io_base + HZIP_CORE_INT_SOURCE);
-
-		return PCI_ERS_RESULT_NEED_RESET;
-	}
-
-	return PCI_ERS_RESULT_RECOVERED;
-}
-
-static pci_ers_result_t hisi_zip_process_hw_error(struct pci_dev *pdev)
-{
-	struct hisi_zip *hisi_zip = pci_get_drvdata(pdev);
-	struct device *dev = &pdev->dev;
-	pci_ers_result_t qm_ret, zip_ret;
-
-	if (!hisi_zip) {
-		dev_err(dev,
-			"Can't recover ZIP-error occurred during device init\n");
-		return PCI_ERS_RESULT_NONE;
-	}
-
-	qm_ret = hisi_qm_hw_error_handle(&hisi_zip->qm);
-
-	zip_ret = hisi_zip_hw_error_handle(hisi_zip);
-
-	return (qm_ret == PCI_ERS_RESULT_NEED_RESET ||
-		zip_ret == PCI_ERS_RESULT_NEED_RESET) ?
-	       PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED;
-}
-
-static pci_ers_result_t hisi_zip_error_detected(struct pci_dev *pdev,
-						pci_channel_state_t state)
-{
-	if (pdev->is_virtfn)
-		return PCI_ERS_RESULT_NONE;
-
-	dev_info(&pdev->dev, "PCI error detected, state(=%d)!!\n", state);
-	if (state == pci_channel_io_perm_failure)
-		return PCI_ERS_RESULT_DISCONNECT;
-
-	return hisi_zip_process_hw_error(pdev);
+	hisi_qm_del_from_list(qm, &zip_devices);
 }
 
 static const struct pci_error_handlers hisi_zip_err_handler = {
-	.error_detected	= hisi_zip_error_detected,
+	.error_detected	= hisi_qm_dev_err_detected,
 };
 
 static struct pci_driver hisi_zip_pci_driver = {
@@ -1002,6 +894,7 @@ static int __init hisi_zip_init(void)
 {
 	int ret;
 
+	hisi_qm_init_list(&zip_devices);
 	hisi_zip_register_debugfs();
 
 	ret = pci_register_driver(&hisi_zip_pci_driver);
@@ -1010,12 +903,10 @@ static int __init hisi_zip_init(void)
 		goto err_pci;
 	}
 
-	if (uacce_mode == 0 || uacce_mode == 2) {
-		ret = hisi_zip_register_to_crypto();
-		if (ret < 0) {
-			pr_err("Failed to register driver to crypto.\n");
-			goto err_crypto;
-		}
+	ret = hisi_zip_register_to_crypto();
+	if (ret < 0) {
+		pr_err("Failed to register driver to crypto.\n");
+		goto err_crypto;
 	}
 
 	return 0;
@@ -1030,8 +921,7 @@ err_pci:
 
 static void __exit hisi_zip_exit(void)
 {
-	if (uacce_mode == 0 || uacce_mode == 2)
-		hisi_zip_unregister_from_crypto();
+	hisi_zip_unregister_from_crypto();
 	pci_unregister_driver(&hisi_zip_pci_driver);
 	hisi_zip_unregister_debugfs();
 }
diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c
index 25d5227f74a1..0e25fc3087f3 100644
--- a/drivers/crypto/img-hash.c
+++ b/drivers/crypto/img-hash.c
@@ -103,7 +103,7 @@ struct img_hash_request_ctx {
 	struct ahash_request	fallback_req;
 
 	/* Zero length buffer must remain last member of struct */
-	u8 buffer[0] __aligned(sizeof(u32));
+	u8 buffer[] __aligned(sizeof(u32));
 };
 
 struct img_hash_ctx {
diff --git a/drivers/crypto/marvell/Kconfig b/drivers/crypto/marvell/Kconfig
new file mode 100644
index 000000000000..13063384f958
--- /dev/null
+++ b/drivers/crypto/marvell/Kconfig
@@ -0,0 +1,37 @@
+#
+# Marvell crypto drivers configuration
+#
+
+config CRYPTO_DEV_MARVELL
+	tristate
+
+config CRYPTO_DEV_MARVELL_CESA
+	tristate "Marvell's Cryptographic Engine driver"
+	depends on PLAT_ORION || ARCH_MVEBU
+	select CRYPTO_LIB_AES
+	select CRYPTO_LIB_DES
+	select CRYPTO_SKCIPHER
+	select CRYPTO_HASH
+	select SRAM
+	select CRYPTO_DEV_MARVELL
+	help
+	  This driver allows you to utilize the Cryptographic Engines and
+	  Security Accelerator (CESA) which can be found on MVEBU and ORION
+	  platforms.
+	  This driver supports CPU offload through DMA transfers.
+
+config CRYPTO_DEV_OCTEONTX_CPT
+	tristate "Support for Marvell OcteonTX CPT driver"
+	depends on ARCH_THUNDER || COMPILE_TEST
+	depends on PCI_MSI && 64BIT
+	depends on CRYPTO_LIB_AES
+	select CRYPTO_SKCIPHER
+	select CRYPTO_HASH
+	select CRYPTO_AEAD
+	select CRYPTO_DEV_MARVELL
+	help
+		This driver allows you to utilize the Marvell Cryptographic
+		Accelerator Unit(CPT) found in OcteonTX series of processors.
+
+		To compile this driver as module, choose M here:
+		the modules will be called octeontx-cpt and octeontx-cptvf
diff --git a/drivers/crypto/marvell/Makefile b/drivers/crypto/marvell/Makefile
index b27cab65e696..6c6a1519b0f1 100644
--- a/drivers/crypto/marvell/Makefile
+++ b/drivers/crypto/marvell/Makefile
@@ -1,3 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell-cesa.o
-marvell-cesa-objs := cesa.o cipher.o hash.o tdma.o
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += cesa/
+obj-$(CONFIG_CRYPTO_DEV_OCTEONTX_CPT) += octeontx/
diff --git a/drivers/crypto/marvell/cesa/Makefile b/drivers/crypto/marvell/cesa/Makefile
new file mode 100644
index 000000000000..b27cab65e696
--- /dev/null
+++ b/drivers/crypto/marvell/cesa/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell-cesa.o
+marvell-cesa-objs := cesa.o cipher.o hash.o tdma.o
diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa/cesa.c
index 8a5f0b0bdf77..8a5f0b0bdf77 100644
--- a/drivers/crypto/marvell/cesa.c
+++ b/drivers/crypto/marvell/cesa/cesa.c
diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa/cesa.h
index f1ed3b85c0d2..e8632d5f343f 100644
--- a/drivers/crypto/marvell/cesa.h
+++ b/drivers/crypto/marvell/cesa/cesa.h
@@ -436,7 +436,7 @@ struct mv_cesa_dev {
  * @queue:		fifo of the pending crypto requests
  * @load:		engine load counter, useful for load balancing
  * @chain:		list of the current tdma descriptors being processed
- * 			by this engine.
+ *			by this engine.
  * @complete_queue:	fifo of the processed requests by the engine
  *
  * Structure storing CESA engine information.
@@ -467,7 +467,7 @@ struct mv_cesa_engine {
  * @step:	launch the crypto operation on the next chunk
  * @cleanup:	cleanup the crypto request (release associated data)
  * @complete:	complete the request, i.e copy result or context from sram when
- * 		needed.
+ *		needed.
  */
 struct mv_cesa_req_ops {
 	int (*process)(struct crypto_async_request *req, u32 status);
@@ -734,6 +734,7 @@ static inline struct mv_cesa_engine *mv_cesa_select_engine(int weight)
 	for (i = 0; i < cesa_dev->caps->nengines; i++) {
 		struct mv_cesa_engine *engine = cesa_dev->engines + i;
 		u32 load = atomic_read(&engine->load);
+
 		if (load < min_load) {
 			min_load = load;
 			selected = engine;
diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cesa/cipher.c
index c24f34a48cef..f133c2ccb5ae 100644
--- a/drivers/crypto/marvell/cipher.c
+++ b/drivers/crypto/marvell/cesa/cipher.c
@@ -106,8 +106,8 @@ static void mv_cesa_skcipher_std_step(struct skcipher_request *req)
 
 	mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE);
 	writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG);
-	BUG_ON(readl(engine->regs + CESA_SA_CMD) &
-	       CESA_SA_CMD_EN_CESA_SA_ACCL0);
+	WARN_ON(readl(engine->regs + CESA_SA_CMD) &
+		CESA_SA_CMD_EN_CESA_SA_ACCL0);
 	writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD);
 }
 
@@ -178,6 +178,7 @@ static inline void mv_cesa_skcipher_prepare(struct crypto_async_request *req,
 {
 	struct skcipher_request *skreq = skcipher_request_cast(req);
 	struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(skreq);
+
 	creq->base.engine = engine;
 
 	if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ)
@@ -336,7 +337,8 @@ static int mv_cesa_skcipher_dma_req_init(struct skcipher_request *req,
 	do {
 		struct mv_cesa_op_ctx *op;
 
-		op = mv_cesa_dma_add_op(&basereq->chain, op_templ, skip_ctx, flags);
+		op = mv_cesa_dma_add_op(&basereq->chain, op_templ, skip_ctx,
+					flags);
 		if (IS_ERR(op)) {
 			ret = PTR_ERR(op);
 			goto err_free_tdma;
@@ -365,9 +367,10 @@ static int mv_cesa_skcipher_dma_req_init(struct skcipher_request *req,
 	} while (mv_cesa_skcipher_req_iter_next_op(&iter));
 
 	/* Add output data for IV */
-	ret = mv_cesa_dma_add_result_op(&basereq->chain, CESA_SA_CFG_SRAM_OFFSET,
-				    CESA_SA_DATA_SRAM_OFFSET,
-				    CESA_TDMA_SRC_IN_SRAM, flags);
+	ret = mv_cesa_dma_add_result_op(&basereq->chain,
+					CESA_SA_CFG_SRAM_OFFSET,
+					CESA_SA_DATA_SRAM_OFFSET,
+					CESA_TDMA_SRC_IN_SRAM, flags);
 
 	if (ret)
 		goto err_free_tdma;
diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/cesa/hash.c
index a2b35fb0fb89..b971284332b6 100644
--- a/drivers/crypto/marvell/hash.c
+++ b/drivers/crypto/marvell/cesa/hash.c
@@ -141,9 +141,11 @@ static int mv_cesa_ahash_pad_req(struct mv_cesa_ahash_req *creq, u8 *buf)
 
 	if (creq->algo_le) {
 		__le64 bits = cpu_to_le64(creq->len << 3);
+
 		memcpy(buf + padlen, &bits, sizeof(bits));
 	} else {
 		__be64 bits = cpu_to_be64(creq->len << 3);
+
 		memcpy(buf + padlen, &bits, sizeof(bits));
 	}
 
@@ -168,7 +170,8 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req)
 	if (!sreq->offset) {
 		digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(req));
 		for (i = 0; i < digsize / 4; i++)
-			writel_relaxed(creq->state[i], engine->regs + CESA_IVDIG(i));
+			writel_relaxed(creq->state[i],
+				       engine->regs + CESA_IVDIG(i));
 	}
 
 	if (creq->cache_ptr)
@@ -245,8 +248,8 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req)
 
 	mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE);
 	writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG);
-	BUG_ON(readl(engine->regs + CESA_SA_CMD) &
-	       CESA_SA_CMD_EN_CESA_SA_ACCL0);
+	WARN_ON(readl(engine->regs + CESA_SA_CMD) &
+		CESA_SA_CMD_EN_CESA_SA_ACCL0);
 	writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD);
 }
 
@@ -329,11 +332,12 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req)
 	digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq));
 
 	if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ &&
-	    (creq->base.chain.last->flags & CESA_TDMA_TYPE_MSK) == CESA_TDMA_RESULT) {
+	    (creq->base.chain.last->flags & CESA_TDMA_TYPE_MSK) ==
+	     CESA_TDMA_RESULT) {
 		__le32 *data = NULL;
 
 		/*
-		 * Result is already in the correct endianess when the SA is
+		 * Result is already in the correct endianness when the SA is
 		 * used
 		 */
 		data = creq->base.chain.last->op->ctx.hash.hash;
@@ -347,9 +351,9 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req)
 						       CESA_IVDIG(i));
 		if (creq->last_req) {
 			/*
-			* Hardware's MD5 digest is in little endian format, but
-			* SHA in big endian format
-			*/
+			 * Hardware's MD5 digest is in little endian format, but
+			 * SHA in big endian format
+			 */
 			if (creq->algo_le) {
 				__le32 *result = (void *)ahashreq->result;
 
@@ -439,7 +443,8 @@ static bool mv_cesa_ahash_cache_req(struct ahash_request *req)
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	bool cached = false;
 
-	if (creq->cache_ptr + req->nbytes < CESA_MAX_HASH_BLOCK_SIZE && !creq->last_req) {
+	if (creq->cache_ptr + req->nbytes < CESA_MAX_HASH_BLOCK_SIZE &&
+	    !creq->last_req) {
 		cached = true;
 
 		if (!req->nbytes)
@@ -648,7 +653,8 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 			if (!mv_cesa_ahash_req_iter_next_op(&iter))
 				break;
 
-			op = mv_cesa_dma_add_frag(&basereq->chain, &creq->op_tmpl,
+			op = mv_cesa_dma_add_frag(&basereq->chain,
+						  &creq->op_tmpl,
 						  frag_len, flags);
 			if (IS_ERR(op)) {
 				ret = PTR_ERR(op);
@@ -920,7 +926,7 @@ struct ahash_alg mv_md5_alg = {
 			.cra_ctxsize = sizeof(struct mv_cesa_hash_ctx),
 			.cra_init = mv_cesa_ahash_cra_init,
 			.cra_module = THIS_MODULE,
-		 }
+		}
 	}
 };
 
@@ -990,7 +996,7 @@ struct ahash_alg mv_sha1_alg = {
 			.cra_ctxsize = sizeof(struct mv_cesa_hash_ctx),
 			.cra_init = mv_cesa_ahash_cra_init,
 			.cra_module = THIS_MODULE,
-		 }
+		}
 	}
 };
 
@@ -1063,7 +1069,7 @@ struct ahash_alg mv_sha256_alg = {
 			.cra_ctxsize = sizeof(struct mv_cesa_hash_ctx),
 			.cra_init = mv_cesa_ahash_cra_init,
 			.cra_module = THIS_MODULE,
-		 }
+		}
 	}
 };
 
@@ -1297,7 +1303,7 @@ struct ahash_alg mv_ahmac_md5_alg = {
 			.cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx),
 			.cra_init = mv_cesa_ahmac_cra_init,
 			.cra_module = THIS_MODULE,
-		 }
+		}
 	}
 };
 
@@ -1367,7 +1373,7 @@ struct ahash_alg mv_ahmac_sha1_alg = {
 			.cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx),
 			.cra_init = mv_cesa_ahmac_cra_init,
 			.cra_module = THIS_MODULE,
-		 }
+		}
 	}
 };
 
@@ -1437,6 +1443,6 @@ struct ahash_alg mv_ahmac_sha256_alg = {
 			.cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx),
 			.cra_init = mv_cesa_ahmac_cra_init,
 			.cra_module = THIS_MODULE,
-		 }
+		}
 	}
 };
diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/cesa/tdma.c
index 45939d53e8d6..b81ee276fe0e 100644
--- a/drivers/crypto/marvell/tdma.c
+++ b/drivers/crypto/marvell/cesa/tdma.c
@@ -50,8 +50,8 @@ void mv_cesa_dma_step(struct mv_cesa_req *dreq)
 		       engine->regs + CESA_SA_CFG);
 	writel_relaxed(dreq->chain.first->cur_dma,
 		       engine->regs + CESA_TDMA_NEXT_ADDR);
-	BUG_ON(readl(engine->regs + CESA_SA_CMD) &
-	       CESA_SA_CMD_EN_CESA_SA_ACCL0);
+	WARN_ON(readl(engine->regs + CESA_SA_CMD) &
+		CESA_SA_CMD_EN_CESA_SA_ACCL0);
 	writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD);
 }
 
@@ -175,8 +175,10 @@ int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status)
 			break;
 	}
 
-	/* Save the last request in error to engine->req, so that the core
-	 * knows which request was fautly */
+	/*
+	 * Save the last request in error to engine->req, so that the core
+	 * knows which request was fautly
+	 */
 	if (res) {
 		spin_lock_bh(&engine->lock);
 		engine->req = req;
diff --git a/drivers/crypto/marvell/octeontx/Makefile b/drivers/crypto/marvell/octeontx/Makefile
new file mode 100644
index 000000000000..5e956fe1a85b
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_CRYPTO_DEV_OCTEONTX_CPT) += octeontx-cpt.o octeontx-cptvf.o
+
+octeontx-cpt-objs := otx_cptpf_main.o otx_cptpf_mbox.o otx_cptpf_ucode.o
+octeontx-cptvf-objs := otx_cptvf_main.o otx_cptvf_mbox.o otx_cptvf_reqmgr.o \
+		       otx_cptvf_algs.o
diff --git a/drivers/crypto/marvell/octeontx/otx_cpt_common.h b/drivers/crypto/marvell/octeontx/otx_cpt_common.h
new file mode 100644
index 000000000000..ca704a7a265f
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cpt_common.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __OTX_CPT_COMMON_H
+#define __OTX_CPT_COMMON_H
+
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+
+#define OTX_CPT_MAX_MBOX_DATA_STR_SIZE 64
+
+enum otx_cptpf_type {
+	OTX_CPT_AE = 2,
+	OTX_CPT_SE = 3,
+	BAD_OTX_CPTPF_TYPE,
+};
+
+enum otx_cptvf_type {
+	OTX_CPT_AE_TYPES = 1,
+	OTX_CPT_SE_TYPES = 2,
+	BAD_OTX_CPTVF_TYPE,
+};
+
+/* VF-PF message opcodes */
+enum otx_cpt_mbox_opcode {
+	OTX_CPT_MSG_VF_UP = 1,
+	OTX_CPT_MSG_VF_DOWN,
+	OTX_CPT_MSG_READY,
+	OTX_CPT_MSG_QLEN,
+	OTX_CPT_MSG_QBIND_GRP,
+	OTX_CPT_MSG_VQ_PRIORITY,
+	OTX_CPT_MSG_PF_TYPE,
+	OTX_CPT_MSG_ACK,
+	OTX_CPT_MSG_NACK
+};
+
+/* OcteonTX CPT mailbox structure */
+struct otx_cpt_mbox {
+	u64 msg; /* Message type MBOX[0] */
+	u64 data;/* Data         MBOX[1] */
+};
+
+#endif /* __OTX_CPT_COMMON_H */
diff --git a/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h
new file mode 100644
index 000000000000..b8bdb9f134f3
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h
@@ -0,0 +1,824 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __OTX_CPT_HW_TYPES_H
+#define __OTX_CPT_HW_TYPES_H
+
+#include <linux/types.h>
+
+/* Device IDs */
+#define OTX_CPT_PCI_PF_DEVICE_ID 0xa040
+#define OTX_CPT_PCI_VF_DEVICE_ID 0xa041
+
+#define OTX_CPT_PCI_PF_SUBSYS_ID 0xa340
+#define OTX_CPT_PCI_VF_SUBSYS_ID 0xa341
+
+/* Configuration and status registers are in BAR0 on OcteonTX platform */
+#define OTX_CPT_PF_PCI_CFG_BAR	0
+#define OTX_CPT_VF_PCI_CFG_BAR	0
+
+#define OTX_CPT_BAR_E_CPTX_VFX_BAR0_OFFSET(a, b) \
+	(0x000020000000ll + 0x1000000000ll * (a) + 0x100000ll * (b))
+#define OTX_CPT_BAR_E_CPTX_VFX_BAR0_SIZE	0x400000
+
+/* Mailbox interrupts offset */
+#define OTX_CPT_PF_MBOX_INT	3
+#define OTX_CPT_PF_INT_VEC_E_MBOXX(x, a) ((x) + (a))
+/* Number of MSIX supported in PF */
+#define OTX_CPT_PF_MSIX_VECTORS 4
+/* Maximum supported microcode groups */
+#define OTX_CPT_MAX_ENGINE_GROUPS 8
+
+/* CPT instruction size in bytes */
+#define OTX_CPT_INST_SIZE 64
+/* CPT queue next chunk pointer size in bytes */
+#define OTX_CPT_NEXT_CHUNK_PTR_SIZE 8
+
+/* OcteonTX CPT VF MSIX vectors and their offsets */
+#define OTX_CPT_VF_MSIX_VECTORS 2
+#define OTX_CPT_VF_INTR_MBOX_MASK BIT(0)
+#define OTX_CPT_VF_INTR_DOVF_MASK BIT(1)
+#define OTX_CPT_VF_INTR_IRDE_MASK BIT(2)
+#define OTX_CPT_VF_INTR_NWRP_MASK BIT(3)
+#define OTX_CPT_VF_INTR_SERR_MASK BIT(4)
+
+/* OcteonTX CPT PF registers */
+#define OTX_CPT_PF_CONSTANTS		(0x0ll)
+#define OTX_CPT_PF_RESET		(0x100ll)
+#define OTX_CPT_PF_DIAG			(0x120ll)
+#define OTX_CPT_PF_BIST_STATUS		(0x160ll)
+#define OTX_CPT_PF_ECC0_CTL		(0x200ll)
+#define OTX_CPT_PF_ECC0_FLIP		(0x210ll)
+#define OTX_CPT_PF_ECC0_INT		(0x220ll)
+#define OTX_CPT_PF_ECC0_INT_W1S		(0x230ll)
+#define OTX_CPT_PF_ECC0_ENA_W1S		(0x240ll)
+#define OTX_CPT_PF_ECC0_ENA_W1C		(0x250ll)
+#define OTX_CPT_PF_MBOX_INTX(b)		(0x400ll | (u64)(b) << 3)
+#define OTX_CPT_PF_MBOX_INT_W1SX(b)	(0x420ll | (u64)(b) << 3)
+#define OTX_CPT_PF_MBOX_ENA_W1CX(b)	(0x440ll | (u64)(b) << 3)
+#define OTX_CPT_PF_MBOX_ENA_W1SX(b)	(0x460ll | (u64)(b) << 3)
+#define OTX_CPT_PF_EXEC_INT		(0x500ll)
+#define OTX_CPT_PF_EXEC_INT_W1S		(0x520ll)
+#define OTX_CPT_PF_EXEC_ENA_W1C		(0x540ll)
+#define OTX_CPT_PF_EXEC_ENA_W1S		(0x560ll)
+#define OTX_CPT_PF_GX_EN(b)		(0x600ll | (u64)(b) << 3)
+#define OTX_CPT_PF_EXEC_INFO		(0x700ll)
+#define OTX_CPT_PF_EXEC_BUSY		(0x800ll)
+#define OTX_CPT_PF_EXEC_INFO0		(0x900ll)
+#define OTX_CPT_PF_EXEC_INFO1		(0x910ll)
+#define OTX_CPT_PF_INST_REQ_PC		(0x10000ll)
+#define OTX_CPT_PF_INST_LATENCY_PC	(0x10020ll)
+#define OTX_CPT_PF_RD_REQ_PC		(0x10040ll)
+#define OTX_CPT_PF_RD_LATENCY_PC	(0x10060ll)
+#define OTX_CPT_PF_RD_UC_PC		(0x10080ll)
+#define OTX_CPT_PF_ACTIVE_CYCLES_PC	(0x10100ll)
+#define OTX_CPT_PF_EXE_CTL		(0x4000000ll)
+#define OTX_CPT_PF_EXE_STATUS		(0x4000008ll)
+#define OTX_CPT_PF_EXE_CLK		(0x4000010ll)
+#define OTX_CPT_PF_EXE_DBG_CTL		(0x4000018ll)
+#define OTX_CPT_PF_EXE_DBG_DATA		(0x4000020ll)
+#define OTX_CPT_PF_EXE_BIST_STATUS	(0x4000028ll)
+#define OTX_CPT_PF_EXE_REQ_TIMER	(0x4000030ll)
+#define OTX_CPT_PF_EXE_MEM_CTL		(0x4000038ll)
+#define OTX_CPT_PF_EXE_PERF_CTL		(0x4001000ll)
+#define OTX_CPT_PF_EXE_DBG_CNTX(b)	(0x4001100ll | (u64)(b) << 3)
+#define OTX_CPT_PF_EXE_PERF_EVENT_CNT	(0x4001180ll)
+#define OTX_CPT_PF_EXE_EPCI_INBX_CNT(b)	(0x4001200ll | (u64)(b) << 3)
+#define OTX_CPT_PF_EXE_EPCI_OUTBX_CNT(b) (0x4001240ll | (u64)(b) << 3)
+#define OTX_CPT_PF_ENGX_UCODE_BASE(b)	(0x4002000ll | (u64)(b) << 3)
+#define OTX_CPT_PF_QX_CTL(b)		(0x8000000ll | (u64)(b) << 20)
+#define OTX_CPT_PF_QX_GMCTL(b)		(0x8000020ll | (u64)(b) << 20)
+#define OTX_CPT_PF_QX_CTL2(b)		(0x8000100ll | (u64)(b) << 20)
+#define OTX_CPT_PF_VFX_MBOXX(b, c)	(0x8001000ll | (u64)(b) << 20 | \
+					 (u64)(c) << 8)
+
+/* OcteonTX CPT VF registers */
+#define OTX_CPT_VQX_CTL(b)		(0x100ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_SADDR(b)		(0x200ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_DONE_WAIT(b)	(0x400ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_INPROG(b)		(0x410ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_DONE(b)		(0x420ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_DONE_ACK(b)		(0x440ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_DONE_INT_W1S(b)	(0x460ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_DONE_INT_W1C(b)	(0x468ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_DONE_ENA_W1S(b)	(0x470ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_DONE_ENA_W1C(b)	(0x478ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_MISC_INT(b)		(0x500ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_MISC_INT_W1S(b)	(0x508ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_MISC_ENA_W1S(b)	(0x510ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_MISC_ENA_W1C(b)	(0x518ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_DOORBELL(b)		(0x600ll | (u64)(b) << 20)
+#define OTX_CPT_VFX_PF_MBOXX(b, c)	(0x1000ll | ((b) << 20) | ((c) << 3))
+
+/*
+ * Enumeration otx_cpt_ucode_error_code_e
+ *
+ * Enumerates ucode errors
+ */
+enum otx_cpt_ucode_error_code_e {
+	CPT_NO_UCODE_ERROR = 0x00,
+	ERR_OPCODE_UNSUPPORTED = 0x01,
+
+	/* Scatter gather */
+	ERR_SCATTER_GATHER_WRITE_LENGTH = 0x02,
+	ERR_SCATTER_GATHER_LIST = 0x03,
+	ERR_SCATTER_GATHER_NOT_SUPPORTED = 0x04,
+
+};
+
+/*
+ * Enumeration otx_cpt_comp_e
+ *
+ * CPT OcteonTX Completion Enumeration
+ * Enumerates the values of CPT_RES_S[COMPCODE].
+ */
+enum otx_cpt_comp_e {
+	CPT_COMP_E_NOTDONE = 0x00,
+	CPT_COMP_E_GOOD = 0x01,
+	CPT_COMP_E_FAULT = 0x02,
+	CPT_COMP_E_SWERR = 0x03,
+	CPT_COMP_E_HWERR = 0x04,
+	CPT_COMP_E_LAST_ENTRY = 0x05
+};
+
+/*
+ * Enumeration otx_cpt_vf_int_vec_e
+ *
+ * CPT OcteonTX VF MSI-X Vector Enumeration
+ * Enumerates the MSI-X interrupt vectors.
+ */
+enum otx_cpt_vf_int_vec_e {
+	CPT_VF_INT_VEC_E_MISC = 0x00,
+	CPT_VF_INT_VEC_E_DONE = 0x01
+};
+
+/*
+ * Structure cpt_inst_s
+ *
+ * CPT Instruction Structure
+ * This structure specifies the instruction layout. Instructions are
+ * stored in memory as little-endian unless CPT()_PF_Q()_CTL[INST_BE] is set.
+ * cpt_inst_s_s
+ * Word 0
+ * doneint:1 Done interrupt.
+ *	0 = No interrupts related to this instruction.
+ *	1 = When the instruction completes, CPT()_VQ()_DONE[DONE] will be
+ *	incremented,and based on the rules described there an interrupt may
+ *	occur.
+ * Word 1
+ * res_addr [127: 64] Result IOVA.
+ *	If nonzero, specifies where to write CPT_RES_S.
+ *	If zero, no result structure will be written.
+ *	Address must be 16-byte aligned.
+ *	Bits <63:49> are ignored by hardware; software should use a
+ *	sign-extended bit <48> for forward compatibility.
+ * Word 2
+ *  grp:10 [171:162] If [WQ_PTR] is nonzero, the SSO guest-group to use when
+ *	CPT submits work SSO.
+ *	For the SSO to not discard the add-work request, FPA_PF_MAP() must map
+ *	[GRP] and CPT()_PF_Q()_GMCTL[GMID] as valid.
+ *  tt:2 [161:160] If [WQ_PTR] is nonzero, the SSO tag type to use when CPT
+ *	submits work to SSO
+ *  tag:32 [159:128] If [WQ_PTR] is nonzero, the SSO tag to use when CPT
+ *	submits work to SSO.
+ * Word 3
+ *  wq_ptr [255:192] If [WQ_PTR] is nonzero, it is a pointer to a
+ *	work-queue entry that CPT submits work to SSO after all context,
+ *	output data, and result write operations are visible to other
+ *	CNXXXX units and the cores. Bits <2:0> must be zero.
+ *	Bits <63:49> are ignored by hardware; software should
+ *	use a sign-extended bit <48> for forward compatibility.
+ *	Internal:
+ *	Bits <63:49>, <2:0> are ignored by hardware, treated as always 0x0.
+ * Word 4
+ *  ei0; [319:256] Engine instruction word 0. Passed to the AE/SE.
+ * Word 5
+ *  ei1; [383:320] Engine instruction word 1. Passed to the AE/SE.
+ * Word 6
+ *  ei2; [447:384] Engine instruction word 1. Passed to the AE/SE.
+ * Word 7
+ *  ei3; [511:448] Engine instruction word 1. Passed to the AE/SE.
+ *
+ */
+union otx_cpt_inst_s {
+	u64 u[8];
+
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_17_63:47;
+		u64 doneint:1;
+		u64 reserved_0_15:16;
+#else /* Word 0 - Little Endian */
+		u64 reserved_0_15:16;
+		u64 doneint:1;
+		u64 reserved_17_63:47;
+#endif /* Word 0 - End */
+		u64 res_addr;
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 2 - Big Endian */
+		u64 reserved_172_191:20;
+		u64 grp:10;
+		u64 tt:2;
+		u64 tag:32;
+#else /* Word 2 - Little Endian */
+		u64 tag:32;
+		u64 tt:2;
+		u64 grp:10;
+		u64 reserved_172_191:20;
+#endif /* Word 2 - End */
+		u64 wq_ptr;
+		u64 ei0;
+		u64 ei1;
+		u64 ei2;
+		u64 ei3;
+	} s;
+};
+
+/*
+ * Structure cpt_res_s
+ *
+ * CPT Result Structure
+ * The CPT coprocessor writes the result structure after it completes a
+ * CPT_INST_S instruction. The result structure is exactly 16 bytes, and
+ * each instruction completion produces exactly one result structure.
+ *
+ * This structure is stored in memory as little-endian unless
+ * CPT()_PF_Q()_CTL[INST_BE] is set.
+ * cpt_res_s_s
+ * Word 0
+ *  doneint:1 [16:16] Done interrupt. This bit is copied from the
+ *	corresponding instruction's CPT_INST_S[DONEINT].
+ *  compcode:8 [7:0] Indicates completion/error status of the CPT coprocessor
+ *	for the	associated instruction, as enumerated by CPT_COMP_E.
+ *	Core software may write the memory location containing [COMPCODE] to
+ *	0x0 before ringing the doorbell, and then poll for completion by
+ *	checking for a nonzero value.
+ *	Once the core observes a nonzero [COMPCODE] value in this case,the CPT
+ *	coprocessor will have also completed L2/DRAM write operations.
+ * Word 1
+ *  reserved
+ *
+ */
+union otx_cpt_res_s {
+	u64 u[2];
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_17_63:47;
+		u64 doneint:1;
+		u64 reserved_8_15:8;
+		u64 compcode:8;
+#else /* Word 0 - Little Endian */
+		u64 compcode:8;
+		u64 reserved_8_15:8;
+		u64 doneint:1;
+		u64 reserved_17_63:47;
+#endif /* Word 0 - End */
+		u64 reserved_64_127;
+	} s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_pf_bist_status
+ *
+ * CPT PF Control Bist Status Register
+ * This register has the BIST status of memories. Each bit is the BIST result
+ * of an individual memory (per bit, 0 = pass and 1 = fail).
+ * otx_cptx_pf_bist_status_s
+ * Word0
+ *  bstatus [29:0](RO/H) BIST status. One bit per memory, enumerated by
+ *	CPT_RAMS_E.
+ */
+union otx_cptx_pf_bist_status {
+	u64 u;
+	struct otx_cptx_pf_bist_status_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_30_63:34;
+		u64 bstatus:30;
+#else /* Word 0 - Little Endian */
+		u64 bstatus:30;
+		u64 reserved_30_63:34;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_pf_constants
+ *
+ * CPT PF Constants Register
+ * This register contains implementation-related parameters of CPT in CNXXXX.
+ * otx_cptx_pf_constants_s
+ * Word 0
+ *  reserved_40_63:24 [63:40] Reserved.
+ *  epcis:8 [39:32](RO) Number of EPCI busses.
+ *  grps:8 [31:24](RO) Number of engine groups implemented.
+ *  ae:8 [23:16](RO/H) Number of AEs. In CNXXXX, for CPT0 returns 0x0,
+ *	for CPT1 returns 0x18, or less if there are fuse-disables.
+ *  se:8 [15:8](RO/H) Number of SEs. In CNXXXX, for CPT0 returns 0x30,
+ *	or less if there are fuse-disables, for CPT1 returns 0x0.
+ *  vq:8 [7:0](RO) Number of VQs.
+ */
+union otx_cptx_pf_constants {
+	u64 u;
+	struct otx_cptx_pf_constants_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_40_63:24;
+		u64 epcis:8;
+		u64 grps:8;
+		u64 ae:8;
+		u64 se:8;
+		u64 vq:8;
+#else /* Word 0 - Little Endian */
+		u64 vq:8;
+		u64 se:8;
+		u64 ae:8;
+		u64 grps:8;
+		u64 epcis:8;
+		u64 reserved_40_63:24;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_pf_exe_bist_status
+ *
+ * CPT PF Engine Bist Status Register
+ * This register has the BIST status of each engine.  Each bit is the
+ * BIST result of an individual engine (per bit, 0 = pass and 1 = fail).
+ * otx_cptx_pf_exe_bist_status_s
+ * Word0
+ *  reserved_48_63:16 [63:48] reserved
+ *  bstatus:48 [47:0](RO/H) BIST status. One bit per engine.
+ *
+ */
+union otx_cptx_pf_exe_bist_status {
+	u64 u;
+	struct otx_cptx_pf_exe_bist_status_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_48_63:16;
+		u64 bstatus:48;
+#else /* Word 0 - Little Endian */
+		u64 bstatus:48;
+		u64 reserved_48_63:16;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_pf_q#_ctl
+ *
+ * CPT Queue Control Register
+ * This register configures queues. This register should be changed only
+ * when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]).
+ * otx_cptx_pf_qx_ctl_s
+ * Word0
+ *  reserved_60_63:4 [63:60] reserved.
+ *  aura:12; [59:48](R/W) Guest-aura for returning this queue's
+ *	instruction-chunk buffers to FPA. Only used when [INST_FREE] is set.
+ *	For the FPA to not discard the request, FPA_PF_MAP() must map
+ *	[AURA] and CPT()_PF_Q()_GMCTL[GMID] as valid.
+ *  reserved_45_47:3 [47:45] reserved.
+ *  size:13 [44:32](R/W) Command-buffer size, in number of 64-bit words per
+ *	command buffer segment. Must be 8*n + 1, where n is the number of
+ *	instructions per buffer segment.
+ *  reserved_11_31:21 [31:11] Reserved.
+ *  cont_err:1 [10:10](R/W) Continue on error.
+ *	0 = When CPT()_VQ()_MISC_INT[NWRP], CPT()_VQ()_MISC_INT[IRDE] or
+ *	CPT()_VQ()_MISC_INT[DOVF] are set by hardware or software via
+ *	CPT()_VQ()_MISC_INT_W1S, then CPT()_VQ()_CTL[ENA] is cleared.  Due to
+ *	pipelining, additional instructions may have been processed between the
+ *	instruction causing the error and the next instruction in the disabled
+ *	queue (the instruction at CPT()_VQ()_SADDR).
+ *	1 = Ignore errors and continue processing instructions.
+ *	For diagnostic use only.
+ *  inst_free:1 [9:9](R/W) Instruction FPA free. When set, when CPT reaches the
+ *	end of an instruction chunk, that chunk will be freed to the FPA.
+ *  inst_be:1 [8:8](R/W) Instruction big-endian control. When set, instructions,
+ *	instruction next chunk pointers, and result structures are stored in
+ *	big-endian format in memory.
+ *  iqb_ldwb:1 [7:7](R/W) Instruction load don't write back.
+ *	0 = The hardware issues NCB transient load (LDT) towards the cache,
+ *	which if the line hits and is is dirty will cause the line to be
+ *	written back before being replaced.
+ *	1 = The hardware issues NCB LDWB read-and-invalidate command towards
+ *	the cache when fetching the last word of instructions; as a result the
+ *	line will not be written back when replaced.  This improves
+ *	performance, but software must not read the instructions after they are
+ *	posted to the hardware.	Reads that do not consume the last word of a
+ *	cache line always use LDI.
+ *  reserved_4_6:3 [6:4] Reserved.
+ *  grp:3; [3:1](R/W) Engine group.
+ *  pri:1; [0:0](R/W) Queue priority.
+ *	1 = This queue has higher priority. Round-robin between higher
+ *	priority queues.
+ *	0 = This queue has lower priority. Round-robin between lower
+ *	priority queues.
+ */
+union otx_cptx_pf_qx_ctl {
+	u64 u;
+	struct otx_cptx_pf_qx_ctl_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_60_63:4;
+		u64 aura:12;
+		u64 reserved_45_47:3;
+		u64 size:13;
+		u64 reserved_11_31:21;
+		u64 cont_err:1;
+		u64 inst_free:1;
+		u64 inst_be:1;
+		u64 iqb_ldwb:1;
+		u64 reserved_4_6:3;
+		u64 grp:3;
+		u64 pri:1;
+#else /* Word 0 - Little Endian */
+		u64 pri:1;
+		u64 grp:3;
+		u64 reserved_4_6:3;
+		u64 iqb_ldwb:1;
+		u64 inst_be:1;
+		u64 inst_free:1;
+		u64 cont_err:1;
+		u64 reserved_11_31:21;
+		u64 size:13;
+		u64 reserved_45_47:3;
+		u64 aura:12;
+		u64 reserved_60_63:4;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_saddr
+ *
+ * CPT Queue Starting Buffer Address Registers
+ * These registers set the instruction buffer starting address.
+ * otx_cptx_vqx_saddr_s
+ * Word0
+ *  reserved_49_63:15 [63:49] Reserved.
+ *  ptr:43 [48:6](R/W/H) Instruction buffer IOVA <48:6> (64-byte aligned).
+ *	When written, it is the initial buffer starting address; when read,
+ *	it is the next read pointer to be requested from L2C. The PTR field
+ *	is overwritten with the next pointer each time that the command buffer
+ *	segment is exhausted. New commands will then be read from the newly
+ *	specified command buffer pointer.
+ *  reserved_0_5:6 [5:0] Reserved.
+ *
+ */
+union otx_cptx_vqx_saddr {
+	u64 u;
+	struct otx_cptx_vqx_saddr_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_49_63:15;
+		u64 ptr:43;
+		u64 reserved_0_5:6;
+#else /* Word 0 - Little Endian */
+		u64 reserved_0_5:6;
+		u64 ptr:43;
+		u64 reserved_49_63:15;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_misc_ena_w1s
+ *
+ * CPT Queue Misc Interrupt Enable Set Register
+ * This register sets interrupt enable bits.
+ * otx_cptx_vqx_misc_ena_w1s_s
+ * Word0
+ * reserved_5_63:59 [63:5] Reserved.
+ * swerr:1 [4:4](R/W1S/H) Reads or sets enable for
+ *	CPT(0..1)_VQ(0..63)_MISC_INT[SWERR].
+ * nwrp:1 [3:3](R/W1S/H) Reads or sets enable for
+ *	CPT(0..1)_VQ(0..63)_MISC_INT[NWRP].
+ * irde:1 [2:2](R/W1S/H) Reads or sets enable for
+ *	CPT(0..1)_VQ(0..63)_MISC_INT[IRDE].
+ * dovf:1 [1:1](R/W1S/H) Reads or sets enable for
+ *	CPT(0..1)_VQ(0..63)_MISC_INT[DOVF].
+ * mbox:1 [0:0](R/W1S/H) Reads or sets enable for
+ *	CPT(0..1)_VQ(0..63)_MISC_INT[MBOX].
+ *
+ */
+union otx_cptx_vqx_misc_ena_w1s {
+	u64 u;
+	struct otx_cptx_vqx_misc_ena_w1s_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_5_63:59;
+		u64 swerr:1;
+		u64 nwrp:1;
+		u64 irde:1;
+		u64 dovf:1;
+		u64 mbox:1;
+#else /* Word 0 - Little Endian */
+		u64 mbox:1;
+		u64 dovf:1;
+		u64 irde:1;
+		u64 nwrp:1;
+		u64 swerr:1;
+		u64 reserved_5_63:59;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_doorbell
+ *
+ * CPT Queue Doorbell Registers
+ * Doorbells for the CPT instruction queues.
+ * otx_cptx_vqx_doorbell_s
+ * Word0
+ *  reserved_20_63:44 [63:20] Reserved.
+ *  dbell_cnt:20 [19:0](R/W/H) Number of instruction queue 64-bit words to add
+ *	to the CPT instruction doorbell count. Readback value is the the
+ *	current number of pending doorbell requests. If counter overflows
+ *	CPT()_VQ()_MISC_INT[DBELL_DOVF] is set. To reset the count back to
+ *	zero, write one to clear CPT()_VQ()_MISC_INT_ENA_W1C[DBELL_DOVF],
+ *	then write a value of 2^20 minus the read [DBELL_CNT], then write one
+ *	to CPT()_VQ()_MISC_INT_W1C[DBELL_DOVF] and
+ *	CPT()_VQ()_MISC_INT_ENA_W1S[DBELL_DOVF]. Must be a multiple of 8.
+ *	All CPT instructions are 8 words and require a doorbell count of
+ *	multiple of 8.
+ */
+union otx_cptx_vqx_doorbell {
+	u64 u;
+	struct otx_cptx_vqx_doorbell_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_20_63:44;
+		u64 dbell_cnt:20;
+#else /* Word 0 - Little Endian */
+		u64 dbell_cnt:20;
+		u64 reserved_20_63:44;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_inprog
+ *
+ * CPT Queue In Progress Count Registers
+ * These registers contain the per-queue instruction in flight registers.
+ * otx_cptx_vqx_inprog_s
+ * Word0
+ *  reserved_8_63:56 [63:8] Reserved.
+ *  inflight:8 [7:0](RO/H) Inflight count. Counts the number of instructions
+ *	for the VF for which CPT is fetching, executing or responding to
+ *	instructions. However this does not include any interrupts that are
+ *	awaiting software handling (CPT()_VQ()_DONE[DONE] != 0x0).
+ *	A queue may not be reconfigured until:
+ *	1. CPT()_VQ()_CTL[ENA] is cleared by software.
+ *	2. [INFLIGHT] is polled until equals to zero.
+ */
+union otx_cptx_vqx_inprog {
+	u64 u;
+	struct otx_cptx_vqx_inprog_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_8_63:56;
+		u64 inflight:8;
+#else /* Word 0 - Little Endian */
+		u64 inflight:8;
+		u64 reserved_8_63:56;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_misc_int
+ *
+ * CPT Queue Misc Interrupt Register
+ * These registers contain the per-queue miscellaneous interrupts.
+ * otx_cptx_vqx_misc_int_s
+ * Word 0
+ *  reserved_5_63:59 [63:5] Reserved.
+ *  swerr:1 [4:4](R/W1C/H) Software error from engines.
+ *  nwrp:1  [3:3](R/W1C/H) NCB result write response error.
+ *  irde:1  [2:2](R/W1C/H) Instruction NCB read response error.
+ *  dovf:1 [1:1](R/W1C/H) Doorbell overflow.
+ *  mbox:1 [0:0](R/W1C/H) PF to VF mailbox interrupt. Set when
+ *	CPT()_VF()_PF_MBOX(0) is written.
+ *
+ */
+union otx_cptx_vqx_misc_int {
+	u64 u;
+	struct otx_cptx_vqx_misc_int_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_5_63:59;
+		u64 swerr:1;
+		u64 nwrp:1;
+		u64 irde:1;
+		u64 dovf:1;
+		u64 mbox:1;
+#else /* Word 0 - Little Endian */
+		u64 mbox:1;
+		u64 dovf:1;
+		u64 irde:1;
+		u64 nwrp:1;
+		u64 swerr:1;
+		u64 reserved_5_63:59;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_done_ack
+ *
+ * CPT Queue Done Count Ack Registers
+ * This register is written by software to acknowledge interrupts.
+ * otx_cptx_vqx_done_ack_s
+ * Word0
+ *  reserved_20_63:44 [63:20] Reserved.
+ *  done_ack:20 [19:0](R/W/H) Number of decrements to CPT()_VQ()_DONE[DONE].
+ *	Reads CPT()_VQ()_DONE[DONE]. Written by software to acknowledge
+ *	interrupts. If CPT()_VQ()_DONE[DONE] is still nonzero the interrupt
+ *	will be re-sent if the conditions described in CPT()_VQ()_DONE[DONE]
+ *	are satisfied.
+ *
+ */
+union otx_cptx_vqx_done_ack {
+	u64 u;
+	struct otx_cptx_vqx_done_ack_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_20_63:44;
+		u64 done_ack:20;
+#else /* Word 0 - Little Endian */
+		u64 done_ack:20;
+		u64 reserved_20_63:44;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_done
+ *
+ * CPT Queue Done Count Registers
+ * These registers contain the per-queue instruction done count.
+ * cptx_vqx_done_s
+ * Word0
+ *  reserved_20_63:44 [63:20] Reserved.
+ *  done:20 [19:0](R/W/H) Done count. When CPT_INST_S[DONEINT] set and that
+ *	instruction completes, CPT()_VQ()_DONE[DONE] is incremented when the
+ *	instruction finishes. Write to this field are for diagnostic use only;
+ *	instead software writes CPT()_VQ()_DONE_ACK with the number of
+ *	decrements for this field.
+ *	Interrupts are sent as follows:
+ *	* When CPT()_VQ()_DONE[DONE] = 0, then no results are pending, the
+ *	interrupt coalescing timer is held to zero, and an interrupt is not
+ *	sent.
+ *	* When CPT()_VQ()_DONE[DONE] != 0, then the interrupt coalescing timer
+ *	counts. If the counter is >= CPT()_VQ()_DONE_WAIT[TIME_WAIT]*1024, or
+ *	CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT], i.e. enough
+ *	time has passed or enough results have arrived, then the interrupt is
+ *	sent.
+ *	* When CPT()_VQ()_DONE_ACK is written (or CPT()_VQ()_DONE is written
+ *	but this is not typical), the interrupt coalescing timer restarts.
+ *	Note after decrementing this interrupt equation is recomputed,
+ *	for example if CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT]
+ *	and because the timer is zero, the interrupt will be resent immediately.
+ *	(This covers the race case between software acknowledging an interrupt
+ *	and a result returning.)
+ *	* When CPT()_VQ()_DONE_ENA_W1S[DONE] = 0, interrupts are not sent,
+ *	but the counting described above still occurs.
+ *	Since CPT instructions complete out-of-order, if software is using
+ *	completion interrupts the suggested scheme is to request a DONEINT on
+ *	each request, and when an interrupt arrives perform a "greedy" scan for
+ *	completions; even if a later command is acknowledged first this will
+ *	not result in missing a completion.
+ *	Software is responsible for making sure [DONE] does not overflow;
+ *	for example by insuring there are not more than 2^20-1 instructions in
+ *	flight that may request interrupts.
+ *
+ */
+union otx_cptx_vqx_done {
+	u64 u;
+	struct otx_cptx_vqx_done_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_20_63:44;
+		u64 done:20;
+#else /* Word 0 - Little Endian */
+		u64 done:20;
+		u64 reserved_20_63:44;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_done_wait
+ *
+ * CPT Queue Done Interrupt Coalescing Wait Registers
+ * Specifies the per queue interrupt coalescing settings.
+ * cptx_vqx_done_wait_s
+ * Word0
+ *  reserved_48_63:16 [63:48] Reserved.
+ *  time_wait:16; [47:32](R/W) Time hold-off. When CPT()_VQ()_DONE[DONE] = 0
+ *	or CPT()_VQ()_DONE_ACK is written a timer is cleared. When the timer
+ *	reaches [TIME_WAIT]*1024 then interrupt coalescing ends.
+ *	see CPT()_VQ()_DONE[DONE]. If 0x0, time coalescing is disabled.
+ *  reserved_20_31:12 [31:20] Reserved.
+ *  num_wait:20 [19:0](R/W) Number of messages hold-off.
+ *	When CPT()_VQ()_DONE[DONE] >= [NUM_WAIT] then interrupt coalescing ends
+ *	see CPT()_VQ()_DONE[DONE]. If 0x0, same behavior as 0x1.
+ *
+ */
+union otx_cptx_vqx_done_wait {
+	u64 u;
+	struct otx_cptx_vqx_done_wait_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_48_63:16;
+		u64 time_wait:16;
+		u64 reserved_20_31:12;
+		u64 num_wait:20;
+#else /* Word 0 - Little Endian */
+		u64 num_wait:20;
+		u64 reserved_20_31:12;
+		u64 time_wait:16;
+		u64 reserved_48_63:16;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_done_ena_w1s
+ *
+ * CPT Queue Done Interrupt Enable Set Registers
+ * Write 1 to these registers will enable the DONEINT interrupt for the queue.
+ * cptx_vqx_done_ena_w1s_s
+ * Word0
+ *  reserved_1_63:63 [63:1] Reserved.
+ *  done:1 [0:0](R/W1S/H) Write 1 will enable DONEINT for this queue.
+ *	Write 0 has no effect. Read will return the enable bit.
+ */
+union otx_cptx_vqx_done_ena_w1s {
+	u64 u;
+	struct otx_cptx_vqx_done_ena_w1s_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_1_63:63;
+		u64 done:1;
+#else /* Word 0 - Little Endian */
+		u64 done:1;
+		u64 reserved_1_63:63;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_ctl
+ *
+ * CPT VF Queue Control Registers
+ * This register configures queues. This register should be changed (other than
+ * clearing [ENA]) only when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]).
+ * cptx_vqx_ctl_s
+ * Word0
+ *  reserved_1_63:63 [63:1] Reserved.
+ *  ena:1 [0:0](R/W/H) Enables the logical instruction queue.
+ *	See also CPT()_PF_Q()_CTL[CONT_ERR] and	CPT()_VQ()_INPROG[INFLIGHT].
+ *	1 = Queue is enabled.
+ *	0 = Queue is disabled.
+ */
+union otx_cptx_vqx_ctl {
+	u64 u;
+	struct otx_cptx_vqx_ctl_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_1_63:63;
+		u64 ena:1;
+#else /* Word 0 - Little Endian */
+		u64 ena:1;
+		u64 reserved_1_63:63;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/*
+ * Error Address/Error Codes
+ *
+ * In the event of a severe error, microcode writes an 8-byte Error Code
+ * value (ECODE) to host memory at the Rptr address specified by the host
+ * system (in the 64-byte request).
+ *
+ * Word0
+ *  [63:56](R) 8-bit completion code
+ *  [55:48](R) Number of the core that reported the severe error
+ *  [47:0] Lower 6 bytes of M-Inst word2. Used to assist in uniquely
+ *  identifying which specific instruction caused the error. This assumes
+ *  that each instruction has a unique result location (RPTR), at least
+ *  for a given period of time.
+ */
+union otx_cpt_error_code {
+	u64 u;
+	struct otx_cpt_error_code_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		uint64_t ccode:8;
+		uint64_t coreid:8;
+		uint64_t rptr6:48;
+#else /* Word 0 - Little Endian */
+		uint64_t rptr6:48;
+		uint64_t coreid:8;
+		uint64_t ccode:8;
+#endif /* Word 0 - End */
+	} s;
+};
+
+#endif /*__OTX_CPT_HW_TYPES_H */
diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf.h b/drivers/crypto/marvell/octeontx/otx_cptpf.h
new file mode 100644
index 000000000000..73cd0a9bc563
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptpf.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __OTX_CPTPF_H
+#define __OTX_CPTPF_H
+
+#include <linux/types.h>
+#include <linux/device.h>
+#include "otx_cptpf_ucode.h"
+
+/*
+ * OcteonTX CPT device structure
+ */
+struct otx_cpt_device {
+	void __iomem *reg_base; /* Register start address */
+	struct pci_dev *pdev;	/* Pci device handle */
+	struct otx_cpt_eng_grps eng_grps;/* Engine groups information */
+	struct list_head list;
+	u8 pf_type;	/* PF type SE or AE */
+	u8 max_vfs;	/* Maximum number of VFs supported by the CPT */
+	u8 vfs_enabled;	/* Number of enabled VFs */
+};
+
+void otx_cpt_mbox_intr_handler(struct otx_cpt_device *cpt, int mbx);
+void otx_cpt_disable_all_cores(struct otx_cpt_device *cpt);
+
+#endif /* __OTX_CPTPF_H */
diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_main.c b/drivers/crypto/marvell/octeontx/otx_cptpf_main.c
new file mode 100644
index 000000000000..200fb3303db0
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptpf_main.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "otx_cpt_common.h"
+#include "otx_cptpf.h"
+
+#define DRV_NAME	"octeontx-cpt"
+#define DRV_VERSION	"1.0"
+
+static void otx_cpt_disable_mbox_interrupts(struct otx_cpt_device *cpt)
+{
+	/* Disable mbox(0) interrupts for all VFs */
+	writeq(~0ull, cpt->reg_base + OTX_CPT_PF_MBOX_ENA_W1CX(0));
+}
+
+static void otx_cpt_enable_mbox_interrupts(struct otx_cpt_device *cpt)
+{
+	/* Enable mbox(0) interrupts for all VFs */
+	writeq(~0ull, cpt->reg_base + OTX_CPT_PF_MBOX_ENA_W1SX(0));
+}
+
+static irqreturn_t otx_cpt_mbx0_intr_handler(int __always_unused irq,
+					     void *cpt)
+{
+	otx_cpt_mbox_intr_handler(cpt, 0);
+
+	return IRQ_HANDLED;
+}
+
+static void otx_cpt_reset(struct otx_cpt_device *cpt)
+{
+	writeq(1, cpt->reg_base + OTX_CPT_PF_RESET);
+}
+
+static void otx_cpt_find_max_enabled_cores(struct otx_cpt_device *cpt)
+{
+	union otx_cptx_pf_constants pf_cnsts = {0};
+
+	pf_cnsts.u = readq(cpt->reg_base + OTX_CPT_PF_CONSTANTS);
+	cpt->eng_grps.avail.max_se_cnt = pf_cnsts.s.se;
+	cpt->eng_grps.avail.max_ae_cnt = pf_cnsts.s.ae;
+}
+
+static u32 otx_cpt_check_bist_status(struct otx_cpt_device *cpt)
+{
+	union otx_cptx_pf_bist_status bist_sts = {0};
+
+	bist_sts.u = readq(cpt->reg_base + OTX_CPT_PF_BIST_STATUS);
+	return bist_sts.u;
+}
+
+static u64 otx_cpt_check_exe_bist_status(struct otx_cpt_device *cpt)
+{
+	union otx_cptx_pf_exe_bist_status bist_sts = {0};
+
+	bist_sts.u = readq(cpt->reg_base + OTX_CPT_PF_EXE_BIST_STATUS);
+	return bist_sts.u;
+}
+
+static int otx_cpt_device_init(struct otx_cpt_device *cpt)
+{
+	struct device *dev = &cpt->pdev->dev;
+	u16 sdevid;
+	u64 bist;
+
+	/* Reset the PF when probed first */
+	otx_cpt_reset(cpt);
+	mdelay(100);
+
+	pci_read_config_word(cpt->pdev, PCI_SUBSYSTEM_ID, &sdevid);
+
+	/* Check BIST status */
+	bist = (u64)otx_cpt_check_bist_status(cpt);
+	if (bist) {
+		dev_err(dev, "RAM BIST failed with code 0x%llx", bist);
+		return -ENODEV;
+	}
+
+	bist = otx_cpt_check_exe_bist_status(cpt);
+	if (bist) {
+		dev_err(dev, "Engine BIST failed with code 0x%llx", bist);
+		return -ENODEV;
+	}
+
+	/* Get max enabled cores */
+	otx_cpt_find_max_enabled_cores(cpt);
+
+	if ((sdevid == OTX_CPT_PCI_PF_SUBSYS_ID) &&
+	    (cpt->eng_grps.avail.max_se_cnt == 0)) {
+		cpt->pf_type = OTX_CPT_AE;
+	} else if ((sdevid == OTX_CPT_PCI_PF_SUBSYS_ID) &&
+		   (cpt->eng_grps.avail.max_ae_cnt == 0)) {
+		cpt->pf_type = OTX_CPT_SE;
+	}
+
+	/* Get max VQs/VFs supported by the device */
+	cpt->max_vfs = pci_sriov_get_totalvfs(cpt->pdev);
+
+	/* Disable all cores */
+	otx_cpt_disable_all_cores(cpt);
+
+	return 0;
+}
+
+static int otx_cpt_register_interrupts(struct otx_cpt_device *cpt)
+{
+	struct device *dev = &cpt->pdev->dev;
+	u32 mbox_int_idx = OTX_CPT_PF_MBOX_INT;
+	u32 num_vec = OTX_CPT_PF_MSIX_VECTORS;
+	int ret;
+
+	/* Enable MSI-X */
+	ret = pci_alloc_irq_vectors(cpt->pdev, num_vec, num_vec, PCI_IRQ_MSIX);
+	if (ret < 0) {
+		dev_err(&cpt->pdev->dev,
+			"Request for #%d msix vectors failed\n",
+			num_vec);
+		return ret;
+	}
+
+	/* Register mailbox interrupt handlers */
+	ret = request_irq(pci_irq_vector(cpt->pdev,
+				OTX_CPT_PF_INT_VEC_E_MBOXX(mbox_int_idx, 0)),
+				otx_cpt_mbx0_intr_handler, 0, "CPT Mbox0", cpt);
+	if (ret) {
+		dev_err(dev, "Request irq failed\n");
+		pci_free_irq_vectors(cpt->pdev);
+		return ret;
+	}
+	/* Enable mailbox interrupt */
+	otx_cpt_enable_mbox_interrupts(cpt);
+	return 0;
+}
+
+static void otx_cpt_unregister_interrupts(struct otx_cpt_device *cpt)
+{
+	u32 mbox_int_idx = OTX_CPT_PF_MBOX_INT;
+
+	otx_cpt_disable_mbox_interrupts(cpt);
+	free_irq(pci_irq_vector(cpt->pdev,
+				OTX_CPT_PF_INT_VEC_E_MBOXX(mbox_int_idx, 0)),
+				cpt);
+	pci_free_irq_vectors(cpt->pdev);
+}
+
+
+static int otx_cpt_sriov_configure(struct pci_dev *pdev, int numvfs)
+{
+	struct otx_cpt_device *cpt = pci_get_drvdata(pdev);
+	int ret = 0;
+
+	if (numvfs > cpt->max_vfs)
+		numvfs = cpt->max_vfs;
+
+	if (numvfs > 0) {
+		ret = otx_cpt_try_create_default_eng_grps(cpt->pdev,
+							  &cpt->eng_grps,
+							  cpt->pf_type);
+		if (ret)
+			return ret;
+
+		cpt->vfs_enabled = numvfs;
+		ret = pci_enable_sriov(pdev, numvfs);
+		if (ret) {
+			cpt->vfs_enabled = 0;
+			return ret;
+		}
+		otx_cpt_set_eng_grps_is_rdonly(&cpt->eng_grps, true);
+		try_module_get(THIS_MODULE);
+		ret = numvfs;
+	} else {
+		pci_disable_sriov(pdev);
+		otx_cpt_set_eng_grps_is_rdonly(&cpt->eng_grps, false);
+		module_put(THIS_MODULE);
+		cpt->vfs_enabled = 0;
+	}
+	dev_notice(&cpt->pdev->dev, "VFs enabled: %d\n", ret);
+
+	return ret;
+}
+
+static int otx_cpt_probe(struct pci_dev *pdev,
+			 const struct pci_device_id __always_unused *ent)
+{
+	struct device *dev = &pdev->dev;
+	struct otx_cpt_device *cpt;
+	int err;
+
+	cpt = devm_kzalloc(dev, sizeof(*cpt), GFP_KERNEL);
+	if (!cpt)
+		return -ENOMEM;
+
+	pci_set_drvdata(pdev, cpt);
+	cpt->pdev = pdev;
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(dev, "Failed to enable PCI device\n");
+		goto err_clear_drvdata;
+	}
+
+	err = pci_request_regions(pdev, DRV_NAME);
+	if (err) {
+		dev_err(dev, "PCI request regions failed 0x%x\n", err);
+		goto err_disable_device;
+	}
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+	if (err) {
+		dev_err(dev, "Unable to get usable DMA configuration\n");
+		goto err_release_regions;
+	}
+
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
+	if (err) {
+		dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n");
+		goto err_release_regions;
+	}
+
+	/* MAP PF's configuration registers */
+	cpt->reg_base = pci_iomap(pdev, OTX_CPT_PF_PCI_CFG_BAR, 0);
+	if (!cpt->reg_base) {
+		dev_err(dev, "Cannot map config register space, aborting\n");
+		err = -ENOMEM;
+		goto err_release_regions;
+	}
+
+	/* CPT device HW initialization */
+	err = otx_cpt_device_init(cpt);
+	if (err)
+		goto err_unmap_region;
+
+	/* Register interrupts */
+	err = otx_cpt_register_interrupts(cpt);
+	if (err)
+		goto err_unmap_region;
+
+	/* Initialize engine groups */
+	err = otx_cpt_init_eng_grps(pdev, &cpt->eng_grps, cpt->pf_type);
+	if (err)
+		goto err_unregister_interrupts;
+
+	return 0;
+
+err_unregister_interrupts:
+	otx_cpt_unregister_interrupts(cpt);
+err_unmap_region:
+	pci_iounmap(pdev, cpt->reg_base);
+err_release_regions:
+	pci_release_regions(pdev);
+err_disable_device:
+	pci_disable_device(pdev);
+err_clear_drvdata:
+	pci_set_drvdata(pdev, NULL);
+
+	return err;
+}
+
+static void otx_cpt_remove(struct pci_dev *pdev)
+{
+	struct otx_cpt_device *cpt = pci_get_drvdata(pdev);
+
+	if (!cpt)
+		return;
+
+	/* Disable VFs */
+	pci_disable_sriov(pdev);
+	/* Cleanup engine groups */
+	otx_cpt_cleanup_eng_grps(pdev, &cpt->eng_grps);
+	/* Disable CPT PF interrupts */
+	otx_cpt_unregister_interrupts(cpt);
+	/* Disengage SE and AE cores from all groups */
+	otx_cpt_disable_all_cores(cpt);
+	pci_iounmap(pdev, cpt->reg_base);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+}
+
+/* Supported devices */
+static const struct pci_device_id otx_cpt_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OTX_CPT_PCI_PF_DEVICE_ID) },
+	{ 0, }  /* end of table */
+};
+
+static struct pci_driver otx_cpt_pci_driver = {
+	.name = DRV_NAME,
+	.id_table = otx_cpt_id_table,
+	.probe = otx_cpt_probe,
+	.remove = otx_cpt_remove,
+	.sriov_configure = otx_cpt_sriov_configure
+};
+
+module_pci_driver(otx_cpt_pci_driver);
+
+MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_DESCRIPTION("Marvell OcteonTX CPT Physical Function Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, otx_cpt_id_table);
diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c b/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c
new file mode 100644
index 000000000000..a6774232e9a3
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "otx_cpt_common.h"
+#include "otx_cptpf.h"
+
+static char *get_mbox_opcode_str(int msg_opcode)
+{
+	char *str = "Unknown";
+
+	switch (msg_opcode) {
+	case OTX_CPT_MSG_VF_UP:
+		str = "UP";
+		break;
+
+	case OTX_CPT_MSG_VF_DOWN:
+		str = "DOWN";
+		break;
+
+	case OTX_CPT_MSG_READY:
+		str = "READY";
+		break;
+
+	case OTX_CPT_MSG_QLEN:
+		str = "QLEN";
+		break;
+
+	case OTX_CPT_MSG_QBIND_GRP:
+		str = "QBIND_GRP";
+		break;
+
+	case OTX_CPT_MSG_VQ_PRIORITY:
+		str = "VQ_PRIORITY";
+		break;
+
+	case OTX_CPT_MSG_PF_TYPE:
+		str = "PF_TYPE";
+		break;
+
+	case OTX_CPT_MSG_ACK:
+		str = "ACK";
+		break;
+
+	case OTX_CPT_MSG_NACK:
+		str = "NACK";
+		break;
+	}
+
+	return str;
+}
+
+static void dump_mbox_msg(struct otx_cpt_mbox *mbox_msg, int vf_id)
+{
+	char raw_data_str[OTX_CPT_MAX_MBOX_DATA_STR_SIZE];
+
+	hex_dump_to_buffer(mbox_msg, sizeof(struct otx_cpt_mbox), 16, 8,
+			   raw_data_str, OTX_CPT_MAX_MBOX_DATA_STR_SIZE, false);
+	if (vf_id >= 0)
+		pr_debug("MBOX opcode %s received from VF%d raw_data %s",
+			 get_mbox_opcode_str(mbox_msg->msg), vf_id,
+			 raw_data_str);
+	else
+		pr_debug("MBOX opcode %s received from PF raw_data %s",
+			 get_mbox_opcode_str(mbox_msg->msg), raw_data_str);
+}
+
+static void otx_cpt_send_msg_to_vf(struct otx_cpt_device *cpt, int vf,
+				   struct otx_cpt_mbox *mbx)
+{
+	/* Writing mbox(0) causes interrupt */
+	writeq(mbx->data, cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 1));
+	writeq(mbx->msg, cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 0));
+}
+
+/*
+ * ACKs VF's mailbox message
+ * @vf: VF to which ACK to be sent
+ */
+static void otx_cpt_mbox_send_ack(struct otx_cpt_device *cpt, int vf,
+			      struct otx_cpt_mbox *mbx)
+{
+	mbx->data = 0ull;
+	mbx->msg = OTX_CPT_MSG_ACK;
+	otx_cpt_send_msg_to_vf(cpt, vf, mbx);
+}
+
+/* NACKs VF's mailbox message that PF is not able to complete the action */
+static void otx_cptpf_mbox_send_nack(struct otx_cpt_device *cpt, int vf,
+				     struct otx_cpt_mbox *mbx)
+{
+	mbx->data = 0ull;
+	mbx->msg = OTX_CPT_MSG_NACK;
+	otx_cpt_send_msg_to_vf(cpt, vf, mbx);
+}
+
+static void otx_cpt_clear_mbox_intr(struct otx_cpt_device *cpt, u32 vf)
+{
+	/* W1C for the VF */
+	writeq(1ull << vf, cpt->reg_base + OTX_CPT_PF_MBOX_INTX(0));
+}
+
+/*
+ * Configure QLEN/Chunk sizes for VF
+ */
+static void otx_cpt_cfg_qlen_for_vf(struct otx_cpt_device *cpt, int vf,
+				    u32 size)
+{
+	union otx_cptx_pf_qx_ctl pf_qx_ctl;
+
+	pf_qx_ctl.u = readq(cpt->reg_base + OTX_CPT_PF_QX_CTL(vf));
+	pf_qx_ctl.s.size = size;
+	pf_qx_ctl.s.cont_err = true;
+	writeq(pf_qx_ctl.u, cpt->reg_base + OTX_CPT_PF_QX_CTL(vf));
+}
+
+/*
+ * Configure VQ priority
+ */
+static void otx_cpt_cfg_vq_priority(struct otx_cpt_device *cpt, int vf, u32 pri)
+{
+	union otx_cptx_pf_qx_ctl pf_qx_ctl;
+
+	pf_qx_ctl.u = readq(cpt->reg_base + OTX_CPT_PF_QX_CTL(vf));
+	pf_qx_ctl.s.pri = pri;
+	writeq(pf_qx_ctl.u, cpt->reg_base + OTX_CPT_PF_QX_CTL(vf));
+}
+
+static int otx_cpt_bind_vq_to_grp(struct otx_cpt_device *cpt, u8 q, u8 grp)
+{
+	struct device *dev = &cpt->pdev->dev;
+	struct otx_cpt_eng_grp_info *eng_grp;
+	union otx_cptx_pf_qx_ctl pf_qx_ctl;
+	struct otx_cpt_ucode *ucode;
+
+	if (q >= cpt->max_vfs) {
+		dev_err(dev, "Requested queue %d is > than maximum avail %d",
+			q, cpt->max_vfs);
+		return -EINVAL;
+	}
+
+	if (grp >= OTX_CPT_MAX_ENGINE_GROUPS) {
+		dev_err(dev, "Requested group %d is > than maximum avail %d",
+			grp, OTX_CPT_MAX_ENGINE_GROUPS);
+		return -EINVAL;
+	}
+
+	eng_grp = &cpt->eng_grps.grp[grp];
+	if (!eng_grp->is_enabled) {
+		dev_err(dev, "Requested engine group %d is disabled", grp);
+		return -EINVAL;
+	}
+
+	pf_qx_ctl.u = readq(cpt->reg_base + OTX_CPT_PF_QX_CTL(q));
+	pf_qx_ctl.s.grp = grp;
+	writeq(pf_qx_ctl.u, cpt->reg_base + OTX_CPT_PF_QX_CTL(q));
+
+	if (eng_grp->mirror.is_ena)
+		ucode = &eng_grp->g->grp[eng_grp->mirror.idx].ucode[0];
+	else
+		ucode = &eng_grp->ucode[0];
+
+	if (otx_cpt_uc_supports_eng_type(ucode, OTX_CPT_SE_TYPES))
+		return OTX_CPT_SE_TYPES;
+	else if (otx_cpt_uc_supports_eng_type(ucode, OTX_CPT_AE_TYPES))
+		return OTX_CPT_AE_TYPES;
+	else
+		return BAD_OTX_CPTVF_TYPE;
+}
+
+/* Interrupt handler to handle mailbox messages from VFs */
+static void otx_cpt_handle_mbox_intr(struct otx_cpt_device *cpt, int vf)
+{
+	int vftype = 0;
+	struct otx_cpt_mbox mbx = {};
+	struct device *dev = &cpt->pdev->dev;
+	/*
+	 * MBOX[0] contains msg
+	 * MBOX[1] contains data
+	 */
+	mbx.msg  = readq(cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 0));
+	mbx.data = readq(cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 1));
+
+	dump_mbox_msg(&mbx, vf);
+
+	switch (mbx.msg) {
+	case OTX_CPT_MSG_VF_UP:
+		mbx.msg  = OTX_CPT_MSG_VF_UP;
+		mbx.data = cpt->vfs_enabled;
+		otx_cpt_send_msg_to_vf(cpt, vf, &mbx);
+		break;
+	case OTX_CPT_MSG_READY:
+		mbx.msg  = OTX_CPT_MSG_READY;
+		mbx.data = vf;
+		otx_cpt_send_msg_to_vf(cpt, vf, &mbx);
+		break;
+	case OTX_CPT_MSG_VF_DOWN:
+		/* First msg in VF teardown sequence */
+		otx_cpt_mbox_send_ack(cpt, vf, &mbx);
+		break;
+	case OTX_CPT_MSG_QLEN:
+		otx_cpt_cfg_qlen_for_vf(cpt, vf, mbx.data);
+		otx_cpt_mbox_send_ack(cpt, vf, &mbx);
+		break;
+	case OTX_CPT_MSG_QBIND_GRP:
+		vftype = otx_cpt_bind_vq_to_grp(cpt, vf, (u8)mbx.data);
+		if ((vftype != OTX_CPT_AE_TYPES) &&
+		    (vftype != OTX_CPT_SE_TYPES)) {
+			dev_err(dev, "VF%d binding to eng group %llu failed",
+				vf, mbx.data);
+			otx_cptpf_mbox_send_nack(cpt, vf, &mbx);
+		} else {
+			mbx.msg = OTX_CPT_MSG_QBIND_GRP;
+			mbx.data = vftype;
+			otx_cpt_send_msg_to_vf(cpt, vf, &mbx);
+		}
+		break;
+	case OTX_CPT_MSG_PF_TYPE:
+		mbx.msg = OTX_CPT_MSG_PF_TYPE;
+		mbx.data = cpt->pf_type;
+		otx_cpt_send_msg_to_vf(cpt, vf, &mbx);
+		break;
+	case OTX_CPT_MSG_VQ_PRIORITY:
+		otx_cpt_cfg_vq_priority(cpt, vf, mbx.data);
+		otx_cpt_mbox_send_ack(cpt, vf, &mbx);
+		break;
+	default:
+		dev_err(&cpt->pdev->dev, "Invalid msg from VF%d, msg 0x%llx\n",
+			vf, mbx.msg);
+		break;
+	}
+}
+
+void otx_cpt_mbox_intr_handler (struct otx_cpt_device *cpt, int mbx)
+{
+	u64 intr;
+	u8  vf;
+
+	intr = readq(cpt->reg_base + OTX_CPT_PF_MBOX_INTX(0));
+	pr_debug("PF interrupt mbox%d mask 0x%llx\n", mbx, intr);
+	for (vf = 0; vf < cpt->max_vfs; vf++) {
+		if (intr & (1ULL << vf)) {
+			otx_cpt_handle_mbox_intr(cpt, vf);
+			otx_cpt_clear_mbox_intr(cpt, vf);
+		}
+	}
+}
diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c
new file mode 100644
index 000000000000..d04baa319592
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c
@@ -0,0 +1,1686 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/ctype.h>
+#include <linux/firmware.h>
+#include "otx_cpt_common.h"
+#include "otx_cptpf_ucode.h"
+#include "otx_cptpf.h"
+
+#define CSR_DELAY 30
+/* Tar archive defines */
+#define TAR_MAGIC		"ustar"
+#define TAR_MAGIC_LEN		6
+#define TAR_BLOCK_LEN		512
+#define REGTYPE			'0'
+#define AREGTYPE		'\0'
+
+/* tar header as defined in POSIX 1003.1-1990. */
+struct tar_hdr_t {
+	char name[100];
+	char mode[8];
+	char uid[8];
+	char gid[8];
+	char size[12];
+	char mtime[12];
+	char chksum[8];
+	char typeflag;
+	char linkname[100];
+	char magic[6];
+	char version[2];
+	char uname[32];
+	char gname[32];
+	char devmajor[8];
+	char devminor[8];
+	char prefix[155];
+};
+
+struct tar_blk_t {
+	union {
+		struct tar_hdr_t hdr;
+		char block[TAR_BLOCK_LEN];
+	};
+};
+
+struct tar_arch_info_t {
+	struct list_head ucodes;
+	const struct firmware *fw;
+};
+
+static struct otx_cpt_bitmap get_cores_bmap(struct device *dev,
+					   struct otx_cpt_eng_grp_info *eng_grp)
+{
+	struct otx_cpt_bitmap bmap = { {0} };
+	bool found = false;
+	int i;
+
+	if (eng_grp->g->engs_num > OTX_CPT_MAX_ENGINES) {
+		dev_err(dev, "unsupported number of engines %d on octeontx",
+			eng_grp->g->engs_num);
+		return bmap;
+	}
+
+	for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) {
+		if (eng_grp->engs[i].type) {
+			bitmap_or(bmap.bits, bmap.bits,
+				  eng_grp->engs[i].bmap,
+				  eng_grp->g->engs_num);
+			bmap.size = eng_grp->g->engs_num;
+			found = true;
+		}
+	}
+
+	if (!found)
+		dev_err(dev, "No engines reserved for engine group %d",
+			eng_grp->idx);
+	return bmap;
+}
+
+static int is_eng_type(int val, int eng_type)
+{
+	return val & (1 << eng_type);
+}
+
+static int dev_supports_eng_type(struct otx_cpt_eng_grps *eng_grps,
+				 int eng_type)
+{
+	return is_eng_type(eng_grps->eng_types_supported, eng_type);
+}
+
+static void set_ucode_filename(struct otx_cpt_ucode *ucode,
+			       const char *filename)
+{
+	strlcpy(ucode->filename, filename, OTX_CPT_UCODE_NAME_LENGTH);
+}
+
+static char *get_eng_type_str(int eng_type)
+{
+	char *str = "unknown";
+
+	switch (eng_type) {
+	case OTX_CPT_SE_TYPES:
+		str = "SE";
+		break;
+
+	case OTX_CPT_AE_TYPES:
+		str = "AE";
+		break;
+	}
+	return str;
+}
+
+static char *get_ucode_type_str(int ucode_type)
+{
+	char *str = "unknown";
+
+	switch (ucode_type) {
+	case (1 << OTX_CPT_SE_TYPES):
+		str = "SE";
+		break;
+
+	case (1 << OTX_CPT_AE_TYPES):
+		str = "AE";
+		break;
+	}
+	return str;
+}
+
+static int get_ucode_type(struct otx_cpt_ucode_hdr *ucode_hdr, int *ucode_type)
+{
+	char tmp_ver_str[OTX_CPT_UCODE_VER_STR_SZ];
+	u32 i, val = 0;
+	u8 nn;
+
+	strlcpy(tmp_ver_str, ucode_hdr->ver_str, OTX_CPT_UCODE_VER_STR_SZ);
+	for (i = 0; i < strlen(tmp_ver_str); i++)
+		tmp_ver_str[i] = tolower(tmp_ver_str[i]);
+
+	nn = ucode_hdr->ver_num.nn;
+	if (strnstr(tmp_ver_str, "se-", OTX_CPT_UCODE_VER_STR_SZ) &&
+	    (nn == OTX_CPT_SE_UC_TYPE1 || nn == OTX_CPT_SE_UC_TYPE2 ||
+	     nn == OTX_CPT_SE_UC_TYPE3))
+		val |= 1 << OTX_CPT_SE_TYPES;
+	if (strnstr(tmp_ver_str, "ae", OTX_CPT_UCODE_VER_STR_SZ) &&
+	    nn == OTX_CPT_AE_UC_TYPE)
+		val |= 1 << OTX_CPT_AE_TYPES;
+
+	*ucode_type = val;
+
+	if (!val)
+		return -EINVAL;
+	if (is_eng_type(val, OTX_CPT_AE_TYPES) &&
+	    is_eng_type(val, OTX_CPT_SE_TYPES))
+		return -EINVAL;
+	return 0;
+}
+
+static int is_mem_zero(const char *ptr, int size)
+{
+	int i;
+
+	for (i = 0; i < size; i++) {
+		if (ptr[i])
+			return 0;
+	}
+	return 1;
+}
+
+static int cpt_set_ucode_base(struct otx_cpt_eng_grp_info *eng_grp, void *obj)
+{
+	struct otx_cpt_device *cpt = (struct otx_cpt_device *) obj;
+	dma_addr_t dma_addr;
+	struct otx_cpt_bitmap bmap;
+	int i;
+
+	bmap = get_cores_bmap(&cpt->pdev->dev, eng_grp);
+	if (!bmap.size)
+		return -EINVAL;
+
+	if (eng_grp->mirror.is_ena)
+		dma_addr =
+		       eng_grp->g->grp[eng_grp->mirror.idx].ucode[0].align_dma;
+	else
+		dma_addr = eng_grp->ucode[0].align_dma;
+
+	/*
+	 * Set UCODE_BASE only for the cores which are not used,
+	 * other cores should have already valid UCODE_BASE set
+	 */
+	for_each_set_bit(i, bmap.bits, bmap.size)
+		if (!eng_grp->g->eng_ref_cnt[i])
+			writeq((u64) dma_addr, cpt->reg_base +
+				OTX_CPT_PF_ENGX_UCODE_BASE(i));
+	return 0;
+}
+
+static int cpt_detach_and_disable_cores(struct otx_cpt_eng_grp_info *eng_grp,
+					void *obj)
+{
+	struct otx_cpt_device *cpt = (struct otx_cpt_device *) obj;
+	struct otx_cpt_bitmap bmap = { {0} };
+	int timeout = 10;
+	int i, busy;
+	u64 reg;
+
+	bmap = get_cores_bmap(&cpt->pdev->dev, eng_grp);
+	if (!bmap.size)
+		return -EINVAL;
+
+	/* Detach the cores from group */
+	reg = readq(cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx));
+	for_each_set_bit(i, bmap.bits, bmap.size) {
+		if (reg & (1ull << i)) {
+			eng_grp->g->eng_ref_cnt[i]--;
+			reg &= ~(1ull << i);
+		}
+	}
+	writeq(reg, cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx));
+
+	/* Wait for cores to become idle */
+	do {
+		busy = 0;
+		usleep_range(10000, 20000);
+		if (timeout-- < 0)
+			return -EBUSY;
+
+		reg = readq(cpt->reg_base + OTX_CPT_PF_EXEC_BUSY);
+		for_each_set_bit(i, bmap.bits, bmap.size)
+			if (reg & (1ull << i)) {
+				busy = 1;
+				break;
+			}
+	} while (busy);
+
+	/* Disable the cores only if they are not used anymore */
+	reg = readq(cpt->reg_base + OTX_CPT_PF_EXE_CTL);
+	for_each_set_bit(i, bmap.bits, bmap.size)
+		if (!eng_grp->g->eng_ref_cnt[i])
+			reg &= ~(1ull << i);
+	writeq(reg, cpt->reg_base + OTX_CPT_PF_EXE_CTL);
+
+	return 0;
+}
+
+static int cpt_attach_and_enable_cores(struct otx_cpt_eng_grp_info *eng_grp,
+				       void *obj)
+{
+	struct otx_cpt_device *cpt = (struct otx_cpt_device *) obj;
+	struct otx_cpt_bitmap bmap;
+	u64 reg;
+	int i;
+
+	bmap = get_cores_bmap(&cpt->pdev->dev, eng_grp);
+	if (!bmap.size)
+		return -EINVAL;
+
+	/* Attach the cores to the group */
+	reg = readq(cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx));
+	for_each_set_bit(i, bmap.bits, bmap.size) {
+		if (!(reg & (1ull << i))) {
+			eng_grp->g->eng_ref_cnt[i]++;
+			reg |= 1ull << i;
+		}
+	}
+	writeq(reg, cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx));
+
+	/* Enable the cores */
+	reg = readq(cpt->reg_base + OTX_CPT_PF_EXE_CTL);
+	for_each_set_bit(i, bmap.bits, bmap.size)
+		reg |= 1ull << i;
+	writeq(reg, cpt->reg_base + OTX_CPT_PF_EXE_CTL);
+
+	return 0;
+}
+
+static int process_tar_file(struct device *dev,
+			    struct tar_arch_info_t *tar_arch, char *filename,
+			    const u8 *data, u32 size)
+{
+	struct tar_ucode_info_t *tar_info;
+	struct otx_cpt_ucode_hdr *ucode_hdr;
+	int ucode_type, ucode_size;
+
+	/*
+	 * If size is less than microcode header size then don't report
+	 * an error because it might not be microcode file, just process
+	 * next file from archive
+	 */
+	if (size < sizeof(struct otx_cpt_ucode_hdr))
+		return 0;
+
+	ucode_hdr = (struct otx_cpt_ucode_hdr *) data;
+	/*
+	 * If microcode version can't be found don't report an error
+	 * because it might not be microcode file, just process next file
+	 */
+	if (get_ucode_type(ucode_hdr, &ucode_type))
+		return 0;
+
+	ucode_size = ntohl(ucode_hdr->code_length) * 2;
+	if (!ucode_size || (size < round_up(ucode_size, 16) +
+	    sizeof(struct otx_cpt_ucode_hdr) + OTX_CPT_UCODE_SIGN_LEN)) {
+		dev_err(dev, "Ucode %s invalid size", filename);
+		return -EINVAL;
+	}
+
+	tar_info = kzalloc(sizeof(struct tar_ucode_info_t), GFP_KERNEL);
+	if (!tar_info)
+		return -ENOMEM;
+
+	tar_info->ucode_ptr = data;
+	set_ucode_filename(&tar_info->ucode, filename);
+	memcpy(tar_info->ucode.ver_str, ucode_hdr->ver_str,
+	       OTX_CPT_UCODE_VER_STR_SZ);
+	tar_info->ucode.ver_num = ucode_hdr->ver_num;
+	tar_info->ucode.type = ucode_type;
+	tar_info->ucode.size = ucode_size;
+	list_add_tail(&tar_info->list, &tar_arch->ucodes);
+
+	return 0;
+}
+
+static void release_tar_archive(struct tar_arch_info_t *tar_arch)
+{
+	struct tar_ucode_info_t *curr, *temp;
+
+	if (!tar_arch)
+		return;
+
+	list_for_each_entry_safe(curr, temp, &tar_arch->ucodes, list) {
+		list_del(&curr->list);
+		kfree(curr);
+	}
+
+	if (tar_arch->fw)
+		release_firmware(tar_arch->fw);
+	kfree(tar_arch);
+}
+
+static struct tar_ucode_info_t *get_uc_from_tar_archive(
+					struct tar_arch_info_t *tar_arch,
+					int ucode_type)
+{
+	struct tar_ucode_info_t *curr, *uc_found = NULL;
+
+	list_for_each_entry(curr, &tar_arch->ucodes, list) {
+		if (!is_eng_type(curr->ucode.type, ucode_type))
+			continue;
+
+		if (!uc_found) {
+			uc_found = curr;
+			continue;
+		}
+
+		switch (ucode_type) {
+		case OTX_CPT_AE_TYPES:
+			break;
+
+		case OTX_CPT_SE_TYPES:
+			if (uc_found->ucode.ver_num.nn == OTX_CPT_SE_UC_TYPE2 ||
+			    (uc_found->ucode.ver_num.nn == OTX_CPT_SE_UC_TYPE3
+			     && curr->ucode.ver_num.nn == OTX_CPT_SE_UC_TYPE1))
+				uc_found = curr;
+			break;
+		}
+	}
+
+	return uc_found;
+}
+
+static void print_tar_dbg_info(struct tar_arch_info_t *tar_arch,
+			       char *tar_filename)
+{
+	struct tar_ucode_info_t *curr;
+
+	pr_debug("Tar archive filename %s", tar_filename);
+	pr_debug("Tar archive pointer %p, size %ld", tar_arch->fw->data,
+		 tar_arch->fw->size);
+	list_for_each_entry(curr, &tar_arch->ucodes, list) {
+		pr_debug("Ucode filename %s", curr->ucode.filename);
+		pr_debug("Ucode version string %s", curr->ucode.ver_str);
+		pr_debug("Ucode version %d.%d.%d.%d",
+			 curr->ucode.ver_num.nn, curr->ucode.ver_num.xx,
+			 curr->ucode.ver_num.yy, curr->ucode.ver_num.zz);
+		pr_debug("Ucode type (%d) %s", curr->ucode.type,
+			 get_ucode_type_str(curr->ucode.type));
+		pr_debug("Ucode size %d", curr->ucode.size);
+		pr_debug("Ucode ptr %p\n", curr->ucode_ptr);
+	}
+}
+
+static struct tar_arch_info_t *load_tar_archive(struct device *dev,
+						char *tar_filename)
+{
+	struct tar_arch_info_t *tar_arch = NULL;
+	struct tar_blk_t *tar_blk;
+	unsigned int cur_size;
+	size_t tar_offs = 0;
+	size_t tar_size;
+	int ret;
+
+	tar_arch = kzalloc(sizeof(struct tar_arch_info_t), GFP_KERNEL);
+	if (!tar_arch)
+		return NULL;
+
+	INIT_LIST_HEAD(&tar_arch->ucodes);
+
+	/* Load tar archive */
+	ret = request_firmware(&tar_arch->fw, tar_filename, dev);
+	if (ret)
+		goto release_tar_arch;
+
+	if (tar_arch->fw->size < TAR_BLOCK_LEN) {
+		dev_err(dev, "Invalid tar archive %s ", tar_filename);
+		goto release_tar_arch;
+	}
+
+	tar_size = tar_arch->fw->size;
+	tar_blk = (struct tar_blk_t *) tar_arch->fw->data;
+	if (strncmp(tar_blk->hdr.magic, TAR_MAGIC, TAR_MAGIC_LEN - 1)) {
+		dev_err(dev, "Unsupported format of tar archive %s",
+			tar_filename);
+		goto release_tar_arch;
+	}
+
+	while (1) {
+		/* Read current file size */
+		ret = kstrtouint(tar_blk->hdr.size, 8, &cur_size);
+		if (ret)
+			goto release_tar_arch;
+
+		if (tar_offs + cur_size > tar_size ||
+		    tar_offs + 2*TAR_BLOCK_LEN > tar_size) {
+			dev_err(dev, "Invalid tar archive %s ", tar_filename);
+			goto release_tar_arch;
+		}
+
+		tar_offs += TAR_BLOCK_LEN;
+		if (tar_blk->hdr.typeflag == REGTYPE ||
+		    tar_blk->hdr.typeflag == AREGTYPE) {
+			ret = process_tar_file(dev, tar_arch,
+					       tar_blk->hdr.name,
+					       &tar_arch->fw->data[tar_offs],
+					       cur_size);
+			if (ret)
+				goto release_tar_arch;
+		}
+
+		tar_offs += (cur_size/TAR_BLOCK_LEN) * TAR_BLOCK_LEN;
+		if (cur_size % TAR_BLOCK_LEN)
+			tar_offs += TAR_BLOCK_LEN;
+
+		/* Check for the end of the archive */
+		if (tar_offs + 2*TAR_BLOCK_LEN > tar_size) {
+			dev_err(dev, "Invalid tar archive %s ", tar_filename);
+			goto release_tar_arch;
+		}
+
+		if (is_mem_zero(&tar_arch->fw->data[tar_offs],
+		    2*TAR_BLOCK_LEN))
+			break;
+
+		/* Read next block from tar archive */
+		tar_blk = (struct tar_blk_t *) &tar_arch->fw->data[tar_offs];
+	}
+
+	print_tar_dbg_info(tar_arch, tar_filename);
+	return tar_arch;
+release_tar_arch:
+	release_tar_archive(tar_arch);
+	return NULL;
+}
+
+static struct otx_cpt_engs_rsvd *find_engines_by_type(
+					struct otx_cpt_eng_grp_info *eng_grp,
+					int eng_type)
+{
+	int i;
+
+	for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) {
+		if (!eng_grp->engs[i].type)
+			continue;
+
+		if (eng_grp->engs[i].type == eng_type)
+			return &eng_grp->engs[i];
+	}
+	return NULL;
+}
+
+int otx_cpt_uc_supports_eng_type(struct otx_cpt_ucode *ucode, int eng_type)
+{
+	return is_eng_type(ucode->type, eng_type);
+}
+EXPORT_SYMBOL_GPL(otx_cpt_uc_supports_eng_type);
+
+int otx_cpt_eng_grp_has_eng_type(struct otx_cpt_eng_grp_info *eng_grp,
+				 int eng_type)
+{
+	struct otx_cpt_engs_rsvd *engs;
+
+	engs = find_engines_by_type(eng_grp, eng_type);
+
+	return (engs != NULL ? 1 : 0);
+}
+EXPORT_SYMBOL_GPL(otx_cpt_eng_grp_has_eng_type);
+
+static void print_ucode_info(struct otx_cpt_eng_grp_info *eng_grp,
+			     char *buf, int size)
+{
+	if (eng_grp->mirror.is_ena) {
+		scnprintf(buf, size, "%s (shared with engine_group%d)",
+			  eng_grp->g->grp[eng_grp->mirror.idx].ucode[0].ver_str,
+			  eng_grp->mirror.idx);
+	} else {
+		scnprintf(buf, size, "%s", eng_grp->ucode[0].ver_str);
+	}
+}
+
+static void print_engs_info(struct otx_cpt_eng_grp_info *eng_grp,
+			    char *buf, int size, int idx)
+{
+	struct otx_cpt_engs_rsvd *mirrored_engs = NULL;
+	struct otx_cpt_engs_rsvd *engs;
+	int len, i;
+
+	buf[0] = '\0';
+	for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) {
+		engs = &eng_grp->engs[i];
+		if (!engs->type)
+			continue;
+		if (idx != -1 && idx != i)
+			continue;
+
+		if (eng_grp->mirror.is_ena)
+			mirrored_engs = find_engines_by_type(
+					&eng_grp->g->grp[eng_grp->mirror.idx],
+					engs->type);
+		if (i > 0 && idx == -1) {
+			len = strlen(buf);
+			scnprintf(buf+len, size-len, ", ");
+		}
+
+		len = strlen(buf);
+		scnprintf(buf+len, size-len, "%d %s ", mirrored_engs ?
+			  engs->count + mirrored_engs->count : engs->count,
+			  get_eng_type_str(engs->type));
+		if (mirrored_engs) {
+			len = strlen(buf);
+			scnprintf(buf+len, size-len,
+				  "(%d shared with engine_group%d) ",
+				  engs->count <= 0 ? engs->count +
+				  mirrored_engs->count : mirrored_engs->count,
+				  eng_grp->mirror.idx);
+		}
+	}
+}
+
+static void print_ucode_dbg_info(struct otx_cpt_ucode *ucode)
+{
+	pr_debug("Ucode info");
+	pr_debug("Ucode version string %s", ucode->ver_str);
+	pr_debug("Ucode version %d.%d.%d.%d", ucode->ver_num.nn,
+		 ucode->ver_num.xx, ucode->ver_num.yy, ucode->ver_num.zz);
+	pr_debug("Ucode type %s", get_ucode_type_str(ucode->type));
+	pr_debug("Ucode size %d", ucode->size);
+	pr_debug("Ucode virt address %16.16llx", (u64)ucode->align_va);
+	pr_debug("Ucode phys address %16.16llx\n", ucode->align_dma);
+}
+
+static void cpt_print_engines_mask(struct otx_cpt_eng_grp_info *eng_grp,
+				   struct device *dev, char *buf, int size)
+{
+	struct otx_cpt_bitmap bmap;
+	u32 mask[2];
+
+	bmap = get_cores_bmap(dev, eng_grp);
+	if (!bmap.size) {
+		scnprintf(buf, size, "unknown");
+		return;
+	}
+	bitmap_to_arr32(mask, bmap.bits, bmap.size);
+	scnprintf(buf, size, "%8.8x %8.8x", mask[1], mask[0]);
+}
+
+
+static void print_dbg_info(struct device *dev,
+			   struct otx_cpt_eng_grps *eng_grps)
+{
+	char engs_info[2*OTX_CPT_UCODE_NAME_LENGTH];
+	struct otx_cpt_eng_grp_info *mirrored_grp;
+	char engs_mask[OTX_CPT_UCODE_NAME_LENGTH];
+	struct otx_cpt_eng_grp_info *grp;
+	struct otx_cpt_engs_rsvd *engs;
+	u32 mask[4];
+	int i, j;
+
+	pr_debug("Engine groups global info");
+	pr_debug("max SE %d, max AE %d",
+		 eng_grps->avail.max_se_cnt, eng_grps->avail.max_ae_cnt);
+	pr_debug("free SE %d", eng_grps->avail.se_cnt);
+	pr_debug("free AE %d", eng_grps->avail.ae_cnt);
+
+	for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) {
+		grp = &eng_grps->grp[i];
+		pr_debug("engine_group%d, state %s", i, grp->is_enabled ?
+			 "enabled" : "disabled");
+		if (grp->is_enabled) {
+			mirrored_grp = &eng_grps->grp[grp->mirror.idx];
+			pr_debug("Ucode0 filename %s, version %s",
+				 grp->mirror.is_ena ?
+				 mirrored_grp->ucode[0].filename :
+				 grp->ucode[0].filename,
+				 grp->mirror.is_ena ?
+				 mirrored_grp->ucode[0].ver_str :
+				 grp->ucode[0].ver_str);
+		}
+
+		for (j = 0; j < OTX_CPT_MAX_ETYPES_PER_GRP; j++) {
+			engs = &grp->engs[j];
+			if (engs->type) {
+				print_engs_info(grp, engs_info,
+						2*OTX_CPT_UCODE_NAME_LENGTH, j);
+				pr_debug("Slot%d: %s", j, engs_info);
+				bitmap_to_arr32(mask, engs->bmap,
+						eng_grps->engs_num);
+				pr_debug("Mask:  %8.8x %8.8x %8.8x %8.8x",
+					 mask[3], mask[2], mask[1], mask[0]);
+			} else
+				pr_debug("Slot%d not used", j);
+		}
+		if (grp->is_enabled) {
+			cpt_print_engines_mask(grp, dev, engs_mask,
+					       OTX_CPT_UCODE_NAME_LENGTH);
+			pr_debug("Cmask: %s", engs_mask);
+		}
+	}
+}
+
+static int update_engines_avail_count(struct device *dev,
+				      struct otx_cpt_engs_available *avail,
+				      struct otx_cpt_engs_rsvd *engs, int val)
+{
+	switch (engs->type) {
+	case OTX_CPT_SE_TYPES:
+		avail->se_cnt += val;
+		break;
+
+	case OTX_CPT_AE_TYPES:
+		avail->ae_cnt += val;
+		break;
+
+	default:
+		dev_err(dev, "Invalid engine type %d\n", engs->type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int update_engines_offset(struct device *dev,
+				 struct otx_cpt_engs_available *avail,
+				 struct otx_cpt_engs_rsvd *engs)
+{
+	switch (engs->type) {
+	case OTX_CPT_SE_TYPES:
+		engs->offset = 0;
+		break;
+
+	case OTX_CPT_AE_TYPES:
+		engs->offset = avail->max_se_cnt;
+		break;
+
+	default:
+		dev_err(dev, "Invalid engine type %d\n", engs->type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int release_engines(struct device *dev, struct otx_cpt_eng_grp_info *grp)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) {
+		if (!grp->engs[i].type)
+			continue;
+
+		if (grp->engs[i].count > 0) {
+			ret = update_engines_avail_count(dev, &grp->g->avail,
+							 &grp->engs[i],
+							 grp->engs[i].count);
+			if (ret)
+				return ret;
+		}
+
+		grp->engs[i].type = 0;
+		grp->engs[i].count = 0;
+		grp->engs[i].offset = 0;
+		grp->engs[i].ucode = NULL;
+		bitmap_zero(grp->engs[i].bmap, grp->g->engs_num);
+	}
+
+	return 0;
+}
+
+static int do_reserve_engines(struct device *dev,
+			      struct otx_cpt_eng_grp_info *grp,
+			      struct otx_cpt_engines *req_engs)
+{
+	struct otx_cpt_engs_rsvd *engs = NULL;
+	int i, ret;
+
+	for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) {
+		if (!grp->engs[i].type) {
+			engs = &grp->engs[i];
+			break;
+		}
+	}
+
+	if (!engs)
+		return -ENOMEM;
+
+	engs->type = req_engs->type;
+	engs->count = req_engs->count;
+
+	ret = update_engines_offset(dev, &grp->g->avail, engs);
+	if (ret)
+		return ret;
+
+	if (engs->count > 0) {
+		ret = update_engines_avail_count(dev, &grp->g->avail, engs,
+						 -engs->count);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int check_engines_availability(struct device *dev,
+				      struct otx_cpt_eng_grp_info *grp,
+				      struct otx_cpt_engines *req_eng)
+{
+	int avail_cnt = 0;
+
+	switch (req_eng->type) {
+	case OTX_CPT_SE_TYPES:
+		avail_cnt = grp->g->avail.se_cnt;
+		break;
+
+	case OTX_CPT_AE_TYPES:
+		avail_cnt = grp->g->avail.ae_cnt;
+		break;
+
+	default:
+		dev_err(dev, "Invalid engine type %d\n", req_eng->type);
+		return -EINVAL;
+	}
+
+	if (avail_cnt < req_eng->count) {
+		dev_err(dev,
+			"Error available %s engines %d < than requested %d",
+			get_eng_type_str(req_eng->type),
+			avail_cnt, req_eng->count);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int reserve_engines(struct device *dev, struct otx_cpt_eng_grp_info *grp,
+			   struct otx_cpt_engines *req_engs, int req_cnt)
+{
+	int i, ret;
+
+	/* Validate if a number of requested engines is available */
+	for (i = 0; i < req_cnt; i++) {
+		ret = check_engines_availability(dev, grp, &req_engs[i]);
+		if (ret)
+			return ret;
+	}
+
+	/* Reserve requested engines for this engine group */
+	for (i = 0; i < req_cnt; i++) {
+		ret = do_reserve_engines(dev, grp, &req_engs[i]);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static ssize_t eng_grp_info_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	char ucode_info[2*OTX_CPT_UCODE_NAME_LENGTH];
+	char engs_info[2*OTX_CPT_UCODE_NAME_LENGTH];
+	char engs_mask[OTX_CPT_UCODE_NAME_LENGTH];
+	struct otx_cpt_eng_grp_info *eng_grp;
+	int ret;
+
+	eng_grp = container_of(attr, struct otx_cpt_eng_grp_info, info_attr);
+	mutex_lock(&eng_grp->g->lock);
+
+	print_engs_info(eng_grp, engs_info, 2*OTX_CPT_UCODE_NAME_LENGTH, -1);
+	print_ucode_info(eng_grp, ucode_info, 2*OTX_CPT_UCODE_NAME_LENGTH);
+	cpt_print_engines_mask(eng_grp, dev, engs_mask,
+			       OTX_CPT_UCODE_NAME_LENGTH);
+	ret = scnprintf(buf, PAGE_SIZE,
+			"Microcode : %s\nEngines: %s\nEngines mask: %s\n",
+			ucode_info, engs_info, engs_mask);
+
+	mutex_unlock(&eng_grp->g->lock);
+	return ret;
+}
+
+static int create_sysfs_eng_grps_info(struct device *dev,
+				      struct otx_cpt_eng_grp_info *eng_grp)
+{
+	int ret;
+
+	eng_grp->info_attr.show = eng_grp_info_show;
+	eng_grp->info_attr.store = NULL;
+	eng_grp->info_attr.attr.name = eng_grp->sysfs_info_name;
+	eng_grp->info_attr.attr.mode = 0440;
+	sysfs_attr_init(&eng_grp->info_attr.attr);
+	ret = device_create_file(dev, &eng_grp->info_attr);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void ucode_unload(struct device *dev, struct otx_cpt_ucode *ucode)
+{
+	if (ucode->va) {
+		dma_free_coherent(dev, ucode->size + OTX_CPT_UCODE_ALIGNMENT,
+				  ucode->va, ucode->dma);
+		ucode->va = NULL;
+		ucode->align_va = NULL;
+		ucode->dma = 0;
+		ucode->align_dma = 0;
+		ucode->size = 0;
+	}
+
+	memset(&ucode->ver_str, 0, OTX_CPT_UCODE_VER_STR_SZ);
+	memset(&ucode->ver_num, 0, sizeof(struct otx_cpt_ucode_ver_num));
+	set_ucode_filename(ucode, "");
+	ucode->type = 0;
+}
+
+static int copy_ucode_to_dma_mem(struct device *dev,
+				 struct otx_cpt_ucode *ucode,
+				 const u8 *ucode_data)
+{
+	u32 i;
+
+	/*  Allocate DMAable space */
+	ucode->va = dma_alloc_coherent(dev, ucode->size +
+				       OTX_CPT_UCODE_ALIGNMENT,
+				       &ucode->dma, GFP_KERNEL);
+	if (!ucode->va) {
+		dev_err(dev, "Unable to allocate space for microcode");
+		return -ENOMEM;
+	}
+	ucode->align_va = PTR_ALIGN(ucode->va, OTX_CPT_UCODE_ALIGNMENT);
+	ucode->align_dma = PTR_ALIGN(ucode->dma, OTX_CPT_UCODE_ALIGNMENT);
+
+	memcpy((void *) ucode->align_va, (void *) ucode_data +
+	       sizeof(struct otx_cpt_ucode_hdr), ucode->size);
+
+	/* Byte swap 64-bit */
+	for (i = 0; i < (ucode->size / 8); i++)
+		((u64 *)ucode->align_va)[i] =
+				cpu_to_be64(((u64 *)ucode->align_va)[i]);
+	/*  Ucode needs 16-bit swap */
+	for (i = 0; i < (ucode->size / 2); i++)
+		((u16 *)ucode->align_va)[i] =
+				cpu_to_be16(((u16 *)ucode->align_va)[i]);
+	return 0;
+}
+
+static int ucode_load(struct device *dev, struct otx_cpt_ucode *ucode,
+		      const char *ucode_filename)
+{
+	struct otx_cpt_ucode_hdr *ucode_hdr;
+	const struct firmware *fw;
+	int ret;
+
+	set_ucode_filename(ucode, ucode_filename);
+	ret = request_firmware(&fw, ucode->filename, dev);
+	if (ret)
+		return ret;
+
+	ucode_hdr = (struct otx_cpt_ucode_hdr *) fw->data;
+	memcpy(ucode->ver_str, ucode_hdr->ver_str, OTX_CPT_UCODE_VER_STR_SZ);
+	ucode->ver_num = ucode_hdr->ver_num;
+	ucode->size = ntohl(ucode_hdr->code_length) * 2;
+	if (!ucode->size || (fw->size < round_up(ucode->size, 16)
+	    + sizeof(struct otx_cpt_ucode_hdr) + OTX_CPT_UCODE_SIGN_LEN)) {
+		dev_err(dev, "Ucode %s invalid size", ucode_filename);
+		ret = -EINVAL;
+		goto release_fw;
+	}
+
+	ret = get_ucode_type(ucode_hdr, &ucode->type);
+	if (ret) {
+		dev_err(dev, "Microcode %s unknown type 0x%x", ucode->filename,
+			ucode->type);
+		goto release_fw;
+	}
+
+	ret = copy_ucode_to_dma_mem(dev, ucode, fw->data);
+	if (ret)
+		goto release_fw;
+
+	print_ucode_dbg_info(ucode);
+release_fw:
+	release_firmware(fw);
+	return ret;
+}
+
+static int enable_eng_grp(struct otx_cpt_eng_grp_info *eng_grp,
+			  void *obj)
+{
+	int ret;
+
+	ret = cpt_set_ucode_base(eng_grp, obj);
+	if (ret)
+		return ret;
+
+	ret = cpt_attach_and_enable_cores(eng_grp, obj);
+	return ret;
+}
+
+static int disable_eng_grp(struct device *dev,
+			   struct otx_cpt_eng_grp_info *eng_grp,
+			   void *obj)
+{
+	int i, ret;
+
+	ret = cpt_detach_and_disable_cores(eng_grp, obj);
+	if (ret)
+		return ret;
+
+	/* Unload ucode used by this engine group */
+	ucode_unload(dev, &eng_grp->ucode[0]);
+
+	for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) {
+		if (!eng_grp->engs[i].type)
+			continue;
+
+		eng_grp->engs[i].ucode = &eng_grp->ucode[0];
+	}
+
+	ret = cpt_set_ucode_base(eng_grp, obj);
+
+	return ret;
+}
+
+static void setup_eng_grp_mirroring(struct otx_cpt_eng_grp_info *dst_grp,
+				    struct otx_cpt_eng_grp_info *src_grp)
+{
+	/* Setup fields for engine group which is mirrored */
+	src_grp->mirror.is_ena = false;
+	src_grp->mirror.idx = 0;
+	src_grp->mirror.ref_count++;
+
+	/* Setup fields for mirroring engine group */
+	dst_grp->mirror.is_ena = true;
+	dst_grp->mirror.idx = src_grp->idx;
+	dst_grp->mirror.ref_count = 0;
+}
+
+static void remove_eng_grp_mirroring(struct otx_cpt_eng_grp_info *dst_grp)
+{
+	struct otx_cpt_eng_grp_info *src_grp;
+
+	if (!dst_grp->mirror.is_ena)
+		return;
+
+	src_grp = &dst_grp->g->grp[dst_grp->mirror.idx];
+
+	src_grp->mirror.ref_count--;
+	dst_grp->mirror.is_ena = false;
+	dst_grp->mirror.idx = 0;
+	dst_grp->mirror.ref_count = 0;
+}
+
+static void update_requested_engs(struct otx_cpt_eng_grp_info *mirrored_eng_grp,
+				  struct otx_cpt_engines *engs, int engs_cnt)
+{
+	struct otx_cpt_engs_rsvd *mirrored_engs;
+	int i;
+
+	for (i = 0; i < engs_cnt; i++) {
+		mirrored_engs = find_engines_by_type(mirrored_eng_grp,
+						     engs[i].type);
+		if (!mirrored_engs)
+			continue;
+
+		/*
+		 * If mirrored group has this type of engines attached then
+		 * there are 3 scenarios possible:
+		 * 1) mirrored_engs.count == engs[i].count then all engines
+		 * from mirrored engine group will be shared with this engine
+		 * group
+		 * 2) mirrored_engs.count > engs[i].count then only a subset of
+		 * engines from mirrored engine group will be shared with this
+		 * engine group
+		 * 3) mirrored_engs.count < engs[i].count then all engines
+		 * from mirrored engine group will be shared with this group
+		 * and additional engines will be reserved for exclusively use
+		 * by this engine group
+		 */
+		engs[i].count -= mirrored_engs->count;
+	}
+}
+
+static struct otx_cpt_eng_grp_info *find_mirrored_eng_grp(
+					struct otx_cpt_eng_grp_info *grp)
+{
+	struct otx_cpt_eng_grps *eng_grps = grp->g;
+	int i;
+
+	for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) {
+		if (!eng_grps->grp[i].is_enabled)
+			continue;
+		if (eng_grps->grp[i].ucode[0].type)
+			continue;
+		if (grp->idx == i)
+			continue;
+		if (!strncasecmp(eng_grps->grp[i].ucode[0].ver_str,
+				 grp->ucode[0].ver_str,
+				 OTX_CPT_UCODE_VER_STR_SZ))
+			return &eng_grps->grp[i];
+	}
+
+	return NULL;
+}
+
+static struct otx_cpt_eng_grp_info *find_unused_eng_grp(
+					struct otx_cpt_eng_grps *eng_grps)
+{
+	int i;
+
+	for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) {
+		if (!eng_grps->grp[i].is_enabled)
+			return &eng_grps->grp[i];
+	}
+	return NULL;
+}
+
+static int eng_grp_update_masks(struct device *dev,
+				struct otx_cpt_eng_grp_info *eng_grp)
+{
+	struct otx_cpt_engs_rsvd *engs, *mirrored_engs;
+	struct otx_cpt_bitmap tmp_bmap = { {0} };
+	int i, j, cnt, max_cnt;
+	int bit;
+
+	for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) {
+		engs = &eng_grp->engs[i];
+		if (!engs->type)
+			continue;
+		if (engs->count <= 0)
+			continue;
+
+		switch (engs->type) {
+		case OTX_CPT_SE_TYPES:
+			max_cnt = eng_grp->g->avail.max_se_cnt;
+			break;
+
+		case OTX_CPT_AE_TYPES:
+			max_cnt = eng_grp->g->avail.max_ae_cnt;
+			break;
+
+		default:
+			dev_err(dev, "Invalid engine type %d", engs->type);
+			return -EINVAL;
+		}
+
+		cnt = engs->count;
+		WARN_ON(engs->offset + max_cnt > OTX_CPT_MAX_ENGINES);
+		bitmap_zero(tmp_bmap.bits, eng_grp->g->engs_num);
+		for (j = engs->offset; j < engs->offset + max_cnt; j++) {
+			if (!eng_grp->g->eng_ref_cnt[j]) {
+				bitmap_set(tmp_bmap.bits, j, 1);
+				cnt--;
+				if (!cnt)
+					break;
+			}
+		}
+
+		if (cnt)
+			return -ENOSPC;
+
+		bitmap_copy(engs->bmap, tmp_bmap.bits, eng_grp->g->engs_num);
+	}
+
+	if (!eng_grp->mirror.is_ena)
+		return 0;
+
+	for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) {
+		engs = &eng_grp->engs[i];
+		if (!engs->type)
+			continue;
+
+		mirrored_engs = find_engines_by_type(
+					&eng_grp->g->grp[eng_grp->mirror.idx],
+					engs->type);
+		WARN_ON(!mirrored_engs && engs->count <= 0);
+		if (!mirrored_engs)
+			continue;
+
+		bitmap_copy(tmp_bmap.bits, mirrored_engs->bmap,
+			    eng_grp->g->engs_num);
+		if (engs->count < 0) {
+			bit = find_first_bit(mirrored_engs->bmap,
+					     eng_grp->g->engs_num);
+			bitmap_clear(tmp_bmap.bits, bit, -engs->count);
+		}
+		bitmap_or(engs->bmap, engs->bmap, tmp_bmap.bits,
+			  eng_grp->g->engs_num);
+	}
+	return 0;
+}
+
+static int delete_engine_group(struct device *dev,
+			       struct otx_cpt_eng_grp_info *eng_grp)
+{
+	int i, ret;
+
+	if (!eng_grp->is_enabled)
+		return -EINVAL;
+
+	if (eng_grp->mirror.ref_count) {
+		dev_err(dev, "Can't delete engine_group%d as it is used by:",
+			eng_grp->idx);
+		for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) {
+			if (eng_grp->g->grp[i].mirror.is_ena &&
+			    eng_grp->g->grp[i].mirror.idx == eng_grp->idx)
+				dev_err(dev, "engine_group%d", i);
+		}
+		return -EINVAL;
+	}
+
+	/* Removing engine group mirroring if enabled */
+	remove_eng_grp_mirroring(eng_grp);
+
+	/* Disable engine group */
+	ret = disable_eng_grp(dev, eng_grp, eng_grp->g->obj);
+	if (ret)
+		return ret;
+
+	/* Release all engines held by this engine group */
+	ret = release_engines(dev, eng_grp);
+	if (ret)
+		return ret;
+
+	device_remove_file(dev, &eng_grp->info_attr);
+	eng_grp->is_enabled = false;
+
+	return 0;
+}
+
+static int validate_1_ucode_scenario(struct device *dev,
+				     struct otx_cpt_eng_grp_info *eng_grp,
+				     struct otx_cpt_engines *engs, int engs_cnt)
+{
+	int i;
+
+	/* Verify that ucode loaded supports requested engine types */
+	for (i = 0; i < engs_cnt; i++) {
+		if (!otx_cpt_uc_supports_eng_type(&eng_grp->ucode[0],
+						  engs[i].type)) {
+			dev_err(dev,
+				"Microcode %s does not support %s engines",
+				eng_grp->ucode[0].filename,
+				get_eng_type_str(engs[i].type));
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static void update_ucode_ptrs(struct otx_cpt_eng_grp_info *eng_grp)
+{
+	struct otx_cpt_ucode *ucode;
+
+	if (eng_grp->mirror.is_ena)
+		ucode = &eng_grp->g->grp[eng_grp->mirror.idx].ucode[0];
+	else
+		ucode = &eng_grp->ucode[0];
+	WARN_ON(!eng_grp->engs[0].type);
+	eng_grp->engs[0].ucode = ucode;
+}
+
+static int create_engine_group(struct device *dev,
+			       struct otx_cpt_eng_grps *eng_grps,
+			       struct otx_cpt_engines *engs, int engs_cnt,
+			       void *ucode_data[], int ucodes_cnt,
+			       bool use_uc_from_tar_arch)
+{
+	struct otx_cpt_eng_grp_info *mirrored_eng_grp;
+	struct tar_ucode_info_t *tar_info;
+	struct otx_cpt_eng_grp_info *eng_grp;
+	int i, ret = 0;
+
+	if (ucodes_cnt > OTX_CPT_MAX_ETYPES_PER_GRP)
+		return -EINVAL;
+
+	/* Validate if requested engine types are supported by this device */
+	for (i = 0; i < engs_cnt; i++)
+		if (!dev_supports_eng_type(eng_grps, engs[i].type)) {
+			dev_err(dev, "Device does not support %s engines",
+				get_eng_type_str(engs[i].type));
+			return -EPERM;
+		}
+
+	/* Find engine group which is not used */
+	eng_grp = find_unused_eng_grp(eng_grps);
+	if (!eng_grp) {
+		dev_err(dev, "Error all engine groups are being used");
+		return -ENOSPC;
+	}
+
+	/* Load ucode */
+	for (i = 0; i < ucodes_cnt; i++) {
+		if (use_uc_from_tar_arch) {
+			tar_info = (struct tar_ucode_info_t *) ucode_data[i];
+			eng_grp->ucode[i] = tar_info->ucode;
+			ret = copy_ucode_to_dma_mem(dev, &eng_grp->ucode[i],
+						    tar_info->ucode_ptr);
+		} else
+			ret = ucode_load(dev, &eng_grp->ucode[i],
+					 (char *) ucode_data[i]);
+		if (ret)
+			goto err_ucode_unload;
+	}
+
+	/* Validate scenario where 1 ucode is used */
+	ret = validate_1_ucode_scenario(dev, eng_grp, engs, engs_cnt);
+	if (ret)
+		goto err_ucode_unload;
+
+	/* Check if this group mirrors another existing engine group */
+	mirrored_eng_grp = find_mirrored_eng_grp(eng_grp);
+	if (mirrored_eng_grp) {
+		/* Setup mirroring */
+		setup_eng_grp_mirroring(eng_grp, mirrored_eng_grp);
+
+		/*
+		 * Update count of requested engines because some
+		 * of them might be shared with mirrored group
+		 */
+		update_requested_engs(mirrored_eng_grp, engs, engs_cnt);
+	}
+
+	/* Reserve engines */
+	ret = reserve_engines(dev, eng_grp, engs, engs_cnt);
+	if (ret)
+		goto err_ucode_unload;
+
+	/* Update ucode pointers used by engines */
+	update_ucode_ptrs(eng_grp);
+
+	/* Update engine masks used by this group */
+	ret = eng_grp_update_masks(dev, eng_grp);
+	if (ret)
+		goto err_release_engs;
+
+	/* Create sysfs entry for engine group info */
+	ret = create_sysfs_eng_grps_info(dev, eng_grp);
+	if (ret)
+		goto err_release_engs;
+
+	/* Enable engine group */
+	ret = enable_eng_grp(eng_grp, eng_grps->obj);
+	if (ret)
+		goto err_release_engs;
+
+	/*
+	 * If this engine group mirrors another engine group
+	 * then we need to unload ucode as we will use ucode
+	 * from mirrored engine group
+	 */
+	if (eng_grp->mirror.is_ena)
+		ucode_unload(dev, &eng_grp->ucode[0]);
+
+	eng_grp->is_enabled = true;
+	if (eng_grp->mirror.is_ena)
+		dev_info(dev,
+			 "Engine_group%d: reuse microcode %s from group %d",
+			 eng_grp->idx, mirrored_eng_grp->ucode[0].ver_str,
+			 mirrored_eng_grp->idx);
+	else
+		dev_info(dev, "Engine_group%d: microcode loaded %s",
+			 eng_grp->idx, eng_grp->ucode[0].ver_str);
+
+	return 0;
+
+err_release_engs:
+	release_engines(dev, eng_grp);
+err_ucode_unload:
+	ucode_unload(dev, &eng_grp->ucode[0]);
+	return ret;
+}
+
+static ssize_t ucode_load_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct otx_cpt_engines engs[OTX_CPT_MAX_ETYPES_PER_GRP] = { {0} };
+	char *ucode_filename[OTX_CPT_MAX_ETYPES_PER_GRP];
+	char tmp_buf[OTX_CPT_UCODE_NAME_LENGTH] = { 0 };
+	char *start, *val, *err_msg, *tmp;
+	struct otx_cpt_eng_grps *eng_grps;
+	int grp_idx = 0, ret = -EINVAL;
+	bool has_se, has_ie, has_ae;
+	int del_grp_idx = -1;
+	int ucode_idx = 0;
+
+	if (strlen(buf) > OTX_CPT_UCODE_NAME_LENGTH)
+		return -EINVAL;
+
+	eng_grps = container_of(attr, struct otx_cpt_eng_grps, ucode_load_attr);
+	err_msg = "Invalid engine group format";
+	strlcpy(tmp_buf, buf, OTX_CPT_UCODE_NAME_LENGTH);
+	start = tmp_buf;
+
+	has_se = has_ie = has_ae = false;
+
+	for (;;) {
+		val = strsep(&start, ";");
+		if (!val)
+			break;
+		val = strim(val);
+		if (!*val)
+			continue;
+
+		if (!strncasecmp(val, "engine_group", 12)) {
+			if (del_grp_idx != -1)
+				goto err_print;
+			tmp = strim(strsep(&val, ":"));
+			if (!val)
+				goto err_print;
+			if (strlen(tmp) != 13)
+				goto err_print;
+			if (kstrtoint((tmp + 12), 10, &del_grp_idx))
+				goto err_print;
+			val = strim(val);
+			if (strncasecmp(val, "null", 4))
+				goto err_print;
+			if (strlen(val) != 4)
+				goto err_print;
+		} else if (!strncasecmp(val, "se", 2) && strchr(val, ':')) {
+			if (has_se || ucode_idx)
+				goto err_print;
+			tmp = strim(strsep(&val, ":"));
+			if (!val)
+				goto err_print;
+			if (strlen(tmp) != 2)
+				goto err_print;
+			if (kstrtoint(strim(val), 10, &engs[grp_idx].count))
+				goto err_print;
+			engs[grp_idx++].type = OTX_CPT_SE_TYPES;
+			has_se = true;
+		} else if (!strncasecmp(val, "ae", 2) && strchr(val, ':')) {
+			if (has_ae || ucode_idx)
+				goto err_print;
+			tmp = strim(strsep(&val, ":"));
+			if (!val)
+				goto err_print;
+			if (strlen(tmp) != 2)
+				goto err_print;
+			if (kstrtoint(strim(val), 10, &engs[grp_idx].count))
+				goto err_print;
+			engs[grp_idx++].type = OTX_CPT_AE_TYPES;
+			has_ae = true;
+		} else {
+			if (ucode_idx > 1)
+				goto err_print;
+			if (!strlen(val))
+				goto err_print;
+			if (strnstr(val, " ", strlen(val)))
+				goto err_print;
+			ucode_filename[ucode_idx++] = val;
+		}
+	}
+
+	/* Validate input parameters */
+	if (del_grp_idx == -1) {
+		if (!(grp_idx && ucode_idx))
+			goto err_print;
+
+		if (ucode_idx > 1 && grp_idx < 2)
+			goto err_print;
+
+		if (grp_idx > OTX_CPT_MAX_ETYPES_PER_GRP) {
+			err_msg = "Error max 2 engine types can be attached";
+			goto err_print;
+		}
+
+	} else {
+		if (del_grp_idx < 0 ||
+		    del_grp_idx >= OTX_CPT_MAX_ENGINE_GROUPS) {
+			dev_err(dev, "Invalid engine group index %d",
+				del_grp_idx);
+			ret = -EINVAL;
+			return ret;
+		}
+
+		if (!eng_grps->grp[del_grp_idx].is_enabled) {
+			dev_err(dev, "Error engine_group%d is not configured",
+				del_grp_idx);
+			ret = -EINVAL;
+			return ret;
+		}
+
+		if (grp_idx || ucode_idx)
+			goto err_print;
+	}
+
+	mutex_lock(&eng_grps->lock);
+
+	if (eng_grps->is_rdonly) {
+		dev_err(dev, "Disable VFs before modifying engine groups\n");
+		ret = -EACCES;
+		goto err_unlock;
+	}
+
+	if (del_grp_idx == -1)
+		/* create engine group */
+		ret = create_engine_group(dev, eng_grps, engs, grp_idx,
+					  (void **) ucode_filename,
+					  ucode_idx, false);
+	else
+		/* delete engine group */
+		ret = delete_engine_group(dev, &eng_grps->grp[del_grp_idx]);
+	if (ret)
+		goto err_unlock;
+
+	print_dbg_info(dev, eng_grps);
+err_unlock:
+	mutex_unlock(&eng_grps->lock);
+	return ret ? ret : count;
+err_print:
+	dev_err(dev, "%s\n", err_msg);
+
+	return ret;
+}
+
+int otx_cpt_try_create_default_eng_grps(struct pci_dev *pdev,
+					struct otx_cpt_eng_grps *eng_grps,
+					int pf_type)
+{
+	struct tar_ucode_info_t *tar_info[OTX_CPT_MAX_ETYPES_PER_GRP] = { 0 };
+	struct otx_cpt_engines engs[OTX_CPT_MAX_ETYPES_PER_GRP] = { {0} };
+	struct tar_arch_info_t *tar_arch = NULL;
+	char *tar_filename;
+	int i, ret = 0;
+
+	mutex_lock(&eng_grps->lock);
+
+	/*
+	 * We don't create engine group for kernel crypto if attempt to create
+	 * it was already made (when user enabled VFs for the first time)
+	 */
+	if (eng_grps->is_first_try)
+		goto unlock_mutex;
+	eng_grps->is_first_try = true;
+
+	/* We create group for kcrypto only if no groups are configured */
+	for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++)
+		if (eng_grps->grp[i].is_enabled)
+			goto unlock_mutex;
+
+	switch (pf_type) {
+	case OTX_CPT_AE:
+	case OTX_CPT_SE:
+		tar_filename = OTX_CPT_UCODE_TAR_FILE_NAME;
+		break;
+
+	default:
+		dev_err(&pdev->dev, "Unknown PF type %d\n", pf_type);
+		ret = -EINVAL;
+		goto unlock_mutex;
+	}
+
+	tar_arch = load_tar_archive(&pdev->dev, tar_filename);
+	if (!tar_arch)
+		goto unlock_mutex;
+
+	/*
+	 * If device supports SE engines and there is SE microcode in tar
+	 * archive try to create engine group with SE engines for kernel
+	 * crypto functionality (symmetric crypto)
+	 */
+	tar_info[0] = get_uc_from_tar_archive(tar_arch, OTX_CPT_SE_TYPES);
+	if (tar_info[0] &&
+	    dev_supports_eng_type(eng_grps, OTX_CPT_SE_TYPES)) {
+
+		engs[0].type = OTX_CPT_SE_TYPES;
+		engs[0].count = eng_grps->avail.max_se_cnt;
+
+		ret = create_engine_group(&pdev->dev, eng_grps, engs, 1,
+					  (void **) tar_info, 1, true);
+		if (ret)
+			goto release_tar_arch;
+	}
+	/*
+	 * If device supports AE engines and there is AE microcode in tar
+	 * archive try to create engine group with AE engines for asymmetric
+	 * crypto functionality.
+	 */
+	tar_info[0] = get_uc_from_tar_archive(tar_arch, OTX_CPT_AE_TYPES);
+	if (tar_info[0] &&
+	    dev_supports_eng_type(eng_grps, OTX_CPT_AE_TYPES)) {
+
+		engs[0].type = OTX_CPT_AE_TYPES;
+		engs[0].count = eng_grps->avail.max_ae_cnt;
+
+		ret = create_engine_group(&pdev->dev, eng_grps, engs, 1,
+					  (void **) tar_info, 1, true);
+		if (ret)
+			goto release_tar_arch;
+	}
+
+	print_dbg_info(&pdev->dev, eng_grps);
+release_tar_arch:
+	release_tar_archive(tar_arch);
+unlock_mutex:
+	mutex_unlock(&eng_grps->lock);
+	return ret;
+}
+
+void otx_cpt_set_eng_grps_is_rdonly(struct otx_cpt_eng_grps *eng_grps,
+				    bool is_rdonly)
+{
+	mutex_lock(&eng_grps->lock);
+
+	eng_grps->is_rdonly = is_rdonly;
+
+	mutex_unlock(&eng_grps->lock);
+}
+
+void otx_cpt_disable_all_cores(struct otx_cpt_device *cpt)
+{
+	int grp, timeout = 100;
+	u64 reg;
+
+	/* Disengage the cores from groups */
+	for (grp = 0; grp < OTX_CPT_MAX_ENGINE_GROUPS; grp++) {
+		writeq(0, cpt->reg_base + OTX_CPT_PF_GX_EN(grp));
+		udelay(CSR_DELAY);
+	}
+
+	reg = readq(cpt->reg_base + OTX_CPT_PF_EXEC_BUSY);
+	while (reg) {
+		udelay(CSR_DELAY);
+		reg = readq(cpt->reg_base + OTX_CPT_PF_EXEC_BUSY);
+		if (timeout--) {
+			dev_warn(&cpt->pdev->dev, "Cores still busy");
+			break;
+		}
+	}
+
+	/* Disable the cores */
+	writeq(0, cpt->reg_base + OTX_CPT_PF_EXE_CTL);
+}
+
+void otx_cpt_cleanup_eng_grps(struct pci_dev *pdev,
+			      struct otx_cpt_eng_grps *eng_grps)
+{
+	struct otx_cpt_eng_grp_info *grp;
+	int i, j;
+
+	mutex_lock(&eng_grps->lock);
+	if (eng_grps->is_ucode_load_created) {
+		device_remove_file(&pdev->dev,
+				   &eng_grps->ucode_load_attr);
+		eng_grps->is_ucode_load_created = false;
+	}
+
+	/* First delete all mirroring engine groups */
+	for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++)
+		if (eng_grps->grp[i].mirror.is_ena)
+			delete_engine_group(&pdev->dev, &eng_grps->grp[i]);
+
+	/* Delete remaining engine groups */
+	for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++)
+		delete_engine_group(&pdev->dev, &eng_grps->grp[i]);
+
+	/* Release memory */
+	for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) {
+		grp = &eng_grps->grp[i];
+		for (j = 0; j < OTX_CPT_MAX_ETYPES_PER_GRP; j++) {
+			kfree(grp->engs[j].bmap);
+			grp->engs[j].bmap = NULL;
+		}
+	}
+
+	mutex_unlock(&eng_grps->lock);
+}
+
+int otx_cpt_init_eng_grps(struct pci_dev *pdev,
+			  struct otx_cpt_eng_grps *eng_grps, int pf_type)
+{
+	struct otx_cpt_eng_grp_info *grp;
+	int i, j, ret = 0;
+
+	mutex_init(&eng_grps->lock);
+	eng_grps->obj = pci_get_drvdata(pdev);
+	eng_grps->avail.se_cnt = eng_grps->avail.max_se_cnt;
+	eng_grps->avail.ae_cnt = eng_grps->avail.max_ae_cnt;
+
+	eng_grps->engs_num = eng_grps->avail.max_se_cnt +
+			     eng_grps->avail.max_ae_cnt;
+	if (eng_grps->engs_num > OTX_CPT_MAX_ENGINES) {
+		dev_err(&pdev->dev,
+			"Number of engines %d > than max supported %d",
+			eng_grps->engs_num, OTX_CPT_MAX_ENGINES);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) {
+		grp = &eng_grps->grp[i];
+		grp->g = eng_grps;
+		grp->idx = i;
+
+		snprintf(grp->sysfs_info_name, OTX_CPT_UCODE_NAME_LENGTH,
+			 "engine_group%d", i);
+		for (j = 0; j < OTX_CPT_MAX_ETYPES_PER_GRP; j++) {
+			grp->engs[j].bmap =
+				kcalloc(BITS_TO_LONGS(eng_grps->engs_num),
+					sizeof(long), GFP_KERNEL);
+			if (!grp->engs[j].bmap) {
+				ret = -ENOMEM;
+				goto err;
+			}
+		}
+	}
+
+	switch (pf_type) {
+	case OTX_CPT_SE:
+		/* OcteonTX 83XX SE CPT PF has only SE engines attached */
+		eng_grps->eng_types_supported = 1 << OTX_CPT_SE_TYPES;
+		break;
+
+	case OTX_CPT_AE:
+		/* OcteonTX 83XX AE CPT PF has only AE engines attached */
+		eng_grps->eng_types_supported = 1 << OTX_CPT_AE_TYPES;
+		break;
+
+	default:
+		dev_err(&pdev->dev, "Unknown PF type %d\n", pf_type);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	eng_grps->ucode_load_attr.show = NULL;
+	eng_grps->ucode_load_attr.store = ucode_load_store;
+	eng_grps->ucode_load_attr.attr.name = "ucode_load";
+	eng_grps->ucode_load_attr.attr.mode = 0220;
+	sysfs_attr_init(&eng_grps->ucode_load_attr.attr);
+	ret = device_create_file(&pdev->dev,
+				 &eng_grps->ucode_load_attr);
+	if (ret)
+		goto err;
+	eng_grps->is_ucode_load_created = true;
+
+	print_dbg_info(&pdev->dev, eng_grps);
+	return ret;
+err:
+	otx_cpt_cleanup_eng_grps(pdev, eng_grps);
+	return ret;
+}
diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h
new file mode 100644
index 000000000000..14f02b60d0c2
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __OTX_CPTPF_UCODE_H
+#define __OTX_CPTPF_UCODE_H
+
+#include <linux/pci.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include "otx_cpt_hw_types.h"
+
+/* CPT ucode name maximum length */
+#define OTX_CPT_UCODE_NAME_LENGTH	64
+/*
+ * On OcteonTX 83xx platform, only one type of engines is allowed to be
+ * attached to an engine group.
+ */
+#define OTX_CPT_MAX_ETYPES_PER_GRP	1
+
+/* Default tar archive file names */
+#define OTX_CPT_UCODE_TAR_FILE_NAME	"cpt8x-mc.tar"
+
+/* CPT ucode alignment */
+#define OTX_CPT_UCODE_ALIGNMENT		128
+
+/* CPT ucode signature size */
+#define OTX_CPT_UCODE_SIGN_LEN		256
+
+/* Microcode version string length */
+#define OTX_CPT_UCODE_VER_STR_SZ	44
+
+/* Maximum number of supported engines/cores on OcteonTX 83XX platform */
+#define OTX_CPT_MAX_ENGINES		64
+
+#define OTX_CPT_ENGS_BITMASK_LEN	(OTX_CPT_MAX_ENGINES/(BITS_PER_BYTE * \
+					 sizeof(unsigned long)))
+
+/* Microcode types */
+enum otx_cpt_ucode_type {
+	OTX_CPT_AE_UC_TYPE =	1,  /* AE-MAIN */
+	OTX_CPT_SE_UC_TYPE1 =	20, /* SE-MAIN - combination of 21 and 22 */
+	OTX_CPT_SE_UC_TYPE2 =	21, /* Fast Path IPSec + AirCrypto */
+	OTX_CPT_SE_UC_TYPE3 =	22, /*
+				     * Hash + HMAC + FlexiCrypto + RNG + Full
+				     * Feature IPSec + AirCrypto + Kasumi
+				     */
+};
+
+struct otx_cpt_bitmap {
+	unsigned long bits[OTX_CPT_ENGS_BITMASK_LEN];
+	int size;
+};
+
+struct otx_cpt_engines {
+	int type;
+	int count;
+};
+
+/* Microcode version number */
+struct otx_cpt_ucode_ver_num {
+	u8 nn;
+	u8 xx;
+	u8 yy;
+	u8 zz;
+};
+
+struct otx_cpt_ucode_hdr {
+	struct otx_cpt_ucode_ver_num ver_num;
+	u8 ver_str[OTX_CPT_UCODE_VER_STR_SZ];
+	u32 code_length;
+	u32 padding[3];
+};
+
+struct otx_cpt_ucode {
+	u8 ver_str[OTX_CPT_UCODE_VER_STR_SZ];/*
+					      * ucode version in readable format
+					      */
+	struct otx_cpt_ucode_ver_num ver_num;/* ucode version number */
+	char filename[OTX_CPT_UCODE_NAME_LENGTH];	 /* ucode filename */
+	dma_addr_t dma;		/* phys address of ucode image */
+	dma_addr_t align_dma;	/* aligned phys address of ucode image */
+	void *va;		/* virt address of ucode image */
+	void *align_va;		/* aligned virt address of ucode image */
+	u32 size;		/* ucode image size */
+	int type;		/* ucode image type SE or AE */
+};
+
+struct tar_ucode_info_t {
+	struct list_head list;
+	struct otx_cpt_ucode ucode;/* microcode information */
+	const u8 *ucode_ptr;	/* pointer to microcode in tar archive */
+};
+
+/* Maximum and current number of engines available for all engine groups */
+struct otx_cpt_engs_available {
+	int max_se_cnt;
+	int max_ae_cnt;
+	int se_cnt;
+	int ae_cnt;
+};
+
+/* Engines reserved to an engine group */
+struct otx_cpt_engs_rsvd {
+	int type;	/* engine type */
+	int count;	/* number of engines attached */
+	int offset;     /* constant offset of engine type in the bitmap */
+	unsigned long *bmap;		/* attached engines bitmap */
+	struct otx_cpt_ucode *ucode;	/* ucode used by these engines */
+};
+
+struct otx_cpt_mirror_info {
+	int is_ena;	/*
+			 * is mirroring enabled, it is set only for engine
+			 * group which mirrors another engine group
+			 */
+	int idx;	/*
+			 * index of engine group which is mirrored by this
+			 * group, set only for engine group which mirrors
+			 * another group
+			 */
+	int ref_count;	/*
+			 * number of times this engine group is mirrored by
+			 * other groups, this is set only for engine group
+			 * which is mirrored by other group(s)
+			 */
+};
+
+struct otx_cpt_eng_grp_info {
+	struct otx_cpt_eng_grps *g; /* pointer to engine_groups structure */
+	struct device_attribute info_attr; /* group info entry attr */
+	/* engines attached */
+	struct otx_cpt_engs_rsvd engs[OTX_CPT_MAX_ETYPES_PER_GRP];
+	/* Microcode information */
+	struct otx_cpt_ucode ucode[OTX_CPT_MAX_ETYPES_PER_GRP];
+	/* sysfs info entry name */
+	char sysfs_info_name[OTX_CPT_UCODE_NAME_LENGTH];
+	/* engine group mirroring information */
+	struct otx_cpt_mirror_info mirror;
+	int idx;	 /* engine group index */
+	bool is_enabled; /*
+			  * is engine group enabled, engine group is enabled
+			  * when it has engines attached and ucode loaded
+			  */
+};
+
+struct otx_cpt_eng_grps {
+	struct otx_cpt_eng_grp_info grp[OTX_CPT_MAX_ENGINE_GROUPS];
+	struct device_attribute ucode_load_attr;/* ucode load attr */
+	struct otx_cpt_engs_available avail;
+	struct mutex lock;
+	void *obj;
+	int engs_num;			/* total number of engines supported */
+	int eng_types_supported;	/* engine types supported SE, AE */
+	u8 eng_ref_cnt[OTX_CPT_MAX_ENGINES];/* engines reference count */
+	bool is_ucode_load_created;	/* is ucode_load sysfs entry created */
+	bool is_first_try; /* is this first try to create kcrypto engine grp */
+	bool is_rdonly;	/* do engine groups configuration can be modified */
+};
+
+int otx_cpt_init_eng_grps(struct pci_dev *pdev,
+			  struct otx_cpt_eng_grps *eng_grps, int pf_type);
+void otx_cpt_cleanup_eng_grps(struct pci_dev *pdev,
+			      struct otx_cpt_eng_grps *eng_grps);
+int otx_cpt_try_create_default_eng_grps(struct pci_dev *pdev,
+					struct otx_cpt_eng_grps *eng_grps,
+					int pf_type);
+void otx_cpt_set_eng_grps_is_rdonly(struct otx_cpt_eng_grps *eng_grps,
+				    bool is_rdonly);
+int otx_cpt_uc_supports_eng_type(struct otx_cpt_ucode *ucode, int eng_type);
+int otx_cpt_eng_grp_has_eng_type(struct otx_cpt_eng_grp_info *eng_grp,
+				 int eng_type);
+
+#endif /* __OTX_CPTPF_UCODE_H */
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf.h b/drivers/crypto/marvell/octeontx/otx_cptvf.h
new file mode 100644
index 000000000000..dd02f21659af
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __OTX_CPTVF_H
+#define __OTX_CPTVF_H
+
+#include <linux/list.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include "otx_cpt_common.h"
+#include "otx_cptvf_reqmgr.h"
+
+/* Flags to indicate the features supported */
+#define OTX_CPT_FLAG_DEVICE_READY  BIT(1)
+#define otx_cpt_device_ready(cpt)  ((cpt)->flags & OTX_CPT_FLAG_DEVICE_READY)
+/* Default command queue length */
+#define OTX_CPT_CMD_QLEN	(4*2046)
+#define OTX_CPT_CMD_QCHUNK_SIZE	1023
+#define OTX_CPT_NUM_QS_PER_VF	1
+
+struct otx_cpt_cmd_chunk {
+	u8 *head;
+	dma_addr_t dma_addr;
+	u32 size; /* Chunk size, max OTX_CPT_INST_CHUNK_MAX_SIZE */
+	struct list_head nextchunk;
+};
+
+struct otx_cpt_cmd_queue {
+	u32 idx;	/* Command queue host write idx */
+	u32 num_chunks;	/* Number of command chunks */
+	struct otx_cpt_cmd_chunk *qhead;/*
+					 * Command queue head, instructions
+					 * are inserted here
+					 */
+	struct otx_cpt_cmd_chunk *base;
+	struct list_head chead;
+};
+
+struct otx_cpt_cmd_qinfo {
+	u32 qchunksize; /* Command queue chunk size */
+	struct otx_cpt_cmd_queue queue[OTX_CPT_NUM_QS_PER_VF];
+};
+
+struct otx_cpt_pending_qinfo {
+	u32 num_queues;	/* Number of queues supported */
+	struct otx_cpt_pending_queue queue[OTX_CPT_NUM_QS_PER_VF];
+};
+
+#define for_each_pending_queue(qinfo, q, i)	\
+		for (i = 0, q = &qinfo->queue[i]; i < qinfo->num_queues; i++, \
+		     q = &qinfo->queue[i])
+
+struct otx_cptvf_wqe {
+	struct tasklet_struct twork;
+	struct otx_cptvf *cptvf;
+};
+
+struct otx_cptvf_wqe_info {
+	struct otx_cptvf_wqe vq_wqe[OTX_CPT_NUM_QS_PER_VF];
+};
+
+struct otx_cptvf {
+	u16 flags;	/* Flags to hold device status bits */
+	u8 vfid;	/* Device Index 0...OTX_CPT_MAX_VF_NUM */
+	u8 num_vfs;	/* Number of enabled VFs */
+	u8 vftype;	/* VF type of SE_TYPE(2) or AE_TYPE(1) */
+	u8 vfgrp;	/* VF group (0 - 8) */
+	u8 node;	/* Operating node: Bits (46:44) in BAR0 address */
+	u8 priority;	/*
+			 * VF priority ring: 1-High proirity round
+			 * robin ring;0-Low priority round robin ring;
+			 */
+	struct pci_dev *pdev;	/* Pci device handle */
+	void __iomem *reg_base;	/* Register start address */
+	void *wqe_info;		/* BH worker info */
+	/* MSI-X */
+	cpumask_var_t affinity_mask[OTX_CPT_VF_MSIX_VECTORS];
+	/* Command and Pending queues */
+	u32 qsize;
+	u32 num_queues;
+	struct otx_cpt_cmd_qinfo cqinfo; /* Command queue information */
+	struct otx_cpt_pending_qinfo pqinfo; /* Pending queue information */
+	/* VF-PF mailbox communication */
+	bool pf_acked;
+	bool pf_nacked;
+};
+
+int otx_cptvf_send_vf_up(struct otx_cptvf *cptvf);
+int otx_cptvf_send_vf_down(struct otx_cptvf *cptvf);
+int otx_cptvf_send_vf_to_grp_msg(struct otx_cptvf *cptvf, int group);
+int otx_cptvf_send_vf_priority_msg(struct otx_cptvf *cptvf);
+int otx_cptvf_send_vq_size_msg(struct otx_cptvf *cptvf);
+int otx_cptvf_check_pf_ready(struct otx_cptvf *cptvf);
+void otx_cptvf_handle_mbox_intr(struct otx_cptvf *cptvf);
+void otx_cptvf_write_vq_doorbell(struct otx_cptvf *cptvf, u32 val);
+
+#endif /* __OTX_CPTVF_H */
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
new file mode 100644
index 000000000000..946fb62949b2
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
@@ -0,0 +1,1744 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/aes.h>
+#include <crypto/authenc.h>
+#include <crypto/cryptd.h>
+#include <crypto/des.h>
+#include <crypto/internal/aead.h>
+#include <crypto/sha.h>
+#include <crypto/xts.h>
+#include <crypto/scatterwalk.h>
+#include <linux/rtnetlink.h>
+#include <linux/sort.h>
+#include <linux/module.h>
+#include "otx_cptvf.h"
+#include "otx_cptvf_algs.h"
+#include "otx_cptvf_reqmgr.h"
+
+#define CPT_MAX_VF_NUM	64
+/* Size of salt in AES GCM mode */
+#define AES_GCM_SALT_SIZE	4
+/* Size of IV in AES GCM mode */
+#define AES_GCM_IV_SIZE		8
+/* Size of ICV (Integrity Check Value) in AES GCM mode */
+#define AES_GCM_ICV_SIZE	16
+/* Offset of IV in AES GCM mode */
+#define AES_GCM_IV_OFFSET	8
+#define CONTROL_WORD_LEN	8
+#define KEY2_OFFSET		48
+#define DMA_MODE_FLAG(dma_mode) \
+	(((dma_mode) == OTX_CPT_DMA_GATHER_SCATTER) ? (1 << 7) : 0)
+
+/* Truncated SHA digest size */
+#define SHA1_TRUNC_DIGEST_SIZE		12
+#define SHA256_TRUNC_DIGEST_SIZE	16
+#define SHA384_TRUNC_DIGEST_SIZE	24
+#define SHA512_TRUNC_DIGEST_SIZE	32
+
+static DEFINE_MUTEX(mutex);
+static int is_crypto_registered;
+
+struct cpt_device_desc {
+	enum otx_cptpf_type pf_type;
+	struct pci_dev *dev;
+	int num_queues;
+};
+
+struct cpt_device_table {
+	atomic_t count;
+	struct cpt_device_desc desc[CPT_MAX_VF_NUM];
+};
+
+static struct cpt_device_table se_devices = {
+	.count = ATOMIC_INIT(0)
+};
+
+static struct cpt_device_table ae_devices = {
+	.count = ATOMIC_INIT(0)
+};
+
+static inline int get_se_device(struct pci_dev **pdev, int *cpu_num)
+{
+	int count, ret = 0;
+
+	count = atomic_read(&se_devices.count);
+	if (count < 1)
+		return -ENODEV;
+
+	*cpu_num = get_cpu();
+
+	if (se_devices.desc[0].pf_type == OTX_CPT_SE) {
+		/*
+		 * On OcteonTX platform there is one CPT instruction queue bound
+		 * to each VF. We get maximum performance if one CPT queue
+		 * is available for each cpu otherwise CPT queues need to be
+		 * shared between cpus.
+		 */
+		if (*cpu_num >= count)
+			*cpu_num %= count;
+		*pdev = se_devices.desc[*cpu_num].dev;
+	} else {
+		pr_err("Unknown PF type %d\n", se_devices.desc[0].pf_type);
+		ret = -EINVAL;
+	}
+	put_cpu();
+
+	return ret;
+}
+
+static inline int validate_hmac_cipher_null(struct otx_cpt_req_info *cpt_req)
+{
+	struct otx_cpt_req_ctx *rctx;
+	struct aead_request *req;
+	struct crypto_aead *tfm;
+
+	req = container_of(cpt_req->areq, struct aead_request, base);
+	tfm = crypto_aead_reqtfm(req);
+	rctx = aead_request_ctx(req);
+	if (memcmp(rctx->fctx.hmac.s.hmac_calc,
+		   rctx->fctx.hmac.s.hmac_recv,
+		   crypto_aead_authsize(tfm)) != 0)
+		return -EBADMSG;
+
+	return 0;
+}
+
+static void otx_cpt_aead_callback(int status, void *arg1, void *arg2)
+{
+	struct otx_cpt_info_buffer *cpt_info = arg2;
+	struct crypto_async_request *areq = arg1;
+	struct otx_cpt_req_info *cpt_req;
+	struct pci_dev *pdev;
+
+	cpt_req = cpt_info->req;
+	if (!status) {
+		/*
+		 * When selected cipher is NULL we need to manually
+		 * verify whether calculated hmac value matches
+		 * received hmac value
+		 */
+		if (cpt_req->req_type == OTX_CPT_AEAD_ENC_DEC_NULL_REQ &&
+		    !cpt_req->is_enc)
+			status = validate_hmac_cipher_null(cpt_req);
+	}
+	if (cpt_info) {
+		pdev = cpt_info->pdev;
+		do_request_cleanup(pdev, cpt_info);
+	}
+	if (areq)
+		areq->complete(areq, status);
+}
+
+static void output_iv_copyback(struct crypto_async_request *areq)
+{
+	struct otx_cpt_req_info *req_info;
+	struct skcipher_request *sreq;
+	struct crypto_skcipher *stfm;
+	struct otx_cpt_req_ctx *rctx;
+	struct otx_cpt_enc_ctx *ctx;
+	u32 start, ivsize;
+
+	sreq = container_of(areq, struct skcipher_request, base);
+	stfm = crypto_skcipher_reqtfm(sreq);
+	ctx = crypto_skcipher_ctx(stfm);
+	if (ctx->cipher_type == OTX_CPT_AES_CBC ||
+	    ctx->cipher_type == OTX_CPT_DES3_CBC) {
+		rctx = skcipher_request_ctx(sreq);
+		req_info = &rctx->cpt_req;
+		ivsize = crypto_skcipher_ivsize(stfm);
+		start = sreq->cryptlen - ivsize;
+
+		if (req_info->is_enc) {
+			scatterwalk_map_and_copy(sreq->iv, sreq->dst, start,
+						 ivsize, 0);
+		} else {
+			if (sreq->src != sreq->dst) {
+				scatterwalk_map_and_copy(sreq->iv, sreq->src,
+							 start, ivsize, 0);
+			} else {
+				memcpy(sreq->iv, req_info->iv_out, ivsize);
+				kfree(req_info->iv_out);
+			}
+		}
+	}
+}
+
+static void otx_cpt_skcipher_callback(int status, void *arg1, void *arg2)
+{
+	struct otx_cpt_info_buffer *cpt_info = arg2;
+	struct crypto_async_request *areq = arg1;
+	struct pci_dev *pdev;
+
+	if (areq) {
+		if (!status)
+			output_iv_copyback(areq);
+		if (cpt_info) {
+			pdev = cpt_info->pdev;
+			do_request_cleanup(pdev, cpt_info);
+		}
+		areq->complete(areq, status);
+	}
+}
+
+static inline void update_input_data(struct otx_cpt_req_info *req_info,
+				     struct scatterlist *inp_sg,
+				     u32 nbytes, u32 *argcnt)
+{
+	req_info->req.dlen += nbytes;
+
+	while (nbytes) {
+		u32 len = min(nbytes, inp_sg->length);
+		u8 *ptr = sg_virt(inp_sg);
+
+		req_info->in[*argcnt].vptr = (void *)ptr;
+		req_info->in[*argcnt].size = len;
+		nbytes -= len;
+		++(*argcnt);
+		inp_sg = sg_next(inp_sg);
+	}
+}
+
+static inline void update_output_data(struct otx_cpt_req_info *req_info,
+				      struct scatterlist *outp_sg,
+				      u32 offset, u32 nbytes, u32 *argcnt)
+{
+	req_info->rlen += nbytes;
+
+	while (nbytes) {
+		u32 len = min(nbytes, outp_sg->length - offset);
+		u8 *ptr = sg_virt(outp_sg);
+
+		req_info->out[*argcnt].vptr = (void *) (ptr + offset);
+		req_info->out[*argcnt].size = len;
+		nbytes -= len;
+		++(*argcnt);
+		offset = 0;
+		outp_sg = sg_next(outp_sg);
+	}
+}
+
+static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc,
+				 u32 *argcnt)
+{
+	struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req);
+	struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req);
+	struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(stfm);
+	struct otx_cpt_enc_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct otx_cpt_fc_ctx *fctx = &rctx->fctx;
+	int ivsize = crypto_skcipher_ivsize(stfm);
+	u32 start = req->cryptlen - ivsize;
+	u64 *ctrl_flags = NULL;
+	gfp_t flags;
+
+	flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+			GFP_KERNEL : GFP_ATOMIC;
+	req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER;
+	req_info->ctrl.s.se_req = OTX_CPT_SE_CORE_REQ;
+
+	req_info->req.opcode.s.major = OTX_CPT_MAJOR_OP_FC |
+				DMA_MODE_FLAG(OTX_CPT_DMA_GATHER_SCATTER);
+	if (enc) {
+		req_info->req.opcode.s.minor = 2;
+	} else {
+		req_info->req.opcode.s.minor = 3;
+		if ((ctx->cipher_type == OTX_CPT_AES_CBC ||
+		    ctx->cipher_type == OTX_CPT_DES3_CBC) &&
+		    req->src == req->dst) {
+			req_info->iv_out = kmalloc(ivsize, flags);
+			if (!req_info->iv_out)
+				return -ENOMEM;
+
+			scatterwalk_map_and_copy(req_info->iv_out, req->src,
+						 start, ivsize, 0);
+		}
+	}
+	/* Encryption data length */
+	req_info->req.param1 = req->cryptlen;
+	/* Authentication data length */
+	req_info->req.param2 = 0;
+
+	fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type;
+	fctx->enc.enc_ctrl.e.aes_key = ctx->key_type;
+	fctx->enc.enc_ctrl.e.iv_source = OTX_CPT_FROM_CPTR;
+
+	if (ctx->cipher_type == OTX_CPT_AES_XTS)
+		memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len * 2);
+	else
+		memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len);
+
+	memcpy(fctx->enc.encr_iv, req->iv, crypto_skcipher_ivsize(stfm));
+
+	ctrl_flags = (u64 *)&fctx->enc.enc_ctrl.flags;
+	*ctrl_flags = cpu_to_be64(*ctrl_flags);
+
+	/*
+	 * Storing  Packet Data Information in offset
+	 * Control Word First 8 bytes
+	 */
+	req_info->in[*argcnt].vptr = (u8 *)&rctx->ctrl_word;
+	req_info->in[*argcnt].size = CONTROL_WORD_LEN;
+	req_info->req.dlen += CONTROL_WORD_LEN;
+	++(*argcnt);
+
+	req_info->in[*argcnt].vptr = (u8 *)fctx;
+	req_info->in[*argcnt].size = sizeof(struct otx_cpt_fc_ctx);
+	req_info->req.dlen += sizeof(struct otx_cpt_fc_ctx);
+
+	++(*argcnt);
+
+	return 0;
+}
+
+static inline u32 create_input_list(struct skcipher_request *req, u32 enc,
+				    u32 enc_iv_len)
+{
+	struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req);
+	struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+	u32 argcnt =  0;
+	int ret;
+
+	ret = create_ctx_hdr(req, enc, &argcnt);
+	if (ret)
+		return ret;
+
+	update_input_data(req_info, req->src, req->cryptlen, &argcnt);
+	req_info->incnt = argcnt;
+
+	return 0;
+}
+
+static inline void create_output_list(struct skcipher_request *req,
+				      u32 enc_iv_len)
+{
+	struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req);
+	struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+	u32 argcnt = 0;
+
+	/*
+	 * OUTPUT Buffer Processing
+	 * AES encryption/decryption output would be
+	 * received in the following format
+	 *
+	 * ------IV--------|------ENCRYPTED/DECRYPTED DATA-----|
+	 * [ 16 Bytes/     [   Request Enc/Dec/ DATA Len AES CBC ]
+	 */
+	update_output_data(req_info, req->dst, 0, req->cryptlen, &argcnt);
+	req_info->outcnt = argcnt;
+}
+
+static inline int cpt_enc_dec(struct skcipher_request *req, u32 enc)
+{
+	struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req);
+	struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req);
+	struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+	u32 enc_iv_len = crypto_skcipher_ivsize(stfm);
+	struct pci_dev *pdev;
+	int status, cpu_num;
+
+	/* Validate that request doesn't exceed maximum CPT supported size */
+	if (req->cryptlen > OTX_CPT_MAX_REQ_SIZE)
+		return -E2BIG;
+
+	/* Clear control words */
+	rctx->ctrl_word.flags = 0;
+	rctx->fctx.enc.enc_ctrl.flags = 0;
+
+	status = create_input_list(req, enc, enc_iv_len);
+	if (status)
+		return status;
+	create_output_list(req, enc_iv_len);
+
+	status = get_se_device(&pdev, &cpu_num);
+	if (status)
+		return status;
+
+	req_info->callback = (void *)otx_cpt_skcipher_callback;
+	req_info->areq = &req->base;
+	req_info->req_type = OTX_CPT_ENC_DEC_REQ;
+	req_info->is_enc = enc;
+	req_info->is_trunc_hmac = false;
+	req_info->ctrl.s.grp = 0;
+
+	/*
+	 * We perform an asynchronous send and once
+	 * the request is completed the driver would
+	 * intimate through registered call back functions
+	 */
+	status = otx_cpt_do_request(pdev, req_info, cpu_num);
+
+	return status;
+}
+
+static int otx_cpt_skcipher_encrypt(struct skcipher_request *req)
+{
+	return cpt_enc_dec(req, true);
+}
+
+static int otx_cpt_skcipher_decrypt(struct skcipher_request *req)
+{
+	return cpt_enc_dec(req, false);
+}
+
+static int otx_cpt_skcipher_xts_setkey(struct crypto_skcipher *tfm,
+				       const u8 *key, u32 keylen)
+{
+	struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm);
+	const u8 *key2 = key + (keylen / 2);
+	const u8 *key1 = key;
+	int ret;
+
+	ret = xts_check_key(crypto_skcipher_tfm(tfm), key, keylen);
+	if (ret)
+		return ret;
+	ctx->key_len = keylen;
+	memcpy(ctx->enc_key, key1, keylen / 2);
+	memcpy(ctx->enc_key + KEY2_OFFSET, key2, keylen / 2);
+	ctx->cipher_type = OTX_CPT_AES_XTS;
+	switch (ctx->key_len) {
+	case 2 * AES_KEYSIZE_128:
+		ctx->key_type = OTX_CPT_AES_128_BIT;
+		break;
+	case 2 * AES_KEYSIZE_256:
+		ctx->key_type = OTX_CPT_AES_256_BIT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int cpt_des_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			  u32 keylen, u8 cipher_type)
+{
+	struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	if (keylen != DES3_EDE_KEY_SIZE)
+		return -EINVAL;
+
+	ctx->key_len = keylen;
+	ctx->cipher_type = cipher_type;
+
+	memcpy(ctx->enc_key, key, keylen);
+
+	return 0;
+}
+
+static int cpt_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			  u32 keylen, u8 cipher_type)
+{
+	struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	switch (keylen) {
+	case AES_KEYSIZE_128:
+		ctx->key_type = OTX_CPT_AES_128_BIT;
+		break;
+	case AES_KEYSIZE_192:
+		ctx->key_type = OTX_CPT_AES_192_BIT;
+		break;
+	case AES_KEYSIZE_256:
+		ctx->key_type = OTX_CPT_AES_256_BIT;
+		break;
+	default:
+		return -EINVAL;
+	}
+	ctx->key_len = keylen;
+	ctx->cipher_type = cipher_type;
+
+	memcpy(ctx->enc_key, key, keylen);
+
+	return 0;
+}
+
+static int otx_cpt_skcipher_cbc_aes_setkey(struct crypto_skcipher *tfm,
+					   const u8 *key, u32 keylen)
+{
+	return cpt_aes_setkey(tfm, key, keylen, OTX_CPT_AES_CBC);
+}
+
+static int otx_cpt_skcipher_ecb_aes_setkey(struct crypto_skcipher *tfm,
+					   const u8 *key, u32 keylen)
+{
+	return cpt_aes_setkey(tfm, key, keylen, OTX_CPT_AES_ECB);
+}
+
+static int otx_cpt_skcipher_cfb_aes_setkey(struct crypto_skcipher *tfm,
+					   const u8 *key, u32 keylen)
+{
+	return cpt_aes_setkey(tfm, key, keylen, OTX_CPT_AES_CFB);
+}
+
+static int otx_cpt_skcipher_cbc_des3_setkey(struct crypto_skcipher *tfm,
+					    const u8 *key, u32 keylen)
+{
+	return cpt_des_setkey(tfm, key, keylen, OTX_CPT_DES3_CBC);
+}
+
+static int otx_cpt_skcipher_ecb_des3_setkey(struct crypto_skcipher *tfm,
+					    const u8 *key, u32 keylen)
+{
+	return cpt_des_setkey(tfm, key, keylen, OTX_CPT_DES3_ECB);
+}
+
+static int otx_cpt_enc_dec_init(struct crypto_skcipher *tfm)
+{
+	struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	memset(ctx, 0, sizeof(*ctx));
+	/*
+	 * Additional memory for skcipher_request is
+	 * allocated since the cryptd daemon uses
+	 * this memory for request_ctx information
+	 */
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct otx_cpt_req_ctx) +
+					sizeof(struct skcipher_request));
+
+	return 0;
+}
+
+static int cpt_aead_init(struct crypto_aead *tfm, u8 cipher_type, u8 mac_type)
+{
+	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+
+	ctx->cipher_type = cipher_type;
+	ctx->mac_type = mac_type;
+
+	/*
+	 * When selected cipher is NULL we use HMAC opcode instead of
+	 * FLEXICRYPTO opcode therefore we don't need to use HASH algorithms
+	 * for calculating ipad and opad
+	 */
+	if (ctx->cipher_type != OTX_CPT_CIPHER_NULL) {
+		switch (ctx->mac_type) {
+		case OTX_CPT_SHA1:
+			ctx->hashalg = crypto_alloc_shash("sha1", 0,
+							  CRYPTO_ALG_ASYNC);
+			if (IS_ERR(ctx->hashalg))
+				return PTR_ERR(ctx->hashalg);
+			break;
+
+		case OTX_CPT_SHA256:
+			ctx->hashalg = crypto_alloc_shash("sha256", 0,
+							  CRYPTO_ALG_ASYNC);
+			if (IS_ERR(ctx->hashalg))
+				return PTR_ERR(ctx->hashalg);
+			break;
+
+		case OTX_CPT_SHA384:
+			ctx->hashalg = crypto_alloc_shash("sha384", 0,
+							  CRYPTO_ALG_ASYNC);
+			if (IS_ERR(ctx->hashalg))
+				return PTR_ERR(ctx->hashalg);
+			break;
+
+		case OTX_CPT_SHA512:
+			ctx->hashalg = crypto_alloc_shash("sha512", 0,
+							  CRYPTO_ALG_ASYNC);
+			if (IS_ERR(ctx->hashalg))
+				return PTR_ERR(ctx->hashalg);
+			break;
+		}
+	}
+
+	crypto_aead_set_reqsize(tfm, sizeof(struct otx_cpt_req_ctx));
+
+	return 0;
+}
+
+static int otx_cpt_aead_cbc_aes_sha1_init(struct crypto_aead *tfm)
+{
+	return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA1);
+}
+
+static int otx_cpt_aead_cbc_aes_sha256_init(struct crypto_aead *tfm)
+{
+	return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA256);
+}
+
+static int otx_cpt_aead_cbc_aes_sha384_init(struct crypto_aead *tfm)
+{
+	return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA384);
+}
+
+static int otx_cpt_aead_cbc_aes_sha512_init(struct crypto_aead *tfm)
+{
+	return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA512);
+}
+
+static int otx_cpt_aead_ecb_null_sha1_init(struct crypto_aead *tfm)
+{
+	return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA1);
+}
+
+static int otx_cpt_aead_ecb_null_sha256_init(struct crypto_aead *tfm)
+{
+	return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA256);
+}
+
+static int otx_cpt_aead_ecb_null_sha384_init(struct crypto_aead *tfm)
+{
+	return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA384);
+}
+
+static int otx_cpt_aead_ecb_null_sha512_init(struct crypto_aead *tfm)
+{
+	return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA512);
+}
+
+static int otx_cpt_aead_gcm_aes_init(struct crypto_aead *tfm)
+{
+	return cpt_aead_init(tfm, OTX_CPT_AES_GCM, OTX_CPT_MAC_NULL);
+}
+
+static void otx_cpt_aead_exit(struct crypto_aead *tfm)
+{
+	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+
+	kfree(ctx->ipad);
+	kfree(ctx->opad);
+	if (ctx->hashalg)
+		crypto_free_shash(ctx->hashalg);
+	kfree(ctx->sdesc);
+}
+
+/*
+ * This is the Integrity Check Value validation (aka the authentication tag
+ * length)
+ */
+static int otx_cpt_aead_set_authsize(struct crypto_aead *tfm,
+				     unsigned int authsize)
+{
+	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+
+	switch (ctx->mac_type) {
+	case OTX_CPT_SHA1:
+		if (authsize != SHA1_DIGEST_SIZE &&
+		    authsize != SHA1_TRUNC_DIGEST_SIZE)
+			return -EINVAL;
+
+		if (authsize == SHA1_TRUNC_DIGEST_SIZE)
+			ctx->is_trunc_hmac = true;
+		break;
+
+	case OTX_CPT_SHA256:
+		if (authsize != SHA256_DIGEST_SIZE &&
+		    authsize != SHA256_TRUNC_DIGEST_SIZE)
+			return -EINVAL;
+
+		if (authsize == SHA256_TRUNC_DIGEST_SIZE)
+			ctx->is_trunc_hmac = true;
+		break;
+
+	case OTX_CPT_SHA384:
+		if (authsize != SHA384_DIGEST_SIZE &&
+		    authsize != SHA384_TRUNC_DIGEST_SIZE)
+			return -EINVAL;
+
+		if (authsize == SHA384_TRUNC_DIGEST_SIZE)
+			ctx->is_trunc_hmac = true;
+		break;
+
+	case OTX_CPT_SHA512:
+		if (authsize != SHA512_DIGEST_SIZE &&
+		    authsize != SHA512_TRUNC_DIGEST_SIZE)
+			return -EINVAL;
+
+		if (authsize == SHA512_TRUNC_DIGEST_SIZE)
+			ctx->is_trunc_hmac = true;
+		break;
+
+	case OTX_CPT_MAC_NULL:
+		if (ctx->cipher_type == OTX_CPT_AES_GCM) {
+			if (authsize != AES_GCM_ICV_SIZE)
+				return -EINVAL;
+		} else
+			return -EINVAL;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	tfm->authsize = authsize;
+	return 0;
+}
+
+static struct otx_cpt_sdesc *alloc_sdesc(struct crypto_shash *alg)
+{
+	struct otx_cpt_sdesc *sdesc;
+	int size;
+
+	size = sizeof(struct shash_desc) + crypto_shash_descsize(alg);
+	sdesc = kmalloc(size, GFP_KERNEL);
+	if (!sdesc)
+		return NULL;
+
+	sdesc->shash.tfm = alg;
+
+	return sdesc;
+}
+
+static inline void swap_data32(void *buf, u32 len)
+{
+	u32 *store = (u32 *) buf;
+	int i = 0;
+
+	for (i = 0 ; i < len/sizeof(u32); i++, store++)
+		*store = cpu_to_be32(*store);
+}
+
+static inline void swap_data64(void *buf, u32 len)
+{
+	u64 *store = (u64 *) buf;
+	int i = 0;
+
+	for (i = 0 ; i < len/sizeof(u64); i++, store++)
+		*store = cpu_to_be64(*store);
+}
+
+static int copy_pad(u8 mac_type, u8 *out_pad, u8 *in_pad)
+{
+	struct sha512_state *sha512;
+	struct sha256_state *sha256;
+	struct sha1_state *sha1;
+
+	switch (mac_type) {
+	case OTX_CPT_SHA1:
+		sha1 = (struct sha1_state *) in_pad;
+		swap_data32(sha1->state, SHA1_DIGEST_SIZE);
+		memcpy(out_pad, &sha1->state, SHA1_DIGEST_SIZE);
+		break;
+
+	case OTX_CPT_SHA256:
+		sha256 = (struct sha256_state *) in_pad;
+		swap_data32(sha256->state, SHA256_DIGEST_SIZE);
+		memcpy(out_pad, &sha256->state, SHA256_DIGEST_SIZE);
+		break;
+
+	case OTX_CPT_SHA384:
+	case OTX_CPT_SHA512:
+		sha512 = (struct sha512_state *) in_pad;
+		swap_data64(sha512->state, SHA512_DIGEST_SIZE);
+		memcpy(out_pad, &sha512->state, SHA512_DIGEST_SIZE);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int aead_hmac_init(struct crypto_aead *cipher)
+{
+	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher);
+	int state_size = crypto_shash_statesize(ctx->hashalg);
+	int ds = crypto_shash_digestsize(ctx->hashalg);
+	int bs = crypto_shash_blocksize(ctx->hashalg);
+	int authkeylen = ctx->auth_key_len;
+	u8 *ipad = NULL, *opad = NULL;
+	int ret = 0, icount = 0;
+
+	ctx->sdesc = alloc_sdesc(ctx->hashalg);
+	if (!ctx->sdesc)
+		return -ENOMEM;
+
+	ctx->ipad = kzalloc(bs, GFP_KERNEL);
+	if (!ctx->ipad) {
+		ret = -ENOMEM;
+		goto calc_fail;
+	}
+
+	ctx->opad = kzalloc(bs, GFP_KERNEL);
+	if (!ctx->opad) {
+		ret = -ENOMEM;
+		goto calc_fail;
+	}
+
+	ipad = kzalloc(state_size, GFP_KERNEL);
+	if (!ipad) {
+		ret = -ENOMEM;
+		goto calc_fail;
+	}
+
+	opad = kzalloc(state_size, GFP_KERNEL);
+	if (!opad) {
+		ret = -ENOMEM;
+		goto calc_fail;
+	}
+
+	if (authkeylen > bs) {
+		ret = crypto_shash_digest(&ctx->sdesc->shash, ctx->key,
+					  authkeylen, ipad);
+		if (ret)
+			goto calc_fail;
+
+		authkeylen = ds;
+	} else {
+		memcpy(ipad, ctx->key, authkeylen);
+	}
+
+	memset(ipad + authkeylen, 0, bs - authkeylen);
+	memcpy(opad, ipad, bs);
+
+	for (icount = 0; icount < bs; icount++) {
+		ipad[icount] ^= 0x36;
+		opad[icount] ^= 0x5c;
+	}
+
+	/*
+	 * Partial Hash calculated from the software
+	 * algorithm is retrieved for IPAD & OPAD
+	 */
+
+	/* IPAD Calculation */
+	crypto_shash_init(&ctx->sdesc->shash);
+	crypto_shash_update(&ctx->sdesc->shash, ipad, bs);
+	crypto_shash_export(&ctx->sdesc->shash, ipad);
+	ret = copy_pad(ctx->mac_type, ctx->ipad, ipad);
+	if (ret)
+		goto calc_fail;
+
+	/* OPAD Calculation */
+	crypto_shash_init(&ctx->sdesc->shash);
+	crypto_shash_update(&ctx->sdesc->shash, opad, bs);
+	crypto_shash_export(&ctx->sdesc->shash, opad);
+	ret = copy_pad(ctx->mac_type, ctx->opad, opad);
+	if (ret)
+		goto calc_fail;
+
+	kfree(ipad);
+	kfree(opad);
+
+	return 0;
+
+calc_fail:
+	kfree(ctx->ipad);
+	ctx->ipad = NULL;
+	kfree(ctx->opad);
+	ctx->opad = NULL;
+	kfree(ipad);
+	kfree(opad);
+	kfree(ctx->sdesc);
+	ctx->sdesc = NULL;
+
+	return ret;
+}
+
+static int otx_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher,
+					   const unsigned char *key,
+					   unsigned int keylen)
+{
+	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher);
+	struct crypto_authenc_key_param *param;
+	int enckeylen = 0, authkeylen = 0;
+	struct rtattr *rta = (void *)key;
+	int status = -EINVAL;
+
+	if (!RTA_OK(rta, keylen))
+		goto badkey;
+
+	if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM)
+		goto badkey;
+
+	if (RTA_PAYLOAD(rta) < sizeof(*param))
+		goto badkey;
+
+	param = RTA_DATA(rta);
+	enckeylen = be32_to_cpu(param->enckeylen);
+	key += RTA_ALIGN(rta->rta_len);
+	keylen -= RTA_ALIGN(rta->rta_len);
+	if (keylen < enckeylen)
+		goto badkey;
+
+	if (keylen > OTX_CPT_MAX_KEY_SIZE)
+		goto badkey;
+
+	authkeylen = keylen - enckeylen;
+	memcpy(ctx->key, key, keylen);
+
+	switch (enckeylen) {
+	case AES_KEYSIZE_128:
+		ctx->key_type = OTX_CPT_AES_128_BIT;
+		break;
+	case AES_KEYSIZE_192:
+		ctx->key_type = OTX_CPT_AES_192_BIT;
+		break;
+	case AES_KEYSIZE_256:
+		ctx->key_type = OTX_CPT_AES_256_BIT;
+		break;
+	default:
+		/* Invalid key length */
+		goto badkey;
+	}
+
+	ctx->enc_key_len = enckeylen;
+	ctx->auth_key_len = authkeylen;
+
+	status = aead_hmac_init(cipher);
+	if (status)
+		goto badkey;
+
+	return 0;
+badkey:
+	return status;
+}
+
+static int otx_cpt_aead_ecb_null_sha_setkey(struct crypto_aead *cipher,
+					    const unsigned char *key,
+					    unsigned int keylen)
+{
+	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher);
+	struct crypto_authenc_key_param *param;
+	struct rtattr *rta = (void *)key;
+	int enckeylen = 0;
+
+	if (!RTA_OK(rta, keylen))
+		goto badkey;
+
+	if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM)
+		goto badkey;
+
+	if (RTA_PAYLOAD(rta) < sizeof(*param))
+		goto badkey;
+
+	param = RTA_DATA(rta);
+	enckeylen = be32_to_cpu(param->enckeylen);
+	key += RTA_ALIGN(rta->rta_len);
+	keylen -= RTA_ALIGN(rta->rta_len);
+	if (enckeylen != 0)
+		goto badkey;
+
+	if (keylen > OTX_CPT_MAX_KEY_SIZE)
+		goto badkey;
+
+	memcpy(ctx->key, key, keylen);
+	ctx->enc_key_len = enckeylen;
+	ctx->auth_key_len = keylen;
+	return 0;
+badkey:
+	return -EINVAL;
+}
+
+static int otx_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher,
+				       const unsigned char *key,
+				       unsigned int keylen)
+{
+	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher);
+
+	/*
+	 * For aes gcm we expect to get encryption key (16, 24, 32 bytes)
+	 * and salt (4 bytes)
+	 */
+	switch (keylen) {
+	case AES_KEYSIZE_128 + AES_GCM_SALT_SIZE:
+		ctx->key_type = OTX_CPT_AES_128_BIT;
+		ctx->enc_key_len = AES_KEYSIZE_128;
+		break;
+	case AES_KEYSIZE_192 + AES_GCM_SALT_SIZE:
+		ctx->key_type = OTX_CPT_AES_192_BIT;
+		ctx->enc_key_len = AES_KEYSIZE_192;
+		break;
+	case AES_KEYSIZE_256 + AES_GCM_SALT_SIZE:
+		ctx->key_type = OTX_CPT_AES_256_BIT;
+		ctx->enc_key_len = AES_KEYSIZE_256;
+		break;
+	default:
+		/* Invalid key and salt length */
+		return -EINVAL;
+	}
+
+	/* Store encryption key and salt */
+	memcpy(ctx->key, key, keylen);
+
+	return 0;
+}
+
+static inline u32 create_aead_ctx_hdr(struct aead_request *req, u32 enc,
+				      u32 *argcnt)
+{
+	struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+	struct otx_cpt_fc_ctx *fctx = &rctx->fctx;
+	int mac_len = crypto_aead_authsize(tfm);
+	int ds;
+
+	rctx->ctrl_word.e.enc_data_offset = req->assoclen;
+
+	switch (ctx->cipher_type) {
+	case OTX_CPT_AES_CBC:
+		fctx->enc.enc_ctrl.e.iv_source = OTX_CPT_FROM_CPTR;
+		/* Copy encryption key to context */
+		memcpy(fctx->enc.encr_key, ctx->key + ctx->auth_key_len,
+		       ctx->enc_key_len);
+		/* Copy IV to context */
+		memcpy(fctx->enc.encr_iv, req->iv, crypto_aead_ivsize(tfm));
+
+		ds = crypto_shash_digestsize(ctx->hashalg);
+		if (ctx->mac_type == OTX_CPT_SHA384)
+			ds = SHA512_DIGEST_SIZE;
+		if (ctx->ipad)
+			memcpy(fctx->hmac.e.ipad, ctx->ipad, ds);
+		if (ctx->opad)
+			memcpy(fctx->hmac.e.opad, ctx->opad, ds);
+		break;
+
+	case OTX_CPT_AES_GCM:
+		fctx->enc.enc_ctrl.e.iv_source = OTX_CPT_FROM_DPTR;
+		/* Copy encryption key to context */
+		memcpy(fctx->enc.encr_key, ctx->key, ctx->enc_key_len);
+		/* Copy salt to context */
+		memcpy(fctx->enc.encr_iv, ctx->key + ctx->enc_key_len,
+		       AES_GCM_SALT_SIZE);
+
+		rctx->ctrl_word.e.iv_offset = req->assoclen - AES_GCM_IV_OFFSET;
+		break;
+
+	default:
+		/* Unknown cipher type */
+		return -EINVAL;
+	}
+	rctx->ctrl_word.flags = cpu_to_be64(rctx->ctrl_word.flags);
+
+	req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER;
+	req_info->ctrl.s.se_req = OTX_CPT_SE_CORE_REQ;
+	req_info->req.opcode.s.major = OTX_CPT_MAJOR_OP_FC |
+				 DMA_MODE_FLAG(OTX_CPT_DMA_GATHER_SCATTER);
+	if (enc) {
+		req_info->req.opcode.s.minor = 2;
+		req_info->req.param1 = req->cryptlen;
+		req_info->req.param2 = req->cryptlen + req->assoclen;
+	} else {
+		req_info->req.opcode.s.minor = 3;
+		req_info->req.param1 = req->cryptlen - mac_len;
+		req_info->req.param2 = req->cryptlen + req->assoclen - mac_len;
+	}
+
+	fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type;
+	fctx->enc.enc_ctrl.e.aes_key = ctx->key_type;
+	fctx->enc.enc_ctrl.e.mac_type = ctx->mac_type;
+	fctx->enc.enc_ctrl.e.mac_len = mac_len;
+	fctx->enc.enc_ctrl.flags = cpu_to_be64(fctx->enc.enc_ctrl.flags);
+
+	/*
+	 * Storing Packet Data Information in offset
+	 * Control Word First 8 bytes
+	 */
+	req_info->in[*argcnt].vptr = (u8 *)&rctx->ctrl_word;
+	req_info->in[*argcnt].size = CONTROL_WORD_LEN;
+	req_info->req.dlen += CONTROL_WORD_LEN;
+	++(*argcnt);
+
+	req_info->in[*argcnt].vptr = (u8 *)fctx;
+	req_info->in[*argcnt].size = sizeof(struct otx_cpt_fc_ctx);
+	req_info->req.dlen += sizeof(struct otx_cpt_fc_ctx);
+	++(*argcnt);
+
+	return 0;
+}
+
+static inline u32 create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt,
+				      u32 enc)
+{
+	struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+
+	req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER;
+	req_info->ctrl.s.se_req = OTX_CPT_SE_CORE_REQ;
+	req_info->req.opcode.s.major = OTX_CPT_MAJOR_OP_HMAC |
+				 DMA_MODE_FLAG(OTX_CPT_DMA_GATHER_SCATTER);
+	req_info->is_trunc_hmac = ctx->is_trunc_hmac;
+
+	req_info->req.opcode.s.minor = 0;
+	req_info->req.param1 = ctx->auth_key_len;
+	req_info->req.param2 = ctx->mac_type << 8;
+
+	/* Add authentication key */
+	req_info->in[*argcnt].vptr = ctx->key;
+	req_info->in[*argcnt].size = round_up(ctx->auth_key_len, 8);
+	req_info->req.dlen += round_up(ctx->auth_key_len, 8);
+	++(*argcnt);
+
+	return 0;
+}
+
+static inline u32 create_aead_input_list(struct aead_request *req, u32 enc)
+{
+	struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+	u32 inputlen =  req->cryptlen + req->assoclen;
+	u32 status, argcnt = 0;
+
+	status = create_aead_ctx_hdr(req, enc, &argcnt);
+	if (status)
+		return status;
+	update_input_data(req_info, req->src, inputlen, &argcnt);
+	req_info->incnt = argcnt;
+
+	return 0;
+}
+
+static inline u32 create_aead_output_list(struct aead_request *req, u32 enc,
+					  u32 mac_len)
+{
+	struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx_cpt_req_info *req_info =  &rctx->cpt_req;
+	u32 argcnt = 0, outputlen = 0;
+
+	if (enc)
+		outputlen = req->cryptlen +  req->assoclen + mac_len;
+	else
+		outputlen = req->cryptlen + req->assoclen - mac_len;
+
+	update_output_data(req_info, req->dst, 0, outputlen, &argcnt);
+	req_info->outcnt = argcnt;
+
+	return 0;
+}
+
+static inline u32 create_aead_null_input_list(struct aead_request *req,
+					      u32 enc, u32 mac_len)
+{
+	struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+	u32 inputlen, argcnt = 0;
+
+	if (enc)
+		inputlen =  req->cryptlen + req->assoclen;
+	else
+		inputlen =  req->cryptlen + req->assoclen - mac_len;
+
+	create_hmac_ctx_hdr(req, &argcnt, enc);
+	update_input_data(req_info, req->src, inputlen, &argcnt);
+	req_info->incnt = argcnt;
+
+	return 0;
+}
+
+static inline u32 create_aead_null_output_list(struct aead_request *req,
+					       u32 enc, u32 mac_len)
+{
+	struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx_cpt_req_info *req_info =  &rctx->cpt_req;
+	struct scatterlist *dst;
+	u8 *ptr = NULL;
+	int argcnt = 0, status, offset;
+	u32 inputlen;
+
+	if (enc)
+		inputlen =  req->cryptlen + req->assoclen;
+	else
+		inputlen =  req->cryptlen + req->assoclen - mac_len;
+
+	/*
+	 * If source and destination are different
+	 * then copy payload to destination
+	 */
+	if (req->src != req->dst) {
+
+		ptr = kmalloc(inputlen, (req_info->areq->flags &
+					 CRYPTO_TFM_REQ_MAY_SLEEP) ?
+					 GFP_KERNEL : GFP_ATOMIC);
+		if (!ptr) {
+			status = -ENOMEM;
+			goto error;
+		}
+
+		status = sg_copy_to_buffer(req->src, sg_nents(req->src), ptr,
+					   inputlen);
+		if (status != inputlen) {
+			status = -EINVAL;
+			goto error;
+		}
+		status = sg_copy_from_buffer(req->dst, sg_nents(req->dst), ptr,
+					     inputlen);
+		if (status != inputlen) {
+			status = -EINVAL;
+			goto error;
+		}
+		kfree(ptr);
+	}
+
+	if (enc) {
+		/*
+		 * In an encryption scenario hmac needs
+		 * to be appended after payload
+		 */
+		dst = req->dst;
+		offset = inputlen;
+		while (offset >= dst->length) {
+			offset -= dst->length;
+			dst = sg_next(dst);
+			if (!dst) {
+				status = -ENOENT;
+				goto error;
+			}
+		}
+
+		update_output_data(req_info, dst, offset, mac_len, &argcnt);
+	} else {
+		/*
+		 * In a decryption scenario calculated hmac for received
+		 * payload needs to be compare with hmac received
+		 */
+		status = sg_copy_buffer(req->src, sg_nents(req->src),
+					rctx->fctx.hmac.s.hmac_recv, mac_len,
+					inputlen, true);
+		if (status != mac_len) {
+			status = -EINVAL;
+			goto error;
+		}
+
+		req_info->out[argcnt].vptr = rctx->fctx.hmac.s.hmac_calc;
+		req_info->out[argcnt].size = mac_len;
+		argcnt++;
+	}
+
+	req_info->outcnt = argcnt;
+	return 0;
+error:
+	kfree(ptr);
+	return status;
+}
+
+static u32 cpt_aead_enc_dec(struct aead_request *req, u8 reg_type, u8 enc)
+{
+	struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct pci_dev *pdev;
+	u32 status, cpu_num;
+
+	/* Clear control words */
+	rctx->ctrl_word.flags = 0;
+	rctx->fctx.enc.enc_ctrl.flags = 0;
+
+	req_info->callback = otx_cpt_aead_callback;
+	req_info->areq = &req->base;
+	req_info->req_type = reg_type;
+	req_info->is_enc = enc;
+	req_info->is_trunc_hmac = false;
+
+	switch (reg_type) {
+	case OTX_CPT_AEAD_ENC_DEC_REQ:
+		status = create_aead_input_list(req, enc);
+		if (status)
+			return status;
+		status = create_aead_output_list(req, enc,
+						 crypto_aead_authsize(tfm));
+		if (status)
+			return status;
+		break;
+
+	case OTX_CPT_AEAD_ENC_DEC_NULL_REQ:
+		status = create_aead_null_input_list(req, enc,
+						     crypto_aead_authsize(tfm));
+		if (status)
+			return status;
+		status = create_aead_null_output_list(req, enc,
+						crypto_aead_authsize(tfm));
+		if (status)
+			return status;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	/* Validate that request doesn't exceed maximum CPT supported size */
+	if (req_info->req.param1 > OTX_CPT_MAX_REQ_SIZE ||
+	    req_info->req.param2 > OTX_CPT_MAX_REQ_SIZE)
+		return -E2BIG;
+
+	status = get_se_device(&pdev, &cpu_num);
+	if (status)
+		return status;
+
+	req_info->ctrl.s.grp = 0;
+
+	status = otx_cpt_do_request(pdev, req_info, cpu_num);
+	/*
+	 * We perform an asynchronous send and once
+	 * the request is completed the driver would
+	 * intimate through registered call back functions
+	 */
+	return status;
+}
+
+static int otx_cpt_aead_encrypt(struct aead_request *req)
+{
+	return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_REQ, true);
+}
+
+static int otx_cpt_aead_decrypt(struct aead_request *req)
+{
+	return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_REQ, false);
+}
+
+static int otx_cpt_aead_null_encrypt(struct aead_request *req)
+{
+	return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_NULL_REQ, true);
+}
+
+static int otx_cpt_aead_null_decrypt(struct aead_request *req)
+{
+	return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_NULL_REQ, false);
+}
+
+static struct skcipher_alg otx_cpt_skciphers[] = { {
+	.base.cra_name = "xts(aes)",
+	.base.cra_driver_name = "cpt_xts_aes",
+	.base.cra_flags = CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize = AES_BLOCK_SIZE,
+	.base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx),
+	.base.cra_alignmask = 7,
+	.base.cra_priority = 4001,
+	.base.cra_module = THIS_MODULE,
+
+	.init = otx_cpt_enc_dec_init,
+	.ivsize = AES_BLOCK_SIZE,
+	.min_keysize = 2 * AES_MIN_KEY_SIZE,
+	.max_keysize = 2 * AES_MAX_KEY_SIZE,
+	.setkey = otx_cpt_skcipher_xts_setkey,
+	.encrypt = otx_cpt_skcipher_encrypt,
+	.decrypt = otx_cpt_skcipher_decrypt,
+}, {
+	.base.cra_name = "cbc(aes)",
+	.base.cra_driver_name = "cpt_cbc_aes",
+	.base.cra_flags = CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize = AES_BLOCK_SIZE,
+	.base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx),
+	.base.cra_alignmask = 7,
+	.base.cra_priority = 4001,
+	.base.cra_module = THIS_MODULE,
+
+	.init = otx_cpt_enc_dec_init,
+	.ivsize = AES_BLOCK_SIZE,
+	.min_keysize = AES_MIN_KEY_SIZE,
+	.max_keysize = AES_MAX_KEY_SIZE,
+	.setkey = otx_cpt_skcipher_cbc_aes_setkey,
+	.encrypt = otx_cpt_skcipher_encrypt,
+	.decrypt = otx_cpt_skcipher_decrypt,
+}, {
+	.base.cra_name = "ecb(aes)",
+	.base.cra_driver_name = "cpt_ecb_aes",
+	.base.cra_flags = CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize = AES_BLOCK_SIZE,
+	.base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx),
+	.base.cra_alignmask = 7,
+	.base.cra_priority = 4001,
+	.base.cra_module = THIS_MODULE,
+
+	.init = otx_cpt_enc_dec_init,
+	.ivsize = 0,
+	.min_keysize = AES_MIN_KEY_SIZE,
+	.max_keysize = AES_MAX_KEY_SIZE,
+	.setkey = otx_cpt_skcipher_ecb_aes_setkey,
+	.encrypt = otx_cpt_skcipher_encrypt,
+	.decrypt = otx_cpt_skcipher_decrypt,
+}, {
+	.base.cra_name = "cfb(aes)",
+	.base.cra_driver_name = "cpt_cfb_aes",
+	.base.cra_flags = CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize = AES_BLOCK_SIZE,
+	.base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx),
+	.base.cra_alignmask = 7,
+	.base.cra_priority = 4001,
+	.base.cra_module = THIS_MODULE,
+
+	.init = otx_cpt_enc_dec_init,
+	.ivsize = AES_BLOCK_SIZE,
+	.min_keysize = AES_MIN_KEY_SIZE,
+	.max_keysize = AES_MAX_KEY_SIZE,
+	.setkey = otx_cpt_skcipher_cfb_aes_setkey,
+	.encrypt = otx_cpt_skcipher_encrypt,
+	.decrypt = otx_cpt_skcipher_decrypt,
+}, {
+	.base.cra_name = "cbc(des3_ede)",
+	.base.cra_driver_name = "cpt_cbc_des3_ede",
+	.base.cra_flags = CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+	.base.cra_ctxsize = sizeof(struct otx_cpt_des3_ctx),
+	.base.cra_alignmask = 7,
+	.base.cra_priority = 4001,
+	.base.cra_module = THIS_MODULE,
+
+	.init = otx_cpt_enc_dec_init,
+	.min_keysize = DES3_EDE_KEY_SIZE,
+	.max_keysize = DES3_EDE_KEY_SIZE,
+	.ivsize = DES_BLOCK_SIZE,
+	.setkey = otx_cpt_skcipher_cbc_des3_setkey,
+	.encrypt = otx_cpt_skcipher_encrypt,
+	.decrypt = otx_cpt_skcipher_decrypt,
+}, {
+	.base.cra_name = "ecb(des3_ede)",
+	.base.cra_driver_name = "cpt_ecb_des3_ede",
+	.base.cra_flags = CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+	.base.cra_ctxsize = sizeof(struct otx_cpt_des3_ctx),
+	.base.cra_alignmask = 7,
+	.base.cra_priority = 4001,
+	.base.cra_module = THIS_MODULE,
+
+	.init = otx_cpt_enc_dec_init,
+	.min_keysize = DES3_EDE_KEY_SIZE,
+	.max_keysize = DES3_EDE_KEY_SIZE,
+	.ivsize = 0,
+	.setkey = otx_cpt_skcipher_ecb_des3_setkey,
+	.encrypt = otx_cpt_skcipher_encrypt,
+	.decrypt = otx_cpt_skcipher_decrypt,
+} };
+
+static struct aead_alg otx_cpt_aeads[] = { {
+	.base = {
+		.cra_name = "authenc(hmac(sha1),cbc(aes))",
+		.cra_driver_name = "cpt_hmac_sha1_cbc_aes",
+		.cra_blocksize = AES_BLOCK_SIZE,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+		.cra_priority = 4001,
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.init = otx_cpt_aead_cbc_aes_sha1_init,
+	.exit = otx_cpt_aead_exit,
+	.setkey = otx_cpt_aead_cbc_aes_sha_setkey,
+	.setauthsize = otx_cpt_aead_set_authsize,
+	.encrypt = otx_cpt_aead_encrypt,
+	.decrypt = otx_cpt_aead_decrypt,
+	.ivsize = AES_BLOCK_SIZE,
+	.maxauthsize = SHA1_DIGEST_SIZE,
+}, {
+	.base = {
+		.cra_name = "authenc(hmac(sha256),cbc(aes))",
+		.cra_driver_name = "cpt_hmac_sha256_cbc_aes",
+		.cra_blocksize = AES_BLOCK_SIZE,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+		.cra_priority = 4001,
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.init = otx_cpt_aead_cbc_aes_sha256_init,
+	.exit = otx_cpt_aead_exit,
+	.setkey = otx_cpt_aead_cbc_aes_sha_setkey,
+	.setauthsize = otx_cpt_aead_set_authsize,
+	.encrypt = otx_cpt_aead_encrypt,
+	.decrypt = otx_cpt_aead_decrypt,
+	.ivsize = AES_BLOCK_SIZE,
+	.maxauthsize = SHA256_DIGEST_SIZE,
+}, {
+	.base = {
+		.cra_name = "authenc(hmac(sha384),cbc(aes))",
+		.cra_driver_name = "cpt_hmac_sha384_cbc_aes",
+		.cra_blocksize = AES_BLOCK_SIZE,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+		.cra_priority = 4001,
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.init = otx_cpt_aead_cbc_aes_sha384_init,
+	.exit = otx_cpt_aead_exit,
+	.setkey = otx_cpt_aead_cbc_aes_sha_setkey,
+	.setauthsize = otx_cpt_aead_set_authsize,
+	.encrypt = otx_cpt_aead_encrypt,
+	.decrypt = otx_cpt_aead_decrypt,
+	.ivsize = AES_BLOCK_SIZE,
+	.maxauthsize = SHA384_DIGEST_SIZE,
+}, {
+	.base = {
+		.cra_name = "authenc(hmac(sha512),cbc(aes))",
+		.cra_driver_name = "cpt_hmac_sha512_cbc_aes",
+		.cra_blocksize = AES_BLOCK_SIZE,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+		.cra_priority = 4001,
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.init = otx_cpt_aead_cbc_aes_sha512_init,
+	.exit = otx_cpt_aead_exit,
+	.setkey = otx_cpt_aead_cbc_aes_sha_setkey,
+	.setauthsize = otx_cpt_aead_set_authsize,
+	.encrypt = otx_cpt_aead_encrypt,
+	.decrypt = otx_cpt_aead_decrypt,
+	.ivsize = AES_BLOCK_SIZE,
+	.maxauthsize = SHA512_DIGEST_SIZE,
+}, {
+	.base = {
+		.cra_name = "authenc(hmac(sha1),ecb(cipher_null))",
+		.cra_driver_name = "cpt_hmac_sha1_ecb_null",
+		.cra_blocksize = 1,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+		.cra_priority = 4001,
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.init = otx_cpt_aead_ecb_null_sha1_init,
+	.exit = otx_cpt_aead_exit,
+	.setkey = otx_cpt_aead_ecb_null_sha_setkey,
+	.setauthsize = otx_cpt_aead_set_authsize,
+	.encrypt = otx_cpt_aead_null_encrypt,
+	.decrypt = otx_cpt_aead_null_decrypt,
+	.ivsize = 0,
+	.maxauthsize = SHA1_DIGEST_SIZE,
+}, {
+	.base = {
+		.cra_name = "authenc(hmac(sha256),ecb(cipher_null))",
+		.cra_driver_name = "cpt_hmac_sha256_ecb_null",
+		.cra_blocksize = 1,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+		.cra_priority = 4001,
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.init = otx_cpt_aead_ecb_null_sha256_init,
+	.exit = otx_cpt_aead_exit,
+	.setkey = otx_cpt_aead_ecb_null_sha_setkey,
+	.setauthsize = otx_cpt_aead_set_authsize,
+	.encrypt = otx_cpt_aead_null_encrypt,
+	.decrypt = otx_cpt_aead_null_decrypt,
+	.ivsize = 0,
+	.maxauthsize = SHA256_DIGEST_SIZE,
+}, {
+	.base = {
+		.cra_name = "authenc(hmac(sha384),ecb(cipher_null))",
+		.cra_driver_name = "cpt_hmac_sha384_ecb_null",
+		.cra_blocksize = 1,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+		.cra_priority = 4001,
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.init = otx_cpt_aead_ecb_null_sha384_init,
+	.exit = otx_cpt_aead_exit,
+	.setkey = otx_cpt_aead_ecb_null_sha_setkey,
+	.setauthsize = otx_cpt_aead_set_authsize,
+	.encrypt = otx_cpt_aead_null_encrypt,
+	.decrypt = otx_cpt_aead_null_decrypt,
+	.ivsize = 0,
+	.maxauthsize = SHA384_DIGEST_SIZE,
+}, {
+	.base = {
+		.cra_name = "authenc(hmac(sha512),ecb(cipher_null))",
+		.cra_driver_name = "cpt_hmac_sha512_ecb_null",
+		.cra_blocksize = 1,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+		.cra_priority = 4001,
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.init = otx_cpt_aead_ecb_null_sha512_init,
+	.exit = otx_cpt_aead_exit,
+	.setkey = otx_cpt_aead_ecb_null_sha_setkey,
+	.setauthsize = otx_cpt_aead_set_authsize,
+	.encrypt = otx_cpt_aead_null_encrypt,
+	.decrypt = otx_cpt_aead_null_decrypt,
+	.ivsize = 0,
+	.maxauthsize = SHA512_DIGEST_SIZE,
+}, {
+	.base = {
+		.cra_name = "rfc4106(gcm(aes))",
+		.cra_driver_name = "cpt_rfc4106_gcm_aes",
+		.cra_blocksize = 1,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+		.cra_priority = 4001,
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.init = otx_cpt_aead_gcm_aes_init,
+	.exit = otx_cpt_aead_exit,
+	.setkey = otx_cpt_aead_gcm_aes_setkey,
+	.setauthsize = otx_cpt_aead_set_authsize,
+	.encrypt = otx_cpt_aead_encrypt,
+	.decrypt = otx_cpt_aead_decrypt,
+	.ivsize = AES_GCM_IV_SIZE,
+	.maxauthsize = AES_GCM_ICV_SIZE,
+} };
+
+static inline int is_any_alg_used(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(otx_cpt_skciphers); i++)
+		if (refcount_read(&otx_cpt_skciphers[i].base.cra_refcnt) != 1)
+			return true;
+	for (i = 0; i < ARRAY_SIZE(otx_cpt_aeads); i++)
+		if (refcount_read(&otx_cpt_aeads[i].base.cra_refcnt) != 1)
+			return true;
+	return false;
+}
+
+static inline int cpt_register_algs(void)
+{
+	int i, err = 0;
+
+	if (!IS_ENABLED(CONFIG_DM_CRYPT)) {
+		for (i = 0; i < ARRAY_SIZE(otx_cpt_skciphers); i++)
+			otx_cpt_skciphers[i].base.cra_flags &= ~CRYPTO_ALG_DEAD;
+
+		err = crypto_register_skciphers(otx_cpt_skciphers,
+						ARRAY_SIZE(otx_cpt_skciphers));
+		if (err)
+			return err;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(otx_cpt_aeads); i++)
+		otx_cpt_aeads[i].base.cra_flags &= ~CRYPTO_ALG_DEAD;
+
+	err = crypto_register_aeads(otx_cpt_aeads, ARRAY_SIZE(otx_cpt_aeads));
+	if (err) {
+		crypto_unregister_skciphers(otx_cpt_skciphers,
+					    ARRAY_SIZE(otx_cpt_skciphers));
+		return err;
+	}
+
+	return 0;
+}
+
+static inline void cpt_unregister_algs(void)
+{
+	crypto_unregister_skciphers(otx_cpt_skciphers,
+				    ARRAY_SIZE(otx_cpt_skciphers));
+	crypto_unregister_aeads(otx_cpt_aeads, ARRAY_SIZE(otx_cpt_aeads));
+}
+
+static int compare_func(const void *lptr, const void *rptr)
+{
+	struct cpt_device_desc *ldesc = (struct cpt_device_desc *) lptr;
+	struct cpt_device_desc *rdesc = (struct cpt_device_desc *) rptr;
+
+	if (ldesc->dev->devfn < rdesc->dev->devfn)
+		return -1;
+	if (ldesc->dev->devfn > rdesc->dev->devfn)
+		return 1;
+	return 0;
+}
+
+static void swap_func(void *lptr, void *rptr, int size)
+{
+	struct cpt_device_desc *ldesc = (struct cpt_device_desc *) lptr;
+	struct cpt_device_desc *rdesc = (struct cpt_device_desc *) rptr;
+	struct cpt_device_desc desc;
+
+	desc = *ldesc;
+	*ldesc = *rdesc;
+	*rdesc = desc;
+}
+
+int otx_cpt_crypto_init(struct pci_dev *pdev, struct module *mod,
+			enum otx_cptpf_type pf_type,
+			enum otx_cptvf_type engine_type,
+			int num_queues, int num_devices)
+{
+	int ret = 0;
+	int count;
+
+	mutex_lock(&mutex);
+	switch (engine_type) {
+	case OTX_CPT_SE_TYPES:
+		count = atomic_read(&se_devices.count);
+		if (count >= CPT_MAX_VF_NUM) {
+			dev_err(&pdev->dev, "No space to add a new device");
+			ret = -ENOSPC;
+			goto err;
+		}
+		se_devices.desc[count].pf_type = pf_type;
+		se_devices.desc[count].num_queues = num_queues;
+		se_devices.desc[count++].dev = pdev;
+		atomic_inc(&se_devices.count);
+
+		if (atomic_read(&se_devices.count) == num_devices &&
+		    is_crypto_registered == false) {
+			if (cpt_register_algs()) {
+				dev_err(&pdev->dev,
+				   "Error in registering crypto algorithms\n");
+				ret =  -EINVAL;
+				goto err;
+			}
+			try_module_get(mod);
+			is_crypto_registered = true;
+		}
+		sort(se_devices.desc, count, sizeof(struct cpt_device_desc),
+		     compare_func, swap_func);
+		break;
+
+	case OTX_CPT_AE_TYPES:
+		count = atomic_read(&ae_devices.count);
+		if (count >= CPT_MAX_VF_NUM) {
+			dev_err(&pdev->dev, "No space to a add new device");
+			ret = -ENOSPC;
+			goto err;
+		}
+		ae_devices.desc[count].pf_type = pf_type;
+		ae_devices.desc[count].num_queues = num_queues;
+		ae_devices.desc[count++].dev = pdev;
+		atomic_inc(&ae_devices.count);
+		sort(ae_devices.desc, count, sizeof(struct cpt_device_desc),
+		     compare_func, swap_func);
+		break;
+
+	default:
+		dev_err(&pdev->dev, "Unknown VF type %d\n", engine_type);
+		ret = BAD_OTX_CPTVF_TYPE;
+	}
+err:
+	mutex_unlock(&mutex);
+	return ret;
+}
+
+void otx_cpt_crypto_exit(struct pci_dev *pdev, struct module *mod,
+			 enum otx_cptvf_type engine_type)
+{
+	struct cpt_device_table *dev_tbl;
+	bool dev_found = false;
+	int i, j, count;
+
+	mutex_lock(&mutex);
+
+	dev_tbl = (engine_type == OTX_CPT_AE_TYPES) ? &ae_devices : &se_devices;
+	count = atomic_read(&dev_tbl->count);
+	for (i = 0; i < count; i++)
+		if (pdev == dev_tbl->desc[i].dev) {
+			for (j = i; j < count-1; j++)
+				dev_tbl->desc[j] = dev_tbl->desc[j+1];
+			dev_found = true;
+			break;
+		}
+
+	if (!dev_found) {
+		dev_err(&pdev->dev, "%s device not found", __func__);
+		goto exit;
+	}
+
+	if (engine_type != OTX_CPT_AE_TYPES) {
+		if (atomic_dec_and_test(&se_devices.count) &&
+		    !is_any_alg_used()) {
+			cpt_unregister_algs();
+			module_put(mod);
+			is_crypto_registered = false;
+		}
+	} else
+		atomic_dec(&ae_devices.count);
+exit:
+	mutex_unlock(&mutex);
+}
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.h b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.h
new file mode 100644
index 000000000000..67cc0025f5d5
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.h
@@ -0,0 +1,188 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __OTX_CPT_ALGS_H
+#define __OTX_CPT_ALGS_H
+
+#include <crypto/hash.h>
+#include "otx_cpt_common.h"
+
+#define OTX_CPT_MAX_ENC_KEY_SIZE    32
+#define OTX_CPT_MAX_HASH_KEY_SIZE   64
+#define OTX_CPT_MAX_KEY_SIZE (OTX_CPT_MAX_ENC_KEY_SIZE + \
+			      OTX_CPT_MAX_HASH_KEY_SIZE)
+enum otx_cpt_request_type {
+	OTX_CPT_ENC_DEC_REQ            = 0x1,
+	OTX_CPT_AEAD_ENC_DEC_REQ       = 0x2,
+	OTX_CPT_AEAD_ENC_DEC_NULL_REQ  = 0x3,
+	OTX_CPT_PASSTHROUGH_REQ	       = 0x4
+};
+
+enum otx_cpt_major_opcodes {
+	OTX_CPT_MAJOR_OP_MISC = 0x01,
+	OTX_CPT_MAJOR_OP_FC   = 0x33,
+	OTX_CPT_MAJOR_OP_HMAC = 0x35,
+};
+
+enum otx_cpt_req_type {
+		OTX_CPT_AE_CORE_REQ,
+		OTX_CPT_SE_CORE_REQ
+};
+
+enum otx_cpt_cipher_type {
+	OTX_CPT_CIPHER_NULL = 0x0,
+	OTX_CPT_DES3_CBC = 0x1,
+	OTX_CPT_DES3_ECB = 0x2,
+	OTX_CPT_AES_CBC  = 0x3,
+	OTX_CPT_AES_ECB  = 0x4,
+	OTX_CPT_AES_CFB  = 0x5,
+	OTX_CPT_AES_CTR  = 0x6,
+	OTX_CPT_AES_GCM  = 0x7,
+	OTX_CPT_AES_XTS  = 0x8
+};
+
+enum otx_cpt_mac_type {
+	OTX_CPT_MAC_NULL = 0x0,
+	OTX_CPT_MD5      = 0x1,
+	OTX_CPT_SHA1     = 0x2,
+	OTX_CPT_SHA224   = 0x3,
+	OTX_CPT_SHA256   = 0x4,
+	OTX_CPT_SHA384   = 0x5,
+	OTX_CPT_SHA512   = 0x6,
+	OTX_CPT_GMAC     = 0x7
+};
+
+enum otx_cpt_aes_key_len {
+	OTX_CPT_AES_128_BIT = 0x1,
+	OTX_CPT_AES_192_BIT = 0x2,
+	OTX_CPT_AES_256_BIT = 0x3
+};
+
+union otx_cpt_encr_ctrl {
+	u64 flags;
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 enc_cipher:4;
+		u64 reserved1:1;
+		u64 aes_key:2;
+		u64 iv_source:1;
+		u64 mac_type:4;
+		u64 reserved2:3;
+		u64 auth_input_type:1;
+		u64 mac_len:8;
+		u64 reserved3:8;
+		u64 encr_offset:16;
+		u64 iv_offset:8;
+		u64 auth_offset:8;
+#else
+		u64 auth_offset:8;
+		u64 iv_offset:8;
+		u64 encr_offset:16;
+		u64 reserved3:8;
+		u64 mac_len:8;
+		u64 auth_input_type:1;
+		u64 reserved2:3;
+		u64 mac_type:4;
+		u64 iv_source:1;
+		u64 aes_key:2;
+		u64 reserved1:1;
+		u64 enc_cipher:4;
+#endif
+	} e;
+};
+
+struct otx_cpt_cipher {
+	const char *name;
+	u8 value;
+};
+
+struct otx_cpt_enc_context {
+	union otx_cpt_encr_ctrl enc_ctrl;
+	u8 encr_key[32];
+	u8 encr_iv[16];
+};
+
+union otx_cpt_fchmac_ctx {
+	struct {
+		u8 ipad[64];
+		u8 opad[64];
+	} e;
+	struct {
+		u8 hmac_calc[64]; /* HMAC calculated */
+		u8 hmac_recv[64]; /* HMAC received */
+	} s;
+};
+
+struct otx_cpt_fc_ctx {
+	struct otx_cpt_enc_context enc;
+	union otx_cpt_fchmac_ctx hmac;
+};
+
+struct otx_cpt_enc_ctx {
+	u32 key_len;
+	u8 enc_key[OTX_CPT_MAX_KEY_SIZE];
+	u8 cipher_type;
+	u8 key_type;
+};
+
+struct otx_cpt_des3_ctx {
+	u32 key_len;
+	u8 des3_key[OTX_CPT_MAX_KEY_SIZE];
+};
+
+union otx_cpt_offset_ctrl_word {
+	u64 flags;
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 reserved:32;
+		u64 enc_data_offset:16;
+		u64 iv_offset:8;
+		u64 auth_offset:8;
+#else
+		u64 auth_offset:8;
+		u64 iv_offset:8;
+		u64 enc_data_offset:16;
+		u64 reserved:32;
+#endif
+	} e;
+};
+
+struct otx_cpt_req_ctx {
+	struct otx_cpt_req_info cpt_req;
+	union otx_cpt_offset_ctrl_word ctrl_word;
+	struct otx_cpt_fc_ctx fctx;
+};
+
+struct otx_cpt_sdesc {
+	struct shash_desc shash;
+};
+
+struct otx_cpt_aead_ctx {
+	u8 key[OTX_CPT_MAX_KEY_SIZE];
+	struct crypto_shash *hashalg;
+	struct otx_cpt_sdesc *sdesc;
+	u8 *ipad;
+	u8 *opad;
+	u32 enc_key_len;
+	u32 auth_key_len;
+	u8 cipher_type;
+	u8 mac_type;
+	u8 key_type;
+	u8 is_trunc_hmac;
+};
+int otx_cpt_crypto_init(struct pci_dev *pdev, struct module *mod,
+			enum otx_cptpf_type pf_type,
+			enum otx_cptvf_type engine_type,
+			int num_queues, int num_devices);
+void otx_cpt_crypto_exit(struct pci_dev *pdev, struct module *mod,
+			 enum otx_cptvf_type engine_type);
+void otx_cpt_callback(int status, void *arg, void *req);
+
+#endif /* __OTX_CPT_ALGS_H */
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c
new file mode 100644
index 000000000000..a91860b5dc77
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c
@@ -0,0 +1,985 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include "otx_cptvf.h"
+#include "otx_cptvf_algs.h"
+#include "otx_cptvf_reqmgr.h"
+
+#define DRV_NAME	"octeontx-cptvf"
+#define DRV_VERSION	"1.0"
+
+static void vq_work_handler(unsigned long data)
+{
+	struct otx_cptvf_wqe_info *cwqe_info =
+					(struct otx_cptvf_wqe_info *) data;
+
+	otx_cpt_post_process(&cwqe_info->vq_wqe[0]);
+}
+
+static int init_worker_threads(struct otx_cptvf *cptvf)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	struct otx_cptvf_wqe_info *cwqe_info;
+	int i;
+
+	cwqe_info = kzalloc(sizeof(*cwqe_info), GFP_KERNEL);
+	if (!cwqe_info)
+		return -ENOMEM;
+
+	if (cptvf->num_queues) {
+		dev_dbg(&pdev->dev, "Creating VQ worker threads (%d)\n",
+			cptvf->num_queues);
+	}
+
+	for (i = 0; i < cptvf->num_queues; i++) {
+		tasklet_init(&cwqe_info->vq_wqe[i].twork, vq_work_handler,
+			     (u64)cwqe_info);
+		cwqe_info->vq_wqe[i].cptvf = cptvf;
+	}
+	cptvf->wqe_info = cwqe_info;
+
+	return 0;
+}
+
+static void cleanup_worker_threads(struct otx_cptvf *cptvf)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	struct otx_cptvf_wqe_info *cwqe_info;
+	int i;
+
+	cwqe_info = (struct otx_cptvf_wqe_info *)cptvf->wqe_info;
+	if (!cwqe_info)
+		return;
+
+	if (cptvf->num_queues) {
+		dev_dbg(&pdev->dev, "Cleaning VQ worker threads (%u)\n",
+			cptvf->num_queues);
+	}
+
+	for (i = 0; i < cptvf->num_queues; i++)
+		tasklet_kill(&cwqe_info->vq_wqe[i].twork);
+
+	kzfree(cwqe_info);
+	cptvf->wqe_info = NULL;
+}
+
+static void free_pending_queues(struct otx_cpt_pending_qinfo *pqinfo)
+{
+	struct otx_cpt_pending_queue *queue;
+	int i;
+
+	for_each_pending_queue(pqinfo, queue, i) {
+		if (!queue->head)
+			continue;
+
+		/* free single queue */
+		kzfree((queue->head));
+		queue->front = 0;
+		queue->rear = 0;
+		queue->qlen = 0;
+	}
+	pqinfo->num_queues = 0;
+}
+
+static int alloc_pending_queues(struct otx_cpt_pending_qinfo *pqinfo, u32 qlen,
+				u32 num_queues)
+{
+	struct otx_cpt_pending_queue *queue = NULL;
+	size_t size;
+	int ret;
+	u32 i;
+
+	pqinfo->num_queues = num_queues;
+	size = (qlen * sizeof(struct otx_cpt_pending_entry));
+
+	for_each_pending_queue(pqinfo, queue, i) {
+		queue->head = kzalloc((size), GFP_KERNEL);
+		if (!queue->head) {
+			ret = -ENOMEM;
+			goto pending_qfail;
+		}
+
+		queue->pending_count = 0;
+		queue->front = 0;
+		queue->rear = 0;
+		queue->qlen = qlen;
+
+		/* init queue spin lock */
+		spin_lock_init(&queue->lock);
+	}
+	return 0;
+
+pending_qfail:
+	free_pending_queues(pqinfo);
+
+	return ret;
+}
+
+static int init_pending_queues(struct otx_cptvf *cptvf, u32 qlen,
+			       u32 num_queues)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	int ret;
+
+	if (!num_queues)
+		return 0;
+
+	ret = alloc_pending_queues(&cptvf->pqinfo, qlen, num_queues);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to setup pending queues (%u)\n",
+			num_queues);
+		return ret;
+	}
+	return 0;
+}
+
+static void cleanup_pending_queues(struct otx_cptvf *cptvf)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+
+	if (!cptvf->num_queues)
+		return;
+
+	dev_dbg(&pdev->dev, "Cleaning VQ pending queue (%u)\n",
+		cptvf->num_queues);
+	free_pending_queues(&cptvf->pqinfo);
+}
+
+static void free_command_queues(struct otx_cptvf *cptvf,
+				struct otx_cpt_cmd_qinfo *cqinfo)
+{
+	struct otx_cpt_cmd_queue *queue = NULL;
+	struct otx_cpt_cmd_chunk *chunk = NULL;
+	struct pci_dev *pdev = cptvf->pdev;
+	int i;
+
+	/* clean up for each queue */
+	for (i = 0; i < cptvf->num_queues; i++) {
+		queue = &cqinfo->queue[i];
+
+		while (!list_empty(&cqinfo->queue[i].chead)) {
+			chunk = list_first_entry(&cqinfo->queue[i].chead,
+					struct otx_cpt_cmd_chunk, nextchunk);
+
+			dma_free_coherent(&pdev->dev, chunk->size,
+					  chunk->head,
+					  chunk->dma_addr);
+			chunk->head = NULL;
+			chunk->dma_addr = 0;
+			list_del(&chunk->nextchunk);
+			kzfree(chunk);
+		}
+		queue->num_chunks = 0;
+		queue->idx = 0;
+
+	}
+}
+
+static int alloc_command_queues(struct otx_cptvf *cptvf,
+				struct otx_cpt_cmd_qinfo *cqinfo,
+				u32 qlen)
+{
+	struct otx_cpt_cmd_chunk *curr, *first, *last;
+	struct otx_cpt_cmd_queue *queue = NULL;
+	struct pci_dev *pdev = cptvf->pdev;
+	size_t q_size, c_size, rem_q_size;
+	u32 qcsize_bytes;
+	int i;
+
+
+	/* Qsize in dwords, needed for SADDR config, 1-next chunk pointer */
+	cptvf->qsize = min(qlen, cqinfo->qchunksize) *
+		       OTX_CPT_NEXT_CHUNK_PTR_SIZE + 1;
+	/* Qsize in bytes to create space for alignment */
+	q_size = qlen * OTX_CPT_INST_SIZE;
+
+	qcsize_bytes = cqinfo->qchunksize * OTX_CPT_INST_SIZE;
+
+	/* per queue initialization */
+	for (i = 0; i < cptvf->num_queues; i++) {
+		c_size = 0;
+		rem_q_size = q_size;
+		first = NULL;
+		last = NULL;
+
+		queue = &cqinfo->queue[i];
+		INIT_LIST_HEAD(&queue->chead);
+		do {
+			curr = kzalloc(sizeof(*curr), GFP_KERNEL);
+			if (!curr)
+				goto cmd_qfail;
+
+			c_size = (rem_q_size > qcsize_bytes) ? qcsize_bytes :
+					rem_q_size;
+			curr->head = dma_alloc_coherent(&pdev->dev,
+					   c_size + OTX_CPT_NEXT_CHUNK_PTR_SIZE,
+					   &curr->dma_addr, GFP_KERNEL);
+			if (!curr->head) {
+				dev_err(&pdev->dev,
+				"Command Q (%d) chunk (%d) allocation failed\n",
+					i, queue->num_chunks);
+				goto free_curr;
+			}
+			curr->size = c_size;
+
+			if (queue->num_chunks == 0) {
+				first = curr;
+				queue->base  = first;
+			}
+			list_add_tail(&curr->nextchunk,
+				      &cqinfo->queue[i].chead);
+
+			queue->num_chunks++;
+			rem_q_size -= c_size;
+			if (last)
+				*((u64 *)(&last->head[last->size])) =
+					(u64)curr->dma_addr;
+
+			last = curr;
+		} while (rem_q_size);
+
+		/*
+		 * Make the queue circular, tie back last chunk entry to head
+		 */
+		curr = first;
+		*((u64 *)(&last->head[last->size])) = (u64)curr->dma_addr;
+		queue->qhead = curr;
+	}
+	return 0;
+free_curr:
+	kfree(curr);
+cmd_qfail:
+	free_command_queues(cptvf, cqinfo);
+	return -ENOMEM;
+}
+
+static int init_command_queues(struct otx_cptvf *cptvf, u32 qlen)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	int ret;
+
+	/* setup command queues */
+	ret = alloc_command_queues(cptvf, &cptvf->cqinfo, qlen);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to allocate command queues (%u)\n",
+			cptvf->num_queues);
+		return ret;
+	}
+	return ret;
+}
+
+static void cleanup_command_queues(struct otx_cptvf *cptvf)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+
+	if (!cptvf->num_queues)
+		return;
+
+	dev_dbg(&pdev->dev, "Cleaning VQ command queue (%u)\n",
+		cptvf->num_queues);
+	free_command_queues(cptvf, &cptvf->cqinfo);
+}
+
+static void cptvf_sw_cleanup(struct otx_cptvf *cptvf)
+{
+	cleanup_worker_threads(cptvf);
+	cleanup_pending_queues(cptvf);
+	cleanup_command_queues(cptvf);
+}
+
+static int cptvf_sw_init(struct otx_cptvf *cptvf, u32 qlen, u32 num_queues)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	u32 max_dev_queues = 0;
+	int ret;
+
+	max_dev_queues = OTX_CPT_NUM_QS_PER_VF;
+	/* possible cpus */
+	num_queues = min_t(u32, num_queues, max_dev_queues);
+	cptvf->num_queues = num_queues;
+
+	ret = init_command_queues(cptvf, qlen);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to setup command queues (%u)\n",
+			num_queues);
+		return ret;
+	}
+
+	ret = init_pending_queues(cptvf, qlen, num_queues);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to setup pending queues (%u)\n",
+			num_queues);
+		goto setup_pqfail;
+	}
+
+	/* Create worker threads for BH processing */
+	ret = init_worker_threads(cptvf);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to setup worker threads\n");
+		goto init_work_fail;
+	}
+	return 0;
+
+init_work_fail:
+	cleanup_worker_threads(cptvf);
+	cleanup_pending_queues(cptvf);
+
+setup_pqfail:
+	cleanup_command_queues(cptvf);
+
+	return ret;
+}
+
+static void cptvf_free_irq_affinity(struct otx_cptvf *cptvf, int vec)
+{
+	irq_set_affinity_hint(pci_irq_vector(cptvf->pdev, vec), NULL);
+	free_cpumask_var(cptvf->affinity_mask[vec]);
+}
+
+static void cptvf_write_vq_ctl(struct otx_cptvf *cptvf, bool val)
+{
+	union otx_cptx_vqx_ctl vqx_ctl;
+
+	vqx_ctl.u = readq(cptvf->reg_base + OTX_CPT_VQX_CTL(0));
+	vqx_ctl.s.ena = val;
+	writeq(vqx_ctl.u, cptvf->reg_base + OTX_CPT_VQX_CTL(0));
+}
+
+void otx_cptvf_write_vq_doorbell(struct otx_cptvf *cptvf, u32 val)
+{
+	union otx_cptx_vqx_doorbell vqx_dbell;
+
+	vqx_dbell.u = readq(cptvf->reg_base + OTX_CPT_VQX_DOORBELL(0));
+	vqx_dbell.s.dbell_cnt = val * 8; /* Num of Instructions * 8 words */
+	writeq(vqx_dbell.u, cptvf->reg_base + OTX_CPT_VQX_DOORBELL(0));
+}
+
+static void cptvf_write_vq_inprog(struct otx_cptvf *cptvf, u8 val)
+{
+	union otx_cptx_vqx_inprog vqx_inprg;
+
+	vqx_inprg.u = readq(cptvf->reg_base + OTX_CPT_VQX_INPROG(0));
+	vqx_inprg.s.inflight = val;
+	writeq(vqx_inprg.u, cptvf->reg_base + OTX_CPT_VQX_INPROG(0));
+}
+
+static void cptvf_write_vq_done_numwait(struct otx_cptvf *cptvf, u32 val)
+{
+	union otx_cptx_vqx_done_wait vqx_dwait;
+
+	vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0));
+	vqx_dwait.s.num_wait = val;
+	writeq(vqx_dwait.u, cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0));
+}
+
+static u32 cptvf_read_vq_done_numwait(struct otx_cptvf *cptvf)
+{
+	union otx_cptx_vqx_done_wait vqx_dwait;
+
+	vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0));
+	return vqx_dwait.s.num_wait;
+}
+
+static void cptvf_write_vq_done_timewait(struct otx_cptvf *cptvf, u16 time)
+{
+	union otx_cptx_vqx_done_wait vqx_dwait;
+
+	vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0));
+	vqx_dwait.s.time_wait = time;
+	writeq(vqx_dwait.u, cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0));
+}
+
+
+static u16 cptvf_read_vq_done_timewait(struct otx_cptvf *cptvf)
+{
+	union otx_cptx_vqx_done_wait vqx_dwait;
+
+	vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0));
+	return vqx_dwait.s.time_wait;
+}
+
+static void cptvf_enable_swerr_interrupts(struct otx_cptvf *cptvf)
+{
+	union otx_cptx_vqx_misc_ena_w1s vqx_misc_ena;
+
+	vqx_misc_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0));
+	/* Enable SWERR interrupts for the requested VF */
+	vqx_misc_ena.s.swerr = 1;
+	writeq(vqx_misc_ena.u, cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0));
+}
+
+static void cptvf_enable_mbox_interrupts(struct otx_cptvf *cptvf)
+{
+	union otx_cptx_vqx_misc_ena_w1s vqx_misc_ena;
+
+	vqx_misc_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0));
+	/* Enable MBOX interrupt for the requested VF */
+	vqx_misc_ena.s.mbox = 1;
+	writeq(vqx_misc_ena.u, cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0));
+}
+
+static void cptvf_enable_done_interrupts(struct otx_cptvf *cptvf)
+{
+	union otx_cptx_vqx_done_ena_w1s vqx_done_ena;
+
+	vqx_done_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_ENA_W1S(0));
+	/* Enable DONE interrupt for the requested VF */
+	vqx_done_ena.s.done = 1;
+	writeq(vqx_done_ena.u, cptvf->reg_base + OTX_CPT_VQX_DONE_ENA_W1S(0));
+}
+
+static void cptvf_clear_dovf_intr(struct otx_cptvf *cptvf)
+{
+	union otx_cptx_vqx_misc_int vqx_misc_int;
+
+	vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+	/* W1C for the VF */
+	vqx_misc_int.s.dovf = 1;
+	writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+}
+
+static void cptvf_clear_irde_intr(struct otx_cptvf *cptvf)
+{
+	union otx_cptx_vqx_misc_int vqx_misc_int;
+
+	vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+	/* W1C for the VF */
+	vqx_misc_int.s.irde = 1;
+	writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+}
+
+static void cptvf_clear_nwrp_intr(struct otx_cptvf *cptvf)
+{
+	union otx_cptx_vqx_misc_int vqx_misc_int;
+
+	vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+	/* W1C for the VF */
+	vqx_misc_int.s.nwrp = 1;
+	writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+}
+
+static void cptvf_clear_mbox_intr(struct otx_cptvf *cptvf)
+{
+	union otx_cptx_vqx_misc_int vqx_misc_int;
+
+	vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+	/* W1C for the VF */
+	vqx_misc_int.s.mbox = 1;
+	writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+}
+
+static void cptvf_clear_swerr_intr(struct otx_cptvf *cptvf)
+{
+	union otx_cptx_vqx_misc_int vqx_misc_int;
+
+	vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+	/* W1C for the VF */
+	vqx_misc_int.s.swerr = 1;
+	writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+}
+
+static u64 cptvf_read_vf_misc_intr_status(struct otx_cptvf *cptvf)
+{
+	return readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+}
+
+static irqreturn_t cptvf_misc_intr_handler(int __always_unused irq,
+					   void *arg)
+{
+	struct otx_cptvf *cptvf = arg;
+	struct pci_dev *pdev = cptvf->pdev;
+	u64 intr;
+
+	intr = cptvf_read_vf_misc_intr_status(cptvf);
+	/* Check for MISC interrupt types */
+	if (likely(intr & OTX_CPT_VF_INTR_MBOX_MASK)) {
+		dev_dbg(&pdev->dev, "Mailbox interrupt 0x%llx on CPT VF %d\n",
+			intr, cptvf->vfid);
+		otx_cptvf_handle_mbox_intr(cptvf);
+		cptvf_clear_mbox_intr(cptvf);
+	} else if (unlikely(intr & OTX_CPT_VF_INTR_DOVF_MASK)) {
+		cptvf_clear_dovf_intr(cptvf);
+		/* Clear doorbell count */
+		otx_cptvf_write_vq_doorbell(cptvf, 0);
+		dev_err(&pdev->dev,
+		"Doorbell overflow error interrupt 0x%llx on CPT VF %d\n",
+			intr, cptvf->vfid);
+	} else if (unlikely(intr & OTX_CPT_VF_INTR_IRDE_MASK)) {
+		cptvf_clear_irde_intr(cptvf);
+		dev_err(&pdev->dev,
+		"Instruction NCB read error interrupt 0x%llx on CPT VF %d\n",
+			intr, cptvf->vfid);
+	} else if (unlikely(intr & OTX_CPT_VF_INTR_NWRP_MASK)) {
+		cptvf_clear_nwrp_intr(cptvf);
+		dev_err(&pdev->dev,
+		"NCB response write error interrupt 0x%llx on CPT VF %d\n",
+			intr, cptvf->vfid);
+	} else if (unlikely(intr & OTX_CPT_VF_INTR_SERR_MASK)) {
+		cptvf_clear_swerr_intr(cptvf);
+		dev_err(&pdev->dev,
+			"Software error interrupt 0x%llx on CPT VF %d\n",
+			intr, cptvf->vfid);
+	} else {
+		dev_err(&pdev->dev, "Unhandled interrupt in OTX_CPT VF %d\n",
+			cptvf->vfid);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static inline struct otx_cptvf_wqe *get_cptvf_vq_wqe(struct otx_cptvf *cptvf,
+						     int qno)
+{
+	struct otx_cptvf_wqe_info *nwqe_info;
+
+	if (unlikely(qno >= cptvf->num_queues))
+		return NULL;
+	nwqe_info = (struct otx_cptvf_wqe_info *)cptvf->wqe_info;
+
+	return &nwqe_info->vq_wqe[qno];
+}
+
+static inline u32 cptvf_read_vq_done_count(struct otx_cptvf *cptvf)
+{
+	union otx_cptx_vqx_done vqx_done;
+
+	vqx_done.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE(0));
+	return vqx_done.s.done;
+}
+
+static inline void cptvf_write_vq_done_ack(struct otx_cptvf *cptvf,
+					   u32 ackcnt)
+{
+	union otx_cptx_vqx_done_ack vqx_dack_cnt;
+
+	vqx_dack_cnt.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_ACK(0));
+	vqx_dack_cnt.s.done_ack = ackcnt;
+	writeq(vqx_dack_cnt.u, cptvf->reg_base + OTX_CPT_VQX_DONE_ACK(0));
+}
+
+static irqreturn_t cptvf_done_intr_handler(int __always_unused irq,
+					   void *cptvf_dev)
+{
+	struct otx_cptvf *cptvf = (struct otx_cptvf *)cptvf_dev;
+	struct pci_dev *pdev = cptvf->pdev;
+	/* Read the number of completions */
+	u32 intr = cptvf_read_vq_done_count(cptvf);
+
+	if (intr) {
+		struct otx_cptvf_wqe *wqe;
+
+		/*
+		 * Acknowledge the number of scheduled completions for
+		 * processing
+		 */
+		cptvf_write_vq_done_ack(cptvf, intr);
+		wqe = get_cptvf_vq_wqe(cptvf, 0);
+		if (unlikely(!wqe)) {
+			dev_err(&pdev->dev, "No work to schedule for VF (%d)",
+				cptvf->vfid);
+			return IRQ_NONE;
+		}
+		tasklet_hi_schedule(&wqe->twork);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void cptvf_set_irq_affinity(struct otx_cptvf *cptvf, int vec)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	int cpu;
+
+	if (!zalloc_cpumask_var(&cptvf->affinity_mask[vec],
+				GFP_KERNEL)) {
+		dev_err(&pdev->dev,
+			"Allocation failed for affinity_mask for VF %d",
+			cptvf->vfid);
+		return;
+	}
+
+	cpu = cptvf->vfid % num_online_cpus();
+	cpumask_set_cpu(cpumask_local_spread(cpu, cptvf->node),
+			cptvf->affinity_mask[vec]);
+	irq_set_affinity_hint(pci_irq_vector(pdev, vec),
+			      cptvf->affinity_mask[vec]);
+}
+
+static void cptvf_write_vq_saddr(struct otx_cptvf *cptvf, u64 val)
+{
+	union otx_cptx_vqx_saddr vqx_saddr;
+
+	vqx_saddr.u = val;
+	writeq(vqx_saddr.u, cptvf->reg_base + OTX_CPT_VQX_SADDR(0));
+}
+
+static void cptvf_device_init(struct otx_cptvf *cptvf)
+{
+	u64 base_addr = 0;
+
+	/* Disable the VQ */
+	cptvf_write_vq_ctl(cptvf, 0);
+	/* Reset the doorbell */
+	otx_cptvf_write_vq_doorbell(cptvf, 0);
+	/* Clear inflight */
+	cptvf_write_vq_inprog(cptvf, 0);
+	/* Write VQ SADDR */
+	base_addr = (u64)(cptvf->cqinfo.queue[0].qhead->dma_addr);
+	cptvf_write_vq_saddr(cptvf, base_addr);
+	/* Configure timerhold / coalescence */
+	cptvf_write_vq_done_timewait(cptvf, OTX_CPT_TIMER_HOLD);
+	cptvf_write_vq_done_numwait(cptvf, OTX_CPT_COUNT_HOLD);
+	/* Enable the VQ */
+	cptvf_write_vq_ctl(cptvf, 1);
+	/* Flag the VF ready */
+	cptvf->flags |= OTX_CPT_FLAG_DEVICE_READY;
+}
+
+static ssize_t vf_type_show(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	struct otx_cptvf *cptvf = dev_get_drvdata(dev);
+	char *msg;
+
+	switch (cptvf->vftype) {
+	case OTX_CPT_AE_TYPES:
+		msg = "AE";
+		break;
+
+	case OTX_CPT_SE_TYPES:
+		msg = "SE";
+		break;
+
+	default:
+		msg = "Invalid";
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", msg);
+}
+
+static ssize_t vf_engine_group_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct otx_cptvf *cptvf = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", cptvf->vfgrp);
+}
+
+static ssize_t vf_engine_group_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct otx_cptvf *cptvf = dev_get_drvdata(dev);
+	int val, ret;
+
+	ret = kstrtoint(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val < 0)
+		return -EINVAL;
+
+	if (val >= OTX_CPT_MAX_ENGINE_GROUPS) {
+		dev_err(dev, "Engine group >= than max available groups %d",
+			OTX_CPT_MAX_ENGINE_GROUPS);
+		return -EINVAL;
+	}
+
+	ret = otx_cptvf_send_vf_to_grp_msg(cptvf, val);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static ssize_t vf_coalesc_time_wait_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct otx_cptvf *cptvf = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n",
+			 cptvf_read_vq_done_timewait(cptvf));
+}
+
+static ssize_t vf_coalesc_num_wait_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct otx_cptvf *cptvf = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n",
+			 cptvf_read_vq_done_numwait(cptvf));
+}
+
+static ssize_t vf_coalesc_time_wait_store(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	struct otx_cptvf *cptvf = dev_get_drvdata(dev);
+	long val;
+	int ret;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret != 0)
+		return ret;
+
+	if (val < OTX_CPT_COALESC_MIN_TIME_WAIT ||
+	    val > OTX_CPT_COALESC_MAX_TIME_WAIT)
+		return -EINVAL;
+
+	cptvf_write_vq_done_timewait(cptvf, val);
+	return count;
+}
+
+static ssize_t vf_coalesc_num_wait_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct otx_cptvf *cptvf = dev_get_drvdata(dev);
+	long val;
+	int ret;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret != 0)
+		return ret;
+
+	if (val < OTX_CPT_COALESC_MIN_NUM_WAIT ||
+	    val > OTX_CPT_COALESC_MAX_NUM_WAIT)
+		return -EINVAL;
+
+	cptvf_write_vq_done_numwait(cptvf, val);
+	return count;
+}
+
+static DEVICE_ATTR_RO(vf_type);
+static DEVICE_ATTR_RW(vf_engine_group);
+static DEVICE_ATTR_RW(vf_coalesc_time_wait);
+static DEVICE_ATTR_RW(vf_coalesc_num_wait);
+
+static struct attribute *otx_cptvf_attrs[] = {
+	&dev_attr_vf_type.attr,
+	&dev_attr_vf_engine_group.attr,
+	&dev_attr_vf_coalesc_time_wait.attr,
+	&dev_attr_vf_coalesc_num_wait.attr,
+	NULL
+};
+
+static const struct attribute_group otx_cptvf_sysfs_group = {
+	.attrs = otx_cptvf_attrs,
+};
+
+static int otx_cptvf_probe(struct pci_dev *pdev,
+			   const struct pci_device_id *ent)
+{
+	struct device *dev = &pdev->dev;
+	struct otx_cptvf *cptvf;
+	int err;
+
+	cptvf = devm_kzalloc(dev, sizeof(*cptvf), GFP_KERNEL);
+	if (!cptvf)
+		return -ENOMEM;
+
+	pci_set_drvdata(pdev, cptvf);
+	cptvf->pdev = pdev;
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(dev, "Failed to enable PCI device\n");
+		goto clear_drvdata;
+	}
+	err = pci_request_regions(pdev, DRV_NAME);
+	if (err) {
+		dev_err(dev, "PCI request regions failed 0x%x\n", err);
+		goto disable_device;
+	}
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+	if (err) {
+		dev_err(dev, "Unable to get usable DMA configuration\n");
+		goto release_regions;
+	}
+
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
+	if (err) {
+		dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n");
+		goto release_regions;
+	}
+
+	/* MAP PF's configuration registers */
+	cptvf->reg_base = pci_iomap(pdev, OTX_CPT_VF_PCI_CFG_BAR, 0);
+	if (!cptvf->reg_base) {
+		dev_err(dev, "Cannot map config register space, aborting\n");
+		err = -ENOMEM;
+		goto release_regions;
+	}
+
+	cptvf->node = dev_to_node(&pdev->dev);
+	err = pci_alloc_irq_vectors(pdev, OTX_CPT_VF_MSIX_VECTORS,
+				    OTX_CPT_VF_MSIX_VECTORS, PCI_IRQ_MSIX);
+	if (err < 0) {
+		dev_err(dev, "Request for #%d msix vectors failed\n",
+			OTX_CPT_VF_MSIX_VECTORS);
+		goto unmap_region;
+	}
+
+	err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC),
+			  cptvf_misc_intr_handler, 0, "CPT VF misc intr",
+			  cptvf);
+	if (err) {
+		dev_err(dev, "Failed to request misc irq");
+		goto free_vectors;
+	}
+
+	/* Enable mailbox interrupt */
+	cptvf_enable_mbox_interrupts(cptvf);
+	cptvf_enable_swerr_interrupts(cptvf);
+
+	/* Check cpt pf status, gets chip ID / device Id from PF if ready */
+	err = otx_cptvf_check_pf_ready(cptvf);
+	if (err)
+		goto free_misc_irq;
+
+	/* CPT VF software resources initialization */
+	cptvf->cqinfo.qchunksize = OTX_CPT_CMD_QCHUNK_SIZE;
+	err = cptvf_sw_init(cptvf, OTX_CPT_CMD_QLEN, OTX_CPT_NUM_QS_PER_VF);
+	if (err) {
+		dev_err(dev, "cptvf_sw_init() failed");
+		goto free_misc_irq;
+	}
+	/* Convey VQ LEN to PF */
+	err = otx_cptvf_send_vq_size_msg(cptvf);
+	if (err)
+		goto sw_cleanup;
+
+	/* CPT VF device initialization */
+	cptvf_device_init(cptvf);
+	/* Send msg to PF to assign currnet Q to required group */
+	err = otx_cptvf_send_vf_to_grp_msg(cptvf, cptvf->vfgrp);
+	if (err)
+		goto sw_cleanup;
+
+	cptvf->priority = 1;
+	err = otx_cptvf_send_vf_priority_msg(cptvf);
+	if (err)
+		goto sw_cleanup;
+
+	err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE),
+			  cptvf_done_intr_handler, 0, "CPT VF done intr",
+			  cptvf);
+	if (err) {
+		dev_err(dev, "Failed to request done irq\n");
+		goto free_done_irq;
+	}
+
+	/* Enable done interrupt */
+	cptvf_enable_done_interrupts(cptvf);
+
+	/* Set irq affinity masks */
+	cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC);
+	cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE);
+
+	err = otx_cptvf_send_vf_up(cptvf);
+	if (err)
+		goto free_irq_affinity;
+
+	/* Initialize algorithms and set ops */
+	err = otx_cpt_crypto_init(pdev, THIS_MODULE,
+		    cptvf->vftype == OTX_CPT_SE_TYPES ? OTX_CPT_SE : OTX_CPT_AE,
+		    cptvf->vftype, 1, cptvf->num_vfs);
+	if (err) {
+		dev_err(dev, "Failed to register crypto algs\n");
+		goto free_irq_affinity;
+	}
+
+	err = sysfs_create_group(&dev->kobj, &otx_cptvf_sysfs_group);
+	if (err) {
+		dev_err(dev, "Creating sysfs entries failed\n");
+		goto crypto_exit;
+	}
+
+	return 0;
+
+crypto_exit:
+	otx_cpt_crypto_exit(pdev, THIS_MODULE, cptvf->vftype);
+free_irq_affinity:
+	cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE);
+	cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC);
+free_done_irq:
+	free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), cptvf);
+sw_cleanup:
+	cptvf_sw_cleanup(cptvf);
+free_misc_irq:
+	free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf);
+free_vectors:
+	pci_free_irq_vectors(cptvf->pdev);
+unmap_region:
+	pci_iounmap(pdev, cptvf->reg_base);
+release_regions:
+	pci_release_regions(pdev);
+disable_device:
+	pci_disable_device(pdev);
+clear_drvdata:
+	pci_set_drvdata(pdev, NULL);
+
+	return err;
+}
+
+static void otx_cptvf_remove(struct pci_dev *pdev)
+{
+	struct otx_cptvf *cptvf = pci_get_drvdata(pdev);
+
+	if (!cptvf) {
+		dev_err(&pdev->dev, "Invalid CPT-VF device\n");
+		return;
+	}
+
+	/* Convey DOWN to PF */
+	if (otx_cptvf_send_vf_down(cptvf)) {
+		dev_err(&pdev->dev, "PF not responding to DOWN msg");
+	} else {
+		sysfs_remove_group(&pdev->dev.kobj, &otx_cptvf_sysfs_group);
+		otx_cpt_crypto_exit(pdev, THIS_MODULE, cptvf->vftype);
+		cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE);
+		cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC);
+		free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), cptvf);
+		free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf);
+		cptvf_sw_cleanup(cptvf);
+		pci_free_irq_vectors(cptvf->pdev);
+		pci_iounmap(pdev, cptvf->reg_base);
+		pci_release_regions(pdev);
+		pci_disable_device(pdev);
+		pci_set_drvdata(pdev, NULL);
+	}
+}
+
+/* Supported devices */
+static const struct pci_device_id otx_cptvf_id_table[] = {
+	{PCI_VDEVICE(CAVIUM, OTX_CPT_PCI_VF_DEVICE_ID), 0},
+	{ 0, }  /* end of table */
+};
+
+static struct pci_driver otx_cptvf_pci_driver = {
+	.name = DRV_NAME,
+	.id_table = otx_cptvf_id_table,
+	.probe = otx_cptvf_probe,
+	.remove = otx_cptvf_remove,
+};
+
+module_pci_driver(otx_cptvf_pci_driver);
+
+MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_DESCRIPTION("Marvell OcteonTX CPT Virtual Function Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, otx_cptvf_id_table);
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c b/drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c
new file mode 100644
index 000000000000..5663787c7a62
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include "otx_cptvf.h"
+
+#define CPT_MBOX_MSG_TIMEOUT 2000
+
+static char *get_mbox_opcode_str(int msg_opcode)
+{
+	char *str = "Unknown";
+
+	switch (msg_opcode) {
+	case OTX_CPT_MSG_VF_UP:
+		str = "UP";
+		break;
+
+	case OTX_CPT_MSG_VF_DOWN:
+		str = "DOWN";
+		break;
+
+	case OTX_CPT_MSG_READY:
+		str = "READY";
+		break;
+
+	case OTX_CPT_MSG_QLEN:
+		str = "QLEN";
+		break;
+
+	case OTX_CPT_MSG_QBIND_GRP:
+		str = "QBIND_GRP";
+		break;
+
+	case OTX_CPT_MSG_VQ_PRIORITY:
+		str = "VQ_PRIORITY";
+		break;
+
+	case OTX_CPT_MSG_PF_TYPE:
+		str = "PF_TYPE";
+		break;
+
+	case OTX_CPT_MSG_ACK:
+		str = "ACK";
+		break;
+
+	case OTX_CPT_MSG_NACK:
+		str = "NACK";
+		break;
+	}
+	return str;
+}
+
+static void dump_mbox_msg(struct otx_cpt_mbox *mbox_msg, int vf_id)
+{
+	char raw_data_str[OTX_CPT_MAX_MBOX_DATA_STR_SIZE];
+
+	hex_dump_to_buffer(mbox_msg, sizeof(struct otx_cpt_mbox), 16, 8,
+			   raw_data_str, OTX_CPT_MAX_MBOX_DATA_STR_SIZE, false);
+	if (vf_id >= 0)
+		pr_debug("MBOX msg %s received from VF%d raw_data %s",
+			 get_mbox_opcode_str(mbox_msg->msg), vf_id,
+			 raw_data_str);
+	else
+		pr_debug("MBOX msg %s received from PF raw_data %s",
+			 get_mbox_opcode_str(mbox_msg->msg), raw_data_str);
+}
+
+static void cptvf_send_msg_to_pf(struct otx_cptvf *cptvf,
+				     struct otx_cpt_mbox *mbx)
+{
+	/* Writing mbox(1) causes interrupt */
+	writeq(mbx->msg, cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 0));
+	writeq(mbx->data, cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 1));
+}
+
+/* Interrupt handler to handle mailbox messages from VFs */
+void otx_cptvf_handle_mbox_intr(struct otx_cptvf *cptvf)
+{
+	struct otx_cpt_mbox mbx = {};
+
+	/*
+	 * MBOX[0] contains msg
+	 * MBOX[1] contains data
+	 */
+	mbx.msg  = readq(cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 0));
+	mbx.data = readq(cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 1));
+
+	dump_mbox_msg(&mbx, -1);
+
+	switch (mbx.msg) {
+	case OTX_CPT_MSG_VF_UP:
+		cptvf->pf_acked = true;
+		cptvf->num_vfs = mbx.data;
+		break;
+	case OTX_CPT_MSG_READY:
+		cptvf->pf_acked = true;
+		cptvf->vfid = mbx.data;
+		dev_dbg(&cptvf->pdev->dev, "Received VFID %d\n", cptvf->vfid);
+		break;
+	case OTX_CPT_MSG_QBIND_GRP:
+		cptvf->pf_acked = true;
+		cptvf->vftype = mbx.data;
+		dev_dbg(&cptvf->pdev->dev, "VF %d type %s group %d\n",
+			cptvf->vfid,
+			((mbx.data == OTX_CPT_SE_TYPES) ? "SE" : "AE"),
+			cptvf->vfgrp);
+		break;
+	case OTX_CPT_MSG_ACK:
+		cptvf->pf_acked = true;
+		break;
+	case OTX_CPT_MSG_NACK:
+		cptvf->pf_nacked = true;
+		break;
+	default:
+		dev_err(&cptvf->pdev->dev, "Invalid msg from PF, msg 0x%llx\n",
+			mbx.msg);
+		break;
+	}
+}
+
+static int cptvf_send_msg_to_pf_timeout(struct otx_cptvf *cptvf,
+					struct otx_cpt_mbox *mbx)
+{
+	int timeout = CPT_MBOX_MSG_TIMEOUT;
+	int sleep = 10;
+
+	cptvf->pf_acked = false;
+	cptvf->pf_nacked = false;
+	cptvf_send_msg_to_pf(cptvf, mbx);
+	/* Wait for previous message to be acked, timeout 2sec */
+	while (!cptvf->pf_acked) {
+		if (cptvf->pf_nacked)
+			return -EINVAL;
+		msleep(sleep);
+		if (cptvf->pf_acked)
+			break;
+		timeout -= sleep;
+		if (!timeout) {
+			dev_err(&cptvf->pdev->dev,
+				"PF didn't ack to mbox msg %llx from VF%u\n",
+				mbx->msg, cptvf->vfid);
+			return -EBUSY;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Checks if VF is able to comminicate with PF
+ * and also gets the CPT number this VF is associated to.
+ */
+int otx_cptvf_check_pf_ready(struct otx_cptvf *cptvf)
+{
+	struct otx_cpt_mbox mbx = {};
+	int ret;
+
+	mbx.msg = OTX_CPT_MSG_READY;
+	ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx);
+
+	return ret;
+}
+
+/*
+ * Communicate VQs size to PF to program CPT(0)_PF_Q(0-15)_CTL of the VF.
+ * Must be ACKed.
+ */
+int otx_cptvf_send_vq_size_msg(struct otx_cptvf *cptvf)
+{
+	struct otx_cpt_mbox mbx = {};
+	int ret;
+
+	mbx.msg = OTX_CPT_MSG_QLEN;
+	mbx.data = cptvf->qsize;
+	ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx);
+
+	return ret;
+}
+
+/*
+ * Communicate VF group required to PF and get the VQ binded to that group
+ */
+int otx_cptvf_send_vf_to_grp_msg(struct otx_cptvf *cptvf, int group)
+{
+	struct otx_cpt_mbox mbx = {};
+	int ret;
+
+	mbx.msg = OTX_CPT_MSG_QBIND_GRP;
+	/* Convey group of the VF */
+	mbx.data = group;
+	ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx);
+	if (ret)
+		return ret;
+	cptvf->vfgrp = group;
+
+	return 0;
+}
+
+/*
+ * Communicate VF group required to PF and get the VQ binded to that group
+ */
+int otx_cptvf_send_vf_priority_msg(struct otx_cptvf *cptvf)
+{
+	struct otx_cpt_mbox mbx = {};
+	int ret;
+
+	mbx.msg = OTX_CPT_MSG_VQ_PRIORITY;
+	/* Convey group of the VF */
+	mbx.data = cptvf->priority;
+	ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx);
+
+	return ret;
+}
+
+/*
+ * Communicate to PF that VF is UP and running
+ */
+int otx_cptvf_send_vf_up(struct otx_cptvf *cptvf)
+{
+	struct otx_cpt_mbox mbx = {};
+	int ret;
+
+	mbx.msg = OTX_CPT_MSG_VF_UP;
+	ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx);
+
+	return ret;
+}
+
+/*
+ * Communicate to PF that VF is DOWN and running
+ */
+int otx_cptvf_send_vf_down(struct otx_cptvf *cptvf)
+{
+	struct otx_cpt_mbox mbx = {};
+	int ret;
+
+	mbx.msg = OTX_CPT_MSG_VF_DOWN;
+	ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx);
+
+	return ret;
+}
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c
new file mode 100644
index 000000000000..df839b880354
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c
@@ -0,0 +1,612 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "otx_cptvf.h"
+#include "otx_cptvf_algs.h"
+
+/* Completion code size and initial value */
+#define COMPLETION_CODE_SIZE	8
+#define COMPLETION_CODE_INIT	0
+
+/* SG list header size in bytes */
+#define SG_LIST_HDR_SIZE	8
+
+/* Default timeout when waiting for free pending entry in us */
+#define CPT_PENTRY_TIMEOUT	1000
+#define CPT_PENTRY_STEP		50
+
+/* Default threshold for stopping and resuming sender requests */
+#define CPT_IQ_STOP_MARGIN	128
+#define CPT_IQ_RESUME_MARGIN	512
+
+#define CPT_DMA_ALIGN		128
+
+void otx_cpt_dump_sg_list(struct pci_dev *pdev, struct otx_cpt_req_info *req)
+{
+	int i;
+
+	pr_debug("Gather list size %d\n", req->incnt);
+	for (i = 0; i < req->incnt; i++) {
+		pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i,
+			 req->in[i].size, req->in[i].vptr,
+			 (void *) req->in[i].dma_addr);
+		pr_debug("Buffer hexdump (%d bytes)\n",
+			 req->in[i].size);
+		print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1,
+				     req->in[i].vptr, req->in[i].size, false);
+	}
+
+	pr_debug("Scatter list size %d\n", req->outcnt);
+	for (i = 0; i < req->outcnt; i++) {
+		pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i,
+			 req->out[i].size, req->out[i].vptr,
+			 (void *) req->out[i].dma_addr);
+		pr_debug("Buffer hexdump (%d bytes)\n", req->out[i].size);
+		print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1,
+				     req->out[i].vptr, req->out[i].size, false);
+	}
+}
+
+static inline struct otx_cpt_pending_entry *get_free_pending_entry(
+						struct otx_cpt_pending_queue *q,
+						int qlen)
+{
+	struct otx_cpt_pending_entry *ent = NULL;
+
+	ent = &q->head[q->rear];
+	if (unlikely(ent->busy))
+		return NULL;
+
+	q->rear++;
+	if (unlikely(q->rear == qlen))
+		q->rear = 0;
+
+	return ent;
+}
+
+static inline u32 modulo_inc(u32 index, u32 length, u32 inc)
+{
+	if (WARN_ON(inc > length))
+		inc = length;
+
+	index += inc;
+	if (unlikely(index >= length))
+		index -= length;
+
+	return index;
+}
+
+static inline void free_pentry(struct otx_cpt_pending_entry *pentry)
+{
+	pentry->completion_addr = NULL;
+	pentry->info = NULL;
+	pentry->callback = NULL;
+	pentry->areq = NULL;
+	pentry->resume_sender = false;
+	pentry->busy = false;
+}
+
+static inline int setup_sgio_components(struct pci_dev *pdev,
+					struct otx_cpt_buf_ptr *list,
+					int buf_count, u8 *buffer)
+{
+	struct otx_cpt_sglist_component *sg_ptr = NULL;
+	int ret = 0, i, j;
+	int components;
+
+	if (unlikely(!list)) {
+		dev_err(&pdev->dev, "Input list pointer is NULL\n");
+		return -EFAULT;
+	}
+
+	for (i = 0; i < buf_count; i++) {
+		if (likely(list[i].vptr)) {
+			list[i].dma_addr = dma_map_single(&pdev->dev,
+							  list[i].vptr,
+							  list[i].size,
+							  DMA_BIDIRECTIONAL);
+			if (unlikely(dma_mapping_error(&pdev->dev,
+						       list[i].dma_addr))) {
+				dev_err(&pdev->dev, "Dma mapping failed\n");
+				ret = -EIO;
+				goto sg_cleanup;
+			}
+		}
+	}
+
+	components = buf_count / 4;
+	sg_ptr = (struct otx_cpt_sglist_component *)buffer;
+	for (i = 0; i < components; i++) {
+		sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size);
+		sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size);
+		sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size);
+		sg_ptr->u.s.len3 = cpu_to_be16(list[i * 4 + 3].size);
+		sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr);
+		sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr);
+		sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr);
+		sg_ptr->ptr3 = cpu_to_be64(list[i * 4 + 3].dma_addr);
+		sg_ptr++;
+	}
+	components = buf_count % 4;
+
+	switch (components) {
+	case 3:
+		sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size);
+		sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr);
+		/* Fall through */
+	case 2:
+		sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size);
+		sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr);
+		/* Fall through */
+	case 1:
+		sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size);
+		sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr);
+		break;
+	default:
+		break;
+	}
+	return ret;
+
+sg_cleanup:
+	for (j = 0; j < i; j++) {
+		if (list[j].dma_addr) {
+			dma_unmap_single(&pdev->dev, list[i].dma_addr,
+					 list[i].size, DMA_BIDIRECTIONAL);
+		}
+
+		list[j].dma_addr = 0;
+	}
+	return ret;
+}
+
+static inline int setup_sgio_list(struct pci_dev *pdev,
+				  struct otx_cpt_info_buffer **pinfo,
+				  struct otx_cpt_req_info *req, gfp_t gfp)
+{
+	u32 dlen, align_dlen, info_len, rlen;
+	struct otx_cpt_info_buffer *info;
+	u16 g_sz_bytes, s_sz_bytes;
+	int align = CPT_DMA_ALIGN;
+	u32 total_mem_len;
+
+	if (unlikely(req->incnt > OTX_CPT_MAX_SG_IN_CNT ||
+		     req->outcnt > OTX_CPT_MAX_SG_OUT_CNT)) {
+		dev_err(&pdev->dev, "Error too many sg components\n");
+		return -EINVAL;
+	}
+
+	g_sz_bytes = ((req->incnt + 3) / 4) *
+		      sizeof(struct otx_cpt_sglist_component);
+	s_sz_bytes = ((req->outcnt + 3) / 4) *
+		      sizeof(struct otx_cpt_sglist_component);
+
+	dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE;
+	align_dlen = ALIGN(dlen, align);
+	info_len = ALIGN(sizeof(*info), align);
+	rlen = ALIGN(sizeof(union otx_cpt_res_s), align);
+	total_mem_len = align_dlen + info_len + rlen + COMPLETION_CODE_SIZE;
+
+	info = kzalloc(total_mem_len, gfp);
+	if (unlikely(!info)) {
+		dev_err(&pdev->dev, "Memory allocation failed\n");
+		return -ENOMEM;
+	}
+	*pinfo = info;
+	info->dlen = dlen;
+	info->in_buffer = (u8 *)info + info_len;
+
+	((u16 *)info->in_buffer)[0] = req->outcnt;
+	((u16 *)info->in_buffer)[1] = req->incnt;
+	((u16 *)info->in_buffer)[2] = 0;
+	((u16 *)info->in_buffer)[3] = 0;
+	*(u64 *)info->in_buffer = cpu_to_be64p((u64 *)info->in_buffer);
+
+	/* Setup gather (input) components */
+	if (setup_sgio_components(pdev, req->in, req->incnt,
+				  &info->in_buffer[8])) {
+		dev_err(&pdev->dev, "Failed to setup gather list\n");
+		return -EFAULT;
+	}
+
+	if (setup_sgio_components(pdev, req->out, req->outcnt,
+				  &info->in_buffer[8 + g_sz_bytes])) {
+		dev_err(&pdev->dev, "Failed to setup scatter list\n");
+		return -EFAULT;
+	}
+
+	info->dma_len = total_mem_len - info_len;
+	info->dptr_baddr = dma_map_single(&pdev->dev, (void *)info->in_buffer,
+					  info->dma_len, DMA_BIDIRECTIONAL);
+	if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) {
+		dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n");
+		return -EIO;
+	}
+	/*
+	 * Get buffer for union otx_cpt_res_s response
+	 * structure and its physical address
+	 */
+	info->completion_addr = (u64 *)(info->in_buffer + align_dlen);
+	info->comp_baddr = info->dptr_baddr + align_dlen;
+
+	/* Create and initialize RPTR */
+	info->out_buffer = (u8 *)info->completion_addr + rlen;
+	info->rptr_baddr = info->comp_baddr + rlen;
+
+	*((u64 *) info->out_buffer) = ~((u64) COMPLETION_CODE_INIT);
+
+	return 0;
+}
+
+
+static void cpt_fill_inst(union otx_cpt_inst_s *inst,
+			  struct otx_cpt_info_buffer *info,
+			  struct otx_cpt_iq_cmd *cmd)
+{
+	inst->u[0] = 0x0;
+	inst->s.doneint = true;
+	inst->s.res_addr = (u64)info->comp_baddr;
+	inst->u[2] = 0x0;
+	inst->s.wq_ptr = 0;
+	inst->s.ei0 = cmd->cmd.u64;
+	inst->s.ei1 = cmd->dptr;
+	inst->s.ei2 = cmd->rptr;
+	inst->s.ei3 = cmd->cptr.u64;
+}
+
+/*
+ * On OcteonTX platform the parameter db_count is used as a count for ringing
+ * door bell. The valid values for db_count are:
+ * 0 - 1 CPT instruction will be enqueued however CPT will not be informed
+ * 1 - 1 CPT instruction will be enqueued and CPT will be informed
+ */
+static void cpt_send_cmd(union otx_cpt_inst_s *cptinst, struct otx_cptvf *cptvf)
+{
+	struct otx_cpt_cmd_qinfo *qinfo = &cptvf->cqinfo;
+	struct otx_cpt_cmd_queue *queue;
+	struct otx_cpt_cmd_chunk *curr;
+	u8 *ent;
+
+	queue = &qinfo->queue[0];
+	/*
+	 * cpt_send_cmd is currently called only from critical section
+	 * therefore no locking is required for accessing instruction queue
+	 */
+	ent = &queue->qhead->head[queue->idx * OTX_CPT_INST_SIZE];
+	memcpy(ent, (void *) cptinst, OTX_CPT_INST_SIZE);
+
+	if (++queue->idx >= queue->qhead->size / 64) {
+		curr = queue->qhead;
+
+		if (list_is_last(&curr->nextchunk, &queue->chead))
+			queue->qhead = queue->base;
+		else
+			queue->qhead = list_next_entry(queue->qhead, nextchunk);
+		queue->idx = 0;
+	}
+	/* make sure all memory stores are done before ringing doorbell */
+	smp_wmb();
+	otx_cptvf_write_vq_doorbell(cptvf, 1);
+}
+
+static int process_request(struct pci_dev *pdev, struct otx_cpt_req_info *req,
+			   struct otx_cpt_pending_queue *pqueue,
+			   struct otx_cptvf *cptvf)
+{
+	struct otx_cptvf_request *cpt_req = &req->req;
+	struct otx_cpt_pending_entry *pentry = NULL;
+	union otx_cpt_ctrl_info *ctrl = &req->ctrl;
+	struct otx_cpt_info_buffer *info = NULL;
+	union otx_cpt_res_s *result = NULL;
+	struct otx_cpt_iq_cmd iq_cmd;
+	union otx_cpt_inst_s cptinst;
+	int retry, ret = 0;
+	u8 resume_sender;
+	gfp_t gfp;
+
+	gfp = (req->areq->flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL :
+							      GFP_ATOMIC;
+	ret = setup_sgio_list(pdev, &info, req, gfp);
+	if (unlikely(ret)) {
+		dev_err(&pdev->dev, "Setting up SG list failed");
+		goto request_cleanup;
+	}
+	cpt_req->dlen = info->dlen;
+
+	result = (union otx_cpt_res_s *) info->completion_addr;
+	result->s.compcode = COMPLETION_CODE_INIT;
+
+	spin_lock_bh(&pqueue->lock);
+	pentry = get_free_pending_entry(pqueue, pqueue->qlen);
+	retry = CPT_PENTRY_TIMEOUT / CPT_PENTRY_STEP;
+	while (unlikely(!pentry) && retry--) {
+		spin_unlock_bh(&pqueue->lock);
+		udelay(CPT_PENTRY_STEP);
+		spin_lock_bh(&pqueue->lock);
+		pentry = get_free_pending_entry(pqueue, pqueue->qlen);
+	}
+
+	if (unlikely(!pentry)) {
+		ret = -ENOSPC;
+		spin_unlock_bh(&pqueue->lock);
+		goto request_cleanup;
+	}
+
+	/*
+	 * Check if we are close to filling in entire pending queue,
+	 * if so then tell the sender to stop/sleep by returning -EBUSY
+	 * We do it only for context which can sleep (GFP_KERNEL)
+	 */
+	if (gfp == GFP_KERNEL &&
+	    pqueue->pending_count > (pqueue->qlen - CPT_IQ_STOP_MARGIN)) {
+		pentry->resume_sender = true;
+	} else
+		pentry->resume_sender = false;
+	resume_sender = pentry->resume_sender;
+	pqueue->pending_count++;
+
+	pentry->completion_addr = info->completion_addr;
+	pentry->info = info;
+	pentry->callback = req->callback;
+	pentry->areq = req->areq;
+	pentry->busy = true;
+	info->pentry = pentry;
+	info->time_in = jiffies;
+	info->req = req;
+
+	/* Fill in the command */
+	iq_cmd.cmd.u64 = 0;
+	iq_cmd.cmd.s.opcode = cpu_to_be16(cpt_req->opcode.flags);
+	iq_cmd.cmd.s.param1 = cpu_to_be16(cpt_req->param1);
+	iq_cmd.cmd.s.param2 = cpu_to_be16(cpt_req->param2);
+	iq_cmd.cmd.s.dlen   = cpu_to_be16(cpt_req->dlen);
+
+	/* 64-bit swap for microcode data reads, not needed for addresses*/
+	iq_cmd.cmd.u64 = cpu_to_be64(iq_cmd.cmd.u64);
+	iq_cmd.dptr = info->dptr_baddr;
+	iq_cmd.rptr = info->rptr_baddr;
+	iq_cmd.cptr.u64 = 0;
+	iq_cmd.cptr.s.grp = ctrl->s.grp;
+
+	/* Fill in the CPT_INST_S type command for HW interpretation */
+	cpt_fill_inst(&cptinst, info, &iq_cmd);
+
+	/* Print debug info if enabled */
+	otx_cpt_dump_sg_list(pdev, req);
+	pr_debug("Cpt_inst_s hexdump (%d bytes)\n", OTX_CPT_INST_SIZE);
+	print_hex_dump_debug("", 0, 16, 1, &cptinst, OTX_CPT_INST_SIZE, false);
+	pr_debug("Dptr hexdump (%d bytes)\n", cpt_req->dlen);
+	print_hex_dump_debug("", 0, 16, 1, info->in_buffer,
+			     cpt_req->dlen, false);
+
+	/* Send CPT command */
+	cpt_send_cmd(&cptinst, cptvf);
+
+	/*
+	 * We allocate and prepare pending queue entry in critical section
+	 * together with submitting CPT instruction to CPT instruction queue
+	 * to make sure that order of CPT requests is the same in both
+	 * pending and instruction queues
+	 */
+	spin_unlock_bh(&pqueue->lock);
+
+	ret = resume_sender ? -EBUSY : -EINPROGRESS;
+	return ret;
+
+request_cleanup:
+	do_request_cleanup(pdev, info);
+	return ret;
+}
+
+int otx_cpt_do_request(struct pci_dev *pdev, struct otx_cpt_req_info *req,
+		       int cpu_num)
+{
+	struct otx_cptvf *cptvf = pci_get_drvdata(pdev);
+
+	if (!otx_cpt_device_ready(cptvf)) {
+		dev_err(&pdev->dev, "CPT Device is not ready");
+		return -ENODEV;
+	}
+
+	if ((cptvf->vftype == OTX_CPT_SE_TYPES) && (!req->ctrl.s.se_req)) {
+		dev_err(&pdev->dev, "CPTVF-%d of SE TYPE got AE request",
+			cptvf->vfid);
+		return -EINVAL;
+	} else if ((cptvf->vftype == OTX_CPT_AE_TYPES) &&
+		   (req->ctrl.s.se_req)) {
+		dev_err(&pdev->dev, "CPTVF-%d of AE TYPE got SE request",
+			cptvf->vfid);
+		return -EINVAL;
+	}
+
+	return process_request(pdev, req, &cptvf->pqinfo.queue[0], cptvf);
+}
+
+static int cpt_process_ccode(struct pci_dev *pdev,
+			     union otx_cpt_res_s *cpt_status,
+			     struct otx_cpt_info_buffer *cpt_info,
+			     struct otx_cpt_req_info *req, u32 *res_code)
+{
+	u8 ccode = cpt_status->s.compcode;
+	union otx_cpt_error_code ecode;
+
+	ecode.u = be64_to_cpu(*((u64 *) cpt_info->out_buffer));
+	switch (ccode) {
+	case CPT_COMP_E_FAULT:
+		dev_err(&pdev->dev,
+			"Request failed with DMA fault\n");
+		otx_cpt_dump_sg_list(pdev, req);
+		break;
+
+	case CPT_COMP_E_SWERR:
+		dev_err(&pdev->dev,
+			"Request failed with software error code %d\n",
+			ecode.s.ccode);
+		otx_cpt_dump_sg_list(pdev, req);
+		break;
+
+	case CPT_COMP_E_HWERR:
+		dev_err(&pdev->dev,
+			"Request failed with hardware error\n");
+		otx_cpt_dump_sg_list(pdev, req);
+		break;
+
+	case COMPLETION_CODE_INIT:
+		/* check for timeout */
+		if (time_after_eq(jiffies, cpt_info->time_in +
+				  OTX_CPT_COMMAND_TIMEOUT * HZ))
+			dev_warn(&pdev->dev, "Request timed out 0x%p", req);
+		else if (cpt_info->extra_time < OTX_CPT_TIME_IN_RESET_COUNT) {
+			cpt_info->time_in = jiffies;
+			cpt_info->extra_time++;
+		}
+		return 1;
+
+	case CPT_COMP_E_GOOD:
+		/* Check microcode completion code */
+		if (ecode.s.ccode) {
+			/*
+			 * If requested hmac is truncated and ucode returns
+			 * s/g write length error then we report success
+			 * because ucode writes as many bytes of calculated
+			 * hmac as available in gather buffer and reports
+			 * s/g write length error if number of bytes in gather
+			 * buffer is less than full hmac size.
+			 */
+			if (req->is_trunc_hmac &&
+			    ecode.s.ccode == ERR_SCATTER_GATHER_WRITE_LENGTH) {
+				*res_code = 0;
+				break;
+			}
+
+			dev_err(&pdev->dev,
+				"Request failed with software error code 0x%x\n",
+				ecode.s.ccode);
+			otx_cpt_dump_sg_list(pdev, req);
+			break;
+		}
+
+		/* Request has been processed with success */
+		*res_code = 0;
+		break;
+
+	default:
+		dev_err(&pdev->dev, "Request returned invalid status\n");
+		break;
+	}
+
+	return 0;
+}
+
+static inline void process_pending_queue(struct pci_dev *pdev,
+					 struct otx_cpt_pending_queue *pqueue)
+{
+	void (*callback)(int status, void *arg1, void *arg2);
+	struct otx_cpt_pending_entry *resume_pentry = NULL;
+	struct otx_cpt_pending_entry *pentry = NULL;
+	struct otx_cpt_info_buffer *cpt_info = NULL;
+	union otx_cpt_res_s *cpt_status = NULL;
+	struct otx_cpt_req_info *req = NULL;
+	struct crypto_async_request *areq;
+	u32 res_code, resume_index;
+
+	while (1) {
+		spin_lock_bh(&pqueue->lock);
+		pentry = &pqueue->head[pqueue->front];
+
+		if (WARN_ON(!pentry)) {
+			spin_unlock_bh(&pqueue->lock);
+			break;
+		}
+
+		res_code = -EINVAL;
+		if (unlikely(!pentry->busy)) {
+			spin_unlock_bh(&pqueue->lock);
+			break;
+		}
+
+		if (unlikely(!pentry->callback)) {
+			dev_err(&pdev->dev, "Callback NULL\n");
+			goto process_pentry;
+		}
+
+		cpt_info = pentry->info;
+		if (unlikely(!cpt_info)) {
+			dev_err(&pdev->dev, "Pending entry post arg NULL\n");
+			goto process_pentry;
+		}
+
+		req = cpt_info->req;
+		if (unlikely(!req)) {
+			dev_err(&pdev->dev, "Request NULL\n");
+			goto process_pentry;
+		}
+
+		cpt_status = (union otx_cpt_res_s *) pentry->completion_addr;
+		if (unlikely(!cpt_status)) {
+			dev_err(&pdev->dev, "Completion address NULL\n");
+			goto process_pentry;
+		}
+
+		if (cpt_process_ccode(pdev, cpt_status, cpt_info, req,
+				      &res_code)) {
+			spin_unlock_bh(&pqueue->lock);
+			return;
+		}
+		cpt_info->pdev = pdev;
+
+process_pentry:
+		/*
+		 * Check if we should inform sending side to resume
+		 * We do it CPT_IQ_RESUME_MARGIN elements in advance before
+		 * pending queue becomes empty
+		 */
+		resume_index = modulo_inc(pqueue->front, pqueue->qlen,
+					  CPT_IQ_RESUME_MARGIN);
+		resume_pentry = &pqueue->head[resume_index];
+		if (resume_pentry &&
+		    resume_pentry->resume_sender) {
+			resume_pentry->resume_sender = false;
+			callback = resume_pentry->callback;
+			areq = resume_pentry->areq;
+
+			if (callback) {
+				spin_unlock_bh(&pqueue->lock);
+
+				/*
+				 * EINPROGRESS is an indication for sending
+				 * side that it can resume sending requests
+				 */
+				callback(-EINPROGRESS, areq, cpt_info);
+				spin_lock_bh(&pqueue->lock);
+			}
+		}
+
+		callback = pentry->callback;
+		areq = pentry->areq;
+		free_pentry(pentry);
+
+		pqueue->pending_count--;
+		pqueue->front = modulo_inc(pqueue->front, pqueue->qlen, 1);
+		spin_unlock_bh(&pqueue->lock);
+
+		/*
+		 * Call callback after current pending entry has been
+		 * processed, we don't do it if the callback pointer is
+		 * invalid.
+		 */
+		if (callback)
+			callback(res_code, areq, cpt_info);
+	}
+}
+
+void otx_cpt_post_process(struct otx_cptvf_wqe *wqe)
+{
+	process_pending_queue(wqe->cptvf->pdev, &wqe->cptvf->pqinfo.queue[0]);
+}
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h
new file mode 100644
index 000000000000..a4c9ff730b13
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __OTX_CPTVF_REQUEST_MANAGER_H
+#define __OTX_CPTVF_REQUEST_MANAGER_H
+
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/pci.h>
+#include "otx_cpt_hw_types.h"
+
+/*
+ * Maximum total number of SG buffers is 100, we divide it equally
+ * between input and output
+ */
+#define OTX_CPT_MAX_SG_IN_CNT		50
+#define OTX_CPT_MAX_SG_OUT_CNT		50
+
+/* DMA mode direct or SG */
+#define OTX_CPT_DMA_DIRECT_DIRECT	0
+#define OTX_CPT_DMA_GATHER_SCATTER	1
+
+/* Context source CPTR or DPTR */
+#define OTX_CPT_FROM_CPTR		0
+#define OTX_CPT_FROM_DPTR		1
+
+/* CPT instruction queue alignment */
+#define OTX_CPT_INST_Q_ALIGNMENT	128
+#define OTX_CPT_MAX_REQ_SIZE		65535
+
+/* Default command timeout in seconds */
+#define OTX_CPT_COMMAND_TIMEOUT		4
+#define OTX_CPT_TIMER_HOLD		0x03F
+#define OTX_CPT_COUNT_HOLD		32
+#define OTX_CPT_TIME_IN_RESET_COUNT     5
+
+/* Minimum and maximum values for interrupt coalescing */
+#define OTX_CPT_COALESC_MIN_TIME_WAIT	0x0
+#define OTX_CPT_COALESC_MAX_TIME_WAIT	((1<<16)-1)
+#define OTX_CPT_COALESC_MIN_NUM_WAIT	0x0
+#define OTX_CPT_COALESC_MAX_NUM_WAIT	((1<<20)-1)
+
+union otx_cpt_opcode_info {
+	u16 flags;
+	struct {
+		u8 major;
+		u8 minor;
+	} s;
+};
+
+struct otx_cptvf_request {
+	u32 param1;
+	u32 param2;
+	u16 dlen;
+	union otx_cpt_opcode_info opcode;
+};
+
+struct otx_cpt_buf_ptr {
+	u8 *vptr;
+	dma_addr_t dma_addr;
+	u16 size;
+};
+
+union otx_cpt_ctrl_info {
+	u32 flags;
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u32 reserved0:26;
+		u32 grp:3;	/* Group bits */
+		u32 dma_mode:2;	/* DMA mode */
+		u32 se_req:1;	/* To SE core */
+#else
+		u32 se_req:1;	/* To SE core */
+		u32 dma_mode:2;	/* DMA mode */
+		u32 grp:3;	/* Group bits */
+		u32 reserved0:26;
+#endif
+	} s;
+};
+
+/*
+ * CPT_INST_S software command definitions
+ * Words EI (0-3)
+ */
+union otx_cpt_iq_cmd_word0 {
+	u64 u64;
+	struct {
+		u16 opcode;
+		u16 param1;
+		u16 param2;
+		u16 dlen;
+	} s;
+};
+
+union otx_cpt_iq_cmd_word3 {
+	u64 u64;
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 grp:3;
+		u64 cptr:61;
+#else
+		u64 cptr:61;
+		u64 grp:3;
+#endif
+	} s;
+};
+
+struct otx_cpt_iq_cmd {
+	union otx_cpt_iq_cmd_word0 cmd;
+	u64 dptr;
+	u64 rptr;
+	union otx_cpt_iq_cmd_word3 cptr;
+};
+
+struct otx_cpt_sglist_component {
+	union {
+		u64 len;
+		struct {
+			u16 len0;
+			u16 len1;
+			u16 len2;
+			u16 len3;
+		} s;
+	} u;
+	u64 ptr0;
+	u64 ptr1;
+	u64 ptr2;
+	u64 ptr3;
+};
+
+struct otx_cpt_pending_entry {
+	u64 *completion_addr;	/* Completion address */
+	struct otx_cpt_info_buffer *info;
+	/* Kernel async request callback */
+	void (*callback)(int status, void *arg1, void *arg2);
+	struct crypto_async_request *areq; /* Async request callback arg */
+	u8 resume_sender;	/* Notify sender to resume sending requests */
+	u8 busy;		/* Entry status (free/busy) */
+};
+
+struct otx_cpt_pending_queue {
+	struct otx_cpt_pending_entry *head;	/* Head of the queue */
+	u32 front;			/* Process work from here */
+	u32 rear;			/* Append new work here */
+	u32 pending_count;		/* Pending requests count */
+	u32 qlen;			/* Queue length */
+	spinlock_t lock;		/* Queue lock */
+};
+
+struct otx_cpt_req_info {
+	/* Kernel async request callback */
+	void (*callback)(int status, void *arg1, void *arg2);
+	struct crypto_async_request *areq; /* Async request callback arg */
+	struct otx_cptvf_request req;/* Request information (core specific) */
+	union otx_cpt_ctrl_info ctrl;/* User control information */
+	struct otx_cpt_buf_ptr in[OTX_CPT_MAX_SG_IN_CNT];
+	struct otx_cpt_buf_ptr out[OTX_CPT_MAX_SG_OUT_CNT];
+	u8 *iv_out;     /* IV to send back */
+	u16 rlen;	/* Output length */
+	u8 incnt;	/* Number of input buffers */
+	u8 outcnt;	/* Number of output buffers */
+	u8 req_type;	/* Type of request */
+	u8 is_enc;	/* Is a request an encryption request */
+	u8 is_trunc_hmac;/* Is truncated hmac used */
+};
+
+struct otx_cpt_info_buffer {
+	struct otx_cpt_pending_entry *pentry;
+	struct otx_cpt_req_info *req;
+	struct pci_dev *pdev;
+	u64 *completion_addr;
+	u8 *out_buffer;
+	u8 *in_buffer;
+	dma_addr_t dptr_baddr;
+	dma_addr_t rptr_baddr;
+	dma_addr_t comp_baddr;
+	unsigned long time_in;
+	u32 dlen;
+	u32 dma_len;
+	u8 extra_time;
+};
+
+static inline void do_request_cleanup(struct pci_dev *pdev,
+				      struct otx_cpt_info_buffer *info)
+{
+	struct otx_cpt_req_info *req;
+	int i;
+
+	if (info->dptr_baddr)
+		dma_unmap_single(&pdev->dev, info->dptr_baddr,
+				 info->dma_len, DMA_BIDIRECTIONAL);
+
+	if (info->req) {
+		req = info->req;
+		for (i = 0; i < req->outcnt; i++) {
+			if (req->out[i].dma_addr)
+				dma_unmap_single(&pdev->dev,
+						 req->out[i].dma_addr,
+						 req->out[i].size,
+						 DMA_BIDIRECTIONAL);
+		}
+
+		for (i = 0; i < req->incnt; i++) {
+			if (req->in[i].dma_addr)
+				dma_unmap_single(&pdev->dev,
+						 req->in[i].dma_addr,
+						 req->in[i].size,
+						 DMA_BIDIRECTIONAL);
+		}
+	}
+	kzfree(info);
+}
+
+struct otx_cptvf_wqe;
+void otx_cpt_dump_sg_list(struct pci_dev *pdev, struct otx_cpt_req_info *req);
+void otx_cpt_post_process(struct otx_cptvf_wqe *wqe);
+int otx_cpt_do_request(struct pci_dev *pdev, struct otx_cpt_req_info *req,
+		       int cpu_num);
+
+#endif /* __OTX_CPTVF_REQUEST_MANAGER_H */
diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c
index 9e9f48bb7f85..bd6309e57ab8 100644
--- a/drivers/crypto/mediatek/mtk-sha.c
+++ b/drivers/crypto/mediatek/mtk-sha.c
@@ -107,7 +107,7 @@ struct mtk_sha_ctx {
 	u8 id;
 	u8 buf[SHA_BUF_SIZE] __aligned(sizeof(u32));
 
-	struct mtk_sha_hmac_ctx	base[0];
+	struct mtk_sha_hmac_ctx	base[];
 };
 
 struct mtk_sha_drv {
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index 435ac1c83df9..d84530293036 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -20,6 +20,7 @@
 #include <crypto/sha.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 
 #define DCP_MAX_CHANS	4
 #define DCP_BUF_SZ	PAGE_SIZE
@@ -611,49 +612,46 @@ static int dcp_sha_req_to_buf(struct crypto_async_request *arq)
 	struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm);
 	struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req);
 	struct hash_alg_common *halg = crypto_hash_alg_common(tfm);
-	const int nents = sg_nents(req->src);
 
 	uint8_t *in_buf = sdcp->coh->sha_in_buf;
 	uint8_t *out_buf = sdcp->coh->sha_out_buf;
 
-	uint8_t *src_buf;
-
 	struct scatterlist *src;
 
-	unsigned int i, len, clen;
+	unsigned int i, len, clen, oft = 0;
 	int ret;
 
 	int fin = rctx->fini;
 	if (fin)
 		rctx->fini = 0;
 
-	for_each_sg(req->src, src, nents, i) {
-		src_buf = sg_virt(src);
-		len = sg_dma_len(src);
-
-		do {
-			if (actx->fill + len > DCP_BUF_SZ)
-				clen = DCP_BUF_SZ - actx->fill;
-			else
-				clen = len;
-
-			memcpy(in_buf + actx->fill, src_buf, clen);
-			len -= clen;
-			src_buf += clen;
-			actx->fill += clen;
+	src = req->src;
+	len = req->nbytes;
 
-			/*
-			 * If we filled the buffer and still have some
-			 * more data, submit the buffer.
-			 */
-			if (len && actx->fill == DCP_BUF_SZ) {
-				ret = mxs_dcp_run_sha(req);
-				if (ret)
-					return ret;
-				actx->fill = 0;
-				rctx->init = 0;
-			}
-		} while (len);
+	while (len) {
+		if (actx->fill + len > DCP_BUF_SZ)
+			clen = DCP_BUF_SZ - actx->fill;
+		else
+			clen = len;
+
+		scatterwalk_map_and_copy(in_buf + actx->fill, src, oft, clen,
+					 0);
+
+		len -= clen;
+		oft += clen;
+		actx->fill += clen;
+
+		/*
+		 * If we filled the buffer and still have some
+		 * more data, submit the buffer.
+		 */
+		if (len && actx->fill == DCP_BUF_SZ) {
+			ret = mxs_dcp_run_sha(req);
+			if (ret)
+				return ret;
+			actx->fill = 0;
+			rctx->init = 0;
+		}
 	}
 
 	if (fin) {
diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h
index 91c54289124a..c6233173c612 100644
--- a/drivers/crypto/nx/nx.h
+++ b/drivers/crypto/nx/nx.h
@@ -37,7 +37,7 @@ struct max_sync_cop {
 	u32 fc;
 	u32 mode;
 	u32 triplets;
-	struct msc_triplet trip[0];
+	struct msc_triplet trip[];
 } __packed;
 
 struct alg_props {
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 4f915a4ef5b0..e4072cd38585 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -159,7 +159,7 @@ struct omap_sham_reqctx {
 	int			sg_len;
 	unsigned int		total;	/* total request */
 
-	u8			buffer[0] OMAP_ALIGNED;
+	u8			buffer[] OMAP_ALIGNED;
 };
 
 struct omap_sham_hmac_ctx {
@@ -176,7 +176,7 @@ struct omap_sham_ctx {
 	/* fallback stuff */
 	struct crypto_shash	*fallback;
 
-	struct omap_sham_hmac_ctx base[0];
+	struct omap_sham_hmac_ctx base[];
 };
 
 #define OMAP_SHAM_QUEUE_LENGTH	10
diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index 833bb1d3a11b..e14d3dd291f0 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -97,7 +97,7 @@ struct qat_alg_cd {
 			struct icp_qat_hw_cipher_algo_blk cipher;
 			struct icp_qat_hw_auth_algo_blk hash;
 		} qat_enc_cd;
-		struct qat_dec { /* Decrytp content desc */
+		struct qat_dec { /* Decrypt content desc */
 			struct icp_qat_hw_auth_algo_blk hash;
 			struct icp_qat_hw_cipher_algo_blk cipher;
 		} qat_dec_cd;
diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c
index 3852d31ce0a4..fb504cee0305 100644
--- a/drivers/crypto/qat/qat_common/qat_crypto.c
+++ b/drivers/crypto/qat/qat_common/qat_crypto.c
@@ -250,8 +250,7 @@ static int qat_crypto_create_instances(struct adf_accel_dev *accel_dev)
 	char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES];
 
 	INIT_LIST_HEAD(&accel_dev->crypto_list);
-	strlcpy(key, ADF_NUM_CY, sizeof(key));
-	if (adf_cfg_get_param_value(accel_dev, SEC, key, val))
+	if (adf_cfg_get_param_value(accel_dev, SEC, ADF_NUM_CY, val))
 		return -EFAULT;
 
 	if (kstrtoul(val, 0, &num_inst))
diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c
index 629e7f34dc09..5006e74c40cd 100644
--- a/drivers/crypto/qce/common.c
+++ b/drivers/crypto/qce/common.c
@@ -15,8 +15,6 @@
 #include "regs-v5.h"
 #include "sha.h"
 
-#define QCE_SECTOR_SIZE		512
-
 static inline u32 qce_read(struct qce_device *qce, u32 offset)
 {
 	return readl(qce->base + offset);
diff --git a/drivers/crypto/qce/common.h b/drivers/crypto/qce/common.h
index 282d4317470d..9f989cba0f1b 100644
--- a/drivers/crypto/qce/common.h
+++ b/drivers/crypto/qce/common.h
@@ -12,6 +12,9 @@
 #include <crypto/hash.h>
 #include <crypto/internal/skcipher.h>
 
+/* xts du size */
+#define QCE_SECTOR_SIZE			512
+
 /* key size in bytes */
 #define QCE_SHA_HMAC_KEY_SIZE		64
 #define QCE_MAX_CIPHER_KEY_SIZE		AES_KEYSIZE_256
diff --git a/drivers/crypto/qce/dma.c b/drivers/crypto/qce/dma.c
index 7da893dc00e7..46db5bf366b4 100644
--- a/drivers/crypto/qce/dma.c
+++ b/drivers/crypto/qce/dma.c
@@ -48,9 +48,10 @@ void qce_dma_release(struct qce_dma_data *dma)
 
 struct scatterlist *
 qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl,
-		int max_ents)
+		unsigned int max_len)
 {
 	struct scatterlist *sg = sgt->sgl, *sg_last = NULL;
+	unsigned int new_len;
 
 	while (sg) {
 		if (!sg_page(sg))
@@ -61,13 +62,13 @@ qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl,
 	if (!sg)
 		return ERR_PTR(-EINVAL);
 
-	while (new_sgl && sg && max_ents) {
-		sg_set_page(sg, sg_page(new_sgl), new_sgl->length,
-			    new_sgl->offset);
+	while (new_sgl && sg && max_len) {
+		new_len = new_sgl->length > max_len ? max_len : new_sgl->length;
+		sg_set_page(sg, sg_page(new_sgl), new_len, new_sgl->offset);
 		sg_last = sg;
 		sg = sg_next(sg);
 		new_sgl = sg_next(new_sgl);
-		max_ents--;
+		max_len -= new_len;
 	}
 
 	return sg_last;
diff --git a/drivers/crypto/qce/dma.h b/drivers/crypto/qce/dma.h
index ed25a0d9829e..786402169360 100644
--- a/drivers/crypto/qce/dma.h
+++ b/drivers/crypto/qce/dma.h
@@ -43,6 +43,6 @@ void qce_dma_issue_pending(struct qce_dma_data *dma);
 int qce_dma_terminate_all(struct qce_dma_data *dma);
 struct scatterlist *
 qce_sgtable_add(struct sg_table *sgt, struct scatterlist *sg_add,
-		int max_ents);
+		unsigned int max_len);
 
 #endif /* _DMA_H_ */
diff --git a/drivers/crypto/qce/skcipher.c b/drivers/crypto/qce/skcipher.c
index 4217b745f124..9412433f3b21 100644
--- a/drivers/crypto/qce/skcipher.c
+++ b/drivers/crypto/qce/skcipher.c
@@ -5,6 +5,7 @@
 
 #include <linux/device.h>
 #include <linux/interrupt.h>
+#include <linux/moduleparam.h>
 #include <linux/types.h>
 #include <crypto/aes.h>
 #include <crypto/internal/des.h>
@@ -12,6 +13,13 @@
 
 #include "cipher.h"
 
+static unsigned int aes_sw_max_len = CONFIG_CRYPTO_DEV_QCE_SW_MAX_LEN;
+module_param(aes_sw_max_len, uint, 0644);
+MODULE_PARM_DESC(aes_sw_max_len,
+		 "Only use hardware for AES requests larger than this "
+		 "[0=always use hardware; anything <16 breaks AES-GCM; default="
+		 __stringify(CONFIG_CRYPTO_DEV_QCE_SW_MAX_LEN)"]");
+
 static LIST_HEAD(skcipher_algs);
 
 static void qce_skcipher_done(void *data)
@@ -97,13 +105,14 @@ qce_skcipher_async_req_handle(struct crypto_async_request *async_req)
 
 	sg_init_one(&rctx->result_sg, qce->dma.result_buf, QCE_RESULT_BUF_SZ);
 
-	sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, rctx->dst_nents - 1);
+	sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, req->cryptlen);
 	if (IS_ERR(sg)) {
 		ret = PTR_ERR(sg);
 		goto error_free;
 	}
 
-	sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg, 1);
+	sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg,
+			     QCE_RESULT_BUF_SZ);
 	if (IS_ERR(sg)) {
 		ret = PTR_ERR(sg);
 		goto error_free;
@@ -165,15 +174,10 @@ static int qce_skcipher_setkey(struct crypto_skcipher *ablk, const u8 *key,
 	switch (IS_XTS(flags) ? keylen >> 1 : keylen) {
 	case AES_KEYSIZE_128:
 	case AES_KEYSIZE_256:
+		memcpy(ctx->enc_key, key, keylen);
 		break;
-	default:
-		goto fallback;
 	}
 
-	ctx->enc_keylen = keylen;
-	memcpy(ctx->enc_key, key, keylen);
-	return 0;
-fallback:
 	ret = crypto_sync_skcipher_setkey(ctx->fallback, key, keylen);
 	if (!ret)
 		ctx->enc_keylen = keylen;
@@ -223,8 +227,14 @@ static int qce_skcipher_crypt(struct skcipher_request *req, int encrypt)
 	rctx->flags |= encrypt ? QCE_ENCRYPT : QCE_DECRYPT;
 	keylen = IS_XTS(rctx->flags) ? ctx->enc_keylen >> 1 : ctx->enc_keylen;
 
-	if (IS_AES(rctx->flags) && keylen != AES_KEYSIZE_128 &&
-	    keylen != AES_KEYSIZE_256) {
+	/* qce is hanging when AES-XTS request len > QCE_SECTOR_SIZE and
+	 * is not a multiple of it; pass such requests to the fallback
+	 */
+	if (IS_AES(rctx->flags) &&
+	    (((keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256) ||
+	      req->cryptlen <= aes_sw_max_len) ||
+	     (IS_XTS(rctx->flags) && req->cryptlen > QCE_SECTOR_SIZE &&
+	      req->cryptlen % QCE_SECTOR_SIZE))) {
 		SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
 
 		skcipher_request_set_sync_tfm(subreq, ctx->fallback);
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index d66e20a2f54c..2a16800d2579 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -369,7 +369,7 @@ struct s5p_hash_reqctx {
 	bool			error;
 
 	u32			bufcnt;
-	u8			buffer[0];
+	u8			buffer[];
 };
 
 /**
diff --git a/drivers/crypto/xilinx/Makefile b/drivers/crypto/xilinx/Makefile
new file mode 100644
index 000000000000..534e32daf76a
--- /dev/null
+++ b/drivers/crypto/xilinx/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_AES) += zynqmp-aes-gcm.o
diff --git a/drivers/crypto/xilinx/zynqmp-aes-gcm.c b/drivers/crypto/xilinx/zynqmp-aes-gcm.c
new file mode 100644
index 000000000000..09f7f468eef8
--- /dev/null
+++ b/drivers/crypto/xilinx/zynqmp-aes-gcm.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx ZynqMP AES Driver.
+ * Copyright (c) 2020 Xilinx Inc.
+ */
+
+#include <crypto/aes.h>
+#include <crypto/engine.h>
+#include <crypto/gcm.h>
+#include <crypto/internal/aead.h>
+#include <crypto/scatterwalk.h>
+
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include <linux/firmware/xlnx-zynqmp.h>
+
+#define ZYNQMP_DMA_BIT_MASK	32U
+
+#define ZYNQMP_AES_KEY_SIZE		AES_KEYSIZE_256
+#define ZYNQMP_AES_AUTH_SIZE		16U
+#define ZYNQMP_KEY_SRC_SEL_KEY_LEN	1U
+#define ZYNQMP_AES_BLK_SIZE		1U
+#define ZYNQMP_AES_MIN_INPUT_BLK_SIZE	4U
+#define ZYNQMP_AES_WORD_LEN		4U
+
+#define ZYNQMP_AES_GCM_TAG_MISMATCH_ERR		0x01
+#define ZYNQMP_AES_WRONG_KEY_SRC_ERR		0x13
+#define ZYNQMP_AES_PUF_NOT_PROGRAMMED		0xE300
+
+enum zynqmp_aead_op {
+	ZYNQMP_AES_DECRYPT = 0,
+	ZYNQMP_AES_ENCRYPT
+};
+
+enum zynqmp_aead_keysrc {
+	ZYNQMP_AES_KUP_KEY = 0,
+	ZYNQMP_AES_DEV_KEY,
+	ZYNQMP_AES_PUF_KEY
+};
+
+struct zynqmp_aead_drv_ctx {
+	union {
+		struct aead_alg aead;
+	} alg;
+	struct device *dev;
+	struct crypto_engine *engine;
+	const struct zynqmp_eemi_ops *eemi_ops;
+};
+
+struct zynqmp_aead_hw_req {
+	u64 src;
+	u64 iv;
+	u64 key;
+	u64 dst;
+	u64 size;
+	u64 op;
+	u64 keysrc;
+};
+
+struct zynqmp_aead_tfm_ctx {
+	struct crypto_engine_ctx engine_ctx;
+	struct device *dev;
+	u8 key[ZYNQMP_AES_KEY_SIZE];
+	u8 *iv;
+	u32 keylen;
+	u32 authsize;
+	enum zynqmp_aead_keysrc keysrc;
+	struct crypto_aead *fbk_cipher;
+};
+
+struct zynqmp_aead_req_ctx {
+	enum zynqmp_aead_op op;
+};
+
+static int zynqmp_aes_aead_cipher(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct zynqmp_aead_tfm_ctx *tfm_ctx = crypto_aead_ctx(aead);
+	struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req);
+	struct device *dev = tfm_ctx->dev;
+	struct aead_alg *alg = crypto_aead_alg(aead);
+	struct zynqmp_aead_drv_ctx *drv_ctx;
+	struct zynqmp_aead_hw_req *hwreq;
+	dma_addr_t dma_addr_data, dma_addr_hw_req;
+	unsigned int data_size;
+	unsigned int status;
+	size_t dma_size;
+	char *kbuf;
+	int err;
+
+	drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead);
+
+	if (!drv_ctx->eemi_ops->aes)
+		return -ENOTSUPP;
+
+	if (tfm_ctx->keysrc == ZYNQMP_AES_KUP_KEY)
+		dma_size = req->cryptlen + ZYNQMP_AES_KEY_SIZE
+			   + GCM_AES_IV_SIZE;
+	else
+		dma_size = req->cryptlen + GCM_AES_IV_SIZE;
+
+	kbuf = dma_alloc_coherent(dev, dma_size, &dma_addr_data, GFP_KERNEL);
+	if (!kbuf)
+		return -ENOMEM;
+
+	hwreq = dma_alloc_coherent(dev, sizeof(struct zynqmp_aead_hw_req),
+				   &dma_addr_hw_req, GFP_KERNEL);
+	if (!hwreq) {
+		dma_free_coherent(dev, dma_size, kbuf, dma_addr_data);
+		return -ENOMEM;
+	}
+
+	data_size = req->cryptlen;
+	scatterwalk_map_and_copy(kbuf, req->src, 0, req->cryptlen, 0);
+	memcpy(kbuf + data_size, req->iv, GCM_AES_IV_SIZE);
+
+	hwreq->src = dma_addr_data;
+	hwreq->dst = dma_addr_data;
+	hwreq->iv = hwreq->src + data_size;
+	hwreq->keysrc = tfm_ctx->keysrc;
+	hwreq->op = rq_ctx->op;
+
+	if (hwreq->op == ZYNQMP_AES_ENCRYPT)
+		hwreq->size = data_size;
+	else
+		hwreq->size = data_size - ZYNQMP_AES_AUTH_SIZE;
+
+	if (hwreq->keysrc == ZYNQMP_AES_KUP_KEY) {
+		memcpy(kbuf + data_size + GCM_AES_IV_SIZE,
+		       tfm_ctx->key, ZYNQMP_AES_KEY_SIZE);
+
+		hwreq->key = hwreq->src + data_size + GCM_AES_IV_SIZE;
+	} else {
+		hwreq->key = 0;
+	}
+
+	drv_ctx->eemi_ops->aes(dma_addr_hw_req, &status);
+
+	if (status) {
+		switch (status) {
+		case ZYNQMP_AES_GCM_TAG_MISMATCH_ERR:
+			dev_err(dev, "ERROR: Gcm Tag mismatch\n");
+			break;
+		case ZYNQMP_AES_WRONG_KEY_SRC_ERR:
+			dev_err(dev, "ERROR: Wrong KeySrc, enable secure mode\n");
+			break;
+		case ZYNQMP_AES_PUF_NOT_PROGRAMMED:
+			dev_err(dev, "ERROR: PUF is not registered\n");
+			break;
+		default:
+			dev_err(dev, "ERROR: Unknown error\n");
+			break;
+		}
+		err = -status;
+	} else {
+		if (hwreq->op == ZYNQMP_AES_ENCRYPT)
+			data_size = data_size + ZYNQMP_AES_AUTH_SIZE;
+		else
+			data_size = data_size - ZYNQMP_AES_AUTH_SIZE;
+
+		sg_copy_from_buffer(req->dst, sg_nents(req->dst),
+				    kbuf, data_size);
+		err = 0;
+	}
+
+	if (kbuf) {
+		memzero_explicit(kbuf, dma_size);
+		dma_free_coherent(dev, dma_size, kbuf, dma_addr_data);
+	}
+	if (hwreq) {
+		memzero_explicit(hwreq, sizeof(struct zynqmp_aead_hw_req));
+		dma_free_coherent(dev, sizeof(struct zynqmp_aead_hw_req),
+				  hwreq, dma_addr_hw_req);
+	}
+	return err;
+}
+
+static int zynqmp_fallback_check(struct zynqmp_aead_tfm_ctx *tfm_ctx,
+				 struct aead_request *req)
+{
+	int need_fallback = 0;
+	struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req);
+
+	if (tfm_ctx->authsize != ZYNQMP_AES_AUTH_SIZE)
+		need_fallback = 1;
+
+	if (tfm_ctx->keysrc == ZYNQMP_AES_KUP_KEY &&
+	    tfm_ctx->keylen != ZYNQMP_AES_KEY_SIZE) {
+		need_fallback = 1;
+	}
+	if (req->assoclen != 0 ||
+	    req->cryptlen < ZYNQMP_AES_MIN_INPUT_BLK_SIZE) {
+		need_fallback = 1;
+	}
+	if ((req->cryptlen % ZYNQMP_AES_WORD_LEN) != 0)
+		need_fallback = 1;
+
+	if (rq_ctx->op == ZYNQMP_AES_DECRYPT &&
+	    req->cryptlen <= ZYNQMP_AES_AUTH_SIZE) {
+		need_fallback = 1;
+	}
+	return need_fallback;
+}
+
+static int zynqmp_handle_aes_req(struct crypto_engine *engine,
+				 void *req)
+{
+	struct aead_request *areq =
+				container_of(req, struct aead_request, base);
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct zynqmp_aead_tfm_ctx *tfm_ctx = crypto_aead_ctx(aead);
+	struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(areq);
+	struct aead_request *subreq = aead_request_ctx(req);
+	int need_fallback;
+	int err;
+
+	need_fallback = zynqmp_fallback_check(tfm_ctx, areq);
+
+	if (need_fallback) {
+		aead_request_set_tfm(subreq, tfm_ctx->fbk_cipher);
+
+		aead_request_set_callback(subreq, areq->base.flags,
+					  NULL, NULL);
+		aead_request_set_crypt(subreq, areq->src, areq->dst,
+				       areq->cryptlen, areq->iv);
+		aead_request_set_ad(subreq, areq->assoclen);
+		if (rq_ctx->op == ZYNQMP_AES_ENCRYPT)
+			err = crypto_aead_encrypt(subreq);
+		else
+			err = crypto_aead_decrypt(subreq);
+	} else {
+		err = zynqmp_aes_aead_cipher(areq);
+	}
+
+	crypto_finalize_aead_request(engine, areq, err);
+	return 0;
+}
+
+static int zynqmp_aes_aead_setkey(struct crypto_aead *aead, const u8 *key,
+				  unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+	struct zynqmp_aead_tfm_ctx *tfm_ctx =
+			(struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm);
+	unsigned char keysrc;
+
+	if (keylen == ZYNQMP_KEY_SRC_SEL_KEY_LEN) {
+		keysrc = *key;
+		if (keysrc == ZYNQMP_AES_KUP_KEY ||
+		    keysrc == ZYNQMP_AES_DEV_KEY ||
+		    keysrc == ZYNQMP_AES_PUF_KEY) {
+			tfm_ctx->keysrc = (enum zynqmp_aead_keysrc)keysrc;
+		} else {
+			tfm_ctx->keylen = keylen;
+		}
+	} else {
+		tfm_ctx->keylen = keylen;
+		if (keylen == ZYNQMP_AES_KEY_SIZE) {
+			tfm_ctx->keysrc = ZYNQMP_AES_KUP_KEY;
+			memcpy(tfm_ctx->key, key, keylen);
+		}
+	}
+
+	tfm_ctx->fbk_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+	tfm_ctx->fbk_cipher->base.crt_flags |= (aead->base.crt_flags &
+					CRYPTO_TFM_REQ_MASK);
+
+	return crypto_aead_setkey(tfm_ctx->fbk_cipher, key, keylen);
+}
+
+static int zynqmp_aes_aead_setauthsize(struct crypto_aead *aead,
+				       unsigned int authsize)
+{
+	struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+	struct zynqmp_aead_tfm_ctx *tfm_ctx =
+			(struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm);
+
+	tfm_ctx->authsize = authsize;
+	return crypto_aead_setauthsize(tfm_ctx->fbk_cipher, authsize);
+}
+
+static int zynqmp_aes_aead_encrypt(struct aead_request *req)
+{
+	struct zynqmp_aead_drv_ctx *drv_ctx;
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct aead_alg *alg = crypto_aead_alg(aead);
+	struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req);
+
+	rq_ctx->op = ZYNQMP_AES_ENCRYPT;
+	drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead);
+
+	return crypto_transfer_aead_request_to_engine(drv_ctx->engine, req);
+}
+
+static int zynqmp_aes_aead_decrypt(struct aead_request *req)
+{
+	struct zynqmp_aead_drv_ctx *drv_ctx;
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct aead_alg *alg = crypto_aead_alg(aead);
+	struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req);
+
+	rq_ctx->op = ZYNQMP_AES_DECRYPT;
+	drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead);
+
+	return crypto_transfer_aead_request_to_engine(drv_ctx->engine, req);
+}
+
+static int zynqmp_aes_aead_init(struct crypto_aead *aead)
+{
+	struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+	struct zynqmp_aead_tfm_ctx *tfm_ctx =
+		(struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm);
+	struct zynqmp_aead_drv_ctx *drv_ctx;
+	struct aead_alg *alg = crypto_aead_alg(aead);
+
+	drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead);
+	tfm_ctx->dev = drv_ctx->dev;
+
+	tfm_ctx->engine_ctx.op.do_one_request = zynqmp_handle_aes_req;
+	tfm_ctx->engine_ctx.op.prepare_request = NULL;
+	tfm_ctx->engine_ctx.op.unprepare_request = NULL;
+
+	tfm_ctx->fbk_cipher = crypto_alloc_aead(drv_ctx->alg.aead.base.cra_name,
+						0,
+						CRYPTO_ALG_NEED_FALLBACK);
+
+	if (IS_ERR(tfm_ctx->fbk_cipher)) {
+		pr_err("%s() Error: failed to allocate fallback for %s\n",
+		       __func__, drv_ctx->alg.aead.base.cra_name);
+		return PTR_ERR(tfm_ctx->fbk_cipher);
+	}
+
+	crypto_aead_set_reqsize(aead,
+				max(sizeof(struct zynqmp_aead_req_ctx),
+				    sizeof(struct aead_request) +
+				    crypto_aead_reqsize(tfm_ctx->fbk_cipher)));
+	return 0;
+}
+
+static void zynqmp_aes_aead_exit(struct crypto_aead *aead)
+{
+	struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+	struct zynqmp_aead_tfm_ctx *tfm_ctx =
+			(struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm);
+
+	if (tfm_ctx->fbk_cipher) {
+		crypto_free_aead(tfm_ctx->fbk_cipher);
+		tfm_ctx->fbk_cipher = NULL;
+	}
+	memzero_explicit(tfm_ctx, sizeof(struct zynqmp_aead_tfm_ctx));
+}
+
+static struct zynqmp_aead_drv_ctx aes_drv_ctx = {
+	.alg.aead = {
+		.setkey		= zynqmp_aes_aead_setkey,
+		.setauthsize	= zynqmp_aes_aead_setauthsize,
+		.encrypt	= zynqmp_aes_aead_encrypt,
+		.decrypt	= zynqmp_aes_aead_decrypt,
+		.init		= zynqmp_aes_aead_init,
+		.exit		= zynqmp_aes_aead_exit,
+		.ivsize		= GCM_AES_IV_SIZE,
+		.maxauthsize	= ZYNQMP_AES_AUTH_SIZE,
+		.base = {
+		.cra_name		= "gcm(aes)",
+		.cra_driver_name	= "xilinx-zynqmp-aes-gcm",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_TYPE_AEAD |
+					  CRYPTO_ALG_ASYNC |
+					  CRYPTO_ALG_KERN_DRIVER_ONLY |
+					  CRYPTO_ALG_NEED_FALLBACK,
+		.cra_blocksize		= ZYNQMP_AES_BLK_SIZE,
+		.cra_ctxsize		= sizeof(struct zynqmp_aead_tfm_ctx),
+		.cra_module		= THIS_MODULE,
+		}
+	}
+};
+
+static int zynqmp_aes_aead_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	int err;
+
+	/* ZynqMP AES driver supports only one instance */
+	if (!aes_drv_ctx.dev)
+		aes_drv_ctx.dev = dev;
+	else
+		return -ENODEV;
+
+	aes_drv_ctx.eemi_ops = zynqmp_pm_get_eemi_ops();
+	if (IS_ERR(aes_drv_ctx.eemi_ops)) {
+		dev_err(dev, "Failed to get ZynqMP EEMI interface\n");
+		return PTR_ERR(aes_drv_ctx.eemi_ops);
+	}
+
+	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(ZYNQMP_DMA_BIT_MASK));
+	if (err < 0) {
+		dev_err(dev, "No usable DMA configuration\n");
+		return err;
+	}
+
+	aes_drv_ctx.engine = crypto_engine_alloc_init(dev, 1);
+	if (!aes_drv_ctx.engine) {
+		dev_err(dev, "Cannot alloc AES engine\n");
+		err = -ENOMEM;
+		goto err_engine;
+	}
+
+	err = crypto_engine_start(aes_drv_ctx.engine);
+	if (err) {
+		dev_err(dev, "Cannot start AES engine\n");
+		goto err_engine;
+	}
+
+	err = crypto_register_aead(&aes_drv_ctx.alg.aead);
+	if (err < 0) {
+		dev_err(dev, "Failed to register AEAD alg.\n");
+		goto err_aead;
+	}
+	return 0;
+
+err_aead:
+	crypto_unregister_aead(&aes_drv_ctx.alg.aead);
+
+err_engine:
+	if (aes_drv_ctx.engine)
+		crypto_engine_exit(aes_drv_ctx.engine);
+
+	return err;
+}
+
+static int zynqmp_aes_aead_remove(struct platform_device *pdev)
+{
+	crypto_engine_exit(aes_drv_ctx.engine);
+	crypto_unregister_aead(&aes_drv_ctx.alg.aead);
+
+	return 0;
+}
+
+static const struct of_device_id zynqmp_aes_dt_ids[] = {
+	{ .compatible = "xlnx,zynqmp-aes" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, zynqmp_aes_dt_ids);
+
+static struct platform_driver zynqmp_aes_driver = {
+	.probe	= zynqmp_aes_aead_probe,
+	.remove = zynqmp_aes_aead_remove,
+	.driver = {
+		.name		= "zynqmp-aes",
+		.of_match_table = zynqmp_aes_dt_ids,
+	},
+};
+
+module_platform_driver(zynqmp_aes_driver);
+MODULE_LICENSE("GPL");
diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index f8b03dcfd2f7..41b65164a367 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -709,6 +709,30 @@ static int zynqmp_pm_set_requirement(const u32 node, const u32 capabilities,
 				   qos, ack, NULL);
 }
 
+/**
+ * zynqmp_pm_aes - Access AES hardware to encrypt/decrypt the data using
+ * AES-GCM core.
+ * @address:	Address of the AesParams structure.
+ * @out:	Returned output value
+ *
+ * Return:	Returns status, either success or error code.
+ */
+static int zynqmp_pm_aes_engine(const u64 address, u32 *out)
+{
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	if (!out)
+		return -EINVAL;
+
+	ret = zynqmp_pm_invoke_fn(PM_SECURE_AES, upper_32_bits(address),
+				  lower_32_bits(address),
+				  0, 0, ret_payload);
+	*out = ret_payload[1];
+
+	return ret;
+}
+
 static const struct zynqmp_eemi_ops eemi_ops = {
 	.get_api_version = zynqmp_pm_get_api_version,
 	.get_chipid = zynqmp_pm_get_chipid,
@@ -732,6 +756,7 @@ static const struct zynqmp_eemi_ops eemi_ops = {
 	.set_requirement = zynqmp_pm_set_requirement,
 	.fpga_load = zynqmp_pm_fpga_load,
 	.fpga_get_status = zynqmp_pm_fpga_get_status,
+	.aes = zynqmp_pm_aes_engine,
 };
 
 /**
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 7f0d48f406e3..99e151475d8f 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -480,4 +480,5 @@ source "drivers/misc/cxl/Kconfig"
 source "drivers/misc/ocxl/Kconfig"
 source "drivers/misc/cardreader/Kconfig"
 source "drivers/misc/habanalabs/Kconfig"
+source "drivers/misc/uacce/Kconfig"
 endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index c1860d35dc7e..9abf2923d831 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -56,4 +56,5 @@ obj-$(CONFIG_OCXL)		+= ocxl/
 obj-y				+= cardreader/
 obj-$(CONFIG_PVPANIC)   	+= pvpanic.o
 obj-$(CONFIG_HABANA_AI)		+= habanalabs/
+obj-$(CONFIG_UACCE)		+= uacce/
 obj-$(CONFIG_XILINX_SDFEC)	+= xilinx_sdfec.o
diff --git a/drivers/misc/uacce/Kconfig b/drivers/misc/uacce/Kconfig
new file mode 100644
index 000000000000..5e39b6083b0f
--- /dev/null
+++ b/drivers/misc/uacce/Kconfig
@@ -0,0 +1,13 @@
+config UACCE
+	tristate "Accelerator Framework for User Land"
+	depends on IOMMU_API
+	help
+	  UACCE provides interface for the user process to access the hardware
+	  without interaction with the kernel space in data path.
+
+	  The user-space interface is described in
+	  include/uapi/misc/uacce/uacce.h
+
+	  See Documentation/misc-devices/uacce.rst for more details.
+
+	  If you don't know what to do here, say N.
diff --git a/drivers/misc/uacce/Makefile b/drivers/misc/uacce/Makefile
new file mode 100644
index 000000000000..5b4374e8b5f2
--- /dev/null
+++ b/drivers/misc/uacce/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+obj-$(CONFIG_UACCE) += uacce.o
diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
new file mode 100644
index 000000000000..d39307f060bd
--- /dev/null
+++ b/drivers/misc/uacce/uacce.c
@@ -0,0 +1,633 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/compat.h>
+#include <linux/dma-mapping.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/uacce.h>
+
+static struct class *uacce_class;
+static dev_t uacce_devt;
+static DEFINE_MUTEX(uacce_mutex);
+static DEFINE_XARRAY_ALLOC(uacce_xa);
+
+static int uacce_start_queue(struct uacce_queue *q)
+{
+	int ret = 0;
+
+	mutex_lock(&uacce_mutex);
+
+	if (q->state != UACCE_Q_INIT) {
+		ret = -EINVAL;
+		goto out_with_lock;
+	}
+
+	if (q->uacce->ops->start_queue) {
+		ret = q->uacce->ops->start_queue(q);
+		if (ret < 0)
+			goto out_with_lock;
+	}
+
+	q->state = UACCE_Q_STARTED;
+
+out_with_lock:
+	mutex_unlock(&uacce_mutex);
+
+	return ret;
+}
+
+static int uacce_put_queue(struct uacce_queue *q)
+{
+	struct uacce_device *uacce = q->uacce;
+
+	mutex_lock(&uacce_mutex);
+
+	if (q->state == UACCE_Q_ZOMBIE)
+		goto out;
+
+	if ((q->state == UACCE_Q_STARTED) && uacce->ops->stop_queue)
+		uacce->ops->stop_queue(q);
+
+	if ((q->state == UACCE_Q_INIT || q->state == UACCE_Q_STARTED) &&
+	     uacce->ops->put_queue)
+		uacce->ops->put_queue(q);
+
+	q->state = UACCE_Q_ZOMBIE;
+out:
+	mutex_unlock(&uacce_mutex);
+
+	return 0;
+}
+
+static long uacce_fops_unl_ioctl(struct file *filep,
+				 unsigned int cmd, unsigned long arg)
+{
+	struct uacce_queue *q = filep->private_data;
+	struct uacce_device *uacce = q->uacce;
+
+	switch (cmd) {
+	case UACCE_CMD_START_Q:
+		return uacce_start_queue(q);
+
+	case UACCE_CMD_PUT_Q:
+		return uacce_put_queue(q);
+
+	default:
+		if (!uacce->ops->ioctl)
+			return -EINVAL;
+
+		return uacce->ops->ioctl(q, cmd, arg);
+	}
+}
+
+#ifdef CONFIG_COMPAT
+static long uacce_fops_compat_ioctl(struct file *filep,
+				   unsigned int cmd, unsigned long arg)
+{
+	arg = (unsigned long)compat_ptr(arg);
+
+	return uacce_fops_unl_ioctl(filep, cmd, arg);
+}
+#endif
+
+static int uacce_sva_exit(struct device *dev, struct iommu_sva *handle,
+			  void *data)
+{
+	struct uacce_mm *uacce_mm = data;
+	struct uacce_queue *q;
+
+	/*
+	 * No new queue can be added concurrently because no caller can have a
+	 * reference to this mm. But there may be concurrent calls to
+	 * uacce_mm_put(), so we need the lock.
+	 */
+	mutex_lock(&uacce_mm->lock);
+	list_for_each_entry(q, &uacce_mm->queues, list)
+		uacce_put_queue(q);
+	uacce_mm->mm = NULL;
+	mutex_unlock(&uacce_mm->lock);
+
+	return 0;
+}
+
+static struct iommu_sva_ops uacce_sva_ops = {
+	.mm_exit = uacce_sva_exit,
+};
+
+static struct uacce_mm *uacce_mm_get(struct uacce_device *uacce,
+				     struct uacce_queue *q,
+				     struct mm_struct *mm)
+{
+	struct uacce_mm *uacce_mm = NULL;
+	struct iommu_sva *handle = NULL;
+	int ret;
+
+	lockdep_assert_held(&uacce->mm_lock);
+
+	list_for_each_entry(uacce_mm, &uacce->mm_list, list) {
+		if (uacce_mm->mm == mm) {
+			mutex_lock(&uacce_mm->lock);
+			list_add(&q->list, &uacce_mm->queues);
+			mutex_unlock(&uacce_mm->lock);
+			return uacce_mm;
+		}
+	}
+
+	uacce_mm = kzalloc(sizeof(*uacce_mm), GFP_KERNEL);
+	if (!uacce_mm)
+		return NULL;
+
+	if (uacce->flags & UACCE_DEV_SVA) {
+		/*
+		 * Safe to pass an incomplete uacce_mm, since mm_exit cannot
+		 * fire while we hold a reference to the mm.
+		 */
+		handle = iommu_sva_bind_device(uacce->parent, mm, uacce_mm);
+		if (IS_ERR(handle))
+			goto err_free;
+
+		ret = iommu_sva_set_ops(handle, &uacce_sva_ops);
+		if (ret)
+			goto err_unbind;
+
+		uacce_mm->pasid = iommu_sva_get_pasid(handle);
+		if (uacce_mm->pasid == IOMMU_PASID_INVALID)
+			goto err_unbind;
+	}
+
+	uacce_mm->mm = mm;
+	uacce_mm->handle = handle;
+	INIT_LIST_HEAD(&uacce_mm->queues);
+	mutex_init(&uacce_mm->lock);
+	list_add(&q->list, &uacce_mm->queues);
+	list_add(&uacce_mm->list, &uacce->mm_list);
+
+	return uacce_mm;
+
+err_unbind:
+	if (handle)
+		iommu_sva_unbind_device(handle);
+err_free:
+	kfree(uacce_mm);
+	return NULL;
+}
+
+static void uacce_mm_put(struct uacce_queue *q)
+{
+	struct uacce_mm *uacce_mm = q->uacce_mm;
+
+	lockdep_assert_held(&q->uacce->mm_lock);
+
+	mutex_lock(&uacce_mm->lock);
+	list_del(&q->list);
+	mutex_unlock(&uacce_mm->lock);
+
+	if (list_empty(&uacce_mm->queues)) {
+		if (uacce_mm->handle)
+			iommu_sva_unbind_device(uacce_mm->handle);
+		list_del(&uacce_mm->list);
+		kfree(uacce_mm);
+	}
+}
+
+static int uacce_fops_open(struct inode *inode, struct file *filep)
+{
+	struct uacce_mm *uacce_mm = NULL;
+	struct uacce_device *uacce;
+	struct uacce_queue *q;
+	int ret = 0;
+
+	uacce = xa_load(&uacce_xa, iminor(inode));
+	if (!uacce)
+		return -ENODEV;
+
+	q = kzalloc(sizeof(struct uacce_queue), GFP_KERNEL);
+	if (!q)
+		return -ENOMEM;
+
+	mutex_lock(&uacce->mm_lock);
+	uacce_mm = uacce_mm_get(uacce, q, current->mm);
+	mutex_unlock(&uacce->mm_lock);
+	if (!uacce_mm) {
+		ret = -ENOMEM;
+		goto out_with_mem;
+	}
+
+	q->uacce = uacce;
+	q->uacce_mm = uacce_mm;
+
+	if (uacce->ops->get_queue) {
+		ret = uacce->ops->get_queue(uacce, uacce_mm->pasid, q);
+		if (ret < 0)
+			goto out_with_mm;
+	}
+
+	init_waitqueue_head(&q->wait);
+	filep->private_data = q;
+	uacce->inode = inode;
+	q->state = UACCE_Q_INIT;
+
+	return 0;
+
+out_with_mm:
+	mutex_lock(&uacce->mm_lock);
+	uacce_mm_put(q);
+	mutex_unlock(&uacce->mm_lock);
+out_with_mem:
+	kfree(q);
+	return ret;
+}
+
+static int uacce_fops_release(struct inode *inode, struct file *filep)
+{
+	struct uacce_queue *q = filep->private_data;
+	struct uacce_device *uacce = q->uacce;
+
+	uacce_put_queue(q);
+
+	mutex_lock(&uacce->mm_lock);
+	uacce_mm_put(q);
+	mutex_unlock(&uacce->mm_lock);
+
+	kfree(q);
+
+	return 0;
+}
+
+static vm_fault_t uacce_vma_fault(struct vm_fault *vmf)
+{
+	if (vmf->flags & (FAULT_FLAG_MKWRITE | FAULT_FLAG_WRITE))
+		return VM_FAULT_SIGBUS;
+
+	return 0;
+}
+
+static void uacce_vma_close(struct vm_area_struct *vma)
+{
+	struct uacce_queue *q = vma->vm_private_data;
+	struct uacce_qfile_region *qfr = NULL;
+
+	if (vma->vm_pgoff < UACCE_MAX_REGION)
+		qfr = q->qfrs[vma->vm_pgoff];
+
+	kfree(qfr);
+}
+
+static const struct vm_operations_struct uacce_vm_ops = {
+	.fault = uacce_vma_fault,
+	.close = uacce_vma_close,
+};
+
+static int uacce_fops_mmap(struct file *filep, struct vm_area_struct *vma)
+{
+	struct uacce_queue *q = filep->private_data;
+	struct uacce_device *uacce = q->uacce;
+	struct uacce_qfile_region *qfr;
+	enum uacce_qfrt type = UACCE_MAX_REGION;
+	int ret = 0;
+
+	if (vma->vm_pgoff < UACCE_MAX_REGION)
+		type = vma->vm_pgoff;
+	else
+		return -EINVAL;
+
+	qfr = kzalloc(sizeof(*qfr), GFP_KERNEL);
+	if (!qfr)
+		return -ENOMEM;
+
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_WIPEONFORK;
+	vma->vm_ops = &uacce_vm_ops;
+	vma->vm_private_data = q;
+	qfr->type = type;
+
+	mutex_lock(&uacce_mutex);
+
+	if (q->state != UACCE_Q_INIT && q->state != UACCE_Q_STARTED) {
+		ret = -EINVAL;
+		goto out_with_lock;
+	}
+
+	if (q->qfrs[type]) {
+		ret = -EEXIST;
+		goto out_with_lock;
+	}
+
+	switch (type) {
+	case UACCE_QFRT_MMIO:
+		if (!uacce->ops->mmap) {
+			ret = -EINVAL;
+			goto out_with_lock;
+		}
+
+		ret = uacce->ops->mmap(q, vma, qfr);
+		if (ret)
+			goto out_with_lock;
+
+		break;
+
+	case UACCE_QFRT_DUS:
+		if (!uacce->ops->mmap) {
+			ret = -EINVAL;
+			goto out_with_lock;
+		}
+
+		ret = uacce->ops->mmap(q, vma, qfr);
+		if (ret)
+			goto out_with_lock;
+		break;
+
+	default:
+		ret = -EINVAL;
+		goto out_with_lock;
+	}
+
+	q->qfrs[type] = qfr;
+	mutex_unlock(&uacce_mutex);
+
+	return ret;
+
+out_with_lock:
+	mutex_unlock(&uacce_mutex);
+	kfree(qfr);
+	return ret;
+}
+
+static __poll_t uacce_fops_poll(struct file *file, poll_table *wait)
+{
+	struct uacce_queue *q = file->private_data;
+	struct uacce_device *uacce = q->uacce;
+
+	poll_wait(file, &q->wait, wait);
+	if (uacce->ops->is_q_updated && uacce->ops->is_q_updated(q))
+		return EPOLLIN | EPOLLRDNORM;
+
+	return 0;
+}
+
+static const struct file_operations uacce_fops = {
+	.owner		= THIS_MODULE,
+	.open		= uacce_fops_open,
+	.release	= uacce_fops_release,
+	.unlocked_ioctl	= uacce_fops_unl_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= uacce_fops_compat_ioctl,
+#endif
+	.mmap		= uacce_fops_mmap,
+	.poll		= uacce_fops_poll,
+};
+
+#define to_uacce_device(dev) container_of(dev, struct uacce_device, dev)
+
+static ssize_t api_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct uacce_device *uacce = to_uacce_device(dev);
+
+	return sprintf(buf, "%s\n", uacce->api_ver);
+}
+
+static ssize_t flags_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	struct uacce_device *uacce = to_uacce_device(dev);
+
+	return sprintf(buf, "%u\n", uacce->flags);
+}
+
+static ssize_t available_instances_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct uacce_device *uacce = to_uacce_device(dev);
+
+	if (!uacce->ops->get_available_instances)
+		return -ENODEV;
+
+	return sprintf(buf, "%d\n",
+		       uacce->ops->get_available_instances(uacce));
+}
+
+static ssize_t algorithms_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct uacce_device *uacce = to_uacce_device(dev);
+
+	return sprintf(buf, "%s\n", uacce->algs);
+}
+
+static ssize_t region_mmio_size_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct uacce_device *uacce = to_uacce_device(dev);
+
+	return sprintf(buf, "%lu\n",
+		       uacce->qf_pg_num[UACCE_QFRT_MMIO] << PAGE_SHIFT);
+}
+
+static ssize_t region_dus_size_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct uacce_device *uacce = to_uacce_device(dev);
+
+	return sprintf(buf, "%lu\n",
+		       uacce->qf_pg_num[UACCE_QFRT_DUS] << PAGE_SHIFT);
+}
+
+static DEVICE_ATTR_RO(api);
+static DEVICE_ATTR_RO(flags);
+static DEVICE_ATTR_RO(available_instances);
+static DEVICE_ATTR_RO(algorithms);
+static DEVICE_ATTR_RO(region_mmio_size);
+static DEVICE_ATTR_RO(region_dus_size);
+
+static struct attribute *uacce_dev_attrs[] = {
+	&dev_attr_api.attr,
+	&dev_attr_flags.attr,
+	&dev_attr_available_instances.attr,
+	&dev_attr_algorithms.attr,
+	&dev_attr_region_mmio_size.attr,
+	&dev_attr_region_dus_size.attr,
+	NULL,
+};
+
+static umode_t uacce_dev_is_visible(struct kobject *kobj,
+				    struct attribute *attr, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct uacce_device *uacce = to_uacce_device(dev);
+
+	if (((attr == &dev_attr_region_mmio_size.attr) &&
+	    (!uacce->qf_pg_num[UACCE_QFRT_MMIO])) ||
+	    ((attr == &dev_attr_region_dus_size.attr) &&
+	    (!uacce->qf_pg_num[UACCE_QFRT_DUS])))
+		return 0;
+
+	return attr->mode;
+}
+
+static struct attribute_group uacce_dev_group = {
+	.is_visible	= uacce_dev_is_visible,
+	.attrs		= uacce_dev_attrs,
+};
+
+__ATTRIBUTE_GROUPS(uacce_dev);
+
+static void uacce_release(struct device *dev)
+{
+	struct uacce_device *uacce = to_uacce_device(dev);
+
+	kfree(uacce);
+}
+
+/**
+ * uacce_alloc() - alloc an accelerator
+ * @parent: pointer of uacce parent device
+ * @interface: pointer of uacce_interface for register
+ *
+ * Returns uacce pointer if success and ERR_PTR if not
+ * Need check returned negotiated uacce->flags
+ */
+struct uacce_device *uacce_alloc(struct device *parent,
+				 struct uacce_interface *interface)
+{
+	unsigned int flags = interface->flags;
+	struct uacce_device *uacce;
+	int ret;
+
+	uacce = kzalloc(sizeof(struct uacce_device), GFP_KERNEL);
+	if (!uacce)
+		return ERR_PTR(-ENOMEM);
+
+	if (flags & UACCE_DEV_SVA) {
+		ret = iommu_dev_enable_feature(parent, IOMMU_DEV_FEAT_SVA);
+		if (ret)
+			flags &= ~UACCE_DEV_SVA;
+	}
+
+	uacce->parent = parent;
+	uacce->flags = flags;
+	uacce->ops = interface->ops;
+
+	ret = xa_alloc(&uacce_xa, &uacce->dev_id, uacce, xa_limit_32b,
+		       GFP_KERNEL);
+	if (ret < 0)
+		goto err_with_uacce;
+
+	INIT_LIST_HEAD(&uacce->mm_list);
+	mutex_init(&uacce->mm_lock);
+	device_initialize(&uacce->dev);
+	uacce->dev.devt = MKDEV(MAJOR(uacce_devt), uacce->dev_id);
+	uacce->dev.class = uacce_class;
+	uacce->dev.groups = uacce_dev_groups;
+	uacce->dev.parent = uacce->parent;
+	uacce->dev.release = uacce_release;
+	dev_set_name(&uacce->dev, "%s-%d", interface->name, uacce->dev_id);
+
+	return uacce;
+
+err_with_uacce:
+	if (flags & UACCE_DEV_SVA)
+		iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_SVA);
+	kfree(uacce);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(uacce_alloc);
+
+/**
+ * uacce_register() - add the accelerator to cdev and export to user space
+ * @uacce: The initialized uacce device
+ *
+ * Return 0 if register succeeded, or an error.
+ */
+int uacce_register(struct uacce_device *uacce)
+{
+	if (!uacce)
+		return -ENODEV;
+
+	uacce->cdev = cdev_alloc();
+	if (!uacce->cdev)
+		return -ENOMEM;
+
+	uacce->cdev->ops = &uacce_fops;
+	uacce->cdev->owner = THIS_MODULE;
+
+	return cdev_device_add(uacce->cdev, &uacce->dev);
+}
+EXPORT_SYMBOL_GPL(uacce_register);
+
+/**
+ * uacce_remove() - remove the accelerator
+ * @uacce: the accelerator to remove
+ */
+void uacce_remove(struct uacce_device *uacce)
+{
+	struct uacce_mm *uacce_mm;
+	struct uacce_queue *q;
+
+	if (!uacce)
+		return;
+	/*
+	 * unmap remaining mapping from user space, preventing user still
+	 * access the mmaped area while parent device is already removed
+	 */
+	if (uacce->inode)
+		unmap_mapping_range(uacce->inode->i_mapping, 0, 0, 1);
+
+	/* ensure no open queue remains */
+	mutex_lock(&uacce->mm_lock);
+	list_for_each_entry(uacce_mm, &uacce->mm_list, list) {
+		/*
+		 * We don't take the uacce_mm->lock here. Since we hold the
+		 * device's mm_lock, no queue can be added to or removed from
+		 * this uacce_mm. We may run concurrently with mm_exit, but
+		 * uacce_put_queue() is serialized and iommu_sva_unbind_device()
+		 * waits for the lock that mm_exit is holding.
+		 */
+		list_for_each_entry(q, &uacce_mm->queues, list)
+			uacce_put_queue(q);
+
+		if (uacce->flags & UACCE_DEV_SVA) {
+			iommu_sva_unbind_device(uacce_mm->handle);
+			uacce_mm->handle = NULL;
+		}
+	}
+	mutex_unlock(&uacce->mm_lock);
+
+	/* disable sva now since no opened queues */
+	if (uacce->flags & UACCE_DEV_SVA)
+		iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_SVA);
+
+	if (uacce->cdev)
+		cdev_device_del(uacce->cdev, &uacce->dev);
+	xa_erase(&uacce_xa, uacce->dev_id);
+	put_device(&uacce->dev);
+}
+EXPORT_SYMBOL_GPL(uacce_remove);
+
+static int __init uacce_init(void)
+{
+	int ret;
+
+	uacce_class = class_create(THIS_MODULE, UACCE_NAME);
+	if (IS_ERR(uacce_class))
+		return PTR_ERR(uacce_class);
+
+	ret = alloc_chrdev_region(&uacce_devt, 0, MINORMASK, UACCE_NAME);
+	if (ret)
+		class_destroy(uacce_class);
+
+	return ret;
+}
+
+static __exit void uacce_exit(void)
+{
+	unregister_chrdev_region(uacce_devt, MINORMASK);
+	class_destroy(uacce_class);
+}
+
+subsys_initcall(uacce_init);
+module_exit(uacce_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Hisilicon Tech. Co., Ltd.");
+MODULE_DESCRIPTION("Accelerator interface for Userland applications");
author	Linus Torvalds <torvalds@linux-foundation.org>	2020-04-02 00:47:40 +0300
committer	Linus Torvalds <torvalds@linux-foundation.org>	2020-04-02 00:47:40 +0300
commit	72f35423e8a6a2451c202f52cb8adb92b08592ec (patch)
tree	2cc5c715631a59d51b6445143e03a187e8e394f6 /drivers
parent	890f0b0d27dc400679b9a91d04ca44f5ee4c19c0 (diff)
parent	fcb90d51c375d09a034993cda262b68499e233a4 (diff)
download	linux-72f35423e8a6a2451c202f52cb8adb92b08592ec.tar.xz