diff options
Diffstat (limited to 'drivers/crypto')
112 files changed, 13417 insertions, 2749 deletions
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index c2767ed54dfe..2c887e4d005a 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -233,20 +233,6 @@ config CRYPTO_CRC32_S390 It is available with IBM z13 or later. -config CRYPTO_DEV_MARVELL_CESA - tristate "Marvell's Cryptographic Engine driver" - depends on PLAT_ORION || ARCH_MVEBU - select CRYPTO_LIB_AES - select CRYPTO_LIB_DES - select CRYPTO_SKCIPHER - select CRYPTO_HASH - select SRAM - help - This driver allows you to utilize the Cryptographic Engines and - Security Accelerator (CESA) which can be found on MVEBU and ORION - platforms. - This driver supports CPU offload through DMA transfers. - config CRYPTO_DEV_NIAGARA2 tristate "Niagara2 Stream Processing Unit driver" select CRYPTO_LIB_DES @@ -606,6 +592,7 @@ config CRYPTO_DEV_MXS_DCP source "drivers/crypto/qat/Kconfig" source "drivers/crypto/cavium/cpt/Kconfig" source "drivers/crypto/cavium/nitrox/Kconfig" +source "drivers/crypto/marvell/Kconfig" config CRYPTO_DEV_CAVIUM_ZIP tristate "Cavium ZIP driver" @@ -685,6 +672,29 @@ choice endchoice +config CRYPTO_DEV_QCE_SW_MAX_LEN + int "Default maximum request size to use software for AES" + depends on CRYPTO_DEV_QCE && CRYPTO_DEV_QCE_SKCIPHER + default 512 + help + This sets the default maximum request size to perform AES requests + using software instead of the crypto engine. It can be changed by + setting the aes_sw_max_len parameter. + + Small blocks are processed faster in software than hardware. + Considering the 256-bit ciphers, software is 2-3 times faster than + qce at 256-bytes, 30% faster at 512, and about even at 768-bytes. + With 128-bit keys, the break-even point would be around 1024-bytes. + + The default is set a little lower, to 512 bytes, to balance the + cost in CPU usage. The minimum recommended setting is 16-bytes + (1 AES block), since AES-GCM will fail if you set it lower. + Setting this to zero will send all requests to the hardware. + + Note that 192-bit keys are not supported by the hardware and are + always processed by the software fallback, and all DES requests + are done by the hardware. + config CRYPTO_DEV_QCOM_RNG tristate "Qualcomm Random Number Generator Driver" depends on ARCH_QCOM || COMPILE_TEST @@ -731,6 +741,18 @@ config CRYPTO_DEV_ROCKCHIP This driver interfaces with the hardware crypto accelerator. Supporting cbc/ecb chainmode, and aes/des/des3_ede cipher mode. +config CRYPTO_DEV_ZYNQMP_AES + tristate "Support for Xilinx ZynqMP AES hw accelerator" + depends on ZYNQMP_FIRMWARE || COMPILE_TEST + select CRYPTO_AES + select CRYPTO_ENGINE + select CRYPTO_AEAD + help + Xilinx ZynqMP has AES-GCM engine used for symmetric key + encryption and decryption. This driver interfaces with AES hw + accelerator. Select this if you want to use the ZynqMP module + for AES algorithms. + config CRYPTO_DEV_MEDIATEK tristate "MediaTek's EIP97 Cryptographic Engine driver" depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 40229d499476..944ed7226e37 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -18,7 +18,7 @@ obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o -obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell/ +obj-$(CONFIG_CRYPTO_DEV_MARVELL) += marvell/ obj-$(CONFIG_CRYPTO_DEV_MEDIATEK) += mediatek/ obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o @@ -47,5 +47,6 @@ obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/ obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/ obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) += axis/ +obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_AES) += xilinx/ obj-y += hisilicon/ obj-$(CONFIG_CRYPTO_DEV_AMLOGIC_GXL) += amlogic/ diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c index f72346a44e69..3e4e4bbda34c 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c @@ -565,10 +565,8 @@ static int sun8i_ce_probe(struct platform_device *pdev) /* Get Non Secure IRQ */ irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(ce->dev, "Cannot get CryptoEngine Non-secure IRQ\n"); + if (irq < 0) return irq; - } ce->reset = devm_reset_control_get(&pdev->dev, NULL); if (IS_ERR(ce->reset)) { diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h index 8f8404c84a4d..0e9eac397e1b 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h @@ -214,7 +214,7 @@ struct sun8i_cipher_tfm_ctx { * this template * @alg: one of sub struct must be used * @stat_req: number of request done on this template - * @stat_fb: total of all data len done on this template + * @stat_fb: number of request which has fallbacked */ struct sun8i_ce_alg_template { u32 type; diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h index b5f855f3de10..29c44f279112 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h @@ -186,7 +186,7 @@ struct sun8i_cipher_tfm_ctx { * this template * @alg: one of sub struct must be used * @stat_req: number of request done on this template - * @stat_fb: total of all data len done on this template + * @stat_fb: number of request which has fallbacked */ struct sun8i_ss_alg_template { u32 type; diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c index 1d3355913b40..e8e8281e027d 100644 --- a/drivers/crypto/atmel-i2c.c +++ b/drivers/crypto/atmel-i2c.c @@ -176,7 +176,8 @@ static int atmel_i2c_wakeup(struct i2c_client *client) * device is idle, asleep or during waking up. Don't check for error * when waking up the device. */ - i2c_master_send(client, i2c_priv->wake_token, i2c_priv->wake_token_sz); + i2c_transfer_buffer_flags(client, i2c_priv->wake_token, + i2c_priv->wake_token_sz, I2C_M_IGNORE_NAK); /* * Wait to wake the device. Typical execution times for ecdh and genkey diff --git a/drivers/crypto/bcm/util.c b/drivers/crypto/bcm/util.c index cd7504101acd..2b304fc78059 100644 --- a/drivers/crypto/bcm/util.c +++ b/drivers/crypto/bcm/util.c @@ -366,88 +366,88 @@ static ssize_t spu_debugfs_read(struct file *filp, char __user *ubuf, ipriv = filp->private_data; out_offset = 0; - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Number of SPUs.........%u\n", ipriv->spu.num_spu); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Current sessions.......%u\n", atomic_read(&ipriv->session_count)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Session count..........%u\n", atomic_read(&ipriv->stream_count)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Cipher setkey..........%u\n", atomic_read(&ipriv->setkey_cnt[SPU_OP_CIPHER])); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Cipher Ops.............%u\n", atomic_read(&ipriv->op_counts[SPU_OP_CIPHER])); for (alg = 0; alg < CIPHER_ALG_LAST; alg++) { for (mode = 0; mode < CIPHER_MODE_LAST; mode++) { op_cnt = atomic_read(&ipriv->cipher_cnt[alg][mode]); if (op_cnt) { - out_offset += snprintf(buf + out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, " %-13s%11u\n", spu_alg_name(alg, mode), op_cnt); } } } - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Hash Ops...............%u\n", atomic_read(&ipriv->op_counts[SPU_OP_HASH])); for (alg = 0; alg < HASH_ALG_LAST; alg++) { op_cnt = atomic_read(&ipriv->hash_cnt[alg]); if (op_cnt) { - out_offset += snprintf(buf + out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, " %-13s%11u\n", hash_alg_name[alg], op_cnt); } } - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "HMAC setkey............%u\n", atomic_read(&ipriv->setkey_cnt[SPU_OP_HMAC])); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "HMAC Ops...............%u\n", atomic_read(&ipriv->op_counts[SPU_OP_HMAC])); for (alg = 0; alg < HASH_ALG_LAST; alg++) { op_cnt = atomic_read(&ipriv->hmac_cnt[alg]); if (op_cnt) { - out_offset += snprintf(buf + out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, " %-13s%11u\n", hash_alg_name[alg], op_cnt); } } - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "AEAD setkey............%u\n", atomic_read(&ipriv->setkey_cnt[SPU_OP_AEAD])); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "AEAD Ops...............%u\n", atomic_read(&ipriv->op_counts[SPU_OP_AEAD])); for (alg = 0; alg < AEAD_TYPE_LAST; alg++) { op_cnt = atomic_read(&ipriv->aead_cnt[alg]); if (op_cnt) { - out_offset += snprintf(buf + out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, " %-13s%11u\n", aead_alg_name[alg], op_cnt); } } - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Bytes of req data......%llu\n", (u64)atomic64_read(&ipriv->bytes_out)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Bytes of resp data.....%llu\n", (u64)atomic64_read(&ipriv->bytes_in)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Mailbox full...........%u\n", atomic_read(&ipriv->mb_no_spc)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Mailbox send failures..%u\n", atomic_read(&ipriv->mb_send_fail)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Check ICV errors.......%u\n", atomic_read(&ipriv->bad_icv)); if (ipriv->spu.spu_type == SPU_TYPE_SPUM) @@ -455,7 +455,7 @@ static ssize_t spu_debugfs_read(struct file *filp, char __user *ubuf, spu_ofifo_ctrl = ioread32(ipriv->spu.reg_vbase[i] + SPU_OFIFO_CTRL); fifo_len = spu_ofifo_ctrl & SPU_FIFO_WATERMARK; - out_offset += snprintf(buf + out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "SPU %d output FIFO high water.....%u\n", i, fifo_len); diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index fac5b2e26610..a62f228be6da 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -13,6 +13,7 @@ config CRYPTO_DEV_FSL_CAAM depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE select SOC_BUS select CRYPTO_DEV_FSL_CAAM_COMMON + imply FSL_MC_BUS help Enables the driver module for Freescale's Cryptographic Accelerator and Assurance Module (CAAM), also known as the SEC version 4 (SEC4). @@ -33,6 +34,7 @@ config CRYPTO_DEV_FSL_CAAM_DEBUG menuconfig CRYPTO_DEV_FSL_CAAM_JR tristate "Freescale CAAM Job Ring driver backend" + select CRYPTO_ENGINE default y help Enables the driver module for Job Rings which are part of diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index ef1a65f4fc92..b2f9882bc010 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -56,6 +56,7 @@ #include "sg_sw_sec4.h" #include "key_gen.h" #include "caamalg_desc.h" +#include <crypto/engine.h> /* * crypto alg @@ -101,6 +102,7 @@ struct caam_skcipher_alg { * per-session context */ struct caam_ctx { + struct crypto_engine_ctx enginectx; u32 sh_desc_enc[DESC_MAX_USED_LEN]; u32 sh_desc_dec[DESC_MAX_USED_LEN]; u8 key[CAAM_MAX_KEY_SIZE]; @@ -114,6 +116,14 @@ struct caam_ctx { unsigned int authsize; }; +struct caam_skcipher_req_ctx { + struct skcipher_edesc *edesc; +}; + +struct caam_aead_req_ctx { + struct aead_edesc *edesc; +}; + static int aead_null_set_sh_desc(struct crypto_aead *aead) { struct caam_ctx *ctx = crypto_aead_ctx(aead); @@ -858,6 +868,7 @@ static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key, * @mapped_src_nents: number of segments in input h/w link table * @mapped_dst_nents: number of segments in output h/w link table * @sec4_sg_bytes: length of dma mapped sec4_sg space + * @bklog: stored to determine if the request needs backlog * @sec4_sg_dma: bus physical mapped address of h/w link table * @sec4_sg: pointer to h/w link table * @hw_desc: the h/w job descriptor followed by any referenced link tables @@ -868,6 +879,7 @@ struct aead_edesc { int mapped_src_nents; int mapped_dst_nents; int sec4_sg_bytes; + bool bklog; dma_addr_t sec4_sg_dma; struct sec4_sg_entry *sec4_sg; u32 hw_desc[]; @@ -881,6 +893,7 @@ struct aead_edesc { * @mapped_dst_nents: number of segments in output h/w link table * @iv_dma: dma address of iv for checking continuity and link table * @sec4_sg_bytes: length of dma mapped sec4_sg space + * @bklog: stored to determine if the request needs backlog * @sec4_sg_dma: bus physical mapped address of h/w link table * @sec4_sg: pointer to h/w link table * @hw_desc: the h/w job descriptor followed by any referenced link tables @@ -893,9 +906,10 @@ struct skcipher_edesc { int mapped_dst_nents; dma_addr_t iv_dma; int sec4_sg_bytes; + bool bklog; dma_addr_t sec4_sg_dma; struct sec4_sg_entry *sec4_sg; - u32 hw_desc[0]; + u32 hw_desc[]; }; static void caam_unmap(struct device *dev, struct scatterlist *src, @@ -941,37 +955,20 @@ static void skcipher_unmap(struct device *dev, struct skcipher_edesc *edesc, edesc->sec4_sg_dma, edesc->sec4_sg_bytes); } -static void aead_encrypt_done(struct device *jrdev, u32 *desc, u32 err, - void *context) -{ - struct aead_request *req = context; - struct aead_edesc *edesc; - int ecode = 0; - - dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - - edesc = container_of(desc, struct aead_edesc, hw_desc[0]); - - if (err) - ecode = caam_jr_strstatus(jrdev, err); - - aead_unmap(jrdev, edesc, req); - - kfree(edesc); - - aead_request_complete(req, ecode); -} - -static void aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err, - void *context) +static void aead_crypt_done(struct device *jrdev, u32 *desc, u32 err, + void *context) { struct aead_request *req = context; + struct caam_aead_req_ctx *rctx = aead_request_ctx(req); + struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev); struct aead_edesc *edesc; int ecode = 0; + bool has_bklog; dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - edesc = container_of(desc, struct aead_edesc, hw_desc[0]); + edesc = rctx->edesc; + has_bklog = edesc->bklog; if (err) ecode = caam_jr_strstatus(jrdev, err); @@ -980,61 +977,32 @@ static void aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err, kfree(edesc); - aead_request_complete(req, ecode); -} - -static void skcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err, - void *context) -{ - struct skcipher_request *req = context; - struct skcipher_edesc *edesc; - struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - int ivsize = crypto_skcipher_ivsize(skcipher); - int ecode = 0; - - dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - - edesc = container_of(desc, struct skcipher_edesc, hw_desc[0]); - - if (err) - ecode = caam_jr_strstatus(jrdev, err); - - skcipher_unmap(jrdev, edesc, req); - /* - * The crypto API expects us to set the IV (req->iv) to the last - * ciphertext block (CBC mode) or last counter (CTR mode). - * This is used e.g. by the CTS mode. + * If no backlog flag, the completion of the request is done + * by CAAM, not crypto engine. */ - if (ivsize && !ecode) { - memcpy(req->iv, (u8 *)edesc->sec4_sg + edesc->sec4_sg_bytes, - ivsize); - print_hex_dump_debug("dstiv @"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->iv, - edesc->src_nents > 1 ? 100 : ivsize, 1); - } - - caam_dump_sg("dst @" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->dst, - edesc->dst_nents > 1 ? 100 : req->cryptlen, 1); - - kfree(edesc); - - skcipher_request_complete(req, ecode); + if (!has_bklog) + aead_request_complete(req, ecode); + else + crypto_finalize_aead_request(jrp->engine, req, ecode); } -static void skcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err, - void *context) +static void skcipher_crypt_done(struct device *jrdev, u32 *desc, u32 err, + void *context) { struct skcipher_request *req = context; struct skcipher_edesc *edesc; + struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); + struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev); int ivsize = crypto_skcipher_ivsize(skcipher); int ecode = 0; + bool has_bklog; dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - edesc = container_of(desc, struct skcipher_edesc, hw_desc[0]); + edesc = rctx->edesc; + has_bklog = edesc->bklog; if (err) ecode = caam_jr_strstatus(jrdev, err); @@ -1060,7 +1028,14 @@ static void skcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err, kfree(edesc); - skcipher_request_complete(req, ecode); + /* + * If no backlog flag, the completion of the request is done + * by CAAM, not crypto engine. + */ + if (!has_bklog) + skcipher_request_complete(req, ecode); + else + crypto_finalize_skcipher_request(jrp->engine, req, ecode); } /* @@ -1306,6 +1281,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, struct crypto_aead *aead = crypto_aead_reqtfm(req); struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; + struct caam_aead_req_ctx *rctx = aead_request_ctx(req); gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0; @@ -1406,6 +1382,9 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, edesc->mapped_dst_nents = mapped_dst_nents; edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) + desc_bytes; + + rctx->edesc = edesc; + *all_contig_ptr = !(mapped_src_nents > 1); sec4_sg_index = 0; @@ -1436,41 +1415,34 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, return edesc; } -static int gcm_encrypt(struct aead_request *req) +static int aead_enqueue_req(struct device *jrdev, struct aead_request *req) { - struct aead_edesc *edesc; - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); - struct device *jrdev = ctx->jrdev; - bool all_contig; - u32 *desc; - int ret = 0; - - /* allocate extended descriptor */ - edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig, true); - if (IS_ERR(edesc)) - return PTR_ERR(edesc); - - /* Create and submit job descriptor */ - init_gcm_job(req, edesc, all_contig, true); + struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); + struct caam_aead_req_ctx *rctx = aead_request_ctx(req); + struct aead_edesc *edesc = rctx->edesc; + u32 *desc = edesc->hw_desc; + int ret; - print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, - desc_bytes(edesc->hw_desc), 1); + /* + * Only the backlog request are sent to crypto-engine since the others + * can be handled by CAAM, if free, especially since JR has up to 1024 + * entries (more than the 10 entries from crypto-engine). + */ + if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG) + ret = crypto_transfer_aead_request_to_engine(jrpriv->engine, + req); + else + ret = caam_jr_enqueue(jrdev, desc, aead_crypt_done, req); - desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { + if ((ret != -EINPROGRESS) && (ret != -EBUSY)) { aead_unmap(jrdev, edesc, req); - kfree(edesc); + kfree(rctx->edesc); } return ret; } -static int chachapoly_encrypt(struct aead_request *req) +static inline int chachapoly_crypt(struct aead_request *req, bool encrypt) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); @@ -1478,180 +1450,130 @@ static int chachapoly_encrypt(struct aead_request *req) struct device *jrdev = ctx->jrdev; bool all_contig; u32 *desc; - int ret; edesc = aead_edesc_alloc(req, CHACHAPOLY_DESC_JOB_IO_LEN, &all_contig, - true); + encrypt); if (IS_ERR(edesc)) return PTR_ERR(edesc); desc = edesc->hw_desc; - init_chachapoly_job(req, edesc, all_contig, true); + init_chachapoly_job(req, edesc, all_contig, encrypt); print_hex_dump_debug("chachapoly jobdesc@" __stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { - aead_unmap(jrdev, edesc, req); - kfree(edesc); - } - - return ret; + return aead_enqueue_req(jrdev, req); } -static int chachapoly_decrypt(struct aead_request *req) +static int chachapoly_encrypt(struct aead_request *req) { - struct aead_edesc *edesc; - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); - struct device *jrdev = ctx->jrdev; - bool all_contig; - u32 *desc; - int ret; - - edesc = aead_edesc_alloc(req, CHACHAPOLY_DESC_JOB_IO_LEN, &all_contig, - false); - if (IS_ERR(edesc)) - return PTR_ERR(edesc); - - desc = edesc->hw_desc; - - init_chachapoly_job(req, edesc, all_contig, false); - print_hex_dump_debug("chachapoly jobdesc@" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), - 1); - - ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { - aead_unmap(jrdev, edesc, req); - kfree(edesc); - } - - return ret; + return chachapoly_crypt(req, true); } -static int ipsec_gcm_encrypt(struct aead_request *req) +static int chachapoly_decrypt(struct aead_request *req) { - return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_encrypt(req); + return chachapoly_crypt(req, false); } -static int aead_encrypt(struct aead_request *req) +static inline int aead_crypt(struct aead_request *req, bool encrypt) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; bool all_contig; - u32 *desc; - int ret = 0; /* allocate extended descriptor */ edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN, - &all_contig, true); + &all_contig, encrypt); if (IS_ERR(edesc)) return PTR_ERR(edesc); /* Create and submit job descriptor */ - init_authenc_job(req, edesc, all_contig, true); + init_authenc_job(req, edesc, all_contig, encrypt); print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, desc_bytes(edesc->hw_desc), 1); - desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { - aead_unmap(jrdev, edesc, req); - kfree(edesc); - } + return aead_enqueue_req(jrdev, req); +} - return ret; +static int aead_encrypt(struct aead_request *req) +{ + return aead_crypt(req, true); } -static int gcm_decrypt(struct aead_request *req) +static int aead_decrypt(struct aead_request *req) { - struct aead_edesc *edesc; - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); - struct device *jrdev = ctx->jrdev; - bool all_contig; - u32 *desc; - int ret = 0; + return aead_crypt(req, false); +} - /* allocate extended descriptor */ - edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig, false); - if (IS_ERR(edesc)) - return PTR_ERR(edesc); +static int aead_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct aead_request *req = aead_request_cast(areq); + struct caam_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); + struct caam_aead_req_ctx *rctx = aead_request_ctx(req); + u32 *desc = rctx->edesc->hw_desc; + int ret; - /* Create and submit job descriptor*/ - init_gcm_job(req, edesc, all_contig, false); + rctx->edesc->bklog = true; - print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, - desc_bytes(edesc->hw_desc), 1); + ret = caam_jr_enqueue(ctx->jrdev, desc, aead_crypt_done, req); - desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req); - if (!ret) { - ret = -EINPROGRESS; + if (ret != -EINPROGRESS) { + aead_unmap(ctx->jrdev, rctx->edesc, req); + kfree(rctx->edesc); } else { - aead_unmap(jrdev, edesc, req); - kfree(edesc); + ret = 0; } return ret; } -static int ipsec_gcm_decrypt(struct aead_request *req) -{ - return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_decrypt(req); -} - -static int aead_decrypt(struct aead_request *req) +static inline int gcm_crypt(struct aead_request *req, bool encrypt) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; bool all_contig; - u32 *desc; - int ret = 0; - - caam_dump_sg("dec src@" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->src, - req->assoclen + req->cryptlen, 1); /* allocate extended descriptor */ - edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN, - &all_contig, false); + edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig, + encrypt); if (IS_ERR(edesc)) return PTR_ERR(edesc); - /* Create and submit job descriptor*/ - init_authenc_job(req, edesc, all_contig, false); + /* Create and submit job descriptor */ + init_gcm_job(req, edesc, all_contig, encrypt); print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, desc_bytes(edesc->hw_desc), 1); - desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { - aead_unmap(jrdev, edesc, req); - kfree(edesc); - } + return aead_enqueue_req(jrdev, req); +} - return ret; +static int gcm_encrypt(struct aead_request *req) +{ + return gcm_crypt(req, true); +} + +static int gcm_decrypt(struct aead_request *req) +{ + return gcm_crypt(req, false); +} + +static int ipsec_gcm_encrypt(struct aead_request *req) +{ + return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_encrypt(req); +} + +static int ipsec_gcm_decrypt(struct aead_request *req) +{ + return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_decrypt(req); } /* @@ -1662,6 +1584,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, { struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req); struct device *jrdev = ctx->jrdev; gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; @@ -1760,6 +1683,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, edesc->sec4_sg_bytes = sec4_sg_bytes; edesc->sec4_sg = (struct sec4_sg_entry *)((u8 *)edesc->hw_desc + desc_bytes); + rctx->edesc = edesc; /* Make sure IV is located in a DMAable area */ if (ivsize) { @@ -1791,7 +1715,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, if (ivsize || mapped_dst_nents > 1) sg_to_sec4_set_last(edesc->sec4_sg + dst_sg_idx + - mapped_dst_nents); + mapped_dst_nents - 1 + !!ivsize); if (sec4_sg_bytes) { edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, @@ -1815,49 +1739,35 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, return edesc; } -static int skcipher_encrypt(struct skcipher_request *req) +static int skcipher_do_one_req(struct crypto_engine *engine, void *areq) { - struct skcipher_edesc *edesc; - struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); - struct device *jrdev = ctx->jrdev; - u32 *desc; - int ret = 0; - - if (!req->cryptlen) - return 0; - - /* allocate extended descriptor */ - edesc = skcipher_edesc_alloc(req, DESC_JOB_IO_LEN * CAAM_CMD_SZ); - if (IS_ERR(edesc)) - return PTR_ERR(edesc); - - /* Create and submit job descriptor*/ - init_skcipher_job(req, edesc, true); + struct skcipher_request *req = skcipher_request_cast(areq); + struct caam_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req); + u32 *desc = rctx->edesc->hw_desc; + int ret; - print_hex_dump_debug("skcipher jobdesc@" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, - desc_bytes(edesc->hw_desc), 1); + rctx->edesc->bklog = true; - desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, skcipher_encrypt_done, req); + ret = caam_jr_enqueue(ctx->jrdev, desc, skcipher_crypt_done, req); - if (!ret) { - ret = -EINPROGRESS; + if (ret != -EINPROGRESS) { + skcipher_unmap(ctx->jrdev, rctx->edesc, req); + kfree(rctx->edesc); } else { - skcipher_unmap(jrdev, edesc, req); - kfree(edesc); + ret = 0; } return ret; } -static int skcipher_decrypt(struct skcipher_request *req) +static inline int skcipher_crypt(struct skcipher_request *req, bool encrypt) { struct skcipher_edesc *edesc; struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); struct device *jrdev = ctx->jrdev; + struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); u32 *desc; int ret = 0; @@ -1870,17 +1780,25 @@ static int skcipher_decrypt(struct skcipher_request *req) return PTR_ERR(edesc); /* Create and submit job descriptor*/ - init_skcipher_job(req, edesc, false); - desc = edesc->hw_desc; + init_skcipher_job(req, edesc, encrypt); print_hex_dump_debug("skcipher jobdesc@" __stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, desc_bytes(edesc->hw_desc), 1); - ret = caam_jr_enqueue(jrdev, desc, skcipher_decrypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { + desc = edesc->hw_desc; + /* + * Only the backlog request are sent to crypto-engine since the others + * can be handled by CAAM, if free, especially since JR has up to 1024 + * entries (more than the 10 entries from crypto-engine). + */ + if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG) + ret = crypto_transfer_skcipher_request_to_engine(jrpriv->engine, + req); + else + ret = caam_jr_enqueue(jrdev, desc, skcipher_crypt_done, req); + + if ((ret != -EINPROGRESS) && (ret != -EBUSY)) { skcipher_unmap(jrdev, edesc, req); kfree(edesc); } @@ -1888,6 +1806,16 @@ static int skcipher_decrypt(struct skcipher_request *req) return ret; } +static int skcipher_encrypt(struct skcipher_request *req) +{ + return skcipher_crypt(req, true); +} + +static int skcipher_decrypt(struct skcipher_request *req) +{ + return skcipher_crypt(req, false); +} + static struct caam_skcipher_alg driver_algs[] = { { .skcipher = { @@ -3391,6 +3319,8 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam, { dma_addr_t dma_addr; struct caam_drv_private *priv; + const size_t sh_desc_enc_offset = offsetof(struct caam_ctx, + sh_desc_enc); ctx->jrdev = caam_jr_alloc(); if (IS_ERR(ctx->jrdev)) { @@ -3406,7 +3336,8 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam, dma_addr = dma_map_single_attrs(ctx->jrdev, ctx->sh_desc_enc, offsetof(struct caam_ctx, - sh_desc_enc_dma), + sh_desc_enc_dma) - + sh_desc_enc_offset, ctx->dir, DMA_ATTR_SKIP_CPU_SYNC); if (dma_mapping_error(ctx->jrdev, dma_addr)) { dev_err(ctx->jrdev, "unable to map key, shared descriptors\n"); @@ -3416,8 +3347,10 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam, ctx->sh_desc_enc_dma = dma_addr; ctx->sh_desc_dec_dma = dma_addr + offsetof(struct caam_ctx, - sh_desc_dec); - ctx->key_dma = dma_addr + offsetof(struct caam_ctx, key); + sh_desc_dec) - + sh_desc_enc_offset; + ctx->key_dma = dma_addr + offsetof(struct caam_ctx, key) - + sh_desc_enc_offset; /* copy descriptor header template value */ ctx->cdata.algtype = OP_TYPE_CLASS1_ALG | caam->class1_alg_type; @@ -3431,6 +3364,11 @@ static int caam_cra_init(struct crypto_skcipher *tfm) struct skcipher_alg *alg = crypto_skcipher_alg(tfm); struct caam_skcipher_alg *caam_alg = container_of(alg, typeof(*caam_alg), skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx(tfm); + + crypto_skcipher_set_reqsize(tfm, sizeof(struct caam_skcipher_req_ctx)); + + ctx->enginectx.op.do_one_request = skcipher_do_one_req; return caam_init_common(crypto_skcipher_ctx(tfm), &caam_alg->caam, false); @@ -3443,13 +3381,18 @@ static int caam_aead_init(struct crypto_aead *tfm) container_of(alg, struct caam_aead_alg, aead); struct caam_ctx *ctx = crypto_aead_ctx(tfm); + crypto_aead_set_reqsize(tfm, sizeof(struct caam_aead_req_ctx)); + + ctx->enginectx.op.do_one_request = aead_do_one_req; + return caam_init_common(ctx, &caam_alg->caam, !caam_alg->caam.nodkp); } static void caam_exit_common(struct caam_ctx *ctx) { dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_enc_dma, - offsetof(struct caam_ctx, sh_desc_enc_dma), + offsetof(struct caam_ctx, sh_desc_enc_dma) - + offsetof(struct caam_ctx, sh_desc_enc), ctx->dir, DMA_ATTR_SKIP_CPU_SYNC); caam_jr_free(ctx->jrdev); } diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c index aa9ccca67045..d6c58184bb57 100644 --- a/drivers/crypto/caam/caamalg_desc.c +++ b/drivers/crypto/caam/caamalg_desc.c @@ -1379,6 +1379,9 @@ void cnstr_shdsc_skcipher_encap(u32 * const desc, struct alginfo *cdata, const u32 ctx1_iv_off) { u32 *key_jump_cmd; + u32 options = cdata->algtype | OP_ALG_AS_INIT | OP_ALG_ENCRYPT; + bool is_chacha20 = ((cdata->algtype & OP_ALG_ALGSEL_MASK) == + OP_ALG_ALGSEL_CHACHA20); init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); /* Skip if already shared */ @@ -1417,14 +1420,15 @@ void cnstr_shdsc_skcipher_encap(u32 * const desc, struct alginfo *cdata, LDST_OFFSET_SHIFT)); /* Load operation */ - append_operation(desc, cdata->algtype | OP_ALG_AS_INIT | - OP_ALG_ENCRYPT); + if (is_chacha20) + options |= OP_ALG_AS_FINALIZE; + append_operation(desc, options); /* Perform operation */ skcipher_append_src_dst(desc); /* Store IV */ - if (ivsize) + if (!is_chacha20 && ivsize) append_seq_store(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT)); @@ -1451,6 +1455,8 @@ void cnstr_shdsc_skcipher_decap(u32 * const desc, struct alginfo *cdata, const u32 ctx1_iv_off) { u32 *key_jump_cmd; + bool is_chacha20 = ((cdata->algtype & OP_ALG_ALGSEL_MASK) == + OP_ALG_ALGSEL_CHACHA20); init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); /* Skip if already shared */ @@ -1499,7 +1505,7 @@ void cnstr_shdsc_skcipher_decap(u32 * const desc, struct alginfo *cdata, skcipher_append_src_dst(desc); /* Store IV */ - if (ivsize) + if (!is_chacha20 && ivsize) append_seq_store(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT)); @@ -1518,7 +1524,13 @@ EXPORT_SYMBOL(cnstr_shdsc_skcipher_decap); */ void cnstr_shdsc_xts_skcipher_encap(u32 * const desc, struct alginfo *cdata) { - __be64 sector_size = cpu_to_be64(512); + /* + * Set sector size to a big value, practically disabling + * sector size segmentation in xts implementation. We cannot + * take full advantage of this HW feature with existing + * crypto API / dm-crypt SW architecture. + */ + __be64 sector_size = cpu_to_be64(BIT(15)); u32 *key_jump_cmd; init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); @@ -1571,7 +1583,13 @@ EXPORT_SYMBOL(cnstr_shdsc_xts_skcipher_encap); */ void cnstr_shdsc_xts_skcipher_decap(u32 * const desc, struct alginfo *cdata) { - __be64 sector_size = cpu_to_be64(512); + /* + * Set sector size to a big value, practically disabling + * sector size segmentation in xts implementation. We cannot + * take full advantage of this HW feature with existing + * crypto API / dm-crypt SW architecture. + */ + __be64 sector_size = cpu_to_be64(BIT(15)); u32 *key_jump_cmd; init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c index 4a29e0ef9d63..27e36bdf6163 100644 --- a/drivers/crypto/caam/caamalg_qi.c +++ b/drivers/crypto/caam/caamalg_qi.c @@ -783,7 +783,7 @@ struct aead_edesc { unsigned int assoclen; dma_addr_t assoclen_dma; struct caam_drv_req drv_req; - struct qm_sg_entry sgt[0]; + struct qm_sg_entry sgt[]; }; /* @@ -803,7 +803,7 @@ struct skcipher_edesc { int qm_sg_bytes; dma_addr_t qm_sg_dma; struct caam_drv_req drv_req; - struct qm_sg_entry sgt[0]; + struct qm_sg_entry sgt[]; }; static struct caam_drv_ctx *get_drv_ctx(struct caam_ctx *ctx, diff --git a/drivers/crypto/caam/caamalg_qi2.h b/drivers/crypto/caam/caamalg_qi2.h index 706736776b47..f29cb7bd7dd3 100644 --- a/drivers/crypto/caam/caamalg_qi2.h +++ b/drivers/crypto/caam/caamalg_qi2.h @@ -114,7 +114,7 @@ struct aead_edesc { dma_addr_t qm_sg_dma; unsigned int assoclen; dma_addr_t assoclen_dma; - struct dpaa2_sg_entry sgt[0]; + struct dpaa2_sg_entry sgt[]; }; /* @@ -132,7 +132,7 @@ struct skcipher_edesc { dma_addr_t iv_dma; int qm_sg_bytes; dma_addr_t qm_sg_dma; - struct dpaa2_sg_entry sgt[0]; + struct dpaa2_sg_entry sgt[]; }; /* @@ -146,7 +146,7 @@ struct ahash_edesc { dma_addr_t qm_sg_dma; int src_nents; int qm_sg_bytes; - struct dpaa2_sg_entry sgt[0]; + struct dpaa2_sg_entry sgt[]; }; /** diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 8d9143407fc5..27ff4a3d037e 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -65,6 +65,7 @@ #include "sg_sw_sec4.h" #include "key_gen.h" #include "caamhash_desc.h" +#include <crypto/engine.h> #define CAAM_CRA_PRIORITY 3000 @@ -86,6 +87,7 @@ static struct list_head hash_list; /* ahash per-session context */ struct caam_hash_ctx { + struct crypto_engine_ctx enginectx; u32 sh_desc_update[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned; u32 sh_desc_update_first[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned; u32 sh_desc_fin[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned; @@ -111,9 +113,12 @@ struct caam_hash_state { int buflen; int next_buflen; u8 caam_ctx[MAX_CTX_LEN] ____cacheline_aligned; - int (*update)(struct ahash_request *req); + int (*update)(struct ahash_request *req) ____cacheline_aligned; int (*final)(struct ahash_request *req); int (*finup)(struct ahash_request *req); + struct ahash_edesc *edesc; + void (*ahash_op_done)(struct device *jrdev, u32 *desc, u32 err, + void *context); }; struct caam_export_state { @@ -395,7 +400,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key, init_completion(&result.completion); ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result); - if (!ret) { + if (ret == -EINPROGRESS) { /* in progress */ wait_for_completion(&result.completion); ret = result.err; @@ -521,6 +526,7 @@ static int acmac_setkey(struct crypto_ahash *ahash, const u8 *key, * @sec4_sg_dma: physical mapped address of h/w link table * @src_nents: number of segments in input scatterlist * @sec4_sg_bytes: length of dma mapped sec4_sg space + * @bklog: stored to determine if the request needs backlog * @hw_desc: the h/w job descriptor followed by any referenced link tables * @sec4_sg: h/w link table */ @@ -528,8 +534,9 @@ struct ahash_edesc { dma_addr_t sec4_sg_dma; int src_nents; int sec4_sg_bytes; + bool bklog; u32 hw_desc[DESC_JOB_IO_LEN_MAX / sizeof(u32)] ____cacheline_aligned; - struct sec4_sg_entry sec4_sg[0]; + struct sec4_sg_entry sec4_sg[]; }; static inline void ahash_unmap(struct device *dev, @@ -565,24 +572,28 @@ static inline void ahash_unmap_ctx(struct device *dev, ahash_unmap(dev, edesc, req, dst_len); } -static void ahash_done(struct device *jrdev, u32 *desc, u32 err, - void *context) +static inline void ahash_done_cpy(struct device *jrdev, u32 *desc, u32 err, + void *context, enum dma_data_direction dir) { struct ahash_request *req = context; + struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev); struct ahash_edesc *edesc; struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); int digestsize = crypto_ahash_digestsize(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); int ecode = 0; + bool has_bklog; dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - edesc = container_of(desc, struct ahash_edesc, hw_desc[0]); + edesc = state->edesc; + has_bklog = edesc->bklog; + if (err) ecode = caam_jr_strstatus(jrdev, err); - ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE); + ahash_unmap_ctx(jrdev, edesc, req, digestsize, dir); memcpy(req->result, state->caam_ctx, digestsize); kfree(edesc); @@ -590,95 +601,49 @@ static void ahash_done(struct device *jrdev, u32 *desc, u32 err, DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, ctx->ctx_len, 1); - req->base.complete(&req->base, ecode); + /* + * If no backlog flag, the completion of the request is done + * by CAAM, not crypto engine. + */ + if (!has_bklog) + req->base.complete(&req->base, ecode); + else + crypto_finalize_hash_request(jrp->engine, req, ecode); } -static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err, - void *context) +static void ahash_done(struct device *jrdev, u32 *desc, u32 err, + void *context) { - struct ahash_request *req = context; - struct ahash_edesc *edesc; - struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); - int digestsize = crypto_ahash_digestsize(ahash); - int ecode = 0; - - dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - - edesc = container_of(desc, struct ahash_edesc, hw_desc[0]); - if (err) - ecode = caam_jr_strstatus(jrdev, err); - - ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL); - kfree(edesc); - - scatterwalk_map_and_copy(state->buf, req->src, - req->nbytes - state->next_buflen, - state->next_buflen, 0); - state->buflen = state->next_buflen; - - print_hex_dump_debug("buf@" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, state->buf, - state->buflen, 1); - - print_hex_dump_debug("ctx@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, - ctx->ctx_len, 1); - if (req->result) - print_hex_dump_debug("result@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->result, - digestsize, 1); - - req->base.complete(&req->base, ecode); + ahash_done_cpy(jrdev, desc, err, context, DMA_FROM_DEVICE); } static void ahash_done_ctx_src(struct device *jrdev, u32 *desc, u32 err, void *context) { - struct ahash_request *req = context; - struct ahash_edesc *edesc; - struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - int digestsize = crypto_ahash_digestsize(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - int ecode = 0; - - dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - - edesc = container_of(desc, struct ahash_edesc, hw_desc[0]); - if (err) - ecode = caam_jr_strstatus(jrdev, err); - - ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_BIDIRECTIONAL); - memcpy(req->result, state->caam_ctx, digestsize); - kfree(edesc); - - print_hex_dump_debug("ctx@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, - ctx->ctx_len, 1); - - req->base.complete(&req->base, ecode); + ahash_done_cpy(jrdev, desc, err, context, DMA_BIDIRECTIONAL); } -static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err, - void *context) +static inline void ahash_done_switch(struct device *jrdev, u32 *desc, u32 err, + void *context, enum dma_data_direction dir) { struct ahash_request *req = context; + struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev); struct ahash_edesc *edesc; struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); int digestsize = crypto_ahash_digestsize(ahash); int ecode = 0; + bool has_bklog; dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - edesc = container_of(desc, struct ahash_edesc, hw_desc[0]); + edesc = state->edesc; + has_bklog = edesc->bklog; if (err) ecode = caam_jr_strstatus(jrdev, err); - ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_FROM_DEVICE); + ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, dir); kfree(edesc); scatterwalk_map_and_copy(state->buf, req->src, @@ -698,18 +663,42 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err, DUMP_PREFIX_ADDRESS, 16, 4, req->result, digestsize, 1); - req->base.complete(&req->base, ecode); + /* + * If no backlog flag, the completion of the request is done + * by CAAM, not crypto engine. + */ + if (!has_bklog) + req->base.complete(&req->base, ecode); + else + crypto_finalize_hash_request(jrp->engine, req, ecode); + +} + +static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err, + void *context) +{ + ahash_done_switch(jrdev, desc, err, context, DMA_BIDIRECTIONAL); +} + +static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err, + void *context) +{ + ahash_done_switch(jrdev, desc, err, context, DMA_FROM_DEVICE); } /* * Allocate an enhanced descriptor, which contains the hardware descriptor * and space for hardware scatter table containing sg_num entries. */ -static struct ahash_edesc *ahash_edesc_alloc(struct caam_hash_ctx *ctx, +static struct ahash_edesc *ahash_edesc_alloc(struct ahash_request *req, int sg_num, u32 *sh_desc, - dma_addr_t sh_desc_dma, - gfp_t flags) + dma_addr_t sh_desc_dma) { + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_state *state = ahash_request_ctx(req); + gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; struct ahash_edesc *edesc; unsigned int sg_size = sg_num * sizeof(struct sec4_sg_entry); @@ -719,6 +708,8 @@ static struct ahash_edesc *ahash_edesc_alloc(struct caam_hash_ctx *ctx, return NULL; } + state->edesc = edesc; + init_job_desc_shared(edesc->hw_desc, sh_desc_dma, desc_len(sh_desc), HDR_SHARE_DEFER | HDR_REVERSE); @@ -761,6 +752,62 @@ static int ahash_edesc_add_src(struct caam_hash_ctx *ctx, return 0; } +static int ahash_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct ahash_request *req = ahash_request_cast(areq); + struct caam_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct caam_hash_state *state = ahash_request_ctx(req); + struct device *jrdev = ctx->jrdev; + u32 *desc = state->edesc->hw_desc; + int ret; + + state->edesc->bklog = true; + + ret = caam_jr_enqueue(jrdev, desc, state->ahash_op_done, req); + + if (ret != -EINPROGRESS) { + ahash_unmap(jrdev, state->edesc, req, 0); + kfree(state->edesc); + } else { + ret = 0; + } + + return ret; +} + +static int ahash_enqueue_req(struct device *jrdev, + void (*cbk)(struct device *jrdev, u32 *desc, + u32 err, void *context), + struct ahash_request *req, + int dst_len, enum dma_data_direction dir) +{ + struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); + struct caam_hash_state *state = ahash_request_ctx(req); + struct ahash_edesc *edesc = state->edesc; + u32 *desc = edesc->hw_desc; + int ret; + + state->ahash_op_done = cbk; + + /* + * Only the backlog request are sent to crypto-engine since the others + * can be handled by CAAM, if free, especially since JR has up to 1024 + * entries (more than the 10 entries from crypto-engine). + */ + if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG) + ret = crypto_transfer_hash_request_to_engine(jrpriv->engine, + req); + else + ret = caam_jr_enqueue(jrdev, desc, cbk, req); + + if ((ret != -EINPROGRESS) && (ret != -EBUSY)) { + ahash_unmap_ctx(jrdev, edesc, req, dst_len, dir); + kfree(edesc); + } + + return ret; +} + /* submit update job descriptor */ static int ahash_update_ctx(struct ahash_request *req) { @@ -768,8 +815,6 @@ static int ahash_update_ctx(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; u8 *buf = state->buf; int *buflen = &state->buflen; int *next_buflen = &state->next_buflen; @@ -823,8 +868,8 @@ static int ahash_update_ctx(struct ahash_request *req) * allocate space for base edesc and hw desc commands, * link tables */ - edesc = ahash_edesc_alloc(ctx, pad_nents, ctx->sh_desc_update, - ctx->sh_desc_update_dma, flags); + edesc = ahash_edesc_alloc(req, pad_nents, ctx->sh_desc_update, + ctx->sh_desc_update_dma); if (!edesc) { dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return -ENOMEM; @@ -870,11 +915,8 @@ static int ahash_update_ctx(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done_bi, req); - if (ret) - goto unmap_ctx; - - ret = -EINPROGRESS; + ret = ahash_enqueue_req(jrdev, ahash_done_bi, req, + ctx->ctx_len, DMA_BIDIRECTIONAL); } else if (*next_buflen) { scatterwalk_map_and_copy(buf + *buflen, req->src, 0, req->nbytes, 0); @@ -898,8 +940,6 @@ static int ahash_final_ctx(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; int buflen = state->buflen; u32 *desc; int sec4_sg_bytes; @@ -911,8 +951,8 @@ static int ahash_final_ctx(struct ahash_request *req) sizeof(struct sec4_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ - edesc = ahash_edesc_alloc(ctx, 4, ctx->sh_desc_fin, - ctx->sh_desc_fin_dma, flags); + edesc = ahash_edesc_alloc(req, 4, ctx->sh_desc_fin, + ctx->sh_desc_fin_dma); if (!edesc) return -ENOMEM; @@ -947,11 +987,8 @@ static int ahash_final_ctx(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req); - if (ret) - goto unmap_ctx; - - return -EINPROGRESS; + return ahash_enqueue_req(jrdev, ahash_done_ctx_src, req, + digestsize, DMA_BIDIRECTIONAL); unmap_ctx: ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_BIDIRECTIONAL); kfree(edesc); @@ -964,8 +1001,6 @@ static int ahash_finup_ctx(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; int buflen = state->buflen; u32 *desc; int sec4_sg_src_index; @@ -994,9 +1029,8 @@ static int ahash_finup_ctx(struct ahash_request *req) sec4_sg_src_index = 1 + (buflen ? 1 : 0); /* allocate space for base edesc and hw desc commands, link tables */ - edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents, - ctx->sh_desc_fin, ctx->sh_desc_fin_dma, - flags); + edesc = ahash_edesc_alloc(req, sec4_sg_src_index + mapped_nents, + ctx->sh_desc_fin, ctx->sh_desc_fin_dma); if (!edesc) { dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return -ENOMEM; @@ -1027,11 +1061,8 @@ static int ahash_finup_ctx(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req); - if (ret) - goto unmap_ctx; - - return -EINPROGRESS; + return ahash_enqueue_req(jrdev, ahash_done_ctx_src, req, + digestsize, DMA_BIDIRECTIONAL); unmap_ctx: ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_BIDIRECTIONAL); kfree(edesc); @@ -1044,8 +1075,6 @@ static int ahash_digest(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; u32 *desc; int digestsize = crypto_ahash_digestsize(ahash); int src_nents, mapped_nents; @@ -1072,9 +1101,8 @@ static int ahash_digest(struct ahash_request *req) } /* allocate space for base edesc and hw desc commands, link tables */ - edesc = ahash_edesc_alloc(ctx, mapped_nents > 1 ? mapped_nents : 0, - ctx->sh_desc_digest, ctx->sh_desc_digest_dma, - flags); + edesc = ahash_edesc_alloc(req, mapped_nents > 1 ? mapped_nents : 0, + ctx->sh_desc_digest, ctx->sh_desc_digest_dma); if (!edesc) { dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return -ENOMEM; @@ -1103,15 +1131,8 @@ static int ahash_digest(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { - ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE); - kfree(edesc); - } - - return ret; + return ahash_enqueue_req(jrdev, ahash_done, req, digestsize, + DMA_FROM_DEVICE); } /* submit ahash final if it the first job descriptor */ @@ -1121,8 +1142,6 @@ static int ahash_final_no_ctx(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; u8 *buf = state->buf; int buflen = state->buflen; u32 *desc; @@ -1131,8 +1150,8 @@ static int ahash_final_no_ctx(struct ahash_request *req) int ret; /* allocate space for base edesc and hw desc commands, link tables */ - edesc = ahash_edesc_alloc(ctx, 0, ctx->sh_desc_digest, - ctx->sh_desc_digest_dma, flags); + edesc = ahash_edesc_alloc(req, 0, ctx->sh_desc_digest, + ctx->sh_desc_digest_dma); if (!edesc) return -ENOMEM; @@ -1157,20 +1176,12 @@ static int ahash_final_no_ctx(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { - ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE); - kfree(edesc); - } - - return ret; + return ahash_enqueue_req(jrdev, ahash_done, req, + digestsize, DMA_FROM_DEVICE); unmap: ahash_unmap(jrdev, edesc, req, digestsize); kfree(edesc); return -ENOMEM; - } /* submit ahash update if it the first job descriptor after update */ @@ -1180,8 +1191,6 @@ static int ahash_update_no_ctx(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; u8 *buf = state->buf; int *buflen = &state->buflen; int *next_buflen = &state->next_buflen; @@ -1234,10 +1243,9 @@ static int ahash_update_no_ctx(struct ahash_request *req) * allocate space for base edesc and hw desc commands, * link tables */ - edesc = ahash_edesc_alloc(ctx, pad_nents, + edesc = ahash_edesc_alloc(req, pad_nents, ctx->sh_desc_update_first, - ctx->sh_desc_update_first_dma, - flags); + ctx->sh_desc_update_first_dma); if (!edesc) { dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return -ENOMEM; @@ -1273,11 +1281,10 @@ static int ahash_update_no_ctx(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req); - if (ret) - goto unmap_ctx; - - ret = -EINPROGRESS; + ret = ahash_enqueue_req(jrdev, ahash_done_ctx_dst, req, + ctx->ctx_len, DMA_TO_DEVICE); + if ((ret != -EINPROGRESS) && (ret != -EBUSY)) + return ret; state->update = ahash_update_ctx; state->finup = ahash_finup_ctx; state->final = ahash_final_ctx; @@ -1305,8 +1312,6 @@ static int ahash_finup_no_ctx(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; int buflen = state->buflen; u32 *desc; int sec4_sg_bytes, sec4_sg_src_index, src_nents, mapped_nents; @@ -1336,9 +1341,8 @@ static int ahash_finup_no_ctx(struct ahash_request *req) sizeof(struct sec4_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ - edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents, - ctx->sh_desc_digest, ctx->sh_desc_digest_dma, - flags); + edesc = ahash_edesc_alloc(req, sec4_sg_src_index + mapped_nents, + ctx->sh_desc_digest, ctx->sh_desc_digest_dma); if (!edesc) { dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return -ENOMEM; @@ -1368,15 +1372,8 @@ static int ahash_finup_no_ctx(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { - ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE); - kfree(edesc); - } - - return ret; + return ahash_enqueue_req(jrdev, ahash_done, req, + digestsize, DMA_FROM_DEVICE); unmap: ahash_unmap(jrdev, edesc, req, digestsize); kfree(edesc); @@ -1391,8 +1388,6 @@ static int ahash_update_first(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; u8 *buf = state->buf; int *buflen = &state->buflen; int *next_buflen = &state->next_buflen; @@ -1440,11 +1435,10 @@ static int ahash_update_first(struct ahash_request *req) * allocate space for base edesc and hw desc commands, * link tables */ - edesc = ahash_edesc_alloc(ctx, mapped_nents > 1 ? + edesc = ahash_edesc_alloc(req, mapped_nents > 1 ? mapped_nents : 0, ctx->sh_desc_update_first, - ctx->sh_desc_update_first_dma, - flags); + ctx->sh_desc_update_first_dma); if (!edesc) { dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return -ENOMEM; @@ -1467,11 +1461,10 @@ static int ahash_update_first(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req); - if (ret) - goto unmap_ctx; - - ret = -EINPROGRESS; + ret = ahash_enqueue_req(jrdev, ahash_done_ctx_dst, req, + ctx->ctx_len, DMA_TO_DEVICE); + if ((ret != -EINPROGRESS) && (ret != -EBUSY)) + return ret; state->update = ahash_update_ctx; state->finup = ahash_finup_ctx; state->final = ahash_final_ctx; @@ -1774,6 +1767,8 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) HASH_MSG_LEN + SHA256_DIGEST_SIZE, HASH_MSG_LEN + 64, HASH_MSG_LEN + SHA512_DIGEST_SIZE }; + const size_t sh_desc_update_offset = offsetof(struct caam_hash_ctx, + sh_desc_update); dma_addr_t dma_addr; struct caam_drv_private *priv; @@ -1826,7 +1821,8 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) } dma_addr = dma_map_single_attrs(ctx->jrdev, ctx->sh_desc_update, - offsetof(struct caam_hash_ctx, key), + offsetof(struct caam_hash_ctx, key) - + sh_desc_update_offset, ctx->dir, DMA_ATTR_SKIP_CPU_SYNC); if (dma_mapping_error(ctx->jrdev, dma_addr)) { dev_err(ctx->jrdev, "unable to map shared descriptors\n"); @@ -1844,11 +1840,16 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) ctx->sh_desc_update_dma = dma_addr; ctx->sh_desc_update_first_dma = dma_addr + offsetof(struct caam_hash_ctx, - sh_desc_update_first); + sh_desc_update_first) - + sh_desc_update_offset; ctx->sh_desc_fin_dma = dma_addr + offsetof(struct caam_hash_ctx, - sh_desc_fin); + sh_desc_fin) - + sh_desc_update_offset; ctx->sh_desc_digest_dma = dma_addr + offsetof(struct caam_hash_ctx, - sh_desc_digest); + sh_desc_digest) - + sh_desc_update_offset; + + ctx->enginectx.op.do_one_request = ahash_do_one_req; crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct caam_hash_state)); @@ -1865,7 +1866,8 @@ static void caam_hash_cra_exit(struct crypto_tfm *tfm) struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm); dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_update_dma, - offsetof(struct caam_hash_ctx, key), + offsetof(struct caam_hash_ctx, key) - + offsetof(struct caam_hash_ctx, sh_desc_update), ctx->dir, DMA_ATTR_SKIP_CPU_SYNC); if (ctx->key_dir != DMA_NONE) dma_unmap_single_attrs(ctx->jrdev, ctx->adata.key_dma, diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 6619c512ef1a..2e44d685618f 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -117,76 +117,73 @@ static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc, static void rsa_pub_done(struct device *dev, u32 *desc, u32 err, void *context) { struct akcipher_request *req = context; + struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); + struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); struct rsa_edesc *edesc; int ecode = 0; + bool has_bklog; if (err) ecode = caam_jr_strstatus(dev, err); - edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); + edesc = req_ctx->edesc; + has_bklog = edesc->bklog; rsa_pub_unmap(dev, edesc, req); rsa_io_unmap(dev, edesc, req); kfree(edesc); - akcipher_request_complete(req, ecode); -} - -static void rsa_priv_f1_done(struct device *dev, u32 *desc, u32 err, - void *context) -{ - struct akcipher_request *req = context; - struct rsa_edesc *edesc; - int ecode = 0; - - if (err) - ecode = caam_jr_strstatus(dev, err); - - edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); - - rsa_priv_f1_unmap(dev, edesc, req); - rsa_io_unmap(dev, edesc, req); - kfree(edesc); - - akcipher_request_complete(req, ecode); -} - -static void rsa_priv_f2_done(struct device *dev, u32 *desc, u32 err, - void *context) -{ - struct akcipher_request *req = context; - struct rsa_edesc *edesc; - int ecode = 0; - - if (err) - ecode = caam_jr_strstatus(dev, err); - - edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); - - rsa_priv_f2_unmap(dev, edesc, req); - rsa_io_unmap(dev, edesc, req); - kfree(edesc); - - akcipher_request_complete(req, ecode); + /* + * If no backlog flag, the completion of the request is done + * by CAAM, not crypto engine. + */ + if (!has_bklog) + akcipher_request_complete(req, ecode); + else + crypto_finalize_akcipher_request(jrp->engine, req, ecode); } -static void rsa_priv_f3_done(struct device *dev, u32 *desc, u32 err, - void *context) +static void rsa_priv_f_done(struct device *dev, u32 *desc, u32 err, + void *context) { struct akcipher_request *req = context; + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); struct rsa_edesc *edesc; int ecode = 0; + bool has_bklog; if (err) ecode = caam_jr_strstatus(dev, err); - edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); + edesc = req_ctx->edesc; + has_bklog = edesc->bklog; + + switch (key->priv_form) { + case FORM1: + rsa_priv_f1_unmap(dev, edesc, req); + break; + case FORM2: + rsa_priv_f2_unmap(dev, edesc, req); + break; + case FORM3: + rsa_priv_f3_unmap(dev, edesc, req); + } - rsa_priv_f3_unmap(dev, edesc, req); rsa_io_unmap(dev, edesc, req); kfree(edesc); - akcipher_request_complete(req, ecode); + /* + * If no backlog flag, the completion of the request is done + * by CAAM, not crypto engine. + */ + if (!has_bklog) + akcipher_request_complete(req, ecode); + else + crypto_finalize_akcipher_request(jrp->engine, req, ecode); } /** @@ -334,6 +331,8 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req, edesc->src_nents = src_nents; edesc->dst_nents = dst_nents; + req_ctx->edesc = edesc; + if (!sec4_sg_bytes) return edesc; @@ -364,6 +363,33 @@ src_fail: return ERR_PTR(-ENOMEM); } +static int akcipher_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct akcipher_request *req = container_of(areq, + struct akcipher_request, + base); + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct device *jrdev = ctx->dev; + u32 *desc = req_ctx->edesc->hw_desc; + int ret; + + req_ctx->edesc->bklog = true; + + ret = caam_jr_enqueue(jrdev, desc, req_ctx->akcipher_op_done, req); + + if (ret != -EINPROGRESS) { + rsa_pub_unmap(jrdev, req_ctx->edesc, req); + rsa_io_unmap(jrdev, req_ctx->edesc, req); + kfree(req_ctx->edesc); + } else { + ret = 0; + } + + return ret; +} + static int set_rsa_pub_pdb(struct akcipher_request *req, struct rsa_edesc *edesc) { @@ -627,6 +653,53 @@ unmap_p: return -ENOMEM; } +static int akcipher_enqueue_req(struct device *jrdev, + void (*cbk)(struct device *jrdev, u32 *desc, + u32 err, void *context), + struct akcipher_request *req) +{ + struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); + struct rsa_edesc *edesc = req_ctx->edesc; + u32 *desc = edesc->hw_desc; + int ret; + + req_ctx->akcipher_op_done = cbk; + /* + * Only the backlog request are sent to crypto-engine since the others + * can be handled by CAAM, if free, especially since JR has up to 1024 + * entries (more than the 10 entries from crypto-engine). + */ + if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG) + ret = crypto_transfer_akcipher_request_to_engine(jrpriv->engine, + req); + else + ret = caam_jr_enqueue(jrdev, desc, cbk, req); + + if ((ret != -EINPROGRESS) && (ret != -EBUSY)) { + switch (key->priv_form) { + case FORM1: + rsa_priv_f1_unmap(jrdev, edesc, req); + break; + case FORM2: + rsa_priv_f2_unmap(jrdev, edesc, req); + break; + case FORM3: + rsa_priv_f3_unmap(jrdev, edesc, req); + break; + default: + rsa_pub_unmap(jrdev, edesc, req); + } + rsa_io_unmap(jrdev, edesc, req); + kfree(edesc); + } + + return ret; +} + static int caam_rsa_enc(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); @@ -658,11 +731,7 @@ static int caam_rsa_enc(struct akcipher_request *req) /* Initialize Job Descriptor */ init_rsa_pub_desc(edesc->hw_desc, &edesc->pdb.pub); - ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_pub_done, req); - if (!ret) - return -EINPROGRESS; - - rsa_pub_unmap(jrdev, edesc, req); + return akcipher_enqueue_req(jrdev, rsa_pub_done, req); init_fail: rsa_io_unmap(jrdev, edesc, req); @@ -691,11 +760,7 @@ static int caam_rsa_dec_priv_f1(struct akcipher_request *req) /* Initialize Job Descriptor */ init_rsa_priv_f1_desc(edesc->hw_desc, &edesc->pdb.priv_f1); - ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f1_done, req); - if (!ret) - return -EINPROGRESS; - - rsa_priv_f1_unmap(jrdev, edesc, req); + return akcipher_enqueue_req(jrdev, rsa_priv_f_done, req); init_fail: rsa_io_unmap(jrdev, edesc, req); @@ -724,11 +789,7 @@ static int caam_rsa_dec_priv_f2(struct akcipher_request *req) /* Initialize Job Descriptor */ init_rsa_priv_f2_desc(edesc->hw_desc, &edesc->pdb.priv_f2); - ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f2_done, req); - if (!ret) - return -EINPROGRESS; - - rsa_priv_f2_unmap(jrdev, edesc, req); + return akcipher_enqueue_req(jrdev, rsa_priv_f_done, req); init_fail: rsa_io_unmap(jrdev, edesc, req); @@ -757,11 +818,7 @@ static int caam_rsa_dec_priv_f3(struct akcipher_request *req) /* Initialize Job Descriptor */ init_rsa_priv_f3_desc(edesc->hw_desc, &edesc->pdb.priv_f3); - ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f3_done, req); - if (!ret) - return -EINPROGRESS; - - rsa_priv_f3_unmap(jrdev, edesc, req); + return akcipher_enqueue_req(jrdev, rsa_priv_f_done, req); init_fail: rsa_io_unmap(jrdev, edesc, req); @@ -1054,6 +1111,8 @@ static int caam_rsa_init_tfm(struct crypto_akcipher *tfm) return -ENOMEM; } + ctx->enginectx.op.do_one_request = akcipher_do_one_req; + return 0; } diff --git a/drivers/crypto/caam/caampkc.h b/drivers/crypto/caam/caampkc.h index c68fb4c03ee6..cc889a525e2f 100644 --- a/drivers/crypto/caam/caampkc.h +++ b/drivers/crypto/caam/caampkc.h @@ -12,6 +12,7 @@ #define _PKC_DESC_H_ #include "compat.h" #include "pdb.h" +#include <crypto/engine.h> /** * caam_priv_key_form - CAAM RSA private key representation @@ -87,11 +88,13 @@ struct caam_rsa_key { /** * caam_rsa_ctx - per session context. + * @enginectx : crypto engine context * @key : RSA key in DMA zone * @dev : device structure * @padding_dma : dma address of padding, for adding it to the input */ struct caam_rsa_ctx { + struct crypto_engine_ctx enginectx; struct caam_rsa_key key; struct device *dev; dma_addr_t padding_dma; @@ -103,11 +106,16 @@ struct caam_rsa_ctx { * @src : input scatterlist (stripped of leading zeros) * @fixup_src : input scatterlist (that might be stripped of leading zeros) * @fixup_src_len : length of the fixup_src input scatterlist + * @edesc : s/w-extended rsa descriptor + * @akcipher_op_done : callback used when operation is done */ struct caam_rsa_req_ctx { struct scatterlist src[2]; struct scatterlist *fixup_src; unsigned int fixup_src_len; + struct rsa_edesc *edesc; + void (*akcipher_op_done)(struct device *jrdev, u32 *desc, u32 err, + void *context); }; /** @@ -117,6 +125,7 @@ struct caam_rsa_req_ctx { * @mapped_src_nents: number of segments in input h/w link table * @mapped_dst_nents: number of segments in output h/w link table * @sec4_sg_bytes : length of h/w link table + * @bklog : stored to determine if the request needs backlog * @sec4_sg_dma : dma address of h/w link table * @sec4_sg : pointer to h/w link table * @pdb : specific RSA Protocol Data Block (PDB) @@ -128,6 +137,7 @@ struct rsa_edesc { int mapped_src_nents; int mapped_dst_nents; int sec4_sg_bytes; + bool bklog; dma_addr_t sec4_sg_dma; struct sec4_sg_entry *sec4_sg; union { diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index e8baacaabe07..77d048dfe5d0 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -7,35 +7,12 @@ * * Based on caamalg.c crypto API driver. * - * relationship between job descriptors to shared descriptors: - * - * --------------- -------------- - * | JobDesc #0 |-------------------->| ShareDesc | - * | *(buffer 0) | |------------->| (generate) | - * --------------- | | (move) | - * | | (store) | - * --------------- | -------------- - * | JobDesc #1 |------| - * | *(buffer 1) | - * --------------- - * - * A job desc looks like this: - * - * --------------------- - * | Header | - * | ShareDesc Pointer | - * | SEQ_OUT_PTR | - * | (output buffer) | - * --------------------- - * - * The SharedDesc never changes, and each job descriptor points to one of two - * buffers for each device, from which the data will be copied into the - * requested destination */ #include <linux/hw_random.h> #include <linux/completion.h> #include <linux/atomic.h> +#include <linux/kfifo.h> #include "compat.h" @@ -45,278 +22,205 @@ #include "jr.h" #include "error.h" +#define CAAM_RNG_MAX_FIFO_STORE_SIZE 16 + /* - * Maximum buffer size: maximum number of random, cache-aligned bytes that - * will be generated and moved to seq out ptr (extlen not allowed) + * Length of used descriptors, see caam_init_desc() */ -#define RN_BUF_SIZE (0xffff / L1_CACHE_BYTES * \ - L1_CACHE_BYTES) - -/* length of descriptors */ -#define DESC_JOB_O_LEN (CAAM_CMD_SZ * 2 + CAAM_PTR_SZ_MAX * 2) -#define DESC_RNG_LEN (3 * CAAM_CMD_SZ) - -/* Buffer, its dma address and lock */ -struct buf_data { - u8 buf[RN_BUF_SIZE] ____cacheline_aligned; - dma_addr_t addr; - struct completion filled; - u32 hw_desc[DESC_JOB_O_LEN]; -#define BUF_NOT_EMPTY 0 -#define BUF_EMPTY 1 -#define BUF_PENDING 2 /* Empty, but with job pending --don't submit another */ - atomic_t empty; -}; +#define CAAM_RNG_DESC_LEN (CAAM_CMD_SZ + \ + CAAM_CMD_SZ + \ + CAAM_CMD_SZ + CAAM_PTR_SZ_MAX) /* rng per-device context */ struct caam_rng_ctx { + struct hwrng rng; struct device *jrdev; - dma_addr_t sh_desc_dma; - u32 sh_desc[DESC_RNG_LEN]; - unsigned int cur_buf_idx; - int current_buf; - struct buf_data bufs[2]; + struct device *ctrldev; + void *desc_async; + void *desc_sync; + struct work_struct worker; + struct kfifo fifo; }; -static struct caam_rng_ctx *rng_ctx; - -/* - * Variable used to avoid double free of resources in case - * algorithm registration was unsuccessful - */ -static bool init_done; - -static inline void rng_unmap_buf(struct device *jrdev, struct buf_data *bd) -{ - if (bd->addr) - dma_unmap_single(jrdev, bd->addr, RN_BUF_SIZE, - DMA_FROM_DEVICE); -} +struct caam_rng_job_ctx { + struct completion *done; + int *err; +}; -static inline void rng_unmap_ctx(struct caam_rng_ctx *ctx) +static struct caam_rng_ctx *to_caam_rng_ctx(struct hwrng *r) { - struct device *jrdev = ctx->jrdev; - - if (ctx->sh_desc_dma) - dma_unmap_single(jrdev, ctx->sh_desc_dma, - desc_bytes(ctx->sh_desc), DMA_TO_DEVICE); - rng_unmap_buf(jrdev, &ctx->bufs[0]); - rng_unmap_buf(jrdev, &ctx->bufs[1]); + return (struct caam_rng_ctx *)r->priv; } -static void rng_done(struct device *jrdev, u32 *desc, u32 err, void *context) +static void caam_rng_done(struct device *jrdev, u32 *desc, u32 err, + void *context) { - struct buf_data *bd; - - bd = container_of(desc, struct buf_data, hw_desc[0]); + struct caam_rng_job_ctx *jctx = context; if (err) - caam_jr_strstatus(jrdev, err); + *jctx->err = caam_jr_strstatus(jrdev, err); - atomic_set(&bd->empty, BUF_NOT_EMPTY); - complete(&bd->filled); - - /* Buffer refilled, invalidate cache */ - dma_sync_single_for_cpu(jrdev, bd->addr, RN_BUF_SIZE, DMA_FROM_DEVICE); - - print_hex_dump_debug("rng refreshed buf@: ", DUMP_PREFIX_ADDRESS, 16, 4, - bd->buf, RN_BUF_SIZE, 1); + complete(jctx->done); } -static inline int submit_job(struct caam_rng_ctx *ctx, int to_current) +static u32 *caam_init_desc(u32 *desc, dma_addr_t dst_dma) { - struct buf_data *bd = &ctx->bufs[!(to_current ^ ctx->current_buf)]; - struct device *jrdev = ctx->jrdev; - u32 *desc = bd->hw_desc; - int err; - - dev_dbg(jrdev, "submitting job %d\n", !(to_current ^ ctx->current_buf)); - init_completion(&bd->filled); - err = caam_jr_enqueue(jrdev, desc, rng_done, ctx); - if (err) - complete(&bd->filled); /* don't wait on failed job*/ - else - atomic_inc(&bd->empty); /* note if pending */ - - return err; + init_job_desc(desc, 0); /* + 1 cmd_sz */ + /* Generate random bytes: + 1 cmd_sz */ + append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG | + OP_ALG_PR_ON); + /* Store bytes: + 1 cmd_sz + caam_ptr_sz */ + append_fifo_store(desc, dst_dma, + CAAM_RNG_MAX_FIFO_STORE_SIZE, FIFOST_TYPE_RNGSTORE); + + print_hex_dump_debug("rng job desc@: ", DUMP_PREFIX_ADDRESS, + 16, 4, desc, desc_bytes(desc), 1); + + return desc; } -static int caam_read(struct hwrng *rng, void *data, size_t max, bool wait) +static int caam_rng_read_one(struct device *jrdev, + void *dst, int len, + void *desc, + struct completion *done) { - struct caam_rng_ctx *ctx = rng_ctx; - struct buf_data *bd = &ctx->bufs[ctx->current_buf]; - int next_buf_idx, copied_idx; - int err; - - if (atomic_read(&bd->empty)) { - /* try to submit job if there wasn't one */ - if (atomic_read(&bd->empty) == BUF_EMPTY) { - err = submit_job(ctx, 1); - /* if can't submit job, can't even wait */ - if (err) - return 0; - } - /* no immediate data, so exit if not waiting */ - if (!wait) - return 0; - - /* waiting for pending job */ - if (atomic_read(&bd->empty)) - wait_for_completion(&bd->filled); + dma_addr_t dst_dma; + int err, ret = 0; + struct caam_rng_job_ctx jctx = { + .done = done, + .err = &ret, + }; + + len = CAAM_RNG_MAX_FIFO_STORE_SIZE; + + dst_dma = dma_map_single(jrdev, dst, len, DMA_FROM_DEVICE); + if (dma_mapping_error(jrdev, dst_dma)) { + dev_err(jrdev, "unable to map destination memory\n"); + return -ENOMEM; } - next_buf_idx = ctx->cur_buf_idx + max; - dev_dbg(ctx->jrdev, "%s: start reading at buffer %d, idx %d\n", - __func__, ctx->current_buf, ctx->cur_buf_idx); - - /* if enough data in current buffer */ - if (next_buf_idx < RN_BUF_SIZE) { - memcpy(data, bd->buf + ctx->cur_buf_idx, max); - ctx->cur_buf_idx = next_buf_idx; - return max; + init_completion(done); + err = caam_jr_enqueue(jrdev, + caam_init_desc(desc, dst_dma), + caam_rng_done, &jctx); + if (err == -EINPROGRESS) { + wait_for_completion(done); + err = 0; } - /* else, copy what's left... */ - copied_idx = RN_BUF_SIZE - ctx->cur_buf_idx; - memcpy(data, bd->buf + ctx->cur_buf_idx, copied_idx); - ctx->cur_buf_idx = 0; - atomic_set(&bd->empty, BUF_EMPTY); - - /* ...refill... */ - submit_job(ctx, 1); + dma_unmap_single(jrdev, dst_dma, len, DMA_FROM_DEVICE); - /* and use next buffer */ - ctx->current_buf = !ctx->current_buf; - dev_dbg(ctx->jrdev, "switched to buffer %d\n", ctx->current_buf); - - /* since there already is some data read, don't wait */ - return copied_idx + caam_read(rng, data + copied_idx, - max - copied_idx, false); + return err ?: (ret ?: len); } -static inline int rng_create_sh_desc(struct caam_rng_ctx *ctx) +static void caam_rng_fill_async(struct caam_rng_ctx *ctx) { - struct device *jrdev = ctx->jrdev; - u32 *desc = ctx->sh_desc; - - init_sh_desc(desc, HDR_SHARE_SERIAL); - - /* Generate random bytes */ - append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG); - - /* Store bytes */ - append_seq_fifo_store(desc, RN_BUF_SIZE, FIFOST_TYPE_RNGSTORE); + struct scatterlist sg[1]; + struct completion done; + int len, nents; + + sg_init_table(sg, ARRAY_SIZE(sg)); + nents = kfifo_dma_in_prepare(&ctx->fifo, sg, ARRAY_SIZE(sg), + CAAM_RNG_MAX_FIFO_STORE_SIZE); + if (!nents) + return; - ctx->sh_desc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + len = caam_rng_read_one(ctx->jrdev, sg_virt(&sg[0]), + sg[0].length, + ctx->desc_async, + &done); + if (len < 0) + return; - print_hex_dump_debug("rng shdesc@: ", DUMP_PREFIX_ADDRESS, 16, 4, - desc, desc_bytes(desc), 1); + kfifo_dma_in_finish(&ctx->fifo, len); +} - return 0; +static void caam_rng_worker(struct work_struct *work) +{ + struct caam_rng_ctx *ctx = container_of(work, struct caam_rng_ctx, + worker); + caam_rng_fill_async(ctx); } -static inline int rng_create_job_desc(struct caam_rng_ctx *ctx, int buf_id) +static int caam_read(struct hwrng *rng, void *dst, size_t max, bool wait) { - struct device *jrdev = ctx->jrdev; - struct buf_data *bd = &ctx->bufs[buf_id]; - u32 *desc = bd->hw_desc; - int sh_len = desc_len(ctx->sh_desc); + struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng); + int out; - init_job_desc_shared(desc, ctx->sh_desc_dma, sh_len, HDR_SHARE_DEFER | - HDR_REVERSE); + if (wait) { + struct completion done; - bd->addr = dma_map_single(jrdev, bd->buf, RN_BUF_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(jrdev, bd->addr)) { - dev_err(jrdev, "unable to map dst\n"); - return -ENOMEM; + return caam_rng_read_one(ctx->jrdev, dst, max, + ctx->desc_sync, &done); } - append_seq_out_ptr_intlen(desc, bd->addr, RN_BUF_SIZE, 0); - - print_hex_dump_debug("rng job desc@: ", DUMP_PREFIX_ADDRESS, 16, 4, - desc, desc_bytes(desc), 1); + out = kfifo_out(&ctx->fifo, dst, max); + if (kfifo_is_empty(&ctx->fifo)) + schedule_work(&ctx->worker); - return 0; + return out; } static void caam_cleanup(struct hwrng *rng) { - int i; - struct buf_data *bd; - - for (i = 0; i < 2; i++) { - bd = &rng_ctx->bufs[i]; - if (atomic_read(&bd->empty) == BUF_PENDING) - wait_for_completion(&bd->filled); - } + struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng); - rng_unmap_ctx(rng_ctx); + flush_work(&ctx->worker); + caam_jr_free(ctx->jrdev); + kfifo_free(&ctx->fifo); } -static int caam_init_buf(struct caam_rng_ctx *ctx, int buf_id) +static int caam_init(struct hwrng *rng) { - struct buf_data *bd = &ctx->bufs[buf_id]; + struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng); int err; - err = rng_create_job_desc(ctx, buf_id); - if (err) - return err; - - atomic_set(&bd->empty, BUF_EMPTY); - submit_job(ctx, buf_id == ctx->current_buf); - wait_for_completion(&bd->filled); + ctx->desc_sync = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN, + GFP_DMA | GFP_KERNEL); + if (!ctx->desc_sync) + return -ENOMEM; - return 0; -} + ctx->desc_async = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN, + GFP_DMA | GFP_KERNEL); + if (!ctx->desc_async) + return -ENOMEM; -static int caam_init_rng(struct caam_rng_ctx *ctx, struct device *jrdev) -{ - int err; + if (kfifo_alloc(&ctx->fifo, CAAM_RNG_MAX_FIFO_STORE_SIZE, + GFP_DMA | GFP_KERNEL)) + return -ENOMEM; - ctx->jrdev = jrdev; + INIT_WORK(&ctx->worker, caam_rng_worker); - err = rng_create_sh_desc(ctx); - if (err) + ctx->jrdev = caam_jr_alloc(); + err = PTR_ERR_OR_ZERO(ctx->jrdev); + if (err) { + kfifo_free(&ctx->fifo); + pr_err("Job Ring Device allocation for transform failed\n"); return err; + } - ctx->current_buf = 0; - ctx->cur_buf_idx = 0; + /* + * Fill async buffer to have early randomness data for + * hw_random + */ + caam_rng_fill_async(ctx); - err = caam_init_buf(ctx, 0); - if (err) - return err; - - return caam_init_buf(ctx, 1); + return 0; } -static struct hwrng caam_rng = { - .name = "rng-caam", - .cleanup = caam_cleanup, - .read = caam_read, -}; +int caam_rng_init(struct device *ctrldev); -void caam_rng_exit(void) +void caam_rng_exit(struct device *ctrldev) { - if (!init_done) - return; - - caam_jr_free(rng_ctx->jrdev); - hwrng_unregister(&caam_rng); - kfree(rng_ctx); + devres_release_group(ctrldev, caam_rng_init); } int caam_rng_init(struct device *ctrldev) { - struct device *dev; + struct caam_rng_ctx *ctx; u32 rng_inst; struct caam_drv_private *priv = dev_get_drvdata(ctrldev); - int err; - init_done = false; + int ret; /* Check for an instantiated RNG before registration */ if (priv->era < 10) @@ -328,31 +232,30 @@ int caam_rng_init(struct device *ctrldev) if (!rng_inst) return 0; - dev = caam_jr_alloc(); - if (IS_ERR(dev)) { - pr_err("Job Ring Device allocation for transform failed\n"); - return PTR_ERR(dev); - } - rng_ctx = kmalloc(sizeof(*rng_ctx), GFP_DMA | GFP_KERNEL); - if (!rng_ctx) { - err = -ENOMEM; - goto free_caam_alloc; - } - err = caam_init_rng(rng_ctx, dev); - if (err) - goto free_rng_ctx; + if (!devres_open_group(ctrldev, caam_rng_init, GFP_KERNEL)) + return -ENOMEM; - dev_info(dev, "registering rng-caam\n"); + ctx = devm_kzalloc(ctrldev, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; - err = hwrng_register(&caam_rng); - if (!err) { - init_done = true; - return err; + ctx->ctrldev = ctrldev; + + ctx->rng.name = "rng-caam"; + ctx->rng.init = caam_init; + ctx->rng.cleanup = caam_cleanup; + ctx->rng.read = caam_read; + ctx->rng.priv = (unsigned long)ctx; + ctx->rng.quality = 1024; + + dev_info(ctrldev, "registering rng-caam\n"); + + ret = devm_hwrng_register(ctrldev, &ctx->rng); + if (ret) { + caam_rng_exit(ctrldev); + return ret; } -free_rng_ctx: - kfree(rng_ctx); -free_caam_alloc: - caam_jr_free(dev); - return err; + devres_close_group(ctrldev, caam_rng_init); + return 0; } diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 7139366da016..4fcdd262e581 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -10,6 +10,7 @@ #include <linux/of_address.h> #include <linux/of_irq.h> #include <linux/sys_soc.h> +#include <linux/fsl/mc.h> #include "compat.h" #include "regs.h" @@ -36,7 +37,8 @@ static void build_instantiation_desc(u32 *desc, int handle, int do_sk) init_job_desc(desc, 0); op_flags = OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG | - (handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INIT; + (handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INIT | + OP_ALG_PR_ON; /* INIT RNG in non-test mode */ append_operation(desc, op_flags); @@ -196,7 +198,7 @@ static int deinstantiate_rng(struct device *ctrldev, int state_handle_mask) u32 *desc, status; int sh_idx, ret = 0; - desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL); + desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL | GFP_DMA); if (!desc) return -ENOMEM; @@ -273,17 +275,30 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask, int ret = 0, sh_idx; ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl; - desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL); + desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL | GFP_DMA); if (!desc) return -ENOMEM; for (sh_idx = 0; sh_idx < RNG4_MAX_HANDLES; sh_idx++) { + const u32 rdsta_if = RDSTA_IF0 << sh_idx; + const u32 rdsta_pr = RDSTA_PR0 << sh_idx; + const u32 rdsta_mask = rdsta_if | rdsta_pr; /* * If the corresponding bit is set, this state handle * was initialized by somebody else, so it's left alone. */ - if ((1 << sh_idx) & state_handle_mask) - continue; + if (rdsta_if & state_handle_mask) { + if (rdsta_pr & state_handle_mask) + continue; + + dev_info(ctrldev, + "RNG4 SH%d was previously instantiated without prediction resistance. Tearing it down\n", + sh_idx); + + ret = deinstantiate_rng(ctrldev, rdsta_if); + if (ret) + break; + } /* Create the descriptor for instantiating RNG State Handle */ build_instantiation_desc(desc, sh_idx, gen_sk); @@ -303,9 +318,9 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask, if (ret) break; - rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK; + rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_MASK; if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) || - !(rdsta_val & (1 << sh_idx))) { + (rdsta_val & rdsta_mask) != rdsta_mask) { ret = -EAGAIN; break; } @@ -341,8 +356,12 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl; r4tst = &ctrl->r4tst[0]; - /* put RNG4 into program mode */ - clrsetbits_32(&r4tst->rtmctl, 0, RTMCTL_PRGM); + /* + * Setting both RTMCTL:PRGM and RTMCTL:TRNG_ACC causes TRNG to + * properly invalidate the entropy in the entropy register and + * force re-generation. + */ + clrsetbits_32(&r4tst->rtmctl, 0, RTMCTL_PRGM | RTMCTL_ACC); /* * Performance-wise, it does not make sense to @@ -372,7 +391,8 @@ start_rng: * select raw sampling in both entropy shifter * and statistical checker; ; put RNG4 into run mode */ - clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, RTMCTL_SAMP_MODE_RAW_ES_SC); + clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM | RTMCTL_ACC, + RTMCTL_SAMP_MODE_RAW_ES_SC); } static int caam_get_era_from_hw(struct caam_ctrl __iomem *ctrl) @@ -559,6 +579,26 @@ static void caam_remove_debugfs(void *root) } #endif +#ifdef CONFIG_FSL_MC_BUS +static bool check_version(struct fsl_mc_version *mc_version, u32 major, + u32 minor, u32 revision) +{ + if (mc_version->major > major) + return true; + + if (mc_version->major == major) { + if (mc_version->minor > minor) + return true; + + if (mc_version->minor == minor && + mc_version->revision > revision) + return true; + } + + return false; +} +#endif + /* Probe routine for CAAM top (controller) level */ static int caam_probe(struct platform_device *pdev) { @@ -577,6 +617,7 @@ static int caam_probe(struct platform_device *pdev) u8 rng_vid; int pg_size; int BLOCK_OFFSET = 0; + bool pr_support = false; ctrlpriv = devm_kzalloc(&pdev->dev, sizeof(*ctrlpriv), GFP_KERNEL); if (!ctrlpriv) @@ -662,6 +703,21 @@ static int caam_probe(struct platform_device *pdev) /* Get the IRQ of the controller (for security violations only) */ ctrlpriv->secvio_irq = irq_of_parse_and_map(nprop, 0); + np = of_find_compatible_node(NULL, NULL, "fsl,qoriq-mc"); + ctrlpriv->mc_en = !!np; + of_node_put(np); + +#ifdef CONFIG_FSL_MC_BUS + if (ctrlpriv->mc_en) { + struct fsl_mc_version *mc_version; + + mc_version = fsl_mc_get_version(); + if (mc_version) + pr_support = check_version(mc_version, 10, 20, 0); + else + return -EPROBE_DEFER; + } +#endif /* * Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel, @@ -669,10 +725,6 @@ static int caam_probe(struct platform_device *pdev) * In case of SoCs with Management Complex, MC f/w performs * the configuration. */ - np = of_find_compatible_node(NULL, NULL, "fsl,qoriq-mc"); - ctrlpriv->mc_en = !!np; - of_node_put(np); - if (!ctrlpriv->mc_en) clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK, MCFGR_AWCACHE_CACH | MCFGR_AWCACHE_BUFF | @@ -779,7 +831,7 @@ static int caam_probe(struct platform_device *pdev) * already instantiated, do RNG instantiation * In case of SoCs with Management Complex, RNG is managed by MC f/w. */ - if (!ctrlpriv->mc_en && rng_vid >= 4) { + if (!(ctrlpriv->mc_en && pr_support) && rng_vid >= 4) { ctrlpriv->rng4_sh_init = rd_reg32(&ctrl->r4tst[0].rdsta); /* @@ -789,11 +841,11 @@ static int caam_probe(struct platform_device *pdev) * to regenerate these keys before the next POR. */ gen_sk = ctrlpriv->rng4_sh_init & RDSTA_SKVN ? 0 : 1; - ctrlpriv->rng4_sh_init &= RDSTA_IFMASK; + ctrlpriv->rng4_sh_init &= RDSTA_MASK; do { int inst_handles = rd_reg32(&ctrl->r4tst[0].rdsta) & - RDSTA_IFMASK; + RDSTA_MASK; /* * If either SH were instantiated by somebody else * (e.g. u-boot) then it is assumed that the entropy @@ -833,7 +885,7 @@ static int caam_probe(struct platform_device *pdev) * Set handles init'ed by this module as the complement of the * already initialized ones */ - ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_IFMASK; + ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_MASK; /* Enable RDB bit so that RNG works faster */ clrsetbits_32(&ctrl->scfgr, 0, SCFGR_RDBENABLE); diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index 4b6854bf896a..e796d3cb9be8 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -1254,6 +1254,8 @@ #define OP_ALG_ICV_OFF (0 << OP_ALG_ICV_SHIFT) #define OP_ALG_ICV_ON (1 << OP_ALG_ICV_SHIFT) +#define OP_ALG_PR_ON BIT(1) + #define OP_ALG_DIR_SHIFT 0 #define OP_ALG_DIR_MASK 1 #define OP_ALG_DECRYPT 0 diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h index c7c10c90464b..402d6a362e8c 100644 --- a/drivers/crypto/caam/intern.h +++ b/drivers/crypto/caam/intern.h @@ -11,6 +11,7 @@ #define INTERN_H #include "ctrl.h" +#include <crypto/engine.h> /* Currently comes from Kconfig param as a ^2 (driver-required) */ #define JOBR_DEPTH (1 << CONFIG_CRYPTO_DEV_FSL_CAAM_RINGSIZE) @@ -46,6 +47,7 @@ struct caam_drv_private_jr { struct caam_job_ring __iomem *rregs; /* JobR's register space */ struct tasklet_struct irqtask; int irq; /* One per queue */ + bool hwrng; /* Number of scatterlist crypt transforms active on the JobR */ atomic_t tfm_count ____cacheline_aligned; @@ -60,6 +62,7 @@ struct caam_drv_private_jr { int out_ring_read_index; /* Output index "tail" */ int tail; /* entinfo (s/w ring) tail index */ void *outring; /* Base of output ring, DMA-safe */ + struct crypto_engine *engine; }; /* @@ -161,7 +164,7 @@ static inline void caam_pkc_exit(void) #ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API int caam_rng_init(struct device *dev); -void caam_rng_exit(void); +void caam_rng_exit(struct device *dev); #else @@ -170,9 +173,7 @@ static inline int caam_rng_init(struct device *dev) return 0; } -static inline void caam_rng_exit(void) -{ -} +static inline void caam_rng_exit(struct device *dev) {} #endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API */ diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index fc97cde27059..4af22e7ceb4f 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -27,7 +27,8 @@ static struct jr_driver_data driver_data; static DEFINE_MUTEX(algs_lock); static unsigned int active_devs; -static void register_algs(struct device *dev) +static void register_algs(struct caam_drv_private_jr *jrpriv, + struct device *dev) { mutex_lock(&algs_lock); @@ -37,7 +38,7 @@ static void register_algs(struct device *dev) caam_algapi_init(dev); caam_algapi_hash_init(dev); caam_pkc_init(dev); - caam_rng_init(dev); + jrpriv->hwrng = !caam_rng_init(dev); caam_qi_algapi_init(dev); algs_unlock: @@ -53,7 +54,6 @@ static void unregister_algs(void) caam_qi_algapi_exit(); - caam_rng_exit(); caam_pkc_exit(); caam_algapi_hash_exit(); caam_algapi_exit(); @@ -62,6 +62,15 @@ algs_unlock: mutex_unlock(&algs_lock); } +static void caam_jr_crypto_engine_exit(void *data) +{ + struct device *jrdev = data; + struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); + + /* Free the resources of crypto-engine */ + crypto_engine_exit(jrpriv->engine); +} + static int caam_reset_hw_jr(struct device *dev) { struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); @@ -126,6 +135,9 @@ static int caam_jr_remove(struct platform_device *pdev) jrdev = &pdev->dev; jrpriv = dev_get_drvdata(jrdev); + if (jrpriv->hwrng) + caam_rng_exit(jrdev->parent); + /* * Return EBUSY if job ring already allocated. */ @@ -324,8 +336,8 @@ void caam_jr_free(struct device *rdev) EXPORT_SYMBOL(caam_jr_free); /** - * caam_jr_enqueue() - Enqueue a job descriptor head. Returns 0 if OK, - * -EBUSY if the queue is full, -EIO if it cannot map the caller's + * caam_jr_enqueue() - Enqueue a job descriptor head. Returns -EINPROGRESS + * if OK, -ENOSPC if the queue is full, -EIO if it cannot map the caller's * descriptor. * @dev: device of the job ring to be used. This device should have * been assigned prior by caam_jr_register(). @@ -377,7 +389,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, CIRC_SPACE(head, tail, JOBR_DEPTH) <= 0) { spin_unlock_bh(&jrp->inplock); dma_unmap_single(dev, desc_dma, desc_size, DMA_TO_DEVICE); - return -EBUSY; + return -ENOSPC; } head_entry = &jrp->entinfo[head]; @@ -414,7 +426,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, spin_unlock_bh(&jrp->inplock); - return 0; + return -EINPROGRESS; } EXPORT_SYMBOL(caam_jr_enqueue); @@ -505,7 +517,7 @@ static int caam_jr_probe(struct platform_device *pdev) int error; jrdev = &pdev->dev; - jrpriv = devm_kmalloc(jrdev, sizeof(*jrpriv), GFP_KERNEL); + jrpriv = devm_kzalloc(jrdev, sizeof(*jrpriv), GFP_KERNEL); if (!jrpriv) return -ENOMEM; @@ -538,6 +550,25 @@ static int caam_jr_probe(struct platform_device *pdev) return error; } + /* Initialize crypto engine */ + jrpriv->engine = crypto_engine_alloc_init(jrdev, false); + if (!jrpriv->engine) { + dev_err(jrdev, "Could not init crypto-engine\n"); + return -ENOMEM; + } + + error = devm_add_action_or_reset(jrdev, caam_jr_crypto_engine_exit, + jrdev); + if (error) + return error; + + /* Start crypto engine */ + error = crypto_engine_start(jrpriv->engine); + if (error) { + dev_err(jrdev, "Could not start crypto-engine\n"); + return error; + } + /* Identify the interrupt */ jrpriv->irq = irq_of_parse_and_map(nprop, 0); if (!jrpriv->irq) { @@ -562,7 +593,7 @@ static int caam_jr_probe(struct platform_device *pdev) atomic_set(&jrpriv->tfm_count, 0); - register_algs(jrdev->parent); + register_algs(jrpriv, jrdev->parent); return 0; } diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c index 5a851ddc48fb..b0e8a4939b4f 100644 --- a/drivers/crypto/caam/key_gen.c +++ b/drivers/crypto/caam/key_gen.c @@ -108,7 +108,7 @@ int gen_split_key(struct device *jrdev, u8 *key_out, init_completion(&result.completion); ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result); - if (!ret) { + if (ret == -EINPROGRESS) { /* in progress */ wait_for_completion(&result.completion); ret = result.err; diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c index dacf2fa4aa8e..b390b935db6d 100644 --- a/drivers/crypto/caam/qi.c +++ b/drivers/crypto/caam/qi.c @@ -4,7 +4,7 @@ * Queue Interface backend functionality * * Copyright 2013-2016 Freescale Semiconductor, Inc. - * Copyright 2016-2017, 2019 NXP + * Copyright 2016-2017, 2019-2020 NXP */ #include <linux/cpumask.h> @@ -124,8 +124,10 @@ int caam_qi_enqueue(struct device *qidev, struct caam_drv_req *req) do { ret = qman_enqueue(req->drv_ctx->req_fq, &fd); - if (likely(!ret)) + if (likely(!ret)) { + refcount_inc(&req->drv_ctx->refcnt); return 0; + } if (ret != -EBUSY) break; @@ -148,11 +150,6 @@ static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq, fd = &msg->ern.fd; - if (qm_fd_get_format(fd) != qm_fd_compound) { - dev_err(qidev, "Non-compound FD from CAAM\n"); - return; - } - drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd)); if (!drv_req) { dev_err(qidev, @@ -160,6 +157,13 @@ static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq, return; } + refcount_dec(&drv_req->drv_ctx->refcnt); + + if (qm_fd_get_format(fd) != qm_fd_compound) { + dev_err(qidev, "Non-compound FD from CAAM\n"); + return; + } + dma_unmap_single(drv_req->drv_ctx->qidev, qm_fd_addr(fd), sizeof(drv_req->fd_sgt), DMA_BIDIRECTIONAL); @@ -287,9 +291,10 @@ empty_fq: return ret; } -static int empty_caam_fq(struct qman_fq *fq) +static int empty_caam_fq(struct qman_fq *fq, struct caam_drv_ctx *drv_ctx) { int ret; + int retries = 10; struct qm_mcr_queryfq_np np; /* Wait till the older CAAM FQ get empty */ @@ -304,11 +309,18 @@ static int empty_caam_fq(struct qman_fq *fq) msleep(20); } while (1); - /* - * Give extra time for pending jobs from this FQ in holding tanks - * to get processed - */ - msleep(20); + /* Wait until pending jobs from this FQ are processed by CAAM */ + do { + if (refcount_read(&drv_ctx->refcnt) == 1) + break; + + msleep(20); + } while (--retries); + + if (!retries) + dev_warn_once(drv_ctx->qidev, "%d frames from FQID %u still pending in CAAM\n", + refcount_read(&drv_ctx->refcnt), fq->fqid); + return 0; } @@ -340,7 +352,7 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc) drv_ctx->req_fq = new_fq; /* Empty and remove the older FQ */ - ret = empty_caam_fq(old_fq); + ret = empty_caam_fq(old_fq, drv_ctx); if (ret) { dev_err(qidev, "Old CAAM FQ empty failed: %d\n", ret); @@ -453,6 +465,9 @@ struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev, return ERR_PTR(-ENOMEM); } + /* init reference counter used to track references to request FQ */ + refcount_set(&drv_ctx->refcnt, 1); + drv_ctx->qidev = qidev; return drv_ctx; } @@ -571,6 +586,16 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p, return qman_cb_dqrr_stop; fd = &dqrr->fd; + + drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd)); + if (unlikely(!drv_req)) { + dev_err(qidev, + "Can't find original request for caam response\n"); + return qman_cb_dqrr_consume; + } + + refcount_dec(&drv_req->drv_ctx->refcnt); + status = be32_to_cpu(fd->status); if (unlikely(status)) { u32 ssrc = status & JRSTA_SSRC_MASK; @@ -588,13 +613,6 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p, return qman_cb_dqrr_consume; } - drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd)); - if (unlikely(!drv_req)) { - dev_err(qidev, - "Can't find original request for caam response\n"); - return qman_cb_dqrr_consume; - } - dma_unmap_single(drv_req->drv_ctx->qidev, qm_fd_addr(fd), sizeof(drv_req->fd_sgt), DMA_BIDIRECTIONAL); diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h index 848958951f68..5894f16f8fe3 100644 --- a/drivers/crypto/caam/qi.h +++ b/drivers/crypto/caam/qi.h @@ -3,7 +3,7 @@ * Public definitions for the CAAM/QI (Queue Interface) backend. * * Copyright 2013-2016 Freescale Semiconductor, Inc. - * Copyright 2016-2017 NXP + * Copyright 2016-2017, 2020 NXP */ #ifndef __QI_H__ @@ -52,6 +52,7 @@ enum optype { * @context_a: shared descriptor dma address * @req_fq: to-CAAM request frame queue * @rsp_fq: from-CAAM response frame queue + * @refcnt: reference counter incremented for each frame enqueued in to-CAAM FQ * @cpu: cpu on which to receive CAAM response * @op_type: operation type * @qidev: device pointer for CAAM/QI backend @@ -62,6 +63,7 @@ struct caam_drv_ctx { dma_addr_t context_a; struct qman_fq *req_fq; struct qman_fq *rsp_fq; + refcount_t refcnt; int cpu; enum optype op_type; struct device *qidev; diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 05127b70527d..0f810bc13b2b 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -487,7 +487,8 @@ struct rngtst { /* RNG4 TRNG test registers */ struct rng4tst { -#define RTMCTL_PRGM 0x00010000 /* 1 -> program mode, 0 -> run mode */ +#define RTMCTL_ACC BIT(5) /* TRNG access mode */ +#define RTMCTL_PRGM BIT(16) /* 1 -> program mode, 0 -> run mode */ #define RTMCTL_SAMP_MODE_VON_NEUMANN_ES_SC 0 /* use von Neumann data in both entropy shifter and statistical checker */ @@ -523,9 +524,11 @@ struct rng4tst { u32 rsvd1[40]; #define RDSTA_SKVT 0x80000000 #define RDSTA_SKVN 0x40000000 +#define RDSTA_PR0 BIT(4) +#define RDSTA_PR1 BIT(5) #define RDSTA_IF0 0x00000001 #define RDSTA_IF1 0x00000002 -#define RDSTA_IFMASK (RDSTA_IF1 | RDSTA_IF0) +#define RDSTA_MASK (RDSTA_PR1 | RDSTA_PR0 | RDSTA_IF1 | RDSTA_IF0) u32 rdsta; u32 rsvd2[15]; }; diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c index c4632d84c9a1..e91be9b8b083 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_main.c +++ b/drivers/crypto/cavium/nitrox/nitrox_main.c @@ -71,7 +71,7 @@ struct ucode { char version[VERSION_LEN - 1]; __be32 code_size; u8 raz[12]; - u64 code[0]; + u64 code[]; }; /** diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c index e95e7aa5dbf1..ae7b44599914 100644 --- a/drivers/crypto/ccp/psp-dev.c +++ b/drivers/crypto/ccp/psp-dev.c @@ -215,6 +215,9 @@ void psp_dev_destroy(struct sp_device *sp) tee_dev_destroy(psp); sp_free_psp_irq(sp, psp); + + if (sp->clear_psp_master_device) + sp->clear_psp_master_device(sp); } void psp_set_sev_irq_handler(struct psp_device *psp, psp_irq_handler_t handler, diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index e467860f797d..896f190b9a50 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -283,11 +283,11 @@ static int sev_get_platform_state(int *state, int *error) return rc; } -static int sev_ioctl_do_reset(struct sev_issue_cmd *argp) +static int sev_ioctl_do_reset(struct sev_issue_cmd *argp, bool writable) { int state, rc; - if (!capable(CAP_SYS_ADMIN)) + if (!writable) return -EPERM; /* @@ -331,12 +331,12 @@ static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp) return ret; } -static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp) +static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp, bool writable) { struct sev_device *sev = psp_master->sev_data; int rc; - if (!capable(CAP_SYS_ADMIN)) + if (!writable) return -EPERM; if (sev->state == SEV_STATE_UNINIT) { @@ -348,7 +348,7 @@ static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp) return __sev_do_cmd_locked(cmd, NULL, &argp->error); } -static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp) +static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable) { struct sev_device *sev = psp_master->sev_data; struct sev_user_data_pek_csr input; @@ -356,7 +356,7 @@ static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp) void *blob = NULL; int ret; - if (!capable(CAP_SYS_ADMIN)) + if (!writable) return -EPERM; if (copy_from_user(&input, (void __user *)argp->data, sizeof(input))) @@ -539,7 +539,7 @@ fw_err: return ret; } -static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp) +static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp, bool writable) { struct sev_device *sev = psp_master->sev_data; struct sev_user_data_pek_cert_import input; @@ -547,7 +547,7 @@ static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp) void *pek_blob, *oca_blob; int ret; - if (!capable(CAP_SYS_ADMIN)) + if (!writable) return -EPERM; if (copy_from_user(&input, (void __user *)argp->data, sizeof(input))) @@ -698,7 +698,7 @@ static int sev_ioctl_do_get_id(struct sev_issue_cmd *argp) return ret; } -static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp) +static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable) { struct sev_device *sev = psp_master->sev_data; struct sev_user_data_pdh_cert_export input; @@ -708,7 +708,7 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp) /* If platform is not in INIT state then transition it to INIT. */ if (sev->state != SEV_STATE_INIT) { - if (!capable(CAP_SYS_ADMIN)) + if (!writable) return -EPERM; ret = __sev_platform_init_locked(&argp->error); @@ -801,6 +801,7 @@ static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) void __user *argp = (void __user *)arg; struct sev_issue_cmd input; int ret = -EFAULT; + bool writable = file->f_mode & FMODE_WRITE; if (!psp_master || !psp_master->sev_data) return -ENODEV; @@ -819,25 +820,25 @@ static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) switch (input.cmd) { case SEV_FACTORY_RESET: - ret = sev_ioctl_do_reset(&input); + ret = sev_ioctl_do_reset(&input, writable); break; case SEV_PLATFORM_STATUS: ret = sev_ioctl_do_platform_status(&input); break; case SEV_PEK_GEN: - ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PEK_GEN, &input); + ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PEK_GEN, &input, writable); break; case SEV_PDH_GEN: - ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PDH_GEN, &input); + ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PDH_GEN, &input, writable); break; case SEV_PEK_CSR: - ret = sev_ioctl_do_pek_csr(&input); + ret = sev_ioctl_do_pek_csr(&input, writable); break; case SEV_PEK_CERT_IMPORT: - ret = sev_ioctl_do_pek_import(&input); + ret = sev_ioctl_do_pek_import(&input, writable); break; case SEV_PDH_CERT_EXPORT: - ret = sev_ioctl_do_pdh_export(&input); + ret = sev_ioctl_do_pdh_export(&input, writable); break; case SEV_GET_ID: pr_warn_once("SEV_GET_ID command is deprecated, use SEV_GET_ID2\n"); @@ -896,9 +897,9 @@ EXPORT_SYMBOL_GPL(sev_guest_df_flush); static void sev_exit(struct kref *ref) { - struct sev_misc_dev *misc_dev = container_of(ref, struct sev_misc_dev, refcount); - misc_deregister(&misc_dev->misc); + kfree(misc_dev); + misc_dev = NULL; } static int sev_misc_init(struct sev_device *sev) @@ -916,7 +917,7 @@ static int sev_misc_init(struct sev_device *sev) if (!misc_dev) { struct miscdevice *misc; - misc_dev = devm_kzalloc(dev, sizeof(*misc_dev), GFP_KERNEL); + misc_dev = kzalloc(sizeof(*misc_dev), GFP_KERNEL); if (!misc_dev) return -ENOMEM; diff --git a/drivers/crypto/ccp/sp-dev.h b/drivers/crypto/ccp/sp-dev.h index 423594608ad1..f913f1494af9 100644 --- a/drivers/crypto/ccp/sp-dev.h +++ b/drivers/crypto/ccp/sp-dev.h @@ -90,6 +90,7 @@ struct sp_device { /* get and set master device */ struct sp_device*(*get_psp_master_device)(void); void (*set_psp_master_device)(struct sp_device *); + void (*clear_psp_master_device)(struct sp_device *); bool irq_registered; bool use_tasklet; diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c index 56c1f61c0f84..cb6cb47053f4 100644 --- a/drivers/crypto/ccp/sp-pci.c +++ b/drivers/crypto/ccp/sp-pci.c @@ -146,6 +146,14 @@ static struct sp_device *psp_get_master(void) return sp_dev_master; } +static void psp_clear_master(struct sp_device *sp) +{ + if (sp == sp_dev_master) { + sp_dev_master = NULL; + dev_dbg(sp->dev, "Cleared sp_dev_master\n"); + } +} + static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct sp_device *sp; @@ -206,6 +214,7 @@ static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_master(pdev); sp->set_psp_master_device = psp_set_master; sp->get_psp_master_device = psp_get_master; + sp->clear_psp_master_device = psp_clear_master; ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); if (ret) { diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index 2fc0e0da790b..1cf51edbc4b9 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -6,8 +6,9 @@ #include <crypto/algapi.h> #include <crypto/internal/aead.h> #include <crypto/authenc.h> -#include <crypto/internal/des.h> +#include <crypto/gcm.h> #include <linux/rtnetlink.h> +#include <crypto/internal/des.h> #include "cc_driver.h" #include "cc_buffer_mgr.h" #include "cc_aead.h" @@ -26,7 +27,7 @@ #define MAX_NONCE_SIZE CTR_RFC3686_NONCE_SIZE struct cc_aead_handle { - cc_sram_addr_t sram_workspace_addr; + u32 sram_workspace_addr; struct list_head aead_list; }; @@ -60,11 +61,6 @@ struct cc_aead_ctx { enum drv_hash_mode auth_mode; }; -static inline bool valid_assoclen(struct aead_request *req) -{ - return ((req->assoclen == 16) || (req->assoclen == 20)); -} - static void cc_aead_exit(struct crypto_aead *tfm) { struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); @@ -417,7 +413,7 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *authkey, dma_addr_t key_dma_addr = 0; struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); struct device *dev = drvdata_to_dev(ctx->drvdata); - u32 larval_addr = cc_larval_digest_addr(ctx->drvdata, ctx->auth_mode); + u32 larval_addr; struct cc_crypto_req cc_req = {}; unsigned int blocksize; unsigned int digestsize; @@ -448,8 +444,7 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *authkey, if (!key) return -ENOMEM; - key_dma_addr = dma_map_single(dev, (void *)key, keylen, - DMA_TO_DEVICE); + key_dma_addr = dma_map_single(dev, key, keylen, DMA_TO_DEVICE); if (dma_mapping_error(dev, key_dma_addr)) { dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n", key, keylen); @@ -460,6 +455,8 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *authkey, /* Load hash initial state */ hw_desc_init(&desc[idx]); set_cipher_mode(&desc[idx], hashmode); + larval_addr = cc_larval_digest_addr(ctx->drvdata, + ctx->auth_mode); set_din_sram(&desc[idx], larval_addr, digestsize); set_flow_mode(&desc[idx], S_DIN_to_HASH); set_setup_mode(&desc[idx], SETUP_LOAD_STATE0); @@ -796,7 +793,7 @@ static void cc_proc_authen_desc(struct aead_request *areq, * assoc. + iv + data -compact in one table * if assoclen is ZERO only IV perform */ - cc_sram_addr_t mlli_addr = areq_ctx->assoc.sram_addr; + u32 mlli_addr = areq_ctx->assoc.sram_addr; u32 mlli_nents = areq_ctx->assoc.mlli_nents; if (areq_ctx->is_single_pass) { @@ -1170,7 +1167,7 @@ static void cc_mlli_to_sram(struct aead_request *req, req_ctx->data_buff_type == CC_DMA_BUF_MLLI || !req_ctx->is_single_pass) && req_ctx->mlli_params.mlli_len) { dev_dbg(dev, "Copy-to-sram: mlli_dma=%08x, mlli_size=%u\n", - (unsigned int)ctx->drvdata->mlli_sram_addr, + ctx->drvdata->mlli_sram_addr, req_ctx->mlli_params.mlli_len); /* Copy MLLI table host-to-sram */ hw_desc_init(&desc[*seq_size]); @@ -1222,7 +1219,7 @@ static void cc_hmac_authenc(struct aead_request *req, struct cc_hw_desc desc[], req_ctx->is_single_pass); if (req_ctx->is_single_pass) { - /** + /* * Single-pass flow */ cc_set_hmac_desc(req, desc, seq_size); @@ -1234,7 +1231,7 @@ static void cc_hmac_authenc(struct aead_request *req, struct cc_hw_desc desc[], return; } - /** + /* * Double-pass flow * Fallback for unsupported single-pass modes, * i.e. using assoc. data of non-word-multiple @@ -1275,7 +1272,7 @@ cc_xcbc_authenc(struct aead_request *req, struct cc_hw_desc desc[], req_ctx->is_single_pass); if (req_ctx->is_single_pass) { - /** + /* * Single-pass flow */ cc_set_xcbc_desc(req, desc, seq_size); @@ -1286,7 +1283,7 @@ cc_xcbc_authenc(struct aead_request *req, struct cc_hw_desc desc[], return; } - /** + /* * Double-pass flow * Fallback for unsupported single-pass modes, * i.e. using assoc. data of non-word-multiple @@ -1611,7 +1608,6 @@ static void cc_proc_rfc4309_ccm(struct aead_request *req) memcpy(areq_ctx->ctr_iv + CCM_BLOCK_IV_OFFSET, req->iv, CCM_BLOCK_IV_SIZE); req->iv = areq_ctx->ctr_iv; - areq_ctx->assoclen -= CCM_BLOCK_IV_SIZE; } static void cc_set_ghash_desc(struct aead_request *req, @@ -1799,12 +1795,6 @@ static int cc_gcm(struct aead_request *req, struct cc_hw_desc desc[], struct aead_req_ctx *req_ctx = aead_request_ctx(req); unsigned int cipher_flow_mode; - if (req_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT) { - cipher_flow_mode = AES_and_HASH; - } else { /* Encrypt */ - cipher_flow_mode = AES_to_HASH_and_DOUT; - } - //in RFC4543 no data to encrypt. just copy data from src to dest. if (req_ctx->plaintext_authenticate_only) { cc_proc_cipher_desc(req, BYPASS, desc, seq_size); @@ -1816,6 +1806,12 @@ static int cc_gcm(struct aead_request *req, struct cc_hw_desc desc[], return 0; } + if (req_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT) { + cipher_flow_mode = AES_and_HASH; + } else { /* Encrypt */ + cipher_flow_mode = AES_to_HASH_and_DOUT; + } + // for gcm and rfc4106. cc_set_ghash_desc(req, desc, seq_size); /* process(ghash) assoc data */ @@ -1870,8 +1866,7 @@ static int config_gcm_context(struct aead_request *req) */ __be64 temp64; - temp64 = cpu_to_be64((req_ctx->assoclen + - GCM_BLOCK_RFC4_IV_SIZE + cryptlen) * 8); + temp64 = cpu_to_be64((req_ctx->assoclen + cryptlen) * 8); memcpy(&req_ctx->gcm_len_block.len_a, &temp64, sizeof(temp64)); temp64 = 0; memcpy(&req_ctx->gcm_len_block.len_c, &temp64, 8); @@ -1891,7 +1886,6 @@ static void cc_proc_rfc4_gcm(struct aead_request *req) memcpy(areq_ctx->ctr_iv + GCM_BLOCK_RFC4_IV_OFFSET, req->iv, GCM_BLOCK_RFC4_IV_SIZE); req->iv = areq_ctx->ctr_iv; - areq_ctx->assoclen -= GCM_BLOCK_RFC4_IV_SIZE; } static int cc_proc_aead(struct aead_request *req, @@ -1921,8 +1915,8 @@ static int cc_proc_aead(struct aead_request *req, } /* Setup request structure */ - cc_req.user_cb = (void *)cc_aead_complete; - cc_req.user_arg = (void *)req; + cc_req.user_cb = cc_aead_complete; + cc_req.user_arg = req; /* Setup request context */ areq_ctx->gen_ctx.op_type = direct; @@ -1989,7 +1983,6 @@ static int cc_proc_aead(struct aead_request *req, /* Load MLLI tables to SRAM if necessary */ cc_mlli_to_sram(req, desc, &seq_len); - /*TODO: move seq len by reference */ switch (ctx->auth_mode) { case DRV_HASH_SHA1: case DRV_HASH_SHA256: @@ -2034,9 +2027,6 @@ static int cc_aead_encrypt(struct aead_request *req) /* No generated IV required */ areq_ctx->backup_iv = req->iv; areq_ctx->assoclen = req->assoclen; - areq_ctx->is_gcm4543 = false; - - areq_ctx->plaintext_authenticate_only = false; rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT); if (rc != -EINPROGRESS && rc != -EBUSY) @@ -2050,22 +2040,17 @@ static int cc_rfc4309_ccm_encrypt(struct aead_request *req) /* Very similar to cc_aead_encrypt() above. */ struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct device *dev = drvdata_to_dev(ctx->drvdata); - int rc = -EINVAL; + int rc; - if (!valid_assoclen(req)) { - dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen); + rc = crypto_ipsec_check_assoclen(req->assoclen); + if (rc) goto out; - } memset(areq_ctx, 0, sizeof(*areq_ctx)); /* No generated IV required */ areq_ctx->backup_iv = req->iv; - areq_ctx->assoclen = req->assoclen; - areq_ctx->is_gcm4543 = true; + areq_ctx->assoclen = req->assoclen - CCM_BLOCK_IV_SIZE; cc_proc_rfc4309_ccm(req); @@ -2086,9 +2071,6 @@ static int cc_aead_decrypt(struct aead_request *req) /* No generated IV required */ areq_ctx->backup_iv = req->iv; areq_ctx->assoclen = req->assoclen; - areq_ctx->is_gcm4543 = false; - - areq_ctx->plaintext_authenticate_only = false; rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT); if (rc != -EINPROGRESS && rc != -EBUSY) @@ -2099,24 +2081,19 @@ static int cc_aead_decrypt(struct aead_request *req) static int cc_rfc4309_ccm_decrypt(struct aead_request *req) { - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct device *dev = drvdata_to_dev(ctx->drvdata); struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - int rc = -EINVAL; + int rc; - if (!valid_assoclen(req)) { - dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen); + rc = crypto_ipsec_check_assoclen(req->assoclen); + if (rc) goto out; - } memset(areq_ctx, 0, sizeof(*areq_ctx)); /* No generated IV required */ areq_ctx->backup_iv = req->iv; - areq_ctx->assoclen = req->assoclen; + areq_ctx->assoclen = req->assoclen - CCM_BLOCK_IV_SIZE; - areq_ctx->is_gcm4543 = true; cc_proc_rfc4309_ccm(req); rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT); @@ -2216,28 +2193,20 @@ static int cc_rfc4543_gcm_setauthsize(struct crypto_aead *authenc, static int cc_rfc4106_gcm_encrypt(struct aead_request *req) { - /* Very similar to cc_aead_encrypt() above. */ - - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct device *dev = drvdata_to_dev(ctx->drvdata); struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - int rc = -EINVAL; + int rc; - if (!valid_assoclen(req)) { - dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen); + rc = crypto_ipsec_check_assoclen(req->assoclen); + if (rc) goto out; - } memset(areq_ctx, 0, sizeof(*areq_ctx)); /* No generated IV required */ areq_ctx->backup_iv = req->iv; - areq_ctx->assoclen = req->assoclen; - areq_ctx->plaintext_authenticate_only = false; + areq_ctx->assoclen = req->assoclen - GCM_BLOCK_RFC4_IV_SIZE; cc_proc_rfc4_gcm(req); - areq_ctx->is_gcm4543 = true; rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT); if (rc != -EINPROGRESS && rc != -EBUSY) @@ -2248,17 +2217,12 @@ out: static int cc_rfc4543_gcm_encrypt(struct aead_request *req) { - /* Very similar to cc_aead_encrypt() above. */ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct device *dev = drvdata_to_dev(ctx->drvdata); struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - int rc = -EINVAL; + int rc; - if (!valid_assoclen(req)) { - dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen); + rc = crypto_ipsec_check_assoclen(req->assoclen); + if (rc) goto out; - } memset(areq_ctx, 0, sizeof(*areq_ctx)); @@ -2270,7 +2234,6 @@ static int cc_rfc4543_gcm_encrypt(struct aead_request *req) areq_ctx->assoclen = req->assoclen; cc_proc_rfc4_gcm(req); - areq_ctx->is_gcm4543 = true; rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT); if (rc != -EINPROGRESS && rc != -EBUSY) @@ -2281,28 +2244,20 @@ out: static int cc_rfc4106_gcm_decrypt(struct aead_request *req) { - /* Very similar to cc_aead_decrypt() above. */ - - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct device *dev = drvdata_to_dev(ctx->drvdata); struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - int rc = -EINVAL; + int rc; - if (!valid_assoclen(req)) { - dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen); + rc = crypto_ipsec_check_assoclen(req->assoclen); + if (rc) goto out; - } memset(areq_ctx, 0, sizeof(*areq_ctx)); /* No generated IV required */ areq_ctx->backup_iv = req->iv; - areq_ctx->assoclen = req->assoclen; - areq_ctx->plaintext_authenticate_only = false; + areq_ctx->assoclen = req->assoclen - GCM_BLOCK_RFC4_IV_SIZE; cc_proc_rfc4_gcm(req); - areq_ctx->is_gcm4543 = true; rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT); if (rc != -EINPROGRESS && rc != -EBUSY) @@ -2313,17 +2268,12 @@ out: static int cc_rfc4543_gcm_decrypt(struct aead_request *req) { - /* Very similar to cc_aead_decrypt() above. */ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct device *dev = drvdata_to_dev(ctx->drvdata); struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - int rc = -EINVAL; + int rc; - if (!valid_assoclen(req)) { - dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen); + rc = crypto_ipsec_check_assoclen(req->assoclen); + if (rc) goto out; - } memset(areq_ctx, 0, sizeof(*areq_ctx)); @@ -2335,7 +2285,6 @@ static int cc_rfc4543_gcm_decrypt(struct aead_request *req) areq_ctx->assoclen = req->assoclen; cc_proc_rfc4_gcm(req); - areq_ctx->is_gcm4543 = true; rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT); if (rc != -EINPROGRESS && rc != -EBUSY) @@ -2614,7 +2563,7 @@ static struct cc_crypto_alg *cc_create_aead_alg(struct cc_alg_template *tmpl, struct cc_crypto_alg *t_alg; struct aead_alg *alg; - t_alg = kzalloc(sizeof(*t_alg), GFP_KERNEL); + t_alg = devm_kzalloc(dev, sizeof(*t_alg), GFP_KERNEL); if (!t_alg) return ERR_PTR(-ENOMEM); @@ -2628,6 +2577,7 @@ static struct cc_crypto_alg *cc_create_aead_alg(struct cc_alg_template *tmpl, alg->base.cra_ctxsize = sizeof(struct cc_aead_ctx); alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY; + alg->base.cra_blocksize = tmpl->blocksize; alg->init = cc_aead_init; alg->exit = cc_aead_exit; @@ -2643,19 +2593,12 @@ static struct cc_crypto_alg *cc_create_aead_alg(struct cc_alg_template *tmpl, int cc_aead_free(struct cc_drvdata *drvdata) { struct cc_crypto_alg *t_alg, *n; - struct cc_aead_handle *aead_handle = - (struct cc_aead_handle *)drvdata->aead_handle; - - if (aead_handle) { - /* Remove registered algs */ - list_for_each_entry_safe(t_alg, n, &aead_handle->aead_list, - entry) { - crypto_unregister_aead(&t_alg->aead_alg); - list_del(&t_alg->entry); - kfree(t_alg); - } - kfree(aead_handle); - drvdata->aead_handle = NULL; + struct cc_aead_handle *aead_handle = drvdata->aead_handle; + + /* Remove registered algs */ + list_for_each_entry_safe(t_alg, n, &aead_handle->aead_list, entry) { + crypto_unregister_aead(&t_alg->aead_alg); + list_del(&t_alg->entry); } return 0; @@ -2669,7 +2612,7 @@ int cc_aead_alloc(struct cc_drvdata *drvdata) int alg; struct device *dev = drvdata_to_dev(drvdata); - aead_handle = kmalloc(sizeof(*aead_handle), GFP_KERNEL); + aead_handle = devm_kmalloc(dev, sizeof(*aead_handle), GFP_KERNEL); if (!aead_handle) { rc = -ENOMEM; goto fail0; @@ -2682,7 +2625,6 @@ int cc_aead_alloc(struct cc_drvdata *drvdata) MAX_HMAC_DIGEST_SIZE); if (aead_handle->sram_workspace_addr == NULL_SRAM_ADDR) { - dev_err(dev, "SRAM pool exhausted\n"); rc = -ENOMEM; goto fail1; } @@ -2705,18 +2647,16 @@ int cc_aead_alloc(struct cc_drvdata *drvdata) if (rc) { dev_err(dev, "%s alg registration failed\n", t_alg->aead_alg.base.cra_driver_name); - goto fail2; - } else { - list_add_tail(&t_alg->entry, &aead_handle->aead_list); - dev_dbg(dev, "Registered %s\n", - t_alg->aead_alg.base.cra_driver_name); + goto fail1; } + + list_add_tail(&t_alg->entry, &aead_handle->aead_list); + dev_dbg(dev, "Registered %s\n", + t_alg->aead_alg.base.cra_driver_name); } return 0; -fail2: - kfree(t_alg); fail1: cc_aead_free(drvdata); fail0: diff --git a/drivers/crypto/ccree/cc_aead.h b/drivers/crypto/ccree/cc_aead.h index f12169b57f9d..b69591550730 100644 --- a/drivers/crypto/ccree/cc_aead.h +++ b/drivers/crypto/ccree/cc_aead.h @@ -66,7 +66,7 @@ struct aead_req_ctx { /* used to prevent cache coherence problem */ u8 backup_mac[MAX_MAC_SIZE]; u8 *backup_iv; /* store orig iv */ - u32 assoclen; /* internal assoclen */ + u32 assoclen; /* size of AAD buffer to authenticate */ dma_addr_t mac_buf_dma_addr; /* internal ICV DMA buffer */ /* buffer for internal ccm configurations */ dma_addr_t ccm_iv0_dma_addr; @@ -79,7 +79,6 @@ struct aead_req_ctx { dma_addr_t gcm_iv_inc2_dma_addr; dma_addr_t hkey_dma_addr; /* Phys. address of hkey */ dma_addr_t gcm_block_len_dma_addr; /* Phys. address of gcm block len */ - bool is_gcm4543; u8 *icv_virt_addr; /* Virt. address of ICV */ struct async_gen_req_ctx gen_ctx; diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index a72586eccd81..b2bd093e7013 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -13,16 +13,6 @@ #include "cc_hash.h" #include "cc_aead.h" -enum dma_buffer_type { - DMA_NULL_TYPE = -1, - DMA_SGL_TYPE = 1, - DMA_BUFF_TYPE = 2, -}; - -struct buff_mgr_handle { - struct dma_pool *mlli_buffs_pool; -}; - union buffer_array_entry { struct scatterlist *sgl; dma_addr_t buffer_dma; @@ -34,7 +24,6 @@ struct buffer_array { unsigned int offset[MAX_NUM_OF_BUFFERS_IN_MLLI]; int nents[MAX_NUM_OF_BUFFERS_IN_MLLI]; int total_data_len[MAX_NUM_OF_BUFFERS_IN_MLLI]; - enum dma_buffer_type type[MAX_NUM_OF_BUFFERS_IN_MLLI]; bool is_last[MAX_NUM_OF_BUFFERS_IN_MLLI]; u32 *mlli_nents[MAX_NUM_OF_BUFFERS_IN_MLLI]; }; @@ -64,11 +53,7 @@ static void cc_copy_mac(struct device *dev, struct aead_request *req, enum cc_sg_cpy_direct dir) { struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - u32 skip = areq_ctx->assoclen + req->cryptlen; - - if (areq_ctx->is_gcm4543) - skip += crypto_aead_ivsize(tfm); + u32 skip = req->assoclen + req->cryptlen; cc_copy_sg_portion(dev, areq_ctx->backup_mac, req->src, (skip - areq_ctx->req_authsize), skip, dir); @@ -77,9 +62,13 @@ static void cc_copy_mac(struct device *dev, struct aead_request *req, /** * cc_get_sgl_nents() - Get scatterlist number of entries. * + * @dev: Device object * @sg_list: SG list * @nbytes: [IN] Total SGL data bytes. * @lbytes: [OUT] Returns the amount of bytes at the last entry + * + * Return: + * Number of entries in the scatterlist */ static unsigned int cc_get_sgl_nents(struct device *dev, struct scatterlist *sg_list, @@ -87,6 +76,8 @@ static unsigned int cc_get_sgl_nents(struct device *dev, { unsigned int nents = 0; + *lbytes = 0; + while (nbytes && sg_list) { nents++; /* get the number of bytes in the last entry */ @@ -95,6 +86,7 @@ static unsigned int cc_get_sgl_nents(struct device *dev, nbytes : sg_list->length; sg_list = sg_next(sg_list); } + dev_dbg(dev, "nents %d last bytes %d\n", nents, *lbytes); return nents; } @@ -103,11 +95,13 @@ static unsigned int cc_get_sgl_nents(struct device *dev, * cc_copy_sg_portion() - Copy scatter list data, * from to_skip to end, to dest and vice versa * - * @dest: - * @sg: - * @to_skip: - * @end: - * @direct: + * @dev: Device object + * @dest: Buffer to copy to/from + * @sg: SG list + * @to_skip: Number of bytes to skip before copying + * @end: Offset of last byte to copy + * @direct: Transfer direction (true == from SG list to buffer, false == from + * buffer to SG list) */ void cc_copy_sg_portion(struct device *dev, u8 *dest, struct scatterlist *sg, u32 to_skip, u32 end, enum cc_sg_cpy_direct direct) @@ -115,7 +109,7 @@ void cc_copy_sg_portion(struct device *dev, u8 *dest, struct scatterlist *sg, u32 nents; nents = sg_nents_for_len(sg, end); - sg_copy_buffer(sg, nents, (void *)dest, (end - to_skip + 1), to_skip, + sg_copy_buffer(sg, nents, dest, (end - to_skip + 1), to_skip, (direct == CC_SG_TO_BUF)); } @@ -204,21 +198,15 @@ static int cc_generate_mlli(struct device *dev, struct buffer_array *sg_data, goto build_mlli_exit; } /* Point to start of MLLI */ - mlli_p = (u32 *)mlli_params->mlli_virt_addr; + mlli_p = mlli_params->mlli_virt_addr; /* go over all SG's and link it to one MLLI table */ for (i = 0; i < sg_data->num_of_buffers; i++) { union buffer_array_entry *entry = &sg_data->entry[i]; u32 tot_len = sg_data->total_data_len[i]; u32 offset = sg_data->offset[i]; - if (sg_data->type[i] == DMA_SGL_TYPE) - rc = cc_render_sg_to_mlli(dev, entry->sgl, tot_len, - offset, &total_nents, - &mlli_p); - else /*DMA_BUFF_TYPE*/ - rc = cc_render_buff_to_mlli(dev, entry->buffer_dma, - tot_len, &total_nents, - &mlli_p); + rc = cc_render_sg_to_mlli(dev, entry->sgl, tot_len, offset, + &total_nents, &mlli_p); if (rc) return rc; @@ -244,27 +232,6 @@ build_mlli_exit: return rc; } -static void cc_add_buffer_entry(struct device *dev, - struct buffer_array *sgl_data, - dma_addr_t buffer_dma, unsigned int buffer_len, - bool is_last_entry, u32 *mlli_nents) -{ - unsigned int index = sgl_data->num_of_buffers; - - dev_dbg(dev, "index=%u single_buff=%pad buffer_len=0x%08X is_last=%d\n", - index, &buffer_dma, buffer_len, is_last_entry); - sgl_data->nents[index] = 1; - sgl_data->entry[index].buffer_dma = buffer_dma; - sgl_data->offset[index] = 0; - sgl_data->total_data_len[index] = buffer_len; - sgl_data->type[index] = DMA_BUFF_TYPE; - sgl_data->is_last[index] = is_last_entry; - sgl_data->mlli_nents[index] = mlli_nents; - if (sgl_data->mlli_nents[index]) - *sgl_data->mlli_nents[index] = 0; - sgl_data->num_of_buffers++; -} - static void cc_add_sg_entry(struct device *dev, struct buffer_array *sgl_data, unsigned int nents, struct scatterlist *sgl, unsigned int data_len, unsigned int data_offset, @@ -278,7 +245,6 @@ static void cc_add_sg_entry(struct device *dev, struct buffer_array *sgl_data, sgl_data->entry[index].sgl = sgl; sgl_data->offset[index] = data_offset; sgl_data->total_data_len[index] = data_len; - sgl_data->type[index] = DMA_SGL_TYPE; sgl_data->is_last[index] = is_last_table; sgl_data->mlli_nents[index] = mlli_nents; if (sgl_data->mlli_nents[index]) @@ -290,37 +256,25 @@ static int cc_map_sg(struct device *dev, struct scatterlist *sg, unsigned int nbytes, int direction, u32 *nents, u32 max_sg_nents, u32 *lbytes, u32 *mapped_nents) { - if (sg_is_last(sg)) { - /* One entry only case -set to DLLI */ - if (dma_map_sg(dev, sg, 1, direction) != 1) { - dev_err(dev, "dma_map_sg() single buffer failed\n"); - return -ENOMEM; - } - dev_dbg(dev, "Mapped sg: dma_address=%pad page=%p addr=%pK offset=%u length=%u\n", - &sg_dma_address(sg), sg_page(sg), sg_virt(sg), - sg->offset, sg->length); - *lbytes = nbytes; - *nents = 1; - *mapped_nents = 1; - } else { /*sg_is_last*/ - *nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes); - if (*nents > max_sg_nents) { - *nents = 0; - dev_err(dev, "Too many fragments. current %d max %d\n", - *nents, max_sg_nents); - return -ENOMEM; - } - /* In case of mmu the number of mapped nents might - * be changed from the original sgl nents - */ - *mapped_nents = dma_map_sg(dev, sg, *nents, direction); - if (*mapped_nents == 0) { - *nents = 0; - dev_err(dev, "dma_map_sg() sg buffer failed\n"); - return -ENOMEM; - } + int ret = 0; + + *nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes); + if (*nents > max_sg_nents) { + *nents = 0; + dev_err(dev, "Too many fragments. current %d max %d\n", + *nents, max_sg_nents); + return -ENOMEM; + } + + ret = dma_map_sg(dev, sg, *nents, direction); + if (dma_mapping_error(dev, ret)) { + *nents = 0; + dev_err(dev, "dma_map_sg() sg buffer failed %d\n", ret); + return -ENOMEM; } + *mapped_nents = ret; + return 0; } @@ -411,7 +365,6 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, { struct cipher_req_ctx *req_ctx = (struct cipher_req_ctx *)ctx; struct mlli_params *mlli_params = &req_ctx->mlli_params; - struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle; struct device *dev = drvdata_to_dev(drvdata); struct buffer_array sg_data; u32 dummy = 0; @@ -424,10 +377,9 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, /* Map IV buffer */ if (ivsize) { - dump_byte_array("iv", (u8 *)info, ivsize); + dump_byte_array("iv", info, ivsize); req_ctx->gen_ctx.iv_dma_addr = - dma_map_single(dev, (void *)info, - ivsize, DMA_BIDIRECTIONAL); + dma_map_single(dev, info, ivsize, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, req_ctx->gen_ctx.iv_dma_addr)) { dev_err(dev, "Mapping iv %u B at va=%pK for DMA failed\n", ivsize, info); @@ -476,7 +428,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, } if (req_ctx->dma_buf_type == CC_DMA_BUF_MLLI) { - mlli_params->curr_pool = buff_mgr->mlli_buffs_pool; + mlli_params->curr_pool = drvdata->mlli_buffs_pool; rc = cc_generate_mlli(dev, &sg_data, mlli_params, flags); if (rc) goto cipher_exit; @@ -555,11 +507,12 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req) sg_virt(req->src), areq_ctx->src.nents, areq_ctx->assoc.nents, areq_ctx->assoclen, req->cryptlen); - dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_BIDIRECTIONAL); + dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents, + DMA_BIDIRECTIONAL); if (req->src != req->dst) { dev_dbg(dev, "Unmapping dst sgl: req->dst=%pK\n", sg_virt(req->dst)); - dma_unmap_sg(dev, req->dst, sg_nents(req->dst), + dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents, DMA_BIDIRECTIONAL); } if (drvdata->coherent && @@ -614,18 +567,6 @@ static int cc_aead_chain_iv(struct cc_drvdata *drvdata, dev_dbg(dev, "Mapped iv %u B at va=%pK to dma=%pad\n", hw_iv_size, req->iv, &areq_ctx->gen_ctx.iv_dma_addr); - // TODO: what about CTR?? ask Ron - if (do_chain && areq_ctx->plaintext_authenticate_only) { - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - unsigned int iv_size_to_authenc = crypto_aead_ivsize(tfm); - unsigned int iv_ofs = GCM_BLOCK_RFC4_IV_OFFSET; - /* Chain to given list */ - cc_add_buffer_entry(dev, sg_data, - (areq_ctx->gen_ctx.iv_dma_addr + iv_ofs), - iv_size_to_authenc, is_last, - &areq_ctx->assoc.mlli_nents); - areq_ctx->assoc_buff_type = CC_DMA_BUF_MLLI; - } chain_iv_exit: return rc; @@ -639,13 +580,8 @@ static int cc_aead_chain_assoc(struct cc_drvdata *drvdata, struct aead_req_ctx *areq_ctx = aead_request_ctx(req); int rc = 0; int mapped_nents = 0; - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - unsigned int size_of_assoc = areq_ctx->assoclen; struct device *dev = drvdata_to_dev(drvdata); - if (areq_ctx->is_gcm4543) - size_of_assoc += crypto_aead_ivsize(tfm); - if (!sg_data) { rc = -EINVAL; goto chain_assoc_exit; @@ -661,7 +597,7 @@ static int cc_aead_chain_assoc(struct cc_drvdata *drvdata, goto chain_assoc_exit; } - mapped_nents = sg_nents_for_len(req->src, size_of_assoc); + mapped_nents = sg_nents_for_len(req->src, areq_ctx->assoclen); if (mapped_nents < 0) return mapped_nents; @@ -854,16 +790,11 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, u32 src_mapped_nents = 0, dst_mapped_nents = 0; u32 offset = 0; /* non-inplace mode */ - unsigned int size_for_map = areq_ctx->assoclen + req->cryptlen; - struct crypto_aead *tfm = crypto_aead_reqtfm(req); + unsigned int size_for_map = req->assoclen + req->cryptlen; u32 sg_index = 0; - bool is_gcm4543 = areq_ctx->is_gcm4543; - u32 size_to_skip = areq_ctx->assoclen; + u32 size_to_skip = req->assoclen; struct scatterlist *sgl; - if (is_gcm4543) - size_to_skip += crypto_aead_ivsize(tfm); - offset = size_to_skip; if (!sg_data) @@ -872,16 +803,13 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, areq_ctx->src_sgl = req->src; areq_ctx->dst_sgl = req->dst; - if (is_gcm4543) - size_for_map += crypto_aead_ivsize(tfm); - size_for_map += (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) ? authsize : 0; src_mapped_nents = cc_get_sgl_nents(dev, req->src, size_for_map, &src_last_bytes); sg_index = areq_ctx->src_sgl->length; //check where the data starts - while (sg_index <= size_to_skip) { + while (src_mapped_nents && (sg_index <= size_to_skip)) { src_mapped_nents--; offset -= areq_ctx->src_sgl->length; sgl = sg_next(areq_ctx->src_sgl); @@ -901,14 +829,15 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, areq_ctx->src_offset = offset; if (req->src != req->dst) { - size_for_map = areq_ctx->assoclen + req->cryptlen; - size_for_map += (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) ? - authsize : 0; - if (is_gcm4543) - size_for_map += crypto_aead_ivsize(tfm); + size_for_map = req->assoclen + req->cryptlen; + + if (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) + size_for_map += authsize; + else + size_for_map -= authsize; rc = cc_map_sg(dev, req->dst, size_for_map, DMA_BIDIRECTIONAL, - &areq_ctx->dst.nents, + &areq_ctx->dst.mapped_nents, LLI_MAX_NUM_OF_DATA_ENTRIES, &dst_last_bytes, &dst_mapped_nents); if (rc) @@ -921,7 +850,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, offset = size_to_skip; //check where the data starts - while (sg_index <= size_to_skip) { + while (dst_mapped_nents && sg_index <= size_to_skip) { dst_mapped_nents--; offset -= areq_ctx->dst_sgl->length; sgl = sg_next(areq_ctx->dst_sgl); @@ -1012,14 +941,11 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) struct device *dev = drvdata_to_dev(drvdata); struct buffer_array sg_data; unsigned int authsize = areq_ctx->req_authsize; - struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle; int rc = 0; - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - bool is_gcm4543 = areq_ctx->is_gcm4543; dma_addr_t dma_addr; u32 mapped_nents = 0; u32 dummy = 0; /*used for the assoc data fragments */ - u32 size_to_map = 0; + u32 size_to_map; gfp_t flags = cc_gfp_flags(&req->base); mlli_params->curr_pool = NULL; @@ -1116,14 +1042,15 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) areq_ctx->gcm_iv_inc2_dma_addr = dma_addr; } - size_to_map = req->cryptlen + areq_ctx->assoclen; - if (areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_ENCRYPT) + size_to_map = req->cryptlen + req->assoclen; + /* If we do in-place encryption, we also need the auth tag */ + if ((areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_ENCRYPT) && + (req->src == req->dst)) { size_to_map += authsize; + } - if (is_gcm4543) - size_to_map += crypto_aead_ivsize(tfm); rc = cc_map_sg(dev, req->src, size_to_map, DMA_BIDIRECTIONAL, - &areq_ctx->src.nents, + &areq_ctx->src.mapped_nents, (LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES + LLI_MAX_NUM_OF_DATA_ENTRIES), &dummy, &mapped_nents); @@ -1183,7 +1110,7 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) */ if (areq_ctx->assoc_buff_type == CC_DMA_BUF_MLLI || areq_ctx->data_buff_type == CC_DMA_BUF_MLLI) { - mlli_params->curr_pool = buff_mgr->mlli_buffs_pool; + mlli_params->curr_pool = drvdata->mlli_buffs_pool; rc = cc_generate_mlli(dev, &sg_data, mlli_params, flags); if (rc) goto aead_map_failure; @@ -1211,7 +1138,6 @@ int cc_map_hash_request_final(struct cc_drvdata *drvdata, void *ctx, u32 *curr_buff_cnt = cc_hash_buf_cnt(areq_ctx); struct mlli_params *mlli_params = &areq_ctx->mlli_params; struct buffer_array sg_data; - struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle; int rc = 0; u32 dummy = 0; u32 mapped_nents = 0; @@ -1229,7 +1155,6 @@ int cc_map_hash_request_final(struct cc_drvdata *drvdata, void *ctx, return 0; } - /*TODO: copy data in case that buffer is enough for operation */ /* map the previous buffer */ if (*curr_buff_cnt) { rc = cc_set_hash_buf(dev, areq_ctx, curr_buff, *curr_buff_cnt, @@ -1258,7 +1183,7 @@ int cc_map_hash_request_final(struct cc_drvdata *drvdata, void *ctx, /*build mlli */ if (areq_ctx->data_dma_buf_type == CC_DMA_BUF_MLLI) { - mlli_params->curr_pool = buff_mgr->mlli_buffs_pool; + mlli_params->curr_pool = drvdata->mlli_buffs_pool; /* add the src data to the sg_data */ cc_add_sg_entry(dev, &sg_data, areq_ctx->in_nents, src, nbytes, 0, true, &areq_ctx->mlli_nents); @@ -1296,7 +1221,6 @@ int cc_map_hash_request_update(struct cc_drvdata *drvdata, void *ctx, unsigned int update_data_len; u32 total_in_len = nbytes + *curr_buff_cnt; struct buffer_array sg_data; - struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle; unsigned int swap_index = 0; int rc = 0; u32 dummy = 0; @@ -1371,7 +1295,7 @@ int cc_map_hash_request_update(struct cc_drvdata *drvdata, void *ctx, } if (areq_ctx->data_dma_buf_type == CC_DMA_BUF_MLLI) { - mlli_params->curr_pool = buff_mgr->mlli_buffs_pool; + mlli_params->curr_pool = drvdata->mlli_buffs_pool; /* add the src data to the sg_data */ cc_add_sg_entry(dev, &sg_data, areq_ctx->in_nents, src, (update_data_len - *curr_buff_cnt), 0, true, @@ -1438,39 +1362,22 @@ void cc_unmap_hash_request(struct device *dev, void *ctx, int cc_buffer_mgr_init(struct cc_drvdata *drvdata) { - struct buff_mgr_handle *buff_mgr_handle; struct device *dev = drvdata_to_dev(drvdata); - buff_mgr_handle = kmalloc(sizeof(*buff_mgr_handle), GFP_KERNEL); - if (!buff_mgr_handle) - return -ENOMEM; - - drvdata->buff_mgr_handle = buff_mgr_handle; - - buff_mgr_handle->mlli_buffs_pool = + drvdata->mlli_buffs_pool = dma_pool_create("dx_single_mlli_tables", dev, MAX_NUM_OF_TOTAL_MLLI_ENTRIES * LLI_ENTRY_BYTE_SIZE, MLLI_TABLE_MIN_ALIGNMENT, 0); - if (!buff_mgr_handle->mlli_buffs_pool) - goto error; + if (!drvdata->mlli_buffs_pool) + return -ENOMEM; return 0; - -error: - cc_buffer_mgr_fini(drvdata); - return -ENOMEM; } int cc_buffer_mgr_fini(struct cc_drvdata *drvdata) { - struct buff_mgr_handle *buff_mgr_handle = drvdata->buff_mgr_handle; - - if (buff_mgr_handle) { - dma_pool_destroy(buff_mgr_handle->mlli_buffs_pool); - kfree(drvdata->buff_mgr_handle); - drvdata->buff_mgr_handle = NULL; - } + dma_pool_destroy(drvdata->mlli_buffs_pool); return 0; } diff --git a/drivers/crypto/ccree/cc_buffer_mgr.h b/drivers/crypto/ccree/cc_buffer_mgr.h index af434872c6ff..653441b6542e 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.h +++ b/drivers/crypto/ccree/cc_buffer_mgr.h @@ -24,14 +24,15 @@ enum cc_sg_cpy_direct { }; struct cc_mlli { - cc_sram_addr_t sram_addr; + u32 sram_addr; + unsigned int mapped_nents; unsigned int nents; //sg nents unsigned int mlli_nents; //mlli nents might be different than the above }; struct mlli_params { struct dma_pool *curr_pool; - u8 *mlli_virt_addr; + void *mlli_virt_addr; dma_addr_t mlli_dma_addr; u32 mlli_len; }; diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index 7d6252d892d7..a84335328f37 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -20,10 +20,6 @@ #define template_skcipher template_u.skcipher -struct cc_cipher_handle { - struct list_head alg_list; -}; - struct cc_user_key_info { u8 *key; dma_addr_t key_dma_addr; @@ -184,7 +180,7 @@ static int cc_cipher_init(struct crypto_tfm *tfm) ctx_p->user.key); /* Map key buffer */ - ctx_p->user.key_dma_addr = dma_map_single(dev, (void *)ctx_p->user.key, + ctx_p->user.key_dma_addr = dma_map_single(dev, ctx_p->user.key, max_key_buf_size, DMA_TO_DEVICE); if (dma_mapping_error(dev, ctx_p->user.key_dma_addr)) { @@ -284,7 +280,7 @@ static int cc_cipher_sethkey(struct crypto_skcipher *sktfm, const u8 *key, dev_dbg(dev, "Setting HW key in context @%p for %s. keylen=%u\n", ctx_p, crypto_tfm_alg_name(tfm), keylen); - dump_byte_array("key", (u8 *)key, keylen); + dump_byte_array("key", key, keylen); /* STAT_PHASE_0: Init and sanity checks */ @@ -387,7 +383,7 @@ static int cc_cipher_setkey(struct crypto_skcipher *sktfm, const u8 *key, dev_dbg(dev, "Setting key in context @%p for %s. keylen=%u\n", ctx_p, crypto_tfm_alg_name(tfm), keylen); - dump_byte_array("key", (u8 *)key, keylen); + dump_byte_array("key", key, keylen); /* STAT_PHASE_0: Init and sanity checks */ @@ -533,14 +529,6 @@ static void cc_setup_state_desc(struct crypto_tfm *tfm, int flow_mode = ctx_p->flow_mode; int direction = req_ctx->gen_ctx.op_type; dma_addr_t iv_dma_addr = req_ctx->gen_ctx.iv_dma_addr; - unsigned int du_size = nbytes; - - struct cc_crypto_alg *cc_alg = - container_of(tfm->__crt_alg, struct cc_crypto_alg, - skcipher_alg.base); - - if (cc_alg->data_unit) - du_size = cc_alg->data_unit; switch (cipher_mode) { case DRV_CIPHER_ECB: @@ -753,7 +741,7 @@ static void cc_setup_mlli_desc(struct crypto_tfm *tfm, dev_dbg(dev, " bypass params addr %pad length 0x%X addr 0x%08X\n", &req_ctx->mlli_params.mlli_dma_addr, req_ctx->mlli_params.mlli_len, - (unsigned int)ctx_p->drvdata->mlli_sram_addr); + ctx_p->drvdata->mlli_sram_addr); hw_desc_init(&desc[*seq_size]); set_din_type(&desc[*seq_size], DMA_DLLI, req_ctx->mlli_params.mlli_dma_addr, @@ -801,16 +789,16 @@ static void cc_setup_flow_desc(struct crypto_tfm *tfm, req_ctx->in_mlli_nents, NS_BIT); if (req_ctx->out_nents == 0) { dev_dbg(dev, " din/dout params addr 0x%08X addr 0x%08X\n", - (unsigned int)ctx_p->drvdata->mlli_sram_addr, - (unsigned int)ctx_p->drvdata->mlli_sram_addr); + ctx_p->drvdata->mlli_sram_addr, + ctx_p->drvdata->mlli_sram_addr); set_dout_mlli(&desc[*seq_size], ctx_p->drvdata->mlli_sram_addr, req_ctx->in_mlli_nents, NS_BIT, (!last_desc ? 0 : 1)); } else { dev_dbg(dev, " din/dout params addr 0x%08X addr 0x%08X\n", - (unsigned int)ctx_p->drvdata->mlli_sram_addr, - (unsigned int)ctx_p->drvdata->mlli_sram_addr + + ctx_p->drvdata->mlli_sram_addr, + ctx_p->drvdata->mlli_sram_addr + (u32)LLI_ENTRY_BYTE_SIZE * req_ctx->in_nents); set_dout_mlli(&desc[*seq_size], (ctx_p->drvdata->mlli_sram_addr + @@ -871,7 +859,6 @@ static int cc_cipher_process(struct skcipher_request *req, /* STAT_PHASE_0: Init and sanity checks */ - /* TODO: check data length according to mode */ if (validate_data_size(ctx_p, nbytes)) { dev_dbg(dev, "Unsupported data size %d.\n", nbytes); rc = -EINVAL; @@ -893,8 +880,8 @@ static int cc_cipher_process(struct skcipher_request *req, } /* Setup request structure */ - cc_req.user_cb = (void *)cc_cipher_complete; - cc_req.user_arg = (void *)req; + cc_req.user_cb = cc_cipher_complete; + cc_req.user_arg = req; /* Setup CPP operation details */ if (ctx_p->key_type == CC_POLICY_PROTECTED_KEY) { @@ -1228,6 +1215,10 @@ static const struct cc_alg_template skcipher_algs[] = { .sec_func = true, }, { + /* See https://www.mail-archive.com/linux-crypto@vger.kernel.org/msg40576.html + * for the reason why this differs from the generic + * implementation. + */ .name = "xts(aes)", .driver_name = "xts-aes-ccree", .blocksize = 1, @@ -1423,7 +1414,7 @@ static const struct cc_alg_template skcipher_algs[] = { { .name = "ofb(aes)", .driver_name = "ofb-aes-ccree", - .blocksize = AES_BLOCK_SIZE, + .blocksize = 1, .template_skcipher = { .setkey = cc_cipher_setkey, .encrypt = cc_cipher_encrypt, @@ -1576,7 +1567,7 @@ static const struct cc_alg_template skcipher_algs[] = { { .name = "ctr(sm4)", .driver_name = "ctr-sm4-ccree", - .blocksize = SM4_BLOCK_SIZE, + .blocksize = 1, .template_skcipher = { .setkey = cc_cipher_setkey, .encrypt = cc_cipher_encrypt, @@ -1634,7 +1625,7 @@ static struct cc_crypto_alg *cc_create_alg(const struct cc_alg_template *tmpl, struct cc_crypto_alg *t_alg; struct skcipher_alg *alg; - t_alg = kzalloc(sizeof(*t_alg), GFP_KERNEL); + t_alg = devm_kzalloc(dev, sizeof(*t_alg), GFP_KERNEL); if (!t_alg) return ERR_PTR(-ENOMEM); @@ -1665,36 +1656,23 @@ static struct cc_crypto_alg *cc_create_alg(const struct cc_alg_template *tmpl, int cc_cipher_free(struct cc_drvdata *drvdata) { struct cc_crypto_alg *t_alg, *n; - struct cc_cipher_handle *cipher_handle = drvdata->cipher_handle; - - if (cipher_handle) { - /* Remove registered algs */ - list_for_each_entry_safe(t_alg, n, &cipher_handle->alg_list, - entry) { - crypto_unregister_skcipher(&t_alg->skcipher_alg); - list_del(&t_alg->entry); - kfree(t_alg); - } - kfree(cipher_handle); - drvdata->cipher_handle = NULL; + + /* Remove registered algs */ + list_for_each_entry_safe(t_alg, n, &drvdata->alg_list, entry) { + crypto_unregister_skcipher(&t_alg->skcipher_alg); + list_del(&t_alg->entry); } return 0; } int cc_cipher_alloc(struct cc_drvdata *drvdata) { - struct cc_cipher_handle *cipher_handle; struct cc_crypto_alg *t_alg; struct device *dev = drvdata_to_dev(drvdata); int rc = -ENOMEM; int alg; - cipher_handle = kmalloc(sizeof(*cipher_handle), GFP_KERNEL); - if (!cipher_handle) - return -ENOMEM; - - INIT_LIST_HEAD(&cipher_handle->alg_list); - drvdata->cipher_handle = cipher_handle; + INIT_LIST_HEAD(&drvdata->alg_list); /* Linux crypto */ dev_dbg(dev, "Number of algorithms = %zu\n", @@ -1723,14 +1701,12 @@ int cc_cipher_alloc(struct cc_drvdata *drvdata) if (rc) { dev_err(dev, "%s alg registration failed\n", t_alg->skcipher_alg.base.cra_driver_name); - kfree(t_alg); goto fail0; - } else { - list_add_tail(&t_alg->entry, - &cipher_handle->alg_list); - dev_dbg(dev, "Registered %s\n", - t_alg->skcipher_alg.base.cra_driver_name); } + + list_add_tail(&t_alg->entry, &drvdata->alg_list); + dev_dbg(dev, "Registered %s\n", + t_alg->skcipher_alg.base.cra_driver_name); } return 0; diff --git a/drivers/crypto/ccree/cc_debugfs.c b/drivers/crypto/ccree/cc_debugfs.c index 566999738698..c454afce7781 100644 --- a/drivers/crypto/ccree/cc_debugfs.c +++ b/drivers/crypto/ccree/cc_debugfs.c @@ -8,10 +8,6 @@ #include "cc_crypto_ctx.h" #include "cc_debugfs.h" -struct cc_debugfs_ctx { - struct dentry *dir; -}; - #define CC_DEBUG_REG(_X) { \ .name = __stringify(_X),\ .offset = CC_REG(_X) \ @@ -67,13 +63,8 @@ void __exit cc_debugfs_global_fini(void) int cc_debugfs_init(struct cc_drvdata *drvdata) { struct device *dev = drvdata_to_dev(drvdata); - struct cc_debugfs_ctx *ctx; struct debugfs_regset32 *regset, *verset; - ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; - regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL); if (!regset) return -ENOMEM; @@ -81,16 +72,18 @@ int cc_debugfs_init(struct cc_drvdata *drvdata) regset->regs = debug_regs; regset->nregs = ARRAY_SIZE(debug_regs); regset->base = drvdata->cc_base; + regset->dev = dev; - ctx->dir = debugfs_create_dir(drvdata->plat_dev->name, cc_debugfs_dir); + drvdata->dir = debugfs_create_dir(drvdata->plat_dev->name, + cc_debugfs_dir); - debugfs_create_regset32("regs", 0400, ctx->dir, regset); - debugfs_create_bool("coherent", 0400, ctx->dir, &drvdata->coherent); + debugfs_create_regset32("regs", 0400, drvdata->dir, regset); + debugfs_create_bool("coherent", 0400, drvdata->dir, &drvdata->coherent); verset = devm_kzalloc(dev, sizeof(*verset), GFP_KERNEL); /* Failing here is not important enough to fail the module load */ if (!verset) - goto out; + return 0; if (drvdata->hw_rev <= CC_HW_REV_712) { ver_sig_regs[0].offset = drvdata->sig_offset; @@ -102,17 +95,13 @@ int cc_debugfs_init(struct cc_drvdata *drvdata) verset->nregs = ARRAY_SIZE(pid_cid_regs); } verset->base = drvdata->cc_base; + verset->dev = dev; - debugfs_create_regset32("version", 0400, ctx->dir, verset); - -out: - drvdata->debugfs = ctx; + debugfs_create_regset32("version", 0400, drvdata->dir, verset); return 0; } void cc_debugfs_fini(struct cc_drvdata *drvdata) { - struct cc_debugfs_ctx *ctx = (struct cc_debugfs_ctx *)drvdata->debugfs; - - debugfs_remove_recursive(ctx->dir); + debugfs_remove_recursive(drvdata->dir); } diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c index 532bc95a8373..2d50991b9a17 100644 --- a/drivers/crypto/ccree/cc_driver.c +++ b/drivers/crypto/ccree/cc_driver.c @@ -14,6 +14,8 @@ #include <linux/of.h> #include <linux/clk.h> #include <linux/of_address.h> +#include <linux/of_device.h> +#include <linux/pm_runtime.h> #include "cc_driver.h" #include "cc_request_mgr.h" @@ -134,7 +136,7 @@ static irqreturn_t cc_isr(int irq, void *dev_id) /* STAT_OP_TYPE_GENERIC STAT_PHASE_0: Interrupt */ /* if driver suspended return, probably shared interrupt */ - if (cc_pm_is_dev_suspended(dev)) + if (pm_runtime_suspended(dev)) return IRQ_NONE; /* read the interrupt status */ @@ -269,7 +271,6 @@ static int init_cc_resources(struct platform_device *plat_dev) u32 val, hw_rev_pidr, sig_cidr; u64 dma_mask; const struct cc_hw_data *hw_rev; - const struct of_device_id *dev_id; struct clk *clk; int irq; int rc = 0; @@ -278,11 +279,7 @@ static int init_cc_resources(struct platform_device *plat_dev) if (!new_drvdata) return -ENOMEM; - dev_id = of_match_node(arm_ccree_dev_of_match, np); - if (!dev_id) - return -ENODEV; - - hw_rev = (struct cc_hw_data *)dev_id->data; + hw_rev = of_device_get_match_data(dev); new_drvdata->hw_rev_name = hw_rev->name; new_drvdata->hw_rev = hw_rev->rev; new_drvdata->std_bodies = hw_rev->std_bodies; @@ -302,22 +299,12 @@ static int init_cc_resources(struct platform_device *plat_dev) platform_set_drvdata(plat_dev, new_drvdata); new_drvdata->plat_dev = plat_dev; - clk = devm_clk_get(dev, NULL); - if (IS_ERR(clk)) - switch (PTR_ERR(clk)) { - /* Clock is optional so this might be fine */ - case -ENOENT: - break; - - /* Clock not available, let's try again soon */ - case -EPROBE_DEFER: - return -EPROBE_DEFER; - - default: - dev_err(dev, "Error getting clock: %ld\n", - PTR_ERR(clk)); - return PTR_ERR(clk); - } + clk = devm_clk_get_optional(dev, NULL); + if (IS_ERR(clk)) { + if (PTR_ERR(clk) != -EPROBE_DEFER) + dev_err(dev, "Error getting clock: %pe\n", clk); + return PTR_ERR(clk); + } new_drvdata->clk = clk; new_drvdata->coherent = of_dma_is_coherent(np); @@ -344,13 +331,13 @@ static int init_cc_resources(struct platform_device *plat_dev) init_completion(&new_drvdata->hw_queue_avail); - if (!plat_dev->dev.dma_mask) - plat_dev->dev.dma_mask = &plat_dev->dev.coherent_dma_mask; + if (!dev->dma_mask) + dev->dma_mask = &dev->coherent_dma_mask; dma_mask = DMA_BIT_MASK(DMA_BIT_MASK_LEN); while (dma_mask > 0x7fffffffUL) { - if (dma_supported(&plat_dev->dev, dma_mask)) { - rc = dma_set_coherent_mask(&plat_dev->dev, dma_mask); + if (dma_supported(dev, dma_mask)) { + rc = dma_set_coherent_mask(dev, dma_mask); if (!rc) break; } @@ -362,7 +349,7 @@ static int init_cc_resources(struct platform_device *plat_dev) return rc; } - rc = cc_clk_on(new_drvdata); + rc = clk_prepare_enable(new_drvdata->clk); if (rc) { dev_err(dev, "Failed to enable clock"); return rc; @@ -370,7 +357,17 @@ static int init_cc_resources(struct platform_device *plat_dev) new_drvdata->sec_disabled = cc_sec_disable; - /* wait for Crytpcell reset completion */ + pm_runtime_set_autosuspend_delay(dev, CC_SUSPEND_TIMEOUT); + pm_runtime_use_autosuspend(dev); + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + rc = pm_runtime_get_sync(dev); + if (rc < 0) { + dev_err(dev, "pm_runtime_get_sync() failed: %d\n", rc); + goto post_pm_err; + } + + /* Wait for Cryptocell reset completion */ if (!cc_wait_for_reset_completion(new_drvdata)) { dev_err(dev, "Cryptocell reset not completed"); } @@ -382,7 +379,7 @@ static int init_cc_resources(struct platform_device *plat_dev) dev_err(dev, "Invalid CC signature: SIGNATURE=0x%08X != expected=0x%08X\n", val, hw_rev->sig); rc = -EINVAL; - goto post_clk_err; + goto post_pm_err; } sig_cidr = val; hw_rev_pidr = cc_ioread(new_drvdata, new_drvdata->ver_offset); @@ -393,7 +390,7 @@ static int init_cc_resources(struct platform_device *plat_dev) dev_err(dev, "Invalid CC PIDR: PIDR0124=0x%08X != expected=0x%08X\n", val, hw_rev->pidr_0124); rc = -EINVAL; - goto post_clk_err; + goto post_pm_err; } hw_rev_pidr = val; @@ -402,7 +399,7 @@ static int init_cc_resources(struct platform_device *plat_dev) dev_err(dev, "Invalid CC CIDR: CIDR0123=0x%08X != expected=0x%08X\n", val, hw_rev->cidr_0123); rc = -EINVAL; - goto post_clk_err; + goto post_pm_err; } sig_cidr = val; @@ -421,7 +418,7 @@ static int init_cc_resources(struct platform_device *plat_dev) default: dev_err(dev, "Unsupported engines configuration.\n"); rc = -EINVAL; - goto post_clk_err; + goto post_pm_err; } /* Check security disable state */ @@ -447,14 +444,14 @@ static int init_cc_resources(struct platform_device *plat_dev) new_drvdata); if (rc) { dev_err(dev, "Could not register to interrupt %d\n", irq); - goto post_clk_err; + goto post_pm_err; } dev_dbg(dev, "Registered to IRQ: %d\n", irq); rc = init_cc_regs(new_drvdata, true); if (rc) { dev_err(dev, "init_cc_regs failed\n"); - goto post_clk_err; + goto post_pm_err; } rc = cc_debugfs_init(new_drvdata); @@ -477,15 +474,14 @@ static int init_cc_resources(struct platform_device *plat_dev) new_drvdata->mlli_sram_addr = cc_sram_alloc(new_drvdata, MAX_MLLI_BUFF_SIZE); if (new_drvdata->mlli_sram_addr == NULL_SRAM_ADDR) { - dev_err(dev, "Failed to alloc MLLI Sram buffer\n"); rc = -ENOMEM; - goto post_sram_mgr_err; + goto post_fips_init_err; } rc = cc_req_mgr_init(new_drvdata); if (rc) { dev_err(dev, "cc_req_mgr_init failed\n"); - goto post_sram_mgr_err; + goto post_fips_init_err; } rc = cc_buffer_mgr_init(new_drvdata); @@ -494,12 +490,6 @@ static int init_cc_resources(struct platform_device *plat_dev) goto post_req_mgr_err; } - rc = cc_pm_init(new_drvdata); - if (rc) { - dev_err(dev, "cc_pm_init failed\n"); - goto post_buf_mgr_err; - } - /* Allocate crypto algs */ rc = cc_cipher_alloc(new_drvdata); if (rc) { @@ -520,15 +510,13 @@ static int init_cc_resources(struct platform_device *plat_dev) goto post_hash_err; } - /* All set, we can allow autosuspend */ - cc_pm_go(new_drvdata); - /* If we got here and FIPS mode is enabled * it means all FIPS test passed, so let TEE * know we're good. */ cc_set_ree_fips_status(new_drvdata, true); + pm_runtime_put(dev); return 0; post_hash_err: @@ -539,16 +527,17 @@ post_buf_mgr_err: cc_buffer_mgr_fini(new_drvdata); post_req_mgr_err: cc_req_mgr_fini(new_drvdata); -post_sram_mgr_err: - cc_sram_mgr_fini(new_drvdata); post_fips_init_err: cc_fips_fini(new_drvdata); post_debugfs_err: cc_debugfs_fini(new_drvdata); post_regs_err: fini_cc_regs(new_drvdata); -post_clk_err: - cc_clk_off(new_drvdata); +post_pm_err: + pm_runtime_put_noidle(dev); + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + clk_disable_unprepare(new_drvdata->clk); return rc; } @@ -560,36 +549,22 @@ void fini_cc_regs(struct cc_drvdata *drvdata) static void cleanup_cc_resources(struct platform_device *plat_dev) { + struct device *dev = &plat_dev->dev; struct cc_drvdata *drvdata = (struct cc_drvdata *)platform_get_drvdata(plat_dev); cc_aead_free(drvdata); cc_hash_free(drvdata); cc_cipher_free(drvdata); - cc_pm_fini(drvdata); cc_buffer_mgr_fini(drvdata); cc_req_mgr_fini(drvdata); - cc_sram_mgr_fini(drvdata); cc_fips_fini(drvdata); cc_debugfs_fini(drvdata); fini_cc_regs(drvdata); - cc_clk_off(drvdata); -} - -int cc_clk_on(struct cc_drvdata *drvdata) -{ - struct clk *clk = drvdata->clk; - int rc; - - if (IS_ERR(clk)) - /* Not all devices have a clock associated with CCREE */ - return 0; - - rc = clk_prepare_enable(clk); - if (rc) - return rc; - - return 0; + pm_runtime_put_noidle(dev); + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + clk_disable_unprepare(drvdata->clk); } unsigned int cc_get_default_hash_len(struct cc_drvdata *drvdata) @@ -600,17 +575,6 @@ unsigned int cc_get_default_hash_len(struct cc_drvdata *drvdata) return HASH_LEN_SIZE_630; } -void cc_clk_off(struct cc_drvdata *drvdata) -{ - struct clk *clk = drvdata->clk; - - if (IS_ERR(clk)) - /* Not all devices have a clock associated with CCREE */ - return; - - clk_disable_unprepare(clk); -} - static int ccree_probe(struct platform_device *plat_dev) { int rc; @@ -653,7 +617,6 @@ static struct platform_driver ccree_driver = { static int __init ccree_init(void) { - cc_hash_global_init(); cc_debugfs_global_init(); return platform_driver_register(&ccree_driver); diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h index c227718ba992..d938886390d2 100644 --- a/drivers/crypto/ccree/cc_driver.h +++ b/drivers/crypto/ccree/cc_driver.h @@ -26,7 +26,6 @@ #include <linux/clk.h> #include <linux/platform_device.h> -/* Registers definitions from shared/hw/ree_include */ #include "cc_host_regs.h" #include "cc_crypto_ctx.h" #include "cc_hw_queue_defs.h" @@ -71,9 +70,7 @@ enum cc_std_body { #define CC_NVM_IS_IDLE_MASK BIT(CC_NVM_IS_IDLE_VALUE_BIT_SHIFT) -#define AXIM_MON_COMP_VALUE GENMASK(CC_AXIM_MON_COMP_VALUE_BIT_SIZE + \ - CC_AXIM_MON_COMP_VALUE_BIT_SHIFT, \ - CC_AXIM_MON_COMP_VALUE_BIT_SHIFT) +#define AXIM_MON_COMP_VALUE CC_GENMASK(CC_AXIM_MON_COMP_VALUE) #define CC_CPP_AES_ABORT_MASK ( \ BIT(CC_HOST_IMR_REE_OP_ABORTED_AES_0_MASK_BIT_SHIFT) | \ @@ -139,15 +136,15 @@ struct cc_drvdata { int irq; struct completion hw_queue_avail; /* wait for HW queue availability */ struct platform_device *plat_dev; - cc_sram_addr_t mlli_sram_addr; - void *buff_mgr_handle; - void *cipher_handle; + u32 mlli_sram_addr; + struct dma_pool *mlli_buffs_pool; + struct list_head alg_list; void *hash_handle; void *aead_handle; void *request_mgr_handle; void *fips_handle; - void *sram_mgr_handle; - void *debugfs; + u32 sram_free_offset; /* offset to non-allocated area in SRAM */ + struct dentry *dir; /* for debugfs */ struct clk *clk; bool coherent; char *hw_rev_name; @@ -158,7 +155,6 @@ struct cc_drvdata { int std_bodies; bool sec_disabled; u32 comp_mask; - bool pm_on; }; struct cc_crypto_alg { @@ -212,8 +208,6 @@ static inline void dump_byte_array(const char *name, const u8 *the_array, bool cc_wait_for_reset_completion(struct cc_drvdata *drvdata); int init_cc_regs(struct cc_drvdata *drvdata, bool is_probe); void fini_cc_regs(struct cc_drvdata *drvdata); -int cc_clk_on(struct cc_drvdata *drvdata); -void cc_clk_off(struct cc_drvdata *drvdata); unsigned int cc_get_default_hash_len(struct cc_drvdata *drvdata); static inline void cc_iowrite(struct cc_drvdata *drvdata, u32 reg, u32 val) diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index 912e5ce5079d..d5310783af15 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -20,8 +20,8 @@ #define CC_SM3_HASH_LEN_SIZE 8 struct cc_hash_handle { - cc_sram_addr_t digest_len_sram_addr; /* const value in SRAM*/ - cc_sram_addr_t larval_digest_sram_addr; /* const value in SRAM */ + u32 digest_len_sram_addr; /* const value in SRAM*/ + u32 larval_digest_sram_addr; /* const value in SRAM */ struct list_head hash_list; }; @@ -39,12 +39,19 @@ static const u32 cc_sha256_init[] = { SHA256_H3, SHA256_H2, SHA256_H1, SHA256_H0 }; static const u32 cc_digest_len_sha512_init[] = { 0x00000080, 0x00000000, 0x00000000, 0x00000000 }; -static u64 cc_sha384_init[] = { - SHA384_H7, SHA384_H6, SHA384_H5, SHA384_H4, - SHA384_H3, SHA384_H2, SHA384_H1, SHA384_H0 }; -static u64 cc_sha512_init[] = { - SHA512_H7, SHA512_H6, SHA512_H5, SHA512_H4, - SHA512_H3, SHA512_H2, SHA512_H1, SHA512_H0 }; + +/* + * Due to the way the HW works, every double word in the SHA384 and SHA512 + * larval hashes must be stored in hi/lo order + */ +#define hilo(x) upper_32_bits(x), lower_32_bits(x) +static const u32 cc_sha384_init[] = { + hilo(SHA384_H7), hilo(SHA384_H6), hilo(SHA384_H5), hilo(SHA384_H4), + hilo(SHA384_H3), hilo(SHA384_H2), hilo(SHA384_H1), hilo(SHA384_H0) }; +static const u32 cc_sha512_init[] = { + hilo(SHA512_H7), hilo(SHA512_H6), hilo(SHA512_H5), hilo(SHA512_H4), + hilo(SHA512_H3), hilo(SHA512_H2), hilo(SHA512_H1), hilo(SHA512_H0) }; + static const u32 cc_sm3_init[] = { SM3_IVH, SM3_IVG, SM3_IVF, SM3_IVE, SM3_IVD, SM3_IVC, SM3_IVB, SM3_IVA }; @@ -342,7 +349,6 @@ static int cc_fin_result(struct cc_hw_desc *desc, struct ahash_request *req, /* Get final MAC result */ hw_desc_init(&desc[idx]); set_hash_cipher_mode(&desc[idx], ctx->hw_mode, ctx->hash_mode); - /* TODO */ set_dout_dlli(&desc[idx], state->digest_result_dma_addr, digestsize, NS_BIT, 1); set_queue_last_ind(ctx->drvdata, &desc[idx]); @@ -422,8 +428,7 @@ static int cc_hash_digest(struct ahash_request *req) bool is_hmac = ctx->is_hmac; struct cc_crypto_req cc_req = {}; struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN]; - cc_sram_addr_t larval_digest_addr = - cc_larval_digest_addr(ctx->drvdata, ctx->hash_mode); + u32 larval_digest_addr; int idx = 0; int rc = 0; gfp_t flags = cc_gfp_flags(&req->base); @@ -465,6 +470,8 @@ static int cc_hash_digest(struct ahash_request *req) set_din_type(&desc[idx], DMA_DLLI, state->digest_buff_dma_addr, ctx->inter_digestsize, NS_BIT); } else { + larval_digest_addr = cc_larval_digest_addr(ctx->drvdata, + ctx->hash_mode); set_din_sram(&desc[idx], larval_digest_addr, ctx->inter_digestsize); } @@ -726,7 +733,7 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key, int digestsize = 0; int i, idx = 0, rc = 0; struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN]; - cc_sram_addr_t larval_addr; + u32 larval_addr; struct device *dev; ctx = crypto_ahash_ctx(ahash); @@ -752,7 +759,7 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key, return -ENOMEM; ctx->key_params.key_dma_addr = - dma_map_single(dev, (void *)ctx->key_params.key, keylen, + dma_map_single(dev, ctx->key_params.key, keylen, DMA_TO_DEVICE); if (dma_mapping_error(dev, ctx->key_params.key_dma_addr)) { dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n", @@ -1067,8 +1074,8 @@ static int cc_alloc_ctx(struct cc_hash_ctx *ctx) ctx->key_params.keylen = 0; ctx->digest_buff_dma_addr = - dma_map_single(dev, (void *)ctx->digest_buff, - sizeof(ctx->digest_buff), DMA_BIDIRECTIONAL); + dma_map_single(dev, ctx->digest_buff, sizeof(ctx->digest_buff), + DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, ctx->digest_buff_dma_addr)) { dev_err(dev, "Mapping digest len %zu B at va=%pK for DMA failed\n", sizeof(ctx->digest_buff), ctx->digest_buff); @@ -1079,7 +1086,7 @@ static int cc_alloc_ctx(struct cc_hash_ctx *ctx) &ctx->digest_buff_dma_addr); ctx->opad_tmp_keys_dma_addr = - dma_map_single(dev, (void *)ctx->opad_tmp_keys_buff, + dma_map_single(dev, ctx->opad_tmp_keys_buff, sizeof(ctx->opad_tmp_keys_buff), DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, ctx->opad_tmp_keys_dma_addr)) { @@ -1196,8 +1203,8 @@ static int cc_mac_update(struct ahash_request *req) idx++; /* Setup request structure */ - cc_req.user_cb = (void *)cc_update_complete; - cc_req.user_arg = (void *)req; + cc_req.user_cb = cc_update_complete; + cc_req.user_arg = req; rc = cc_send_request(ctx->drvdata, &cc_req, desc, idx, &req->base); if (rc != -EINPROGRESS && rc != -EBUSY) { @@ -1254,8 +1261,8 @@ static int cc_mac_final(struct ahash_request *req) } /* Setup request structure */ - cc_req.user_cb = (void *)cc_hash_complete; - cc_req.user_arg = (void *)req; + cc_req.user_cb = cc_hash_complete; + cc_req.user_arg = req; if (state->xcbc_count && rem_cnt == 0) { /* Load key for ECB decryption */ @@ -1311,7 +1318,6 @@ static int cc_mac_final(struct ahash_request *req) /* Get final MAC result */ hw_desc_init(&desc[idx]); - /* TODO */ set_dout_dlli(&desc[idx], state->digest_result_dma_addr, digestsize, NS_BIT, 1); set_queue_last_ind(ctx->drvdata, &desc[idx]); @@ -1369,8 +1375,8 @@ static int cc_mac_finup(struct ahash_request *req) } /* Setup request structure */ - cc_req.user_cb = (void *)cc_hash_complete; - cc_req.user_arg = (void *)req; + cc_req.user_cb = cc_hash_complete; + cc_req.user_arg = req; if (ctx->hw_mode == DRV_CIPHER_XCBC_MAC) { key_len = CC_AES_128_BIT_KEY_SIZE; @@ -1393,7 +1399,6 @@ static int cc_mac_finup(struct ahash_request *req) /* Get final MAC result */ hw_desc_init(&desc[idx]); - /* TODO */ set_dout_dlli(&desc[idx], state->digest_result_dma_addr, digestsize, NS_BIT, 1); set_queue_last_ind(ctx->drvdata, &desc[idx]); @@ -1448,8 +1453,8 @@ static int cc_mac_digest(struct ahash_request *req) } /* Setup request structure */ - cc_req.user_cb = (void *)cc_digest_complete; - cc_req.user_arg = (void *)req; + cc_req.user_cb = cc_digest_complete; + cc_req.user_arg = req; if (ctx->hw_mode == DRV_CIPHER_XCBC_MAC) { key_len = CC_AES_128_BIT_KEY_SIZE; @@ -1820,7 +1825,7 @@ static struct cc_hash_alg *cc_alloc_hash_alg(struct cc_hash_template *template, struct crypto_alg *alg; struct ahash_alg *halg; - t_crypto_alg = kzalloc(sizeof(*t_crypto_alg), GFP_KERNEL); + t_crypto_alg = devm_kzalloc(dev, sizeof(*t_crypto_alg), GFP_KERNEL); if (!t_crypto_alg) return ERR_PTR(-ENOMEM); @@ -1857,104 +1862,85 @@ static struct cc_hash_alg *cc_alloc_hash_alg(struct cc_hash_template *template, return t_crypto_alg; } +static int cc_init_copy_sram(struct cc_drvdata *drvdata, const u32 *data, + unsigned int size, u32 *sram_buff_ofs) +{ + struct cc_hw_desc larval_seq[CC_DIGEST_SIZE_MAX / sizeof(u32)]; + unsigned int larval_seq_len = 0; + int rc; + + cc_set_sram_desc(data, *sram_buff_ofs, size / sizeof(*data), + larval_seq, &larval_seq_len); + rc = send_request_init(drvdata, larval_seq, larval_seq_len); + if (rc) + return rc; + + *sram_buff_ofs += size; + return 0; +} + int cc_init_hash_sram(struct cc_drvdata *drvdata) { struct cc_hash_handle *hash_handle = drvdata->hash_handle; - cc_sram_addr_t sram_buff_ofs = hash_handle->digest_len_sram_addr; - unsigned int larval_seq_len = 0; - struct cc_hw_desc larval_seq[CC_DIGEST_SIZE_MAX / sizeof(u32)]; + u32 sram_buff_ofs = hash_handle->digest_len_sram_addr; bool large_sha_supported = (drvdata->hw_rev >= CC_HW_REV_712); bool sm3_supported = (drvdata->hw_rev >= CC_HW_REV_713); int rc = 0; /* Copy-to-sram digest-len */ - cc_set_sram_desc(cc_digest_len_init, sram_buff_ofs, - ARRAY_SIZE(cc_digest_len_init), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_digest_len_init, + sizeof(cc_digest_len_init), &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_digest_len_init); - larval_seq_len = 0; - if (large_sha_supported) { /* Copy-to-sram digest-len for sha384/512 */ - cc_set_sram_desc(cc_digest_len_sha512_init, sram_buff_ofs, - ARRAY_SIZE(cc_digest_len_sha512_init), - larval_seq, &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_digest_len_sha512_init, + sizeof(cc_digest_len_sha512_init), + &sram_buff_ofs); if (rc) goto init_digest_const_err; - - sram_buff_ofs += sizeof(cc_digest_len_sha512_init); - larval_seq_len = 0; } /* The initial digests offset */ hash_handle->larval_digest_sram_addr = sram_buff_ofs; /* Copy-to-sram initial SHA* digests */ - cc_set_sram_desc(cc_md5_init, sram_buff_ofs, ARRAY_SIZE(cc_md5_init), - larval_seq, &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_md5_init, sizeof(cc_md5_init), + &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_md5_init); - larval_seq_len = 0; - cc_set_sram_desc(cc_sha1_init, sram_buff_ofs, - ARRAY_SIZE(cc_sha1_init), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_sha1_init, sizeof(cc_sha1_init), + &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_sha1_init); - larval_seq_len = 0; - cc_set_sram_desc(cc_sha224_init, sram_buff_ofs, - ARRAY_SIZE(cc_sha224_init), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_sha224_init, sizeof(cc_sha224_init), + &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_sha224_init); - larval_seq_len = 0; - cc_set_sram_desc(cc_sha256_init, sram_buff_ofs, - ARRAY_SIZE(cc_sha256_init), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_sha256_init, sizeof(cc_sha256_init), + &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_sha256_init); - larval_seq_len = 0; if (sm3_supported) { - cc_set_sram_desc(cc_sm3_init, sram_buff_ofs, - ARRAY_SIZE(cc_sm3_init), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_sm3_init, + sizeof(cc_sm3_init), &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_sm3_init); - larval_seq_len = 0; } if (large_sha_supported) { - cc_set_sram_desc((u32 *)cc_sha384_init, sram_buff_ofs, - (ARRAY_SIZE(cc_sha384_init) * 2), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_sha384_init, + sizeof(cc_sha384_init), &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_sha384_init); - larval_seq_len = 0; - cc_set_sram_desc((u32 *)cc_sha512_init, sram_buff_ofs, - (ARRAY_SIZE(cc_sha512_init) * 2), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_sha512_init, + sizeof(cc_sha512_init), &sram_buff_ofs); if (rc) goto init_digest_const_err; } @@ -1963,38 +1949,16 @@ init_digest_const_err: return rc; } -static void __init cc_swap_dwords(u32 *buf, unsigned long size) -{ - int i; - u32 tmp; - - for (i = 0; i < size; i += 2) { - tmp = buf[i]; - buf[i] = buf[i + 1]; - buf[i + 1] = tmp; - } -} - -/* - * Due to the way the HW works we need to swap every - * double word in the SHA384 and SHA512 larval hashes - */ -void __init cc_hash_global_init(void) -{ - cc_swap_dwords((u32 *)&cc_sha384_init, (ARRAY_SIZE(cc_sha384_init) * 2)); - cc_swap_dwords((u32 *)&cc_sha512_init, (ARRAY_SIZE(cc_sha512_init) * 2)); -} - int cc_hash_alloc(struct cc_drvdata *drvdata) { struct cc_hash_handle *hash_handle; - cc_sram_addr_t sram_buff; + u32 sram_buff; u32 sram_size_to_alloc; struct device *dev = drvdata_to_dev(drvdata); int rc = 0; int alg; - hash_handle = kzalloc(sizeof(*hash_handle), GFP_KERNEL); + hash_handle = devm_kzalloc(dev, sizeof(*hash_handle), GFP_KERNEL); if (!hash_handle) return -ENOMEM; @@ -2016,7 +1980,6 @@ int cc_hash_alloc(struct cc_drvdata *drvdata) sram_buff = cc_sram_alloc(drvdata, sram_size_to_alloc); if (sram_buff == NULL_SRAM_ADDR) { - dev_err(dev, "SRAM pool exhausted\n"); rc = -ENOMEM; goto fail; } @@ -2056,12 +2019,10 @@ int cc_hash_alloc(struct cc_drvdata *drvdata) if (rc) { dev_err(dev, "%s alg registration failed\n", driver_hash[alg].driver_name); - kfree(t_alg); goto fail; - } else { - list_add_tail(&t_alg->entry, - &hash_handle->hash_list); } + + list_add_tail(&t_alg->entry, &hash_handle->hash_list); } if (hw_mode == DRV_CIPHER_XCBC_MAC || hw_mode == DRV_CIPHER_CMAC) @@ -2081,18 +2042,16 @@ int cc_hash_alloc(struct cc_drvdata *drvdata) if (rc) { dev_err(dev, "%s alg registration failed\n", driver_hash[alg].driver_name); - kfree(t_alg); goto fail; - } else { - list_add_tail(&t_alg->entry, &hash_handle->hash_list); } + + list_add_tail(&t_alg->entry, &hash_handle->hash_list); } return 0; fail: - kfree(drvdata->hash_handle); - drvdata->hash_handle = NULL; + cc_hash_free(drvdata); return rc; } @@ -2101,17 +2060,12 @@ int cc_hash_free(struct cc_drvdata *drvdata) struct cc_hash_alg *t_hash_alg, *hash_n; struct cc_hash_handle *hash_handle = drvdata->hash_handle; - if (hash_handle) { - list_for_each_entry_safe(t_hash_alg, hash_n, - &hash_handle->hash_list, entry) { - crypto_unregister_ahash(&t_hash_alg->ahash_alg); - list_del(&t_hash_alg->entry); - kfree(t_hash_alg); - } - - kfree(hash_handle); - drvdata->hash_handle = NULL; + list_for_each_entry_safe(t_hash_alg, hash_n, &hash_handle->hash_list, + entry) { + crypto_unregister_ahash(&t_hash_alg->ahash_alg); + list_del(&t_hash_alg->entry); } + return 0; } @@ -2272,22 +2226,23 @@ static const void *cc_larval_digest(struct device *dev, u32 mode) } } -/*! - * Gets the address of the initial digest in SRAM +/** + * cc_larval_digest_addr() - Get the address of the initial digest in SRAM * according to the given hash mode * - * \param drvdata - * \param mode The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256 + * @drvdata: Associated device driver context + * @mode: The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256 * - * \return u32 The address of the initial digest in SRAM + * Return: + * The address of the initial digest in SRAM */ -cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode) +u32 cc_larval_digest_addr(void *drvdata, u32 mode) { struct cc_drvdata *_drvdata = (struct cc_drvdata *)drvdata; struct cc_hash_handle *hash_handle = _drvdata->hash_handle; struct device *dev = drvdata_to_dev(_drvdata); bool sm3_supported = (_drvdata->hw_rev >= CC_HW_REV_713); - cc_sram_addr_t addr; + u32 addr; switch (mode) { case DRV_HASH_NULL: @@ -2339,12 +2294,11 @@ cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode) return hash_handle->larval_digest_sram_addr; } -cc_sram_addr_t -cc_digest_len_addr(void *drvdata, u32 mode) +u32 cc_digest_len_addr(void *drvdata, u32 mode) { struct cc_drvdata *_drvdata = (struct cc_drvdata *)drvdata; struct cc_hash_handle *hash_handle = _drvdata->hash_handle; - cc_sram_addr_t digest_len_addr = hash_handle->digest_len_sram_addr; + u32 digest_len_addr = hash_handle->digest_len_sram_addr; switch (mode) { case DRV_HASH_SHA1: diff --git a/drivers/crypto/ccree/cc_hash.h b/drivers/crypto/ccree/cc_hash.h index 0d6dc61484d7..3d0f2179e07e 100644 --- a/drivers/crypto/ccree/cc_hash.h +++ b/drivers/crypto/ccree/cc_hash.h @@ -80,30 +80,27 @@ int cc_hash_alloc(struct cc_drvdata *drvdata); int cc_init_hash_sram(struct cc_drvdata *drvdata); int cc_hash_free(struct cc_drvdata *drvdata); -/*! - * Gets the initial digest length +/** + * cc_digest_len_addr() - Gets the initial digest length * - * \param drvdata - * \param mode The Hash mode. Supported modes: - * MD5/SHA1/SHA224/SHA256/SHA384/SHA512 + * @drvdata: Associated device driver context + * @mode: The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256/SHA384/SHA512 * - * \return u32 returns the address of the initial digest length in SRAM + * Return: + * Returns the address of the initial digest length in SRAM */ -cc_sram_addr_t -cc_digest_len_addr(void *drvdata, u32 mode); +u32 cc_digest_len_addr(void *drvdata, u32 mode); -/*! - * Gets the address of the initial digest in SRAM +/** + * cc_larval_digest_addr() - Gets the address of the initial digest in SRAM * according to the given hash mode * - * \param drvdata - * \param mode The Hash mode. Supported modes: - * MD5/SHA1/SHA224/SHA256/SHA384/SHA512 + * @drvdata: Associated device driver context + * @mode: The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256/SHA384/SHA512 * - * \return u32 The address of the initial digest in SRAM + * Return: + * The address of the initial digest in SRAM */ -cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode); - -void cc_hash_global_init(void); +u32 cc_larval_digest_addr(void *drvdata, u32 mode); #endif /*__CC_HASH_H__*/ diff --git a/drivers/crypto/ccree/cc_hw_queue_defs.h b/drivers/crypto/ccree/cc_hw_queue_defs.h index 9f4db9956e91..15df58c66911 100644 --- a/drivers/crypto/ccree/cc_hw_queue_defs.h +++ b/drivers/crypto/ccree/cc_hw_queue_defs.h @@ -17,46 +17,43 @@ /* Define max. available slots in HW queue */ #define HW_QUEUE_SLOTS_MAX 15 -#define CC_REG_LOW(word, name) \ - (CC_DSCRPTR_QUEUE_WORD ## word ## _ ## name ## _BIT_SHIFT) - -#define CC_REG_HIGH(word, name) \ - (CC_REG_LOW(word, name) + \ - CC_DSCRPTR_QUEUE_WORD ## word ## _ ## name ## _BIT_SIZE - 1) - -#define CC_GENMASK(word, name) \ - GENMASK(CC_REG_HIGH(word, name), CC_REG_LOW(word, name)) - -#define WORD0_VALUE CC_GENMASK(0, VALUE) -#define WORD0_CPP_CIPHER_MODE CC_GENMASK(0, CPP_CIPHER_MODE) -#define WORD1_DIN_CONST_VALUE CC_GENMASK(1, DIN_CONST_VALUE) -#define WORD1_DIN_DMA_MODE CC_GENMASK(1, DIN_DMA_MODE) -#define WORD1_DIN_SIZE CC_GENMASK(1, DIN_SIZE) -#define WORD1_NOT_LAST CC_GENMASK(1, NOT_LAST) -#define WORD1_NS_BIT CC_GENMASK(1, NS_BIT) -#define WORD1_LOCK_QUEUE CC_GENMASK(1, LOCK_QUEUE) -#define WORD2_VALUE CC_GENMASK(2, VALUE) -#define WORD3_DOUT_DMA_MODE CC_GENMASK(3, DOUT_DMA_MODE) -#define WORD3_DOUT_LAST_IND CC_GENMASK(3, DOUT_LAST_IND) -#define WORD3_DOUT_SIZE CC_GENMASK(3, DOUT_SIZE) -#define WORD3_HASH_XOR_BIT CC_GENMASK(3, HASH_XOR_BIT) -#define WORD3_NS_BIT CC_GENMASK(3, NS_BIT) -#define WORD3_QUEUE_LAST_IND CC_GENMASK(3, QUEUE_LAST_IND) -#define WORD4_ACK_NEEDED CC_GENMASK(4, ACK_NEEDED) -#define WORD4_AES_SEL_N_HASH CC_GENMASK(4, AES_SEL_N_HASH) -#define WORD4_AES_XOR_CRYPTO_KEY CC_GENMASK(4, AES_XOR_CRYPTO_KEY) -#define WORD4_BYTES_SWAP CC_GENMASK(4, BYTES_SWAP) -#define WORD4_CIPHER_CONF0 CC_GENMASK(4, CIPHER_CONF0) -#define WORD4_CIPHER_CONF1 CC_GENMASK(4, CIPHER_CONF1) -#define WORD4_CIPHER_CONF2 CC_GENMASK(4, CIPHER_CONF2) -#define WORD4_CIPHER_DO CC_GENMASK(4, CIPHER_DO) -#define WORD4_CIPHER_MODE CC_GENMASK(4, CIPHER_MODE) -#define WORD4_CMAC_SIZE0 CC_GENMASK(4, CMAC_SIZE0) -#define WORD4_DATA_FLOW_MODE CC_GENMASK(4, DATA_FLOW_MODE) -#define WORD4_KEY_SIZE CC_GENMASK(4, KEY_SIZE) -#define WORD4_SETUP_OPERATION CC_GENMASK(4, SETUP_OPERATION) -#define WORD5_DIN_ADDR_HIGH CC_GENMASK(5, DIN_ADDR_HIGH) -#define WORD5_DOUT_ADDR_HIGH CC_GENMASK(5, DOUT_ADDR_HIGH) +#define CC_REG_LOW(name) (name ## _BIT_SHIFT) +#define CC_REG_HIGH(name) (CC_REG_LOW(name) + name ## _BIT_SIZE - 1) +#define CC_GENMASK(name) GENMASK(CC_REG_HIGH(name), CC_REG_LOW(name)) + +#define CC_HWQ_GENMASK(word, field) \ + CC_GENMASK(CC_DSCRPTR_QUEUE_WORD ## word ## _ ## field) + +#define WORD0_VALUE CC_HWQ_GENMASK(0, VALUE) +#define WORD0_CPP_CIPHER_MODE CC_HWQ_GENMASK(0, CPP_CIPHER_MODE) +#define WORD1_DIN_CONST_VALUE CC_HWQ_GENMASK(1, DIN_CONST_VALUE) +#define WORD1_DIN_DMA_MODE CC_HWQ_GENMASK(1, DIN_DMA_MODE) +#define WORD1_DIN_SIZE CC_HWQ_GENMASK(1, DIN_SIZE) +#define WORD1_NOT_LAST CC_HWQ_GENMASK(1, NOT_LAST) +#define WORD1_NS_BIT CC_HWQ_GENMASK(1, NS_BIT) +#define WORD1_LOCK_QUEUE CC_HWQ_GENMASK(1, LOCK_QUEUE) +#define WORD2_VALUE CC_HWQ_GENMASK(2, VALUE) +#define WORD3_DOUT_DMA_MODE CC_HWQ_GENMASK(3, DOUT_DMA_MODE) +#define WORD3_DOUT_LAST_IND CC_HWQ_GENMASK(3, DOUT_LAST_IND) +#define WORD3_DOUT_SIZE CC_HWQ_GENMASK(3, DOUT_SIZE) +#define WORD3_HASH_XOR_BIT CC_HWQ_GENMASK(3, HASH_XOR_BIT) +#define WORD3_NS_BIT CC_HWQ_GENMASK(3, NS_BIT) +#define WORD3_QUEUE_LAST_IND CC_HWQ_GENMASK(3, QUEUE_LAST_IND) +#define WORD4_ACK_NEEDED CC_HWQ_GENMASK(4, ACK_NEEDED) +#define WORD4_AES_SEL_N_HASH CC_HWQ_GENMASK(4, AES_SEL_N_HASH) +#define WORD4_AES_XOR_CRYPTO_KEY CC_HWQ_GENMASK(4, AES_XOR_CRYPTO_KEY) +#define WORD4_BYTES_SWAP CC_HWQ_GENMASK(4, BYTES_SWAP) +#define WORD4_CIPHER_CONF0 CC_HWQ_GENMASK(4, CIPHER_CONF0) +#define WORD4_CIPHER_CONF1 CC_HWQ_GENMASK(4, CIPHER_CONF1) +#define WORD4_CIPHER_CONF2 CC_HWQ_GENMASK(4, CIPHER_CONF2) +#define WORD4_CIPHER_DO CC_HWQ_GENMASK(4, CIPHER_DO) +#define WORD4_CIPHER_MODE CC_HWQ_GENMASK(4, CIPHER_MODE) +#define WORD4_CMAC_SIZE0 CC_HWQ_GENMASK(4, CMAC_SIZE0) +#define WORD4_DATA_FLOW_MODE CC_HWQ_GENMASK(4, DATA_FLOW_MODE) +#define WORD4_KEY_SIZE CC_HWQ_GENMASK(4, KEY_SIZE) +#define WORD4_SETUP_OPERATION CC_HWQ_GENMASK(4, SETUP_OPERATION) +#define WORD5_DIN_ADDR_HIGH CC_HWQ_GENMASK(5, DIN_ADDR_HIGH) +#define WORD5_DOUT_ADDR_HIGH CC_HWQ_GENMASK(5, DOUT_ADDR_HIGH) /****************************************************************************** * TYPE DEFINITIONS @@ -207,31 +204,32 @@ enum cc_hash_cipher_pad { /* Descriptor packing macros */ /*****************************/ -/* - * Init a HW descriptor struct - * @pdesc: pointer HW descriptor struct +/** + * hw_desc_init() - Init a HW descriptor struct + * @pdesc: pointer to HW descriptor struct */ static inline void hw_desc_init(struct cc_hw_desc *pdesc) { memset(pdesc, 0, sizeof(struct cc_hw_desc)); } -/* - * Indicates the end of current HW descriptors flow and release the HW engines. +/** + * set_queue_last_ind_bit() - Indicate the end of current HW descriptors flow + * and release the HW engines. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct */ static inline void set_queue_last_ind_bit(struct cc_hw_desc *pdesc) { pdesc->word[3] |= FIELD_PREP(WORD3_QUEUE_LAST_IND, 1); } -/* - * Set the DIN field of a HW descriptors +/** + * set_din_type() - Set the DIN field of a HW descriptor * - * @pdesc: pointer HW descriptor struct - * @dma_mode: dmaMode The DMA mode: NO_DMA, SRAM, DLLI, MLLI, CONSTANT - * @addr: dinAdr DIN address + * @pdesc: Pointer to HW descriptor struct + * @dma_mode: The DMA mode: NO_DMA, SRAM, DLLI, MLLI, CONSTANT + * @addr: DIN address * @size: Data size in bytes * @axi_sec: AXI secure bit */ @@ -239,20 +237,20 @@ static inline void set_din_type(struct cc_hw_desc *pdesc, enum cc_dma_mode dma_mode, dma_addr_t addr, u32 size, enum cc_axi_sec axi_sec) { - pdesc->word[0] = (u32)addr; + pdesc->word[0] = lower_32_bits(addr); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - pdesc->word[5] |= FIELD_PREP(WORD5_DIN_ADDR_HIGH, ((u16)(addr >> 32))); + pdesc->word[5] |= FIELD_PREP(WORD5_DIN_ADDR_HIGH, upper_32_bits(addr)); #endif pdesc->word[1] |= FIELD_PREP(WORD1_DIN_DMA_MODE, dma_mode) | FIELD_PREP(WORD1_DIN_SIZE, size) | FIELD_PREP(WORD1_NS_BIT, axi_sec); } -/* - * Set the DIN field of a HW descriptors to NO DMA mode. +/** + * set_din_no_dma() - Set the DIN field of a HW descriptor to NO DMA mode. * Used for NOP descriptor, register patches and other special modes. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @addr: DIN address * @size: Data size in bytes */ @@ -262,14 +260,11 @@ static inline void set_din_no_dma(struct cc_hw_desc *pdesc, u32 addr, u32 size) pdesc->word[1] |= FIELD_PREP(WORD1_DIN_SIZE, size); } -/* - * Setup the special CPP descriptor +/** + * set_cpp_crypto_key() - Setup the special CPP descriptor * - * @pdesc: pointer HW descriptor struct - * @alg: cipher used (AES / SM4) - * @mode: mode used (CTR or CBC) - * @slot: slot number - * @ksize: key size + * @pdesc: Pointer to HW descriptor struct + * @slot: Slot number */ static inline void set_cpp_crypto_key(struct cc_hw_desc *pdesc, u8 slot) { @@ -281,27 +276,26 @@ static inline void set_cpp_crypto_key(struct cc_hw_desc *pdesc, u8 slot) pdesc->word[4] |= FIELD_PREP(WORD4_SETUP_OPERATION, slot); } -/* - * Set the DIN field of a HW descriptors to SRAM mode. +/** + * set_din_sram() - Set the DIN field of a HW descriptor to SRAM mode. * Note: No need to check SRAM alignment since host requests do not use SRAM and - * adaptor will enforce alignment check. + * the adaptor will enforce alignment checks. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @addr: DIN address - * @size Data size in bytes + * @size: Data size in bytes */ -static inline void set_din_sram(struct cc_hw_desc *pdesc, dma_addr_t addr, - u32 size) +static inline void set_din_sram(struct cc_hw_desc *pdesc, u32 addr, u32 size) { - pdesc->word[0] = (u32)addr; + pdesc->word[0] = addr; pdesc->word[1] |= FIELD_PREP(WORD1_DIN_SIZE, size) | FIELD_PREP(WORD1_DIN_DMA_MODE, DMA_SRAM); } -/* - * Set the DIN field of a HW descriptors to CONST mode +/** + * set_din_const() - Set the DIN field of a HW descriptor to CONST mode * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @val: DIN const value * @size: Data size in bytes */ @@ -313,20 +307,20 @@ static inline void set_din_const(struct cc_hw_desc *pdesc, u32 val, u32 size) FIELD_PREP(WORD1_DIN_SIZE, size); } -/* - * Set the DIN not last input data indicator +/** + * set_din_not_last_indication() - Set the DIN not last input data indicator * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct */ static inline void set_din_not_last_indication(struct cc_hw_desc *pdesc) { pdesc->word[1] |= FIELD_PREP(WORD1_NOT_LAST, 1); } -/* - * Set the DOUT field of a HW descriptors +/** + * set_dout_type() - Set the DOUT field of a HW descriptor * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @dma_mode: The DMA mode: NO_DMA, SRAM, DLLI, MLLI, CONSTANT * @addr: DOUT address * @size: Data size in bytes @@ -336,24 +330,24 @@ static inline void set_dout_type(struct cc_hw_desc *pdesc, enum cc_dma_mode dma_mode, dma_addr_t addr, u32 size, enum cc_axi_sec axi_sec) { - pdesc->word[2] = (u32)addr; + pdesc->word[2] = lower_32_bits(addr); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - pdesc->word[5] |= FIELD_PREP(WORD5_DOUT_ADDR_HIGH, ((u16)(addr >> 32))); + pdesc->word[5] |= FIELD_PREP(WORD5_DOUT_ADDR_HIGH, upper_32_bits(addr)); #endif pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_DMA_MODE, dma_mode) | FIELD_PREP(WORD3_DOUT_SIZE, size) | FIELD_PREP(WORD3_NS_BIT, axi_sec); } -/* - * Set the DOUT field of a HW descriptors to DLLI type +/** + * set_dout_dlli() - Set the DOUT field of a HW descriptor to DLLI type * The LAST INDICATION is provided by the user * - * @pdesc pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @addr: DOUT address * @size: Data size in bytes - * @last_ind: The last indication bit * @axi_sec: AXI secure bit + * @last_ind: The last indication bit */ static inline void set_dout_dlli(struct cc_hw_desc *pdesc, dma_addr_t addr, u32 size, enum cc_axi_sec axi_sec, @@ -363,29 +357,28 @@ static inline void set_dout_dlli(struct cc_hw_desc *pdesc, dma_addr_t addr, pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_LAST_IND, last_ind); } -/* - * Set the DOUT field of a HW descriptors to DLLI type +/** + * set_dout_mlli() - Set the DOUT field of a HW descriptor to MLLI type * The LAST INDICATION is provided by the user * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @addr: DOUT address * @size: Data size in bytes - * @last_ind: The last indication bit * @axi_sec: AXI secure bit + * @last_ind: The last indication bit */ -static inline void set_dout_mlli(struct cc_hw_desc *pdesc, dma_addr_t addr, - u32 size, enum cc_axi_sec axi_sec, - bool last_ind) +static inline void set_dout_mlli(struct cc_hw_desc *pdesc, u32 addr, u32 size, + enum cc_axi_sec axi_sec, bool last_ind) { set_dout_type(pdesc, DMA_MLLI, addr, size, axi_sec); pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_LAST_IND, last_ind); } -/* - * Set the DOUT field of a HW descriptors to NO DMA mode. +/** + * set_dout_no_dma() - Set the DOUT field of a HW descriptor to NO DMA mode. * Used for NOP descriptor, register patches and other special modes. * - * @pdesc: pointer HW descriptor struct + * @pdesc: pointer to HW descriptor struct * @addr: DOUT address * @size: Data size in bytes * @write_enable: Enables a write operation to a register @@ -398,54 +391,55 @@ static inline void set_dout_no_dma(struct cc_hw_desc *pdesc, u32 addr, FIELD_PREP(WORD3_DOUT_LAST_IND, write_enable); } -/* - * Set the word for the XOR operation. +/** + * set_xor_val() - Set the word for the XOR operation. * - * @pdesc: pointer HW descriptor struct - * @val: xor data value + * @pdesc: Pointer to HW descriptor struct + * @val: XOR data value */ static inline void set_xor_val(struct cc_hw_desc *pdesc, u32 val) { pdesc->word[2] = val; } -/* - * Sets the XOR indicator bit in the descriptor +/** + * set_xor_active() - Set the XOR indicator bit in the descriptor * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct */ static inline void set_xor_active(struct cc_hw_desc *pdesc) { pdesc->word[3] |= FIELD_PREP(WORD3_HASH_XOR_BIT, 1); } -/* - * Select the AES engine instead of HASH engine when setting up combined mode - * with AES XCBC MAC +/** + * set_aes_not_hash_mode() - Select the AES engine instead of HASH engine when + * setting up combined mode with AES XCBC MAC * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct */ static inline void set_aes_not_hash_mode(struct cc_hw_desc *pdesc) { pdesc->word[4] |= FIELD_PREP(WORD4_AES_SEL_N_HASH, 1); } -/* - * Set aes xor crypto key, this in some secenrios select SM3 engine +/** + * set_aes_xor_crypto_key() - Set aes xor crypto key, which in some scenarios + * selects the SM3 engine * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct */ static inline void set_aes_xor_crypto_key(struct cc_hw_desc *pdesc) { pdesc->word[4] |= FIELD_PREP(WORD4_AES_XOR_CRYPTO_KEY, 1); } -/* - * Set the DOUT field of a HW descriptors to SRAM mode +/** + * set_dout_sram() - Set the DOUT field of a HW descriptor to SRAM mode * Note: No need to check SRAM alignment since host requests do not use SRAM and - * adaptor will enforce alignment check. + * the adaptor will enforce alignment checks. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @addr: DOUT address * @size: Data size in bytes */ @@ -456,32 +450,34 @@ static inline void set_dout_sram(struct cc_hw_desc *pdesc, u32 addr, u32 size) FIELD_PREP(WORD3_DOUT_SIZE, size); } -/* - * Sets the data unit size for XEX mode in data_out_addr[15:0] +/** + * set_xex_data_unit_size() - Set the data unit size for XEX mode in + * data_out_addr[15:0] * - * @pdesc: pDesc pointer HW descriptor struct - * @size: data unit size for XEX mode + * @pdesc: Pointer to HW descriptor struct + * @size: Data unit size for XEX mode */ static inline void set_xex_data_unit_size(struct cc_hw_desc *pdesc, u32 size) { pdesc->word[2] = size; } -/* - * Set the number of rounds for Multi2 in data_out_addr[15:0] +/** + * set_multi2_num_rounds() - Set the number of rounds for Multi2 in + * data_out_addr[15:0] * - * @pdesc: pointer HW descriptor struct - * @num: number of rounds for Multi2 + * @pdesc: Pointer to HW descriptor struct + * @num: Number of rounds for Multi2 */ static inline void set_multi2_num_rounds(struct cc_hw_desc *pdesc, u32 num) { pdesc->word[2] = num; } -/* - * Set the flow mode. +/** + * set_flow_mode() - Set the flow mode. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @mode: Any one of the modes defined in [CC7x-DESC] */ static inline void set_flow_mode(struct cc_hw_desc *pdesc, @@ -490,22 +486,22 @@ static inline void set_flow_mode(struct cc_hw_desc *pdesc, pdesc->word[4] |= FIELD_PREP(WORD4_DATA_FLOW_MODE, mode); } -/* - * Set the cipher mode. +/** + * set_cipher_mode() - Set the cipher mode. * - * @pdesc: pointer HW descriptor struct - * @mode: Any one of the modes defined in [CC7x-DESC] + * @pdesc: Pointer to HW descriptor struct + * @mode: Any one of the modes defined in [CC7x-DESC] */ static inline void set_cipher_mode(struct cc_hw_desc *pdesc, int mode) { pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_MODE, mode); } -/* - * Set the cipher mode for hash algorithms. +/** + * set_hash_cipher_mode() - Set the cipher mode for hash algorithms. * - * @pdesc: pointer HW descriptor struct - * @cipher_mode: Any one of the modes defined in [CC7x-DESC] + * @pdesc: Pointer to HW descriptor struct + * @cipher_mode: Any one of the modes defined in [CC7x-DESC] * @hash_mode: specifies which hash is being handled */ static inline void set_hash_cipher_mode(struct cc_hw_desc *pdesc, @@ -517,10 +513,10 @@ static inline void set_hash_cipher_mode(struct cc_hw_desc *pdesc, set_aes_xor_crypto_key(pdesc); } -/* - * Set the cipher configuration fields. +/** + * set_cipher_config0() - Set the cipher configuration fields. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @mode: Any one of the modes defined in [CC7x-DESC] */ static inline void set_cipher_config0(struct cc_hw_desc *pdesc, int mode) @@ -528,11 +524,11 @@ static inline void set_cipher_config0(struct cc_hw_desc *pdesc, int mode) pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_CONF0, mode); } -/* - * Set the cipher configuration fields. +/** + * set_cipher_config1() - Set the cipher configuration fields. * - * @pdesc: pointer HW descriptor struct - * @config: Any one of the modes defined in [CC7x-DESC] + * @pdesc: Pointer to HW descriptor struct + * @config: Padding mode */ static inline void set_cipher_config1(struct cc_hw_desc *pdesc, enum cc_hash_conf_pad config) @@ -540,10 +536,10 @@ static inline void set_cipher_config1(struct cc_hw_desc *pdesc, pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_CONF1, config); } -/* - * Set HW key configuration fields. +/** + * set_hw_crypto_key() - Set HW key configuration fields. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @hw_key: The HW key slot asdefined in enum cc_hw_crypto_key */ static inline void set_hw_crypto_key(struct cc_hw_desc *pdesc, @@ -555,64 +551,64 @@ static inline void set_hw_crypto_key(struct cc_hw_desc *pdesc, (hw_key >> HW_KEY_SHIFT_CIPHER_CFG2)); } -/* - * Set byte order of all setup-finalize descriptors. +/** + * set_bytes_swap() - Set byte order of all setup-finalize descriptors. * - * @pdesc: pointer HW descriptor struct - * @config: Any one of the modes defined in [CC7x-DESC] + * @pdesc: Pointer to HW descriptor struct + * @config: True to enable byte swapping */ static inline void set_bytes_swap(struct cc_hw_desc *pdesc, bool config) { pdesc->word[4] |= FIELD_PREP(WORD4_BYTES_SWAP, config); } -/* - * Set CMAC_SIZE0 mode. +/** + * set_cmac_size0_mode() - Set CMAC_SIZE0 mode. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct */ static inline void set_cmac_size0_mode(struct cc_hw_desc *pdesc) { pdesc->word[4] |= FIELD_PREP(WORD4_CMAC_SIZE0, 1); } -/* - * Set key size descriptor field. +/** + * set_key_size() - Set key size descriptor field. * - * @pdesc: pointer HW descriptor struct - * @size: key size in bytes (NOT size code) + * @pdesc: Pointer to HW descriptor struct + * @size: Key size in bytes (NOT size code) */ static inline void set_key_size(struct cc_hw_desc *pdesc, u32 size) { pdesc->word[4] |= FIELD_PREP(WORD4_KEY_SIZE, size); } -/* - * Set AES key size. +/** + * set_key_size_aes() - Set AES key size. * - * @pdesc: pointer HW descriptor struct - * @size: key size in bytes (NOT size code) + * @pdesc: Pointer to HW descriptor struct + * @size: Key size in bytes (NOT size code) */ static inline void set_key_size_aes(struct cc_hw_desc *pdesc, u32 size) { set_key_size(pdesc, ((size >> 3) - 2)); } -/* - * Set DES key size. +/** + * set_key_size_des() - Set DES key size. * - * @pdesc: pointer HW descriptor struct - * @size: key size in bytes (NOT size code) + * @pdesc: Pointer to HW descriptor struct + * @size: Key size in bytes (NOT size code) */ static inline void set_key_size_des(struct cc_hw_desc *pdesc, u32 size) { set_key_size(pdesc, ((size >> 3) - 1)); } -/* - * Set the descriptor setup mode +/** + * set_setup_mode() - Set the descriptor setup mode * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @mode: Any one of the setup modes defined in [CC7x-DESC] */ static inline void set_setup_mode(struct cc_hw_desc *pdesc, @@ -621,10 +617,10 @@ static inline void set_setup_mode(struct cc_hw_desc *pdesc, pdesc->word[4] |= FIELD_PREP(WORD4_SETUP_OPERATION, mode); } -/* - * Set the descriptor cipher DO +/** + * set_cipher_do() - Set the descriptor cipher DO * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @config: Any one of the cipher do defined in [CC7x-DESC] */ static inline void set_cipher_do(struct cc_hw_desc *pdesc, diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c index 24c368b866f6..d39e1664fc7e 100644 --- a/drivers/crypto/ccree/cc_pm.c +++ b/drivers/crypto/ccree/cc_pm.c @@ -15,29 +15,25 @@ #define POWER_DOWN_ENABLE 0x01 #define POWER_DOWN_DISABLE 0x00 -const struct dev_pm_ops ccree_pm = { - SET_RUNTIME_PM_OPS(cc_pm_suspend, cc_pm_resume, NULL) -}; - -int cc_pm_suspend(struct device *dev) +static int cc_pm_suspend(struct device *dev) { struct cc_drvdata *drvdata = dev_get_drvdata(dev); dev_dbg(dev, "set HOST_POWER_DOWN_EN\n"); fini_cc_regs(drvdata); cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_ENABLE); - cc_clk_off(drvdata); + clk_disable_unprepare(drvdata->clk); return 0; } -int cc_pm_resume(struct device *dev) +static int cc_pm_resume(struct device *dev) { int rc; struct cc_drvdata *drvdata = dev_get_drvdata(dev); dev_dbg(dev, "unset HOST_POWER_DOWN_EN\n"); /* Enables the device source clk */ - rc = cc_clk_on(drvdata); + rc = clk_prepare_enable(drvdata->clk); if (rc) { dev_err(dev, "failed getting clock back on. We're toast.\n"); return rc; @@ -62,53 +58,19 @@ int cc_pm_resume(struct device *dev) return 0; } +const struct dev_pm_ops ccree_pm = { + SET_RUNTIME_PM_OPS(cc_pm_suspend, cc_pm_resume, NULL) +}; + int cc_pm_get(struct device *dev) { - int rc = 0; - struct cc_drvdata *drvdata = dev_get_drvdata(dev); - - if (drvdata->pm_on) - rc = pm_runtime_get_sync(dev); + int rc = pm_runtime_get_sync(dev); return (rc == 1 ? 0 : rc); } void cc_pm_put_suspend(struct device *dev) { - struct cc_drvdata *drvdata = dev_get_drvdata(dev); - - if (drvdata->pm_on) { - pm_runtime_mark_last_busy(dev); - pm_runtime_put_autosuspend(dev); - } -} - -bool cc_pm_is_dev_suspended(struct device *dev) -{ - /* check device state using runtime api */ - return pm_runtime_suspended(dev); -} - -int cc_pm_init(struct cc_drvdata *drvdata) -{ - struct device *dev = drvdata_to_dev(drvdata); - - /* must be before the enabling to avoid redundant suspending */ - pm_runtime_set_autosuspend_delay(dev, CC_SUSPEND_TIMEOUT); - pm_runtime_use_autosuspend(dev); - /* set us as active - note we won't do PM ops until cc_pm_go()! */ - return pm_runtime_set_active(dev); -} - -/* enable the PM module*/ -void cc_pm_go(struct cc_drvdata *drvdata) -{ - pm_runtime_enable(drvdata_to_dev(drvdata)); - drvdata->pm_on = true; -} - -void cc_pm_fini(struct cc_drvdata *drvdata) -{ - pm_runtime_disable(drvdata_to_dev(drvdata)); - drvdata->pm_on = false; + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); } diff --git a/drivers/crypto/ccree/cc_pm.h b/drivers/crypto/ccree/cc_pm.h index 80a18e11cae4..50cac33de118 100644 --- a/drivers/crypto/ccree/cc_pm.h +++ b/drivers/crypto/ccree/cc_pm.h @@ -15,26 +15,11 @@ extern const struct dev_pm_ops ccree_pm; -int cc_pm_init(struct cc_drvdata *drvdata); -void cc_pm_go(struct cc_drvdata *drvdata); -void cc_pm_fini(struct cc_drvdata *drvdata); -int cc_pm_suspend(struct device *dev); -int cc_pm_resume(struct device *dev); int cc_pm_get(struct device *dev); void cc_pm_put_suspend(struct device *dev); -bool cc_pm_is_dev_suspended(struct device *dev); #else -static inline int cc_pm_init(struct cc_drvdata *drvdata) -{ - return 0; -} - -static inline void cc_pm_go(struct cc_drvdata *drvdata) {} - -static inline void cc_pm_fini(struct cc_drvdata *drvdata) {} - static inline int cc_pm_get(struct device *dev) { return 0; @@ -42,12 +27,6 @@ static inline int cc_pm_get(struct device *dev) static inline void cc_pm_put_suspend(struct device *dev) {} -static inline bool cc_pm_is_dev_suspended(struct device *dev) -{ - /* if PM not supported device is never suspend */ - return false; -} - #endif #endif /*__POWER_MGR_H__*/ diff --git a/drivers/crypto/ccree/cc_request_mgr.c b/drivers/crypto/ccree/cc_request_mgr.c index 9d61e6f12478..1d7649ecf44e 100644 --- a/drivers/crypto/ccree/cc_request_mgr.c +++ b/drivers/crypto/ccree/cc_request_mgr.c @@ -206,12 +206,13 @@ static void enqueue_seq(struct cc_drvdata *drvdata, struct cc_hw_desc seq[], } } -/*! - * Completion will take place if and only if user requested completion - * by cc_send_sync_request(). +/** + * request_mgr_complete() - Completion will take place if and only if user + * requested completion by cc_send_sync_request(). * - * \param dev - * \param dx_compl_h The completion event to signal + * @dev: Device pointer + * @dx_compl_h: The completion event to signal + * @dummy: unused error code */ static void request_mgr_complete(struct device *dev, void *dx_compl_h, int dummy) @@ -264,15 +265,15 @@ static int cc_queues_status(struct cc_drvdata *drvdata, return -ENOSPC; } -/*! - * Enqueue caller request to crypto hardware. +/** + * cc_do_send_request() - Enqueue caller request to crypto hardware. * Need to be called with HW lock held and PM running * - * \param drvdata - * \param cc_req The request to enqueue - * \param desc The crypto sequence - * \param len The crypto sequence length - * \param add_comp If "true": add an artificial dout DMA to mark completion + * @drvdata: Associated device driver context + * @cc_req: The request to enqueue + * @desc: The crypto sequence + * @len: The crypto sequence length + * @add_comp: If "true": add an artificial dout DMA to mark completion * */ static void cc_do_send_request(struct cc_drvdata *drvdata, @@ -295,7 +296,6 @@ static void cc_do_send_request(struct cc_drvdata *drvdata, req_mgr_h->req_queue[req_mgr_h->req_queue_head] = *cc_req; req_mgr_h->req_queue_head = (req_mgr_h->req_queue_head + 1) & (MAX_REQUEST_QUEUE_SIZE - 1); - /* TODO: Use circ_buf.h ? */ dev_dbg(dev, "Enqueue request head=%u\n", req_mgr_h->req_queue_head); @@ -377,7 +377,7 @@ static void cc_proc_backlog(struct cc_drvdata *drvdata) rc = cc_queues_status(drvdata, mgr, bli->len); if (rc) { /* - * There is still not room in the FIFO for + * There is still no room in the FIFO for * this request. Bail out. We'll return here * on the next completion irq. */ @@ -476,10 +476,6 @@ int cc_send_sync_request(struct cc_drvdata *drvdata, break; spin_unlock_bh(&mgr->hw_lock); - if (rc != -EAGAIN) { - cc_pm_put_suspend(dev); - return rc; - } wait_for_completion_interruptible(&drvdata->hw_queue_avail); reinit_completion(&drvdata->hw_queue_avail); } @@ -490,16 +486,18 @@ int cc_send_sync_request(struct cc_drvdata *drvdata, return 0; } -/*! - * Enqueue caller request to crypto hardware during init process. - * assume this function is not called in middle of a flow, +/** + * send_request_init() - Enqueue caller request to crypto hardware during init + * process. + * Assume this function is not called in the middle of a flow, * since we set QUEUE_LAST_IND flag in the last descriptor. * - * \param drvdata - * \param desc The crypto sequence - * \param len The crypto sequence length + * @drvdata: Associated device driver context + * @desc: The crypto sequence + * @len: The crypto sequence length * - * \return int Returns "0" upon success + * Return: + * Returns "0" upon success */ int send_request_init(struct cc_drvdata *drvdata, struct cc_hw_desc *desc, unsigned int len) diff --git a/drivers/crypto/ccree/cc_request_mgr.h b/drivers/crypto/ccree/cc_request_mgr.h index ff7746aaaf35..ae25ca843dce 100644 --- a/drivers/crypto/ccree/cc_request_mgr.h +++ b/drivers/crypto/ccree/cc_request_mgr.h @@ -12,18 +12,17 @@ int cc_req_mgr_init(struct cc_drvdata *drvdata); -/*! - * Enqueue caller request to crypto hardware. +/** + * cc_send_request() - Enqueue caller request to crypto hardware. * - * \param drvdata - * \param cc_req The request to enqueue - * \param desc The crypto sequence - * \param len The crypto sequence length - * \param is_dout If "true": completion is handled by the caller - * If "false": this function adds a dummy descriptor completion - * and waits upon completion signal. + * @drvdata: Associated device driver context + * @cc_req: The request to enqueue + * @desc: The crypto sequence + * @len: The crypto sequence length + * @req: Asynchronous crypto request * - * \return int Returns -EINPROGRESS or error + * Return: + * Returns -EINPROGRESS or error */ int cc_send_request(struct cc_drvdata *drvdata, struct cc_crypto_req *cc_req, struct cc_hw_desc *desc, unsigned int len, diff --git a/drivers/crypto/ccree/cc_sram_mgr.c b/drivers/crypto/ccree/cc_sram_mgr.c index 62c885e6e791..37a95856361f 100644 --- a/drivers/crypto/ccree/cc_sram_mgr.c +++ b/drivers/crypto/ccree/cc_sram_mgr.c @@ -5,88 +5,61 @@ #include "cc_sram_mgr.h" /** - * struct cc_sram_ctx -Internal RAM context manager - * @sram_free_offset: the offset to the non-allocated area - */ -struct cc_sram_ctx { - cc_sram_addr_t sram_free_offset; -}; - -/** - * cc_sram_mgr_fini() - Cleanup SRAM pool. - * - * @drvdata: Associated device driver context - */ -void cc_sram_mgr_fini(struct cc_drvdata *drvdata) -{ - /* Nothing needed */ -} - -/** * cc_sram_mgr_init() - Initializes SRAM pool. * The pool starts right at the beginning of SRAM. * Returns zero for success, negative value otherwise. * * @drvdata: Associated device driver context + * + * Return: + * 0 for success, negative error code for failure. */ int cc_sram_mgr_init(struct cc_drvdata *drvdata) { - struct cc_sram_ctx *ctx; - dma_addr_t start = 0; + u32 start = 0; struct device *dev = drvdata_to_dev(drvdata); if (drvdata->hw_rev < CC_HW_REV_712) { /* Pool starts after ROM bytes */ - start = (dma_addr_t)cc_ioread(drvdata, - CC_REG(HOST_SEP_SRAM_THRESHOLD)); - + start = cc_ioread(drvdata, CC_REG(HOST_SEP_SRAM_THRESHOLD)); if ((start & 0x3) != 0) { - dev_err(dev, "Invalid SRAM offset %pad\n", &start); + dev_err(dev, "Invalid SRAM offset 0x%x\n", start); return -EINVAL; } } - /* Allocate "this" context */ - ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); - - if (!ctx) - return -ENOMEM; - - ctx->sram_free_offset = start; - drvdata->sram_mgr_handle = ctx; - + drvdata->sram_free_offset = start; return 0; } -/*! - * Allocated buffer from SRAM pool. - * Note: Caller is responsible to free the LAST allocated buffer. - * This function does not taking care of any fragmentation may occur - * by the order of calls to alloc/free. +/** + * cc_sram_alloc() - Allocate buffer from SRAM pool. + * + * @drvdata: Associated device driver context + * @size: The requested numer of bytes to allocate * - * \param drvdata - * \param size The requested bytes to allocate + * Return: + * Address offset in SRAM or NULL_SRAM_ADDR for failure. */ -cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size) +u32 cc_sram_alloc(struct cc_drvdata *drvdata, u32 size) { - struct cc_sram_ctx *smgr_ctx = drvdata->sram_mgr_handle; struct device *dev = drvdata_to_dev(drvdata); - cc_sram_addr_t p; + u32 p; if ((size & 0x3)) { dev_err(dev, "Requested buffer size (%u) is not multiple of 4", size); return NULL_SRAM_ADDR; } - if (size > (CC_CC_SRAM_SIZE - smgr_ctx->sram_free_offset)) { - dev_err(dev, "Not enough space to allocate %u B (at offset %llu)\n", - size, smgr_ctx->sram_free_offset); + if (size > (CC_CC_SRAM_SIZE - drvdata->sram_free_offset)) { + dev_err(dev, "Not enough space to allocate %u B (at offset %u)\n", + size, drvdata->sram_free_offset); return NULL_SRAM_ADDR; } - p = smgr_ctx->sram_free_offset; - smgr_ctx->sram_free_offset += size; - dev_dbg(dev, "Allocated %u B @ %u\n", size, (unsigned int)p); + p = drvdata->sram_free_offset; + drvdata->sram_free_offset += size; + dev_dbg(dev, "Allocated %u B @ %u\n", size, p); return p; } @@ -97,13 +70,12 @@ cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size) * * @src: A pointer to array of words to set as consts. * @dst: The target SRAM buffer to set into - * @nelements: The number of words in "src" array + * @nelement: The number of words in "src" array * @seq: A pointer to the given IN/OUT descriptor sequence * @seq_len: A pointer to the given IN/OUT sequence length */ -void cc_set_sram_desc(const u32 *src, cc_sram_addr_t dst, - unsigned int nelement, struct cc_hw_desc *seq, - unsigned int *seq_len) +void cc_set_sram_desc(const u32 *src, u32 dst, unsigned int nelement, + struct cc_hw_desc *seq, unsigned int *seq_len) { u32 i; unsigned int idx = *seq_len; diff --git a/drivers/crypto/ccree/cc_sram_mgr.h b/drivers/crypto/ccree/cc_sram_mgr.h index 1d14de9ee8c3..1c965ef83002 100644 --- a/drivers/crypto/ccree/cc_sram_mgr.h +++ b/drivers/crypto/ccree/cc_sram_mgr.h @@ -10,42 +10,30 @@ struct cc_drvdata; -/** - * Address (offset) within CC internal SRAM - */ - -typedef u64 cc_sram_addr_t; - -#define NULL_SRAM_ADDR ((cc_sram_addr_t)-1) +#define NULL_SRAM_ADDR ((u32)-1) -/*! - * Initializes SRAM pool. +/** + * cc_sram_mgr_init() - Initializes SRAM pool. * The first X bytes of SRAM are reserved for ROM usage, hence, pool * starts right after X bytes. * - * \param drvdata + * @drvdata: Associated device driver context * - * \return int Zero for success, negative value otherwise. + * Return: + * Zero for success, negative value otherwise. */ int cc_sram_mgr_init(struct cc_drvdata *drvdata); -/*! - * Uninits SRAM pool. +/** + * cc_sram_alloc() - Allocate buffer from SRAM pool. * - * \param drvdata - */ -void cc_sram_mgr_fini(struct cc_drvdata *drvdata); - -/*! - * Allocated buffer from SRAM pool. - * Note: Caller is responsible to free the LAST allocated buffer. - * This function does not taking care of any fragmentation may occur - * by the order of calls to alloc/free. + * @drvdata: Associated device driver context + * @size: The requested bytes to allocate * - * \param drvdata - * \param size The requested bytes to allocate + * Return: + * Address offset in SRAM or NULL_SRAM_ADDR for failure. */ -cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size); +u32 cc_sram_alloc(struct cc_drvdata *drvdata, u32 size); /** * cc_set_sram_desc() - Create const descriptors sequence to @@ -54,12 +42,11 @@ cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size); * * @src: A pointer to array of words to set as consts. * @dst: The target SRAM buffer to set into - * @nelements: The number of words in "src" array + * @nelement: The number of words in "src" array * @seq: A pointer to the given IN/OUT descriptor sequence * @seq_len: A pointer to the given IN/OUT sequence length */ -void cc_set_sram_desc(const u32 *src, cc_sram_addr_t dst, - unsigned int nelement, struct cc_hw_desc *seq, - unsigned int *seq_len); +void cc_set_sram_desc(const u32 *src, u32 dst, unsigned int nelement, + struct cc_hw_desc *seq, unsigned int *seq_len); #endif /*__CC_SRAM_MGR_H__*/ diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig index f078b2686418..f2756836093f 100644 --- a/drivers/crypto/chelsio/Kconfig +++ b/drivers/crypto/chelsio/Kconfig @@ -42,3 +42,14 @@ config CRYPTO_DEV_CHELSIO_TLS To compile this driver as a module, choose M here: the module will be called chtls. + +config CHELSIO_TLS_DEVICE + bool "Chelsio Inline KTLS Offload" + depends on CHELSIO_T4 + depends on TLS_DEVICE + select CRYPTO_DEV_CHELSIO + default y + help + This flag enables support for kernel tls offload over Chelsio T6 + crypto accelerator. CONFIG_CHELSIO_TLS_DEVICE flag can be enabled + only if CONFIG_TLS and CONFIG_TLS_DEVICE flags are enabled. diff --git a/drivers/crypto/chelsio/Makefile b/drivers/crypto/chelsio/Makefile index a3c05e2f4562..0e9d035927e9 100644 --- a/drivers/crypto/chelsio/Makefile +++ b/drivers/crypto/chelsio/Makefile @@ -3,5 +3,8 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb4 obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chcr.o chcr-objs := chcr_core.o chcr_algo.o +#ifdef CONFIG_CHELSIO_TLS_DEVICE +chcr-objs += chcr_ktls.o +#endif chcr-$(CONFIG_CHELSIO_IPSEC_INLINE) += chcr_ipsec.o obj-$(CONFIG_CRYPTO_DEV_CHELSIO_TLS) += chtls/ diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index b4b9b22125d1..c29b80dd30d8 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -715,6 +715,52 @@ static int chcr_cipher_fallback(struct crypto_sync_skcipher *cipher, return err; } + +static inline int get_qidxs(struct crypto_async_request *req, + unsigned int *txqidx, unsigned int *rxqidx) +{ + struct crypto_tfm *tfm = req->tfm; + int ret = 0; + + switch (tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { + case CRYPTO_ALG_TYPE_AEAD: + { + struct aead_request *aead_req = + container_of(req, struct aead_request, base); + struct chcr_aead_reqctx *reqctx = aead_request_ctx(aead_req); + *txqidx = reqctx->txqidx; + *rxqidx = reqctx->rxqidx; + break; + } + case CRYPTO_ALG_TYPE_SKCIPHER: + { + struct skcipher_request *sk_req = + container_of(req, struct skcipher_request, base); + struct chcr_skcipher_req_ctx *reqctx = + skcipher_request_ctx(sk_req); + *txqidx = reqctx->txqidx; + *rxqidx = reqctx->rxqidx; + break; + } + case CRYPTO_ALG_TYPE_AHASH: + { + struct ahash_request *ahash_req = + container_of(req, struct ahash_request, base); + struct chcr_ahash_req_ctx *reqctx = + ahash_request_ctx(ahash_req); + *txqidx = reqctx->txqidx; + *rxqidx = reqctx->rxqidx; + break; + } + default: + ret = -EINVAL; + /* should never get here */ + BUG(); + break; + } + return ret; +} + static inline void create_wreq(struct chcr_context *ctx, struct chcr_wr *chcr_req, struct crypto_async_request *req, @@ -725,7 +771,15 @@ static inline void create_wreq(struct chcr_context *ctx, unsigned int lcb) { struct uld_ctx *u_ctx = ULD_CTX(ctx); - int qid = u_ctx->lldi.rxq_ids[ctx->rx_qidx]; + unsigned int tx_channel_id, rx_channel_id; + unsigned int txqidx = 0, rxqidx = 0; + unsigned int qid, fid; + + get_qidxs(req, &txqidx, &rxqidx); + qid = u_ctx->lldi.rxq_ids[rxqidx]; + fid = u_ctx->lldi.rxq_ids[0]; + tx_channel_id = txqidx / ctx->txq_perchan; + rx_channel_id = rxqidx / ctx->rxq_perchan; chcr_req->wreq.op_to_cctx_size = FILL_WR_OP_CCTX_SIZE; @@ -734,15 +788,12 @@ static inline void create_wreq(struct chcr_context *ctx, chcr_req->wreq.len16_pkd = htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP(len16, 16))); chcr_req->wreq.cookie = cpu_to_be64((uintptr_t)req); - chcr_req->wreq.rx_chid_to_rx_q_id = - FILL_WR_RX_Q_ID(ctx->tx_chan_id, qid, - !!lcb, ctx->tx_qidx); + chcr_req->wreq.rx_chid_to_rx_q_id = FILL_WR_RX_Q_ID(rx_channel_id, qid, + !!lcb, txqidx); - chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->tx_chan_id, - qid); + chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(tx_channel_id, fid); chcr_req->ulptx.len = htonl((DIV_ROUND_UP(len16, 16) - - ((sizeof(chcr_req->wreq)) >> 4))); - + ((sizeof(chcr_req->wreq)) >> 4))); chcr_req->sc_imm.cmd_more = FILL_CMD_MORE(!imm); chcr_req->sc_imm.len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) + sizeof(chcr_req->key_ctx) + sc_len); @@ -758,7 +809,8 @@ static inline void create_wreq(struct chcr_context *ctx, static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(wrparam->req); - struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(tfm)); + struct chcr_context *ctx = c_ctx(tfm); + struct ablk_ctx *ablkctx = ABLK_CTX(ctx); struct sk_buff *skb = NULL; struct chcr_wr *chcr_req; struct cpl_rx_phys_dsgl *phys_cpl; @@ -771,7 +823,8 @@ static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam) unsigned int kctx_len; gfp_t flags = wrparam->req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; - struct adapter *adap = padap(c_ctx(tfm)->dev); + struct adapter *adap = padap(ctx->dev); + unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; nents = sg_nents_xlen(reqctx->dstsg, wrparam->bytes, CHCR_DST_SG_SIZE, reqctx->dst_ofst); @@ -791,7 +844,7 @@ static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam) } chcr_req = __skb_put_zero(skb, transhdr_len); chcr_req->sec_cpl.op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(c_ctx(tfm)->tx_chan_id, 2, 1); + FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 1); chcr_req->sec_cpl.pldlen = htonl(IV + wrparam->bytes); chcr_req->sec_cpl.aadstart_cipherstop_hi = @@ -1086,8 +1139,12 @@ static int chcr_final_cipher_iv(struct skcipher_request *req, if (subtype == CRYPTO_ALG_SUB_TYPE_CTR) ctr_add_iv(iv, req->iv, DIV_ROUND_UP(reqctx->processed, AES_BLOCK_SIZE)); - else if (subtype == CRYPTO_ALG_SUB_TYPE_XTS) - ret = chcr_update_tweak(req, iv, 1); + else if (subtype == CRYPTO_ALG_SUB_TYPE_XTS) { + if (!reqctx->partial_req) + memcpy(iv, reqctx->iv, AES_BLOCK_SIZE); + else + ret = chcr_update_tweak(req, iv, 1); + } else if (subtype == CRYPTO_ALG_SUB_TYPE_CBC) { /*Already updated for Decrypt*/ if (!reqctx->op) @@ -1102,12 +1159,13 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req, unsigned char *input, int err) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chcr_context *ctx = c_ctx(tfm); struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm)); struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(tfm)); struct sk_buff *skb; struct cpl_fw6_pld *fw6_pld = (struct cpl_fw6_pld *)input; struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req); - struct cipher_wr_param wrparam; + struct cipher_wr_param wrparam; struct chcr_dev *dev = c_ctx(tfm)->dev; int bytes; @@ -1152,7 +1210,7 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req, if (get_cryptoalg_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_CTR) bytes = adjust_ctr_overflow(reqctx->iv, bytes); - wrparam.qid = u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx]; + wrparam.qid = u_ctx->lldi.rxq_ids[reqctx->rxqidx]; wrparam.req = req; wrparam.bytes = bytes; skb = create_cipher_wr(&wrparam); @@ -1162,14 +1220,24 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req, goto unmap; } skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, c_ctx(tfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx); chcr_send_wr(skb); reqctx->last_req_len = bytes; reqctx->processed += bytes; + if (get_cryptoalg_subtype(tfm) == + CRYPTO_ALG_SUB_TYPE_CBC && req->base.flags == + CRYPTO_TFM_REQ_MAY_SLEEP ) { + complete(&ctx->cbc_aes_aio_done); + } return 0; unmap: chcr_cipher_dma_unmap(&ULD_CTX(c_ctx(tfm))->lldi.pdev->dev, req); complete: + if (get_cryptoalg_subtype(tfm) == + CRYPTO_ALG_SUB_TYPE_CBC && req->base.flags == + CRYPTO_TFM_REQ_MAY_SLEEP ) { + complete(&ctx->cbc_aes_aio_done); + } chcr_dec_wrcount(dev); req->base.complete(&req->base, err); return err; @@ -1188,6 +1256,7 @@ static int process_cipher(struct skcipher_request *req, int bytes, err = -EINVAL; reqctx->processed = 0; + reqctx->partial_req = 0; if (!req->iv) goto error; if ((ablkctx->enckey_len == 0) || (ivsize > AES_BLOCK_SIZE) || @@ -1278,6 +1347,7 @@ static int process_cipher(struct skcipher_request *req, } reqctx->processed = bytes; reqctx->last_req_len = bytes; + reqctx->partial_req = !!(req->cryptlen - reqctx->processed); return 0; unmap: @@ -1289,31 +1359,43 @@ error: static int chcr_aes_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req); struct chcr_dev *dev = c_ctx(tfm)->dev; struct sk_buff *skb = NULL; - int err, isfull = 0; + int err; struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm)); + struct chcr_context *ctx = c_ctx(tfm); + unsigned int cpu; + + cpu = get_cpu(); + reqctx->txqidx = cpu % ctx->ntxq; + reqctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); err = chcr_inc_wrcount(dev); if (err) return -ENXIO; if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], - c_ctx(tfm)->tx_qidx))) { - isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + reqctx->txqidx) && + (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) { err = -ENOSPC; goto error; - } } - err = process_cipher(req, u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx], + err = process_cipher(req, u_ctx->lldi.rxq_ids[reqctx->rxqidx], &skb, CHCR_ENCRYPT_OP); if (err || !skb) return err; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, c_ctx(tfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx); chcr_send_wr(skb); - return isfull ? -EBUSY : -EINPROGRESS; + if (get_cryptoalg_subtype(tfm) == + CRYPTO_ALG_SUB_TYPE_CBC && req->base.flags == + CRYPTO_TFM_REQ_MAY_SLEEP ) { + reqctx->partial_req = 1; + wait_for_completion(&ctx->cbc_aes_aio_done); + } + return -EINPROGRESS; error: chcr_dec_wrcount(dev); return err; @@ -1322,44 +1404,45 @@ error: static int chcr_aes_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req); struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm)); struct chcr_dev *dev = c_ctx(tfm)->dev; struct sk_buff *skb = NULL; - int err, isfull = 0; + int err; + struct chcr_context *ctx = c_ctx(tfm); + unsigned int cpu; + + cpu = get_cpu(); + reqctx->txqidx = cpu % ctx->ntxq; + reqctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); err = chcr_inc_wrcount(dev); if (err) return -ENXIO; if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], - c_ctx(tfm)->tx_qidx))) { - isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + reqctx->txqidx) && + (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) return -ENOSPC; - } - - err = process_cipher(req, u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx], + err = process_cipher(req, u_ctx->lldi.rxq_ids[reqctx->rxqidx], &skb, CHCR_DECRYPT_OP); if (err || !skb) return err; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, c_ctx(tfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx); chcr_send_wr(skb); - return isfull ? -EBUSY : -EINPROGRESS; + return -EINPROGRESS; } - static int chcr_device_init(struct chcr_context *ctx) { struct uld_ctx *u_ctx = NULL; - unsigned int id; - int txq_perchan, txq_idx, ntxq; - int err = 0, rxq_perchan, rxq_idx; + int txq_perchan, ntxq; + int err = 0, rxq_perchan; - id = smp_processor_id(); if (!ctx->dev) { u_ctx = assign_chcr_device(); if (!u_ctx) { - err = -ENXIO; pr_err("chcr device assignment fails\n"); goto out; } @@ -1367,23 +1450,10 @@ static int chcr_device_init(struct chcr_context *ctx) ntxq = u_ctx->lldi.ntxq; rxq_perchan = u_ctx->lldi.nrxq / u_ctx->lldi.nchan; txq_perchan = ntxq / u_ctx->lldi.nchan; - spin_lock(&ctx->dev->lock_chcr_dev); - ctx->tx_chan_id = ctx->dev->tx_channel_id; - ctx->dev->tx_channel_id = - (ctx->dev->tx_channel_id + 1) % u_ctx->lldi.nchan; - spin_unlock(&ctx->dev->lock_chcr_dev); - rxq_idx = ctx->tx_chan_id * rxq_perchan; - rxq_idx += id % rxq_perchan; - txq_idx = ctx->tx_chan_id * txq_perchan; - txq_idx += id % txq_perchan; - ctx->rx_qidx = rxq_idx; - ctx->tx_qidx = txq_idx; - /* Channel Id used by SGE to forward packet to Host. - * Same value should be used in cpl_fw6_pld RSS_CH field - * by FW. Driver programs PCI channel ID to be used in fw - * at the time of queue allocation with value "pi->tx_chan" - */ - ctx->pci_chan_id = txq_idx / txq_perchan; + ctx->ntxq = ntxq; + ctx->nrxq = u_ctx->lldi.nrxq; + ctx->rxq_perchan = rxq_perchan; + ctx->txq_perchan = txq_perchan; } out: return err; @@ -1401,7 +1471,7 @@ static int chcr_init_tfm(struct crypto_skcipher *tfm) pr_err("failed to allocate fallback for %s\n", alg->base.cra_name); return PTR_ERR(ablkctx->sw_cipher); } - + init_completion(&ctx->cbc_aes_aio_done); crypto_skcipher_set_reqsize(tfm, sizeof(struct chcr_skcipher_req_ctx)); return chcr_device_init(ctx); @@ -1485,9 +1555,10 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req, { struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct hmac_ctx *hmacctx = HMAC_CTX(h_ctx(tfm)); + struct chcr_context *ctx = h_ctx(tfm); + struct hmac_ctx *hmacctx = HMAC_CTX(ctx); struct sk_buff *skb = NULL; - struct uld_ctx *u_ctx = ULD_CTX(h_ctx(tfm)); + struct uld_ctx *u_ctx = ULD_CTX(ctx); struct chcr_wr *chcr_req; struct ulptx_sgl *ulptx; unsigned int nents = 0, transhdr_len; @@ -1496,6 +1567,7 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req, GFP_ATOMIC; struct adapter *adap = padap(h_ctx(tfm)->dev); int error = 0; + unsigned int rx_channel_id = req_ctx->rxqidx / ctx->rxq_perchan; transhdr_len = HASH_TRANSHDR_SIZE(param->kctx_len); req_ctx->hctx_wr.imm = (transhdr_len + param->bfr_len + @@ -1513,7 +1585,8 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req, chcr_req = __skb_put_zero(skb, transhdr_len); chcr_req->sec_cpl.op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(h_ctx(tfm)->tx_chan_id, 2, 0); + FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 0); + chcr_req->sec_cpl.pldlen = htonl(param->bfr_len + param->sg_len); chcr_req->sec_cpl.aadstart_cipherstop_hi = @@ -1576,16 +1649,22 @@ static int chcr_ahash_update(struct ahash_request *req) { struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); - struct uld_ctx *u_ctx = NULL; + struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm)); + struct chcr_context *ctx = h_ctx(rtfm); struct chcr_dev *dev = h_ctx(rtfm)->dev; struct sk_buff *skb; u8 remainder = 0, bs; unsigned int nbytes = req->nbytes; struct hash_wr_param params; - int error, isfull = 0; + int error; + unsigned int cpu; + + cpu = get_cpu(); + req_ctx->txqidx = cpu % ctx->ntxq; + req_ctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); - u_ctx = ULD_CTX(h_ctx(rtfm)); if (nbytes + req_ctx->reqlen >= bs) { remainder = (nbytes + req_ctx->reqlen) % bs; @@ -1603,12 +1682,10 @@ static int chcr_ahash_update(struct ahash_request *req) * inflight count for dev guarantees that lldi and padap is valid */ if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], - h_ctx(rtfm)->tx_qidx))) { - isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + req_ctx->txqidx) && + (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) { error = -ENOSPC; goto err; - } } chcr_init_hctx_per_wr(req_ctx); @@ -1650,10 +1727,9 @@ static int chcr_ahash_update(struct ahash_request *req) } req_ctx->reqlen = remainder; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx); chcr_send_wr(skb); - - return isfull ? -EBUSY : -EINPROGRESS; + return -EINPROGRESS; unmap: chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req); err: @@ -1678,16 +1754,22 @@ static int chcr_ahash_final(struct ahash_request *req) struct chcr_dev *dev = h_ctx(rtfm)->dev; struct hash_wr_param params; struct sk_buff *skb; - struct uld_ctx *u_ctx = NULL; + struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm)); + struct chcr_context *ctx = h_ctx(rtfm); u8 bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); int error = -EINVAL; + unsigned int cpu; + + cpu = get_cpu(); + req_ctx->txqidx = cpu % ctx->ntxq; + req_ctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); error = chcr_inc_wrcount(dev); if (error) return -ENXIO; chcr_init_hctx_per_wr(req_ctx); - u_ctx = ULD_CTX(h_ctx(rtfm)); if (is_hmac(crypto_ahash_tfm(rtfm))) params.opad_needed = 1; else @@ -1727,7 +1809,7 @@ static int chcr_ahash_final(struct ahash_request *req) } req_ctx->reqlen = 0; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx); chcr_send_wr(skb); return -EINPROGRESS; err: @@ -1740,25 +1822,29 @@ static int chcr_ahash_finup(struct ahash_request *req) struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); struct chcr_dev *dev = h_ctx(rtfm)->dev; - struct uld_ctx *u_ctx = NULL; + struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm)); + struct chcr_context *ctx = h_ctx(rtfm); struct sk_buff *skb; struct hash_wr_param params; u8 bs; - int error, isfull = 0; + int error; + unsigned int cpu; + + cpu = get_cpu(); + req_ctx->txqidx = cpu % ctx->ntxq; + req_ctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); - u_ctx = ULD_CTX(h_ctx(rtfm)); error = chcr_inc_wrcount(dev); if (error) return -ENXIO; if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], - h_ctx(rtfm)->tx_qidx))) { - isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + req_ctx->txqidx) && + (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) { error = -ENOSPC; goto err; - } } chcr_init_hctx_per_wr(req_ctx); error = chcr_hash_dma_map(&u_ctx->lldi.pdev->dev, req); @@ -1816,10 +1902,9 @@ static int chcr_ahash_finup(struct ahash_request *req) req_ctx->reqlen = 0; req_ctx->hctx_wr.processed += params.sg_len; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx); chcr_send_wr(skb); - - return isfull ? -EBUSY : -EINPROGRESS; + return -EINPROGRESS; unmap: chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req); err: @@ -1832,11 +1917,18 @@ static int chcr_ahash_digest(struct ahash_request *req) struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); struct chcr_dev *dev = h_ctx(rtfm)->dev; - struct uld_ctx *u_ctx = NULL; + struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm)); + struct chcr_context *ctx = h_ctx(rtfm); struct sk_buff *skb; struct hash_wr_param params; u8 bs; - int error, isfull = 0; + int error; + unsigned int cpu; + + cpu = get_cpu(); + req_ctx->txqidx = cpu % ctx->ntxq; + req_ctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); rtfm->init(req); bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); @@ -1844,14 +1936,11 @@ static int chcr_ahash_digest(struct ahash_request *req) if (error) return -ENXIO; - u_ctx = ULD_CTX(h_ctx(rtfm)); if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], - h_ctx(rtfm)->tx_qidx))) { - isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + req_ctx->txqidx) && + (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) { error = -ENOSPC; goto err; - } } chcr_init_hctx_per_wr(req_ctx); @@ -1907,9 +1996,9 @@ static int chcr_ahash_digest(struct ahash_request *req) } req_ctx->hctx_wr.processed += params.sg_len; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx); chcr_send_wr(skb); - return isfull ? -EBUSY : -EINPROGRESS; + return -EINPROGRESS; unmap: chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req); err: @@ -1922,14 +2011,20 @@ static int chcr_ahash_continue(struct ahash_request *req) struct chcr_ahash_req_ctx *reqctx = ahash_request_ctx(req); struct chcr_hctx_per_wr *hctx_wr = &reqctx->hctx_wr; struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); - struct uld_ctx *u_ctx = NULL; + struct chcr_context *ctx = h_ctx(rtfm); + struct uld_ctx *u_ctx = ULD_CTX(ctx); struct sk_buff *skb; struct hash_wr_param params; u8 bs; int error; + unsigned int cpu; + + cpu = get_cpu(); + reqctx->txqidx = cpu % ctx->ntxq; + reqctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); - u_ctx = ULD_CTX(h_ctx(rtfm)); get_alg_config(¶ms.alg_prm, crypto_ahash_digestsize(rtfm)); params.kctx_len = roundup(params.alg_prm.result_size, 16); if (is_hmac(crypto_ahash_tfm(rtfm))) { @@ -1969,7 +2064,7 @@ static int chcr_ahash_continue(struct ahash_request *req) } hctx_wr->processed += params.sg_len; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx); chcr_send_wr(skb); return 0; err: @@ -2315,7 +2410,8 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, int size) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); + struct chcr_context *ctx = a_ctx(tfm); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); struct chcr_authenc_ctx *actx = AUTHENC_CTX(aeadctx); struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); struct sk_buff *skb = NULL; @@ -2331,7 +2427,8 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, int null = 0; gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; - struct adapter *adap = padap(a_ctx(tfm)->dev); + struct adapter *adap = padap(ctx->dev); + unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; if (req->cryptlen == 0) return NULL; @@ -2351,7 +2448,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, snents = sg_nents_xlen(req->src, req->assoclen + req->cryptlen, CHCR_SRC_SG_SIZE, 0); dst_size = get_space_for_phys_dsgl(dnents); - kctx_len = (ntohl(KEY_CONTEXT_CTX_LEN_V(aeadctx->key_ctx_hdr)) << 4) + kctx_len = (KEY_CONTEXT_CTX_LEN_G(ntohl(aeadctx->key_ctx_hdr)) << 4) - sizeof(chcr_req->key_ctx); transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size); reqctx->imm = (transhdr_len + req->assoclen + req->cryptlen) < @@ -2383,7 +2480,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, * to the hardware spec */ chcr_req->sec_cpl.op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(a_ctx(tfm)->tx_chan_id, 2, 1); + FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 1); chcr_req->sec_cpl.pldlen = htonl(req->assoclen + IV + req->cryptlen); chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI( null ? 0 : 1 + IV, @@ -2471,8 +2568,9 @@ int chcr_aead_dma_map(struct device *dev, else reqctx->b0_dma = 0; if (req->src == req->dst) { - error = dma_map_sg(dev, req->src, sg_nents(req->src), - DMA_BIDIRECTIONAL); + error = dma_map_sg(dev, req->src, + sg_nents_for_len(req->src, dst_size), + DMA_BIDIRECTIONAL); if (!error) goto err; } else { @@ -2558,13 +2656,14 @@ void chcr_add_aead_dst_ent(struct aead_request *req, unsigned int authsize = crypto_aead_authsize(tfm); struct chcr_context *ctx = a_ctx(tfm); u32 temp; + unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; dsgl_walk_init(&dsgl_walk, phys_cpl); dsgl_walk_add_page(&dsgl_walk, IV + reqctx->b0_len, reqctx->iv_dma); temp = req->assoclen + req->cryptlen + (reqctx->op ? -authsize : authsize); dsgl_walk_add_sg(&dsgl_walk, req->dst, temp, 0); - dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id); + dsgl_walk_end(&dsgl_walk, qid, rx_channel_id); } void chcr_add_cipher_src_ent(struct skcipher_request *req, @@ -2599,14 +2698,14 @@ void chcr_add_cipher_dst_ent(struct skcipher_request *req, struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(wrparam->req); struct chcr_context *ctx = c_ctx(tfm); struct dsgl_walk dsgl_walk; + unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; dsgl_walk_init(&dsgl_walk, phys_cpl); dsgl_walk_add_sg(&dsgl_walk, reqctx->dstsg, wrparam->bytes, reqctx->dst_ofst); reqctx->dstsg = dsgl_walk.last_sg; reqctx->dst_ofst = dsgl_walk.last_sg_len; - - dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id); + dsgl_walk_end(&dsgl_walk, qid, rx_channel_id); } void chcr_add_hash_src_ent(struct ahash_request *req, @@ -2804,10 +2903,12 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl, unsigned short op_type) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); + struct chcr_context *ctx = a_ctx(tfm); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); + struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); unsigned int cipher_mode = CHCR_SCMD_CIPHER_MODE_AES_CCM; unsigned int mac_mode = CHCR_SCMD_AUTH_MODE_CBCMAC; - unsigned int c_id = a_ctx(tfm)->tx_chan_id; + unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; unsigned int ccm_xtra; unsigned char tag_offset = 0, auth_offset = 0; unsigned int assoclen; @@ -2828,9 +2929,7 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl, auth_offset = 0; } - - sec_cpl->op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(c_id, - 2, 1); + sec_cpl->op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 1); sec_cpl->pldlen = htonl(req->assoclen + IV + req->cryptlen + ccm_xtra); /* For CCM there wil be b0 always. So AAD start will be 1 always */ @@ -2973,7 +3072,8 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, int size) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); + struct chcr_context *ctx = a_ctx(tfm); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); struct sk_buff *skb = NULL; struct chcr_wr *chcr_req; @@ -2986,7 +3086,8 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, u8 *ivptr; gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; - struct adapter *adap = padap(a_ctx(tfm)->dev); + struct adapter *adap = padap(ctx->dev); + unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106) assoclen = req->assoclen - 8; @@ -3028,7 +3129,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, //Offset of tag from end temp = (reqctx->op == CHCR_ENCRYPT_OP) ? 0 : authsize; chcr_req->sec_cpl.op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR( - a_ctx(tfm)->tx_chan_id, 2, 1); + rx_channel_id, 2, 1); chcr_req->sec_cpl.pldlen = htonl(req->assoclen + IV + req->cryptlen); chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI( @@ -3576,9 +3677,9 @@ static int chcr_aead_op(struct aead_request *req, { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); - struct uld_ctx *u_ctx; + struct chcr_context *ctx = a_ctx(tfm); + struct uld_ctx *u_ctx = ULD_CTX(ctx); struct sk_buff *skb; - int isfull = 0; struct chcr_dev *cdev; cdev = a_ctx(tfm)->dev; @@ -3594,18 +3695,15 @@ static int chcr_aead_op(struct aead_request *req, return chcr_aead_fallback(req, reqctx->op); } - u_ctx = ULD_CTX(a_ctx(tfm)); if (cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], - a_ctx(tfm)->tx_qidx)) { - isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + reqctx->txqidx) && + (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))) { chcr_dec_wrcount(cdev); return -ENOSPC; - } } /* Form a WR from req */ - skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[a_ctx(tfm)->rx_qidx], size); + skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[reqctx->rxqidx], size); if (IS_ERR_OR_NULL(skb)) { chcr_dec_wrcount(cdev); @@ -3613,15 +3711,22 @@ static int chcr_aead_op(struct aead_request *req, } skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, a_ctx(tfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx); chcr_send_wr(skb); - return isfull ? -EBUSY : -EINPROGRESS; + return -EINPROGRESS; } static int chcr_aead_encrypt(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_context *ctx = a_ctx(tfm); + unsigned int cpu; + + cpu = get_cpu(); + reqctx->txqidx = cpu % ctx->ntxq; + reqctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); reqctx->verify = VERIFY_HW; reqctx->op = CHCR_ENCRYPT_OP; @@ -3643,9 +3748,16 @@ static int chcr_aead_encrypt(struct aead_request *req) static int chcr_aead_decrypt(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); + struct chcr_context *ctx = a_ctx(tfm); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); int size; + unsigned int cpu; + + cpu = get_cpu(); + reqctx->txqidx = cpu % ctx->ntxq; + reqctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); if (aeadctx->mayverify == VERIFY_SW) { size = crypto_aead_maxauthsize(tfm); diff --git a/drivers/crypto/chelsio/chcr_common.h b/drivers/crypto/chelsio/chcr_common.h new file mode 100644 index 000000000000..33f589cbfba1 --- /dev/null +++ b/drivers/crypto/chelsio/chcr_common.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2020 Chelsio Communications. All rights reserved. */ + +#ifndef __CHCR_COMMON_H__ +#define __CHCR_COMMON_H__ + +#include "cxgb4.h" + +#define CHCR_MAX_SALT 4 +#define CHCR_KEYCTX_MAC_KEY_SIZE_128 0 +#define CHCR_KEYCTX_CIPHER_KEY_SIZE_128 0 +#define CHCR_SCMD_CIPHER_MODE_AES_GCM 2 +#define CHCR_SCMD_CIPHER_MODE_AES_CTR 3 +#define CHCR_CPL_TX_SEC_PDU_LEN_64BIT 2 +#define CHCR_SCMD_SEQ_NO_CTRL_64BIT 3 +#define CHCR_SCMD_PROTO_VERSION_TLS 0 +#define CHCR_SCMD_PROTO_VERSION_GENERIC 4 +#define CHCR_SCMD_AUTH_MODE_GHASH 4 +#define AES_BLOCK_LEN 16 + +enum chcr_state { + CHCR_INIT = 0, + CHCR_ATTACH, + CHCR_DETACH, +}; + +struct chcr_dev { + spinlock_t lock_chcr_dev; /* chcr dev structure lock */ + enum chcr_state state; + atomic_t inflight; + int wqretry; + struct delayed_work detach_work; + struct completion detach_comp; + unsigned char tx_channel_id; +}; + +struct uld_ctx { + struct list_head entry; + struct cxgb4_lld_info lldi; + struct chcr_dev dev; +}; + +struct ktls_key_ctx { + __be32 ctx_hdr; + u8 salt[CHCR_MAX_SALT]; + __be64 iv_to_auth; + unsigned char key[TLS_CIPHER_AES_GCM_128_KEY_SIZE + + TLS_CIPHER_AES_GCM_256_TAG_SIZE]; +}; + +/* Crypto key context */ +#define KEY_CONTEXT_CTX_LEN_S 24 +#define KEY_CONTEXT_CTX_LEN_V(x) ((x) << KEY_CONTEXT_CTX_LEN_S) + +#define KEY_CONTEXT_SALT_PRESENT_S 10 +#define KEY_CONTEXT_SALT_PRESENT_V(x) ((x) << KEY_CONTEXT_SALT_PRESENT_S) +#define KEY_CONTEXT_SALT_PRESENT_F KEY_CONTEXT_SALT_PRESENT_V(1U) + +#define KEY_CONTEXT_VALID_S 0 +#define KEY_CONTEXT_VALID_V(x) ((x) << KEY_CONTEXT_VALID_S) +#define KEY_CONTEXT_VALID_F KEY_CONTEXT_VALID_V(1U) + +#define KEY_CONTEXT_CK_SIZE_S 6 +#define KEY_CONTEXT_CK_SIZE_V(x) ((x) << KEY_CONTEXT_CK_SIZE_S) + +#define KEY_CONTEXT_MK_SIZE_S 2 +#define KEY_CONTEXT_MK_SIZE_V(x) ((x) << KEY_CONTEXT_MK_SIZE_S) + +#define KEY_CONTEXT_OPAD_PRESENT_S 11 +#define KEY_CONTEXT_OPAD_PRESENT_V(x) ((x) << KEY_CONTEXT_OPAD_PRESENT_S) +#define KEY_CONTEXT_OPAD_PRESENT_F KEY_CONTEXT_OPAD_PRESENT_V(1U) + +#define FILL_KEY_CTX_HDR(ck_size, mk_size, ctx_len) \ + htonl(KEY_CONTEXT_MK_SIZE_V(mk_size) | \ + KEY_CONTEXT_CK_SIZE_V(ck_size) | \ + KEY_CONTEXT_VALID_F | \ + KEY_CONTEXT_SALT_PRESENT_F | \ + KEY_CONTEXT_CTX_LEN_V((ctx_len))) + +struct uld_ctx *assign_chcr_device(void); + +static inline void *chcr_copy_to_txd(const void *src, const struct sge_txq *q, + void *pos, int length) +{ + int left = (void *)q->stat - pos; + u64 *p; + + if (likely(length <= left)) { + memcpy(pos, src, length); + pos += length; + } else { + memcpy(pos, src, left); + memcpy(q->desc, src + left, length - left); + pos = (void *)q->desc + (length - left); + } + /* 0-pad to multiple of 16 */ + p = PTR_ALIGN(pos, 8); + if ((uintptr_t)p & 8) { + *p = 0; + return p + 1; + } + return p; +} + +static inline unsigned int chcr_txq_avail(const struct sge_txq *q) +{ + return q->size - 1 - q->in_use; +} + +static inline void chcr_txq_advance(struct sge_txq *q, unsigned int n) +{ + q->in_use += n; + q->pidx += n; + if (q->pidx >= q->size) + q->pidx -= q->size; +} + +static inline void chcr_eth_txq_stop(struct sge_eth_txq *q) +{ + netif_tx_stop_queue(q->txq); + q->q.stops++; +} + +static inline unsigned int chcr_sgl_len(unsigned int n) +{ + n--; + return (3 * n) / 2 + (n & 1) + 2; +} + +static inline unsigned int chcr_flits_to_desc(unsigned int n) +{ + WARN_ON(n > SGE_MAX_WR_LEN / 8); + return DIV_ROUND_UP(n, 8); +} +#endif /* __CHCR_COMMON_H__ */ diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c index e937605670ac..ffd4ec0c7374 100644 --- a/drivers/crypto/chelsio/chcr_core.c +++ b/drivers/crypto/chelsio/chcr_core.c @@ -28,13 +28,21 @@ static struct chcr_driver_data drv_data; -typedef int (*chcr_handler_func)(struct chcr_dev *dev, unsigned char *input); -static int cpl_fw6_pld_handler(struct chcr_dev *dev, unsigned char *input); +typedef int (*chcr_handler_func)(struct adapter *adap, unsigned char *input); +static int cpl_fw6_pld_handler(struct adapter *adap, unsigned char *input); static void *chcr_uld_add(const struct cxgb4_lld_info *lld); static int chcr_uld_state_change(void *handle, enum cxgb4_state state); +#ifdef CONFIG_CHELSIO_IPSEC_INLINE +static void update_netdev_features(void); +#endif /* CONFIG_CHELSIO_IPSEC_INLINE */ + static chcr_handler_func work_handlers[NUM_CPL_CMDS] = { [CPL_FW6_PLD] = cpl_fw6_pld_handler, +#ifdef CONFIG_CHELSIO_TLS_DEVICE + [CPL_ACT_OPEN_RPL] = chcr_ktls_cpl_act_open_rpl, + [CPL_SET_TCB_RPL] = chcr_ktls_cpl_set_tcb_rpl, +#endif }; static struct cxgb4_uld_info chcr_uld_info = { @@ -45,9 +53,9 @@ static struct cxgb4_uld_info chcr_uld_info = { .add = chcr_uld_add, .state_change = chcr_uld_state_change, .rx_handler = chcr_uld_rx_handler, -#ifdef CONFIG_CHELSIO_IPSEC_INLINE +#if defined(CONFIG_CHELSIO_IPSEC_INLINE) || defined(CONFIG_CHELSIO_TLS_DEVICE) .tx_handler = chcr_uld_tx_handler, -#endif /* CONFIG_CHELSIO_IPSEC_INLINE */ +#endif /* CONFIG_CHELSIO_IPSEC_INLINE || CONFIG_CHELSIO_TLS_DEVICE */ }; static void detach_work_fn(struct work_struct *work) @@ -125,8 +133,6 @@ static void chcr_dev_init(struct uld_ctx *u_ctx) atomic_set(&dev->inflight, 0); mutex_lock(&drv_data.drv_mutex); list_add_tail(&u_ctx->entry, &drv_data.inact_dev); - if (!drv_data.last_dev) - drv_data.last_dev = u_ctx; mutex_unlock(&drv_data.drv_mutex); } @@ -150,14 +156,13 @@ static int chcr_dev_move(struct uld_ctx *u_ctx) return 0; } -static int cpl_fw6_pld_handler(struct chcr_dev *dev, +static int cpl_fw6_pld_handler(struct adapter *adap, unsigned char *input) { struct crypto_async_request *req; struct cpl_fw6_pld *fw6_pld; u32 ack_err_status = 0; int error_status = 0; - struct adapter *adap = padap(dev); fw6_pld = (struct cpl_fw6_pld *)input; req = (struct crypto_async_request *)(uintptr_t)be64_to_cpu( @@ -190,6 +195,7 @@ static void *chcr_uld_add(const struct cxgb4_lld_info *lld) struct uld_ctx *u_ctx; /* Create the device and add it in the device list */ + pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION); if (!(lld->ulp_crypto & ULP_CRYPTO_LOOKASIDE)) return ERR_PTR(-EOPNOTSUPP); @@ -201,10 +207,11 @@ static void *chcr_uld_add(const struct cxgb4_lld_info *lld) } u_ctx->lldi = *lld; chcr_dev_init(u_ctx); -#ifdef CONFIG_CHELSIO_IPSEC_INLINE - if (lld->crypto & ULP_CRYPTO_IPSEC_INLINE) - chcr_add_xfrmops(lld); -#endif /* CONFIG_CHELSIO_IPSEC_INLINE */ + +#ifdef CONFIG_CHELSIO_TLS_DEVICE + if (lld->ulp_crypto & ULP_CRYPTO_KTLS_INLINE) + chcr_enable_ktls(padap(&u_ctx->dev)); +#endif out: return u_ctx; } @@ -214,26 +221,37 @@ int chcr_uld_rx_handler(void *handle, const __be64 *rsp, { struct uld_ctx *u_ctx = (struct uld_ctx *)handle; struct chcr_dev *dev = &u_ctx->dev; + struct adapter *adap = padap(dev); const struct cpl_fw6_pld *rpl = (struct cpl_fw6_pld *)rsp; - if (rpl->opcode != CPL_FW6_PLD) { - pr_err("Unsupported opcode\n"); + if (!work_handlers[rpl->opcode]) { + pr_err("Unsupported opcode %d received\n", rpl->opcode); return 0; } if (!pgl) - work_handlers[rpl->opcode](dev, (unsigned char *)&rsp[1]); + work_handlers[rpl->opcode](adap, (unsigned char *)&rsp[1]); else - work_handlers[rpl->opcode](dev, pgl->va); + work_handlers[rpl->opcode](adap, pgl->va); return 0; } -#ifdef CONFIG_CHELSIO_IPSEC_INLINE +#if defined(CONFIG_CHELSIO_IPSEC_INLINE) || defined(CONFIG_CHELSIO_TLS_DEVICE) int chcr_uld_tx_handler(struct sk_buff *skb, struct net_device *dev) { + /* In case if skb's decrypted bit is set, it's nic tls packet, else it's + * ipsec packet. + */ +#ifdef CONFIG_CHELSIO_TLS_DEVICE + if (skb->decrypted) + return chcr_ktls_xmit(skb, dev); +#endif +#ifdef CONFIG_CHELSIO_IPSEC_INLINE return chcr_ipsec_xmit(skb, dev); +#endif + return 0; } -#endif /* CONFIG_CHELSIO_IPSEC_INLINE */ +#endif /* CONFIG_CHELSIO_IPSEC_INLINE || CONFIG_CHELSIO_TLS_DEVICE */ static void chcr_detach_device(struct uld_ctx *u_ctx) { @@ -270,6 +288,8 @@ static int chcr_uld_state_change(void *handle, enum cxgb4_state state) case CXGB4_STATE_DETACH: chcr_detach_device(u_ctx); + if (!atomic_read(&drv_data.dev_count)) + stop_crypto(); break; case CXGB4_STATE_START_RECOVERY: @@ -280,6 +300,24 @@ static int chcr_uld_state_change(void *handle, enum cxgb4_state state) return ret; } +#ifdef CONFIG_CHELSIO_IPSEC_INLINE +static void update_netdev_features(void) +{ + struct uld_ctx *u_ctx, *tmp; + + mutex_lock(&drv_data.drv_mutex); + list_for_each_entry_safe(u_ctx, tmp, &drv_data.inact_dev, entry) { + if (u_ctx->lldi.crypto & ULP_CRYPTO_IPSEC_INLINE) + chcr_add_xfrmops(&u_ctx->lldi); + } + list_for_each_entry_safe(u_ctx, tmp, &drv_data.act_dev, entry) { + if (u_ctx->lldi.crypto & ULP_CRYPTO_IPSEC_INLINE) + chcr_add_xfrmops(&u_ctx->lldi); + } + mutex_unlock(&drv_data.drv_mutex); +} +#endif /* CONFIG_CHELSIO_IPSEC_INLINE */ + static int __init chcr_crypto_init(void) { INIT_LIST_HEAD(&drv_data.act_dev); @@ -289,6 +327,12 @@ static int __init chcr_crypto_init(void) drv_data.last_dev = NULL; cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info); + #ifdef CONFIG_CHELSIO_IPSEC_INLINE + rtnl_lock(); + update_netdev_features(); + rtnl_unlock(); + #endif /* CONFIG_CHELSIO_IPSEC_INLINE */ + return 0; } @@ -304,12 +348,20 @@ static void __exit chcr_crypto_exit(void) list_for_each_entry_safe(u_ctx, tmp, &drv_data.act_dev, entry) { adap = padap(&u_ctx->dev); memset(&adap->chcr_stats, 0, sizeof(adap->chcr_stats)); +#ifdef CONFIG_CHELSIO_TLS_DEVICE + if (u_ctx->lldi.ulp_crypto & ULP_CRYPTO_KTLS_INLINE) + chcr_disable_ktls(adap); +#endif list_del(&u_ctx->entry); kfree(u_ctx); } list_for_each_entry_safe(u_ctx, tmp, &drv_data.inact_dev, entry) { adap = padap(&u_ctx->dev); memset(&adap->chcr_stats, 0, sizeof(adap->chcr_stats)); +#ifdef CONFIG_CHELSIO_TLS_DEVICE + if (u_ctx->lldi.ulp_crypto & ULP_CRYPTO_KTLS_INLINE) + chcr_disable_ktls(adap); +#endif list_del(&u_ctx->entry); kfree(u_ctx); } diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h index ad874d548aa5..2c09672e00a4 100644 --- a/drivers/crypto/chelsio/chcr_core.h +++ b/drivers/crypto/chelsio/chcr_core.h @@ -43,7 +43,8 @@ #include "cxgb4_uld.h" #define DRV_MODULE_NAME "chcr" -#define DRV_VERSION "1.0.0.0" +#define DRV_VERSION "1.0.0.0-ko" +#define DRV_DESC "Chelsio T6 Crypto Co-processor Driver" #define MAX_PENDING_REQ_TO_HW 20 #define CHCR_TEST_RESPONSE_TIMEOUT 1000 @@ -67,7 +68,7 @@ struct _key_ctx { __be32 ctx_hdr; u8 salt[MAX_SALT]; __be64 iv_to_auth; - unsigned char key[0]; + unsigned char key[]; }; #define KEYCTX_TX_WR_IV_S 55 @@ -147,7 +148,6 @@ struct chcr_dev { int wqretry; struct delayed_work detach_work; struct completion detach_comp; - unsigned char tx_channel_id; }; struct uld_ctx { @@ -222,4 +222,11 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input, int err); int chcr_ipsec_xmit(struct sk_buff *skb, struct net_device *dev); void chcr_add_xfrmops(const struct cxgb4_lld_info *lld); +#ifdef CONFIG_CHELSIO_TLS_DEVICE +void chcr_enable_ktls(struct adapter *adap); +void chcr_disable_ktls(struct adapter *adap); +int chcr_ktls_cpl_act_open_rpl(struct adapter *adap, unsigned char *input); +int chcr_ktls_cpl_set_tcb_rpl(struct adapter *adap, unsigned char *input); +int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev); +#endif #endif /* __CHCR_CORE_H__ */ diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h index 6db2df8c8a05..542bebae001f 100644 --- a/drivers/crypto/chelsio/chcr_crypto.h +++ b/drivers/crypto/chelsio/chcr_crypto.h @@ -187,6 +187,8 @@ struct chcr_aead_reqctx { unsigned int op; u16 imm; u16 verify; + u16 txqidx; + u16 rxqidx; u8 iv[CHCR_MAX_CRYPTO_IV_LEN + MAX_SCRATCH_PAD_SIZE]; u8 *scratch_pad; }; @@ -250,10 +252,11 @@ struct __crypto_ctx { struct chcr_context { struct chcr_dev *dev; - unsigned char tx_qidx; - unsigned char rx_qidx; - unsigned char tx_chan_id; - unsigned char pci_chan_id; + unsigned char rxq_perchan; + unsigned char txq_perchan; + unsigned int ntxq; + unsigned int nrxq; + struct completion cbc_aes_aio_done; struct __crypto_ctx crypto_ctx[0]; }; @@ -279,6 +282,8 @@ struct chcr_ahash_req_ctx { u8 *skbfr; /* SKB which is being sent to the hardware for processing */ u64 data_len; /* Data len till time */ + u16 txqidx; + u16 rxqidx; u8 reqlen; u8 partial_hash[CHCR_HASH_MAX_DIGEST_SIZE]; u8 bfr1[CHCR_HASH_MAX_BLOCK_SIZE_128]; @@ -290,12 +295,15 @@ struct chcr_skcipher_req_ctx { struct scatterlist *dstsg; unsigned int processed; unsigned int last_req_len; + unsigned int partial_req; struct scatterlist *srcsg; unsigned int src_ofst; unsigned int dst_ofst; unsigned int op; u16 imm; u8 iv[CHCR_MAX_CRYPTO_IV_LEN]; + u16 txqidx; + u16 rxqidx; }; struct chcr_alg_template { diff --git a/drivers/crypto/chelsio/chcr_ipsec.c b/drivers/crypto/chelsio/chcr_ipsec.c index 9da0f93a330b..9fd3b9d1ec2f 100644 --- a/drivers/crypto/chelsio/chcr_ipsec.c +++ b/drivers/crypto/chelsio/chcr_ipsec.c @@ -99,9 +99,7 @@ void chcr_add_xfrmops(const struct cxgb4_lld_info *lld) netdev->xfrmdev_ops = &chcr_xfrmdev_ops; netdev->hw_enc_features |= NETIF_F_HW_ESP; netdev->features |= NETIF_F_HW_ESP; - rtnl_lock(); netdev_change_features(netdev); - rtnl_unlock(); } } diff --git a/drivers/crypto/chelsio/chcr_ktls.c b/drivers/crypto/chelsio/chcr_ktls.c new file mode 100644 index 000000000000..43d9e2420110 --- /dev/null +++ b/drivers/crypto/chelsio/chcr_ktls.c @@ -0,0 +1,2028 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Chelsio Communications. All rights reserved. */ + +#ifdef CONFIG_CHELSIO_TLS_DEVICE +#include <linux/highmem.h> +#include "chcr_ktls.h" +#include "clip_tbl.h" + +static int chcr_init_tcb_fields(struct chcr_ktls_info *tx_info); +/* + * chcr_ktls_save_keys: calculate and save crypto keys. + * @tx_info - driver specific tls info. + * @crypto_info - tls crypto information. + * @direction - TX/RX direction. + * return - SUCCESS/FAILURE. + */ +static int chcr_ktls_save_keys(struct chcr_ktls_info *tx_info, + struct tls_crypto_info *crypto_info, + enum tls_offload_ctx_dir direction) +{ + int ck_size, key_ctx_size, mac_key_size, keylen, ghash_size, ret; + unsigned char ghash_h[TLS_CIPHER_AES_GCM_256_TAG_SIZE]; + struct tls12_crypto_info_aes_gcm_128 *info_128_gcm; + struct ktls_key_ctx *kctx = &tx_info->key_ctx; + struct crypto_cipher *cipher; + unsigned char *key, *salt; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + info_128_gcm = + (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + keylen = TLS_CIPHER_AES_GCM_128_KEY_SIZE; + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128; + tx_info->salt_size = TLS_CIPHER_AES_GCM_128_SALT_SIZE; + mac_key_size = CHCR_KEYCTX_MAC_KEY_SIZE_128; + tx_info->iv_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; + tx_info->iv = be64_to_cpu(*(__be64 *)info_128_gcm->iv); + + ghash_size = TLS_CIPHER_AES_GCM_128_TAG_SIZE; + key = info_128_gcm->key; + salt = info_128_gcm->salt; + tx_info->record_no = *(u64 *)info_128_gcm->rec_seq; + + /* The SCMD fields used when encrypting a full TLS + * record. Its a one time calculation till the + * connection exists. + */ + tx_info->scmd0_seqno_numivs = + SCMD_SEQ_NO_CTRL_V(CHCR_SCMD_SEQ_NO_CTRL_64BIT) | + SCMD_CIPH_AUTH_SEQ_CTRL_F | + SCMD_PROTO_VERSION_V(CHCR_SCMD_PROTO_VERSION_TLS) | + SCMD_CIPH_MODE_V(CHCR_SCMD_CIPHER_MODE_AES_GCM) | + SCMD_AUTH_MODE_V(CHCR_SCMD_AUTH_MODE_GHASH) | + SCMD_IV_SIZE_V(TLS_CIPHER_AES_GCM_128_IV_SIZE >> 1) | + SCMD_NUM_IVS_V(1); + + /* keys will be sent inline. */ + tx_info->scmd0_ivgen_hdrlen = SCMD_KEY_CTX_INLINE_F; + + /* The SCMD fields used when encrypting a partial TLS + * record (no trailer and possibly a truncated payload). + */ + tx_info->scmd0_short_seqno_numivs = + SCMD_CIPH_AUTH_SEQ_CTRL_F | + SCMD_PROTO_VERSION_V(CHCR_SCMD_PROTO_VERSION_GENERIC) | + SCMD_CIPH_MODE_V(CHCR_SCMD_CIPHER_MODE_AES_CTR) | + SCMD_IV_SIZE_V(AES_BLOCK_LEN >> 1); + + tx_info->scmd0_short_ivgen_hdrlen = + tx_info->scmd0_ivgen_hdrlen | SCMD_AADIVDROP_F; + + break; + + default: + pr_err("GCM: cipher type 0x%x not supported\n", + crypto_info->cipher_type); + ret = -EINVAL; + goto out; + } + + key_ctx_size = CHCR_KTLS_KEY_CTX_LEN + + roundup(keylen, 16) + ghash_size; + /* Calculate the H = CIPH(K, 0 repeated 16 times). + * It will go in key context + */ + cipher = crypto_alloc_cipher("aes", 0, 0); + if (IS_ERR(cipher)) { + ret = -ENOMEM; + goto out; + } + + ret = crypto_cipher_setkey(cipher, key, keylen); + if (ret) + goto out1; + + memset(ghash_h, 0, ghash_size); + crypto_cipher_encrypt_one(cipher, ghash_h, ghash_h); + + /* fill the Key context */ + if (direction == TLS_OFFLOAD_CTX_DIR_TX) { + kctx->ctx_hdr = FILL_KEY_CTX_HDR(ck_size, + mac_key_size, + key_ctx_size >> 4); + } else { + ret = -EINVAL; + goto out1; + } + + memcpy(kctx->salt, salt, tx_info->salt_size); + memcpy(kctx->key, key, keylen); + memcpy(kctx->key + keylen, ghash_h, ghash_size); + tx_info->key_ctx_len = key_ctx_size; + +out1: + crypto_free_cipher(cipher); +out: + return ret; +} + +static int chcr_ktls_update_connection_state(struct chcr_ktls_info *tx_info, + int new_state) +{ + /* This function can be called from both rx (interrupt context) and tx + * queue contexts. + */ + spin_lock_bh(&tx_info->lock); + switch (tx_info->connection_state) { + case KTLS_CONN_CLOSED: + tx_info->connection_state = new_state; + break; + + case KTLS_CONN_ACT_OPEN_REQ: + /* only go forward if state is greater than current state. */ + if (new_state <= tx_info->connection_state) + break; + /* update to the next state and also initialize TCB */ + tx_info->connection_state = new_state; + /* FALLTHRU */ + case KTLS_CONN_ACT_OPEN_RPL: + /* if we are stuck in this state, means tcb init might not + * received by HW, try sending it again. + */ + if (!chcr_init_tcb_fields(tx_info)) + tx_info->connection_state = KTLS_CONN_SET_TCB_REQ; + break; + + case KTLS_CONN_SET_TCB_REQ: + /* only go forward if state is greater than current state. */ + if (new_state <= tx_info->connection_state) + break; + /* update to the next state and check if l2t_state is valid */ + tx_info->connection_state = new_state; + /* FALLTHRU */ + case KTLS_CONN_SET_TCB_RPL: + /* Check if l2t state is valid, then move to ready state. */ + if (cxgb4_check_l2t_valid(tx_info->l2te)) { + tx_info->connection_state = KTLS_CONN_TX_READY; + atomic64_inc(&tx_info->adap->chcr_stats.ktls_tx_ctx); + } + break; + + case KTLS_CONN_TX_READY: + /* nothing to be done here */ + break; + + default: + pr_err("unknown KTLS connection state\n"); + break; + } + spin_unlock_bh(&tx_info->lock); + + return tx_info->connection_state; +} +/* + * chcr_ktls_act_open_req: creates TCB entry for ipv4 connection. + * @sk - tcp socket. + * @tx_info - driver specific tls info. + * @atid - connection active tid. + * return - send success/failure. + */ +static int chcr_ktls_act_open_req(struct sock *sk, + struct chcr_ktls_info *tx_info, + int atid) +{ + struct inet_sock *inet = inet_sk(sk); + struct cpl_t6_act_open_req *cpl6; + struct cpl_act_open_req *cpl; + struct sk_buff *skb; + unsigned int len; + int qid_atid; + u64 options; + + len = sizeof(*cpl6); + skb = alloc_skb(len, GFP_KERNEL); + if (unlikely(!skb)) + return -ENOMEM; + /* mark it a control pkt */ + set_wr_txq(skb, CPL_PRIORITY_CONTROL, tx_info->port_id); + + cpl6 = __skb_put_zero(skb, len); + cpl = (struct cpl_act_open_req *)cpl6; + INIT_TP_WR(cpl6, 0); + qid_atid = TID_QID_V(tx_info->rx_qid) | + TID_TID_V(atid); + OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_atid)); + cpl->local_port = inet->inet_sport; + cpl->peer_port = inet->inet_dport; + cpl->local_ip = inet->inet_rcv_saddr; + cpl->peer_ip = inet->inet_daddr; + + /* fill first 64 bit option field. */ + options = TCAM_BYPASS_F | ULP_MODE_V(ULP_MODE_NONE) | NON_OFFLOAD_F | + SMAC_SEL_V(tx_info->smt_idx) | TX_CHAN_V(tx_info->tx_chan); + cpl->opt0 = cpu_to_be64(options); + + /* next 64 bit option field. */ + options = + TX_QUEUE_V(tx_info->adap->params.tp.tx_modq[tx_info->tx_chan]); + cpl->opt2 = htonl(options); + + return cxgb4_l2t_send(tx_info->netdev, skb, tx_info->l2te); +} + +/* + * chcr_ktls_act_open_req6: creates TCB entry for ipv6 connection. + * @sk - tcp socket. + * @tx_info - driver specific tls info. + * @atid - connection active tid. + * return - send success/failure. + */ +static int chcr_ktls_act_open_req6(struct sock *sk, + struct chcr_ktls_info *tx_info, + int atid) +{ + struct inet_sock *inet = inet_sk(sk); + struct cpl_t6_act_open_req6 *cpl6; + struct cpl_act_open_req6 *cpl; + struct sk_buff *skb; + unsigned int len; + int qid_atid; + u64 options; + + len = sizeof(*cpl6); + skb = alloc_skb(len, GFP_KERNEL); + if (unlikely(!skb)) + return -ENOMEM; + /* mark it a control pkt */ + set_wr_txq(skb, CPL_PRIORITY_CONTROL, tx_info->port_id); + + cpl6 = __skb_put_zero(skb, len); + cpl = (struct cpl_act_open_req6 *)cpl6; + INIT_TP_WR(cpl6, 0); + qid_atid = TID_QID_V(tx_info->rx_qid) | TID_TID_V(atid); + OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, qid_atid)); + cpl->local_port = inet->inet_sport; + cpl->peer_port = inet->inet_dport; + cpl->local_ip_hi = *(__be64 *)&sk->sk_v6_rcv_saddr.in6_u.u6_addr8[0]; + cpl->local_ip_lo = *(__be64 *)&sk->sk_v6_rcv_saddr.in6_u.u6_addr8[8]; + cpl->peer_ip_hi = *(__be64 *)&sk->sk_v6_daddr.in6_u.u6_addr8[0]; + cpl->peer_ip_lo = *(__be64 *)&sk->sk_v6_daddr.in6_u.u6_addr8[8]; + + /* first 64 bit option field. */ + options = TCAM_BYPASS_F | ULP_MODE_V(ULP_MODE_NONE) | NON_OFFLOAD_F | + SMAC_SEL_V(tx_info->smt_idx) | TX_CHAN_V(tx_info->tx_chan); + cpl->opt0 = cpu_to_be64(options); + /* next 64 bit option field. */ + options = + TX_QUEUE_V(tx_info->adap->params.tp.tx_modq[tx_info->tx_chan]); + cpl->opt2 = htonl(options); + + return cxgb4_l2t_send(tx_info->netdev, skb, tx_info->l2te); +} + +/* + * chcr_setup_connection: create a TCB entry so that TP will form tcp packets. + * @sk - tcp socket. + * @tx_info - driver specific tls info. + * return: NET_TX_OK/NET_XMIT_DROP + */ +static int chcr_setup_connection(struct sock *sk, + struct chcr_ktls_info *tx_info) +{ + struct tid_info *t = &tx_info->adap->tids; + int atid, ret = 0; + + atid = cxgb4_alloc_atid(t, tx_info); + if (atid == -1) + return -EINVAL; + + tx_info->atid = atid; + tx_info->ip_family = sk->sk_family; + + if (sk->sk_family == AF_INET || + (sk->sk_family == AF_INET6 && !sk->sk_ipv6only && + ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED)) { + tx_info->ip_family = AF_INET; + ret = chcr_ktls_act_open_req(sk, tx_info, atid); + } else { + tx_info->ip_family = AF_INET6; + ret = + cxgb4_clip_get(tx_info->netdev, + (const u32 *)&sk->sk_v6_rcv_saddr.in6_u.u6_addr8, + 1); + if (ret) + goto out; + ret = chcr_ktls_act_open_req6(sk, tx_info, atid); + } + + /* if return type is NET_XMIT_CN, msg will be sent but delayed, mark ret + * success, if any other return type clear atid and return that failure. + */ + if (ret) { + if (ret == NET_XMIT_CN) + ret = 0; + else + cxgb4_free_atid(t, atid); + goto out; + } + + /* update the connection state */ + chcr_ktls_update_connection_state(tx_info, KTLS_CONN_ACT_OPEN_REQ); +out: + return ret; +} + +/* + * chcr_set_tcb_field: update tcb fields. + * @tx_info - driver specific tls info. + * @word - TCB word. + * @mask - TCB word related mask. + * @val - TCB word related value. + * @no_reply - set 1 if not looking for TP response. + */ +static int chcr_set_tcb_field(struct chcr_ktls_info *tx_info, u16 word, + u64 mask, u64 val, int no_reply) +{ + struct cpl_set_tcb_field *req; + struct sk_buff *skb; + + skb = alloc_skb(sizeof(struct cpl_set_tcb_field), GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + req = (struct cpl_set_tcb_field *)__skb_put_zero(skb, sizeof(*req)); + INIT_TP_WR_CPL(req, CPL_SET_TCB_FIELD, tx_info->tid); + req->reply_ctrl = htons(QUEUENO_V(tx_info->rx_qid) | + NO_REPLY_V(no_reply)); + req->word_cookie = htons(TCB_WORD_V(word)); + req->mask = cpu_to_be64(mask); + req->val = cpu_to_be64(val); + + set_wr_txq(skb, CPL_PRIORITY_CONTROL, tx_info->port_id); + return cxgb4_ofld_send(tx_info->netdev, skb); +} + +/* + * chcr_ktls_mark_tcb_close: mark tcb state to CLOSE + * @tx_info - driver specific tls info. + * return: NET_TX_OK/NET_XMIT_DROP. + */ +static int chcr_ktls_mark_tcb_close(struct chcr_ktls_info *tx_info) +{ + return chcr_set_tcb_field(tx_info, TCB_T_STATE_W, + TCB_T_STATE_V(TCB_T_STATE_M), + CHCR_TCB_STATE_CLOSED, 1); +} + +/* + * chcr_ktls_dev_del: call back for tls_dev_del. + * Remove the tid and l2t entry and close the connection. + * it per connection basis. + * @netdev - net device. + * @tls_cts - tls context. + * @direction - TX/RX crypto direction + */ +static void chcr_ktls_dev_del(struct net_device *netdev, + struct tls_context *tls_ctx, + enum tls_offload_ctx_dir direction) +{ + struct chcr_ktls_ofld_ctx_tx *tx_ctx = + chcr_get_ktls_tx_context(tls_ctx); + struct chcr_ktls_info *tx_info = tx_ctx->chcr_info; + struct sock *sk; + + if (!tx_info) + return; + sk = tx_info->sk; + + spin_lock(&tx_info->lock); + tx_info->connection_state = KTLS_CONN_CLOSED; + spin_unlock(&tx_info->lock); + + /* clear l2t entry */ + if (tx_info->l2te) + cxgb4_l2t_release(tx_info->l2te); + + /* clear clip entry */ + if (tx_info->ip_family == AF_INET6) + cxgb4_clip_release(netdev, + (const u32 *)&sk->sk_v6_daddr.in6_u.u6_addr8, + 1); + + /* clear tid */ + if (tx_info->tid != -1) { + /* clear tcb state and then release tid */ + chcr_ktls_mark_tcb_close(tx_info); + cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan, + tx_info->tid, tx_info->ip_family); + } + + atomic64_inc(&tx_info->adap->chcr_stats.ktls_tx_connection_close); + kvfree(tx_info); + tx_ctx->chcr_info = NULL; +} + +/* + * chcr_ktls_dev_add: call back for tls_dev_add. + * Create a tcb entry for TP. Also add l2t entry for the connection. And + * generate keys & save those keys locally. + * @netdev - net device. + * @tls_cts - tls context. + * @direction - TX/RX crypto direction + * return: SUCCESS/FAILURE. + */ +static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk, + enum tls_offload_ctx_dir direction, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct chcr_ktls_ofld_ctx_tx *tx_ctx; + struct chcr_ktls_info *tx_info; + struct dst_entry *dst; + struct adapter *adap; + struct port_info *pi; + struct neighbour *n; + u8 daaddr[16]; + int ret = -1; + + tx_ctx = chcr_get_ktls_tx_context(tls_ctx); + + pi = netdev_priv(netdev); + adap = pi->adapter; + if (direction == TLS_OFFLOAD_CTX_DIR_RX) { + pr_err("not expecting for RX direction\n"); + ret = -EINVAL; + goto out; + } + if (tx_ctx->chcr_info) { + ret = -EINVAL; + goto out; + } + + tx_info = kvzalloc(sizeof(*tx_info), GFP_KERNEL); + if (!tx_info) { + ret = -ENOMEM; + goto out; + } + + spin_lock_init(&tx_info->lock); + + /* clear connection state */ + spin_lock(&tx_info->lock); + tx_info->connection_state = KTLS_CONN_CLOSED; + spin_unlock(&tx_info->lock); + + tx_info->sk = sk; + /* initialize tid and atid to -1, 0 is a also a valid id. */ + tx_info->tid = -1; + tx_info->atid = -1; + + tx_info->adap = adap; + tx_info->netdev = netdev; + tx_info->first_qset = pi->first_qset; + tx_info->tx_chan = pi->tx_chan; + tx_info->smt_idx = pi->smt_idx; + tx_info->port_id = pi->port_id; + + tx_info->rx_qid = chcr_get_first_rx_qid(adap); + if (unlikely(tx_info->rx_qid < 0)) + goto out2; + + tx_info->prev_seq = start_offload_tcp_sn; + tx_info->tcp_start_seq_number = start_offload_tcp_sn; + + /* save crypto keys */ + ret = chcr_ktls_save_keys(tx_info, crypto_info, direction); + if (ret < 0) + goto out2; + + /* get peer ip */ + if (sk->sk_family == AF_INET || + (sk->sk_family == AF_INET6 && !sk->sk_ipv6only && + ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED)) { + memcpy(daaddr, &sk->sk_daddr, 4); + } else { + memcpy(daaddr, sk->sk_v6_daddr.in6_u.u6_addr8, 16); + } + + /* get the l2t index */ + dst = sk_dst_get(sk); + if (!dst) { + pr_err("DST entry not found\n"); + goto out2; + } + n = dst_neigh_lookup(dst, daaddr); + if (!n || !n->dev) { + pr_err("neighbour not found\n"); + dst_release(dst); + goto out2; + } + tx_info->l2te = cxgb4_l2t_get(adap->l2t, n, n->dev, 0); + + neigh_release(n); + dst_release(dst); + + if (!tx_info->l2te) { + pr_err("l2t entry not found\n"); + goto out2; + } + + tx_ctx->chcr_info = tx_info; + + /* create a filter and call cxgb4_l2t_send to send the packet out, which + * will take care of updating l2t entry in hw if not already done. + */ + ret = chcr_setup_connection(sk, tx_info); + if (ret) + goto out2; + + atomic64_inc(&adap->chcr_stats.ktls_tx_connection_open); + return 0; +out2: + kvfree(tx_info); +out: + atomic64_inc(&adap->chcr_stats.ktls_tx_connection_fail); + return ret; +} + +static const struct tlsdev_ops chcr_ktls_ops = { + .tls_dev_add = chcr_ktls_dev_add, + .tls_dev_del = chcr_ktls_dev_del, +}; + +/* + * chcr_enable_ktls: add NETIF_F_HW_TLS_TX flag in all the ports. + */ +void chcr_enable_ktls(struct adapter *adap) +{ + struct net_device *netdev; + int i; + + for_each_port(adap, i) { + netdev = adap->port[i]; + netdev->features |= NETIF_F_HW_TLS_TX; + netdev->hw_features |= NETIF_F_HW_TLS_TX; + netdev->tlsdev_ops = &chcr_ktls_ops; + } +} + +/* + * chcr_disable_ktls: remove NETIF_F_HW_TLS_TX flag from all the ports. + */ +void chcr_disable_ktls(struct adapter *adap) +{ + struct net_device *netdev; + int i; + + for_each_port(adap, i) { + netdev = adap->port[i]; + netdev->features &= ~NETIF_F_HW_TLS_TX; + netdev->hw_features &= ~NETIF_F_HW_TLS_TX; + netdev->tlsdev_ops = NULL; + } +} + +/* + * chcr_init_tcb_fields: Initialize tcb fields to handle TCP seq number + * handling. + * @tx_info - driver specific tls info. + * return: NET_TX_OK/NET_XMIT_DROP + */ +static int chcr_init_tcb_fields(struct chcr_ktls_info *tx_info) +{ + int ret = 0; + + /* set tcb in offload and bypass */ + ret = + chcr_set_tcb_field(tx_info, TCB_T_FLAGS_W, + TCB_T_FLAGS_V(TF_CORE_BYPASS_F | TF_NON_OFFLOAD_F), + TCB_T_FLAGS_V(TF_CORE_BYPASS_F), 1); + if (ret) + return ret; + /* reset snd_una and snd_next fields in tcb */ + ret = chcr_set_tcb_field(tx_info, TCB_SND_UNA_RAW_W, + TCB_SND_NXT_RAW_V(TCB_SND_NXT_RAW_M) | + TCB_SND_UNA_RAW_V(TCB_SND_UNA_RAW_M), + 0, 1); + if (ret) + return ret; + + /* reset send max */ + ret = chcr_set_tcb_field(tx_info, TCB_SND_MAX_RAW_W, + TCB_SND_MAX_RAW_V(TCB_SND_MAX_RAW_M), + 0, 1); + if (ret) + return ret; + + /* update l2t index and request for tp reply to confirm tcb is + * initialised to handle tx traffic. + */ + ret = chcr_set_tcb_field(tx_info, TCB_L2T_IX_W, + TCB_L2T_IX_V(TCB_L2T_IX_M), + TCB_L2T_IX_V(tx_info->l2te->idx), 0); + return ret; +} + +/* + * chcr_ktls_cpl_act_open_rpl: connection reply received from TP. + */ +int chcr_ktls_cpl_act_open_rpl(struct adapter *adap, unsigned char *input) +{ + const struct cpl_act_open_rpl *p = (void *)input; + struct chcr_ktls_info *tx_info = NULL; + unsigned int atid, tid, status; + struct tid_info *t; + + tid = GET_TID(p); + status = AOPEN_STATUS_G(ntohl(p->atid_status)); + atid = TID_TID_G(AOPEN_ATID_G(ntohl(p->atid_status))); + + t = &adap->tids; + tx_info = lookup_atid(t, atid); + + if (!tx_info || tx_info->atid != atid) { + pr_err("tx_info or atid is not correct\n"); + return -1; + } + + if (!status) { + tx_info->tid = tid; + cxgb4_insert_tid(t, tx_info, tx_info->tid, tx_info->ip_family); + + cxgb4_free_atid(t, atid); + tx_info->atid = -1; + /* update the connection state */ + chcr_ktls_update_connection_state(tx_info, + KTLS_CONN_ACT_OPEN_RPL); + } + return 0; +} + +/* + * chcr_ktls_cpl_set_tcb_rpl: TCB reply received from TP. + */ +int chcr_ktls_cpl_set_tcb_rpl(struct adapter *adap, unsigned char *input) +{ + const struct cpl_set_tcb_rpl *p = (void *)input; + struct chcr_ktls_info *tx_info = NULL; + struct tid_info *t; + u32 tid; + + tid = GET_TID(p); + + t = &adap->tids; + tx_info = lookup_tid(t, tid); + if (!tx_info || tx_info->tid != tid) { + pr_err("tx_info or atid is not correct\n"); + return -1; + } + /* update the connection state */ + chcr_ktls_update_connection_state(tx_info, KTLS_CONN_SET_TCB_RPL); + return 0; +} + +static void *__chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info, + u32 tid, void *pos, u16 word, u64 mask, + u64 val, u32 reply) +{ + struct cpl_set_tcb_field_core *cpl; + struct ulptx_idata *idata; + struct ulp_txpkt *txpkt; + + /* ULP_TXPKT */ + txpkt = pos; + txpkt->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | ULP_TXPKT_DEST_V(0)); + txpkt->len = htonl(DIV_ROUND_UP(CHCR_SET_TCB_FIELD_LEN, 16)); + + /* ULPTX_IDATA sub-command */ + idata = (struct ulptx_idata *)(txpkt + 1); + idata->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM)); + idata->len = htonl(sizeof(*cpl)); + pos = idata + 1; + + cpl = pos; + /* CPL_SET_TCB_FIELD */ + OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); + cpl->reply_ctrl = htons(QUEUENO_V(tx_info->rx_qid) | + NO_REPLY_V(!reply)); + cpl->word_cookie = htons(TCB_WORD_V(word)); + cpl->mask = cpu_to_be64(mask); + cpl->val = cpu_to_be64(val); + + /* ULPTX_NOOP */ + idata = (struct ulptx_idata *)(cpl + 1); + idata->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP)); + idata->len = htonl(0); + pos = idata + 1; + + return pos; +} + + +/* + * chcr_write_cpl_set_tcb_ulp: update tcb values. + * TCB is responsible to create tcp headers, so all the related values + * should be correctly updated. + * @tx_info - driver specific tls info. + * @q - tx queue on which packet is going out. + * @tid - TCB identifier. + * @pos - current index where should we start writing. + * @word - TCB word. + * @mask - TCB word related mask. + * @val - TCB word related value. + * @reply - set 1 if looking for TP response. + * return - next position to write. + */ +static void *chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info, + struct sge_eth_txq *q, u32 tid, + void *pos, u16 word, u64 mask, + u64 val, u32 reply) +{ + int left = (void *)q->q.stat - pos; + + if (unlikely(left < CHCR_SET_TCB_FIELD_LEN)) { + if (!left) { + pos = q->q.desc; + } else { + u8 buf[48] = {0}; + + __chcr_write_cpl_set_tcb_ulp(tx_info, tid, buf, word, + mask, val, reply); + + return chcr_copy_to_txd(buf, &q->q, pos, + CHCR_SET_TCB_FIELD_LEN); + } + } + + pos = __chcr_write_cpl_set_tcb_ulp(tx_info, tid, pos, word, + mask, val, reply); + + /* check again if we are at the end of the queue */ + if (left == CHCR_SET_TCB_FIELD_LEN) + pos = q->q.desc; + + return pos; +} + +/* + * chcr_ktls_xmit_tcb_cpls: update tcb entry so that TP will create the header + * with updated values like tcp seq, ack, window etc. + * @tx_info - driver specific tls info. + * @q - TX queue. + * @tcp_seq + * @tcp_ack + * @tcp_win + * return: NETDEV_TX_BUSY/NET_TX_OK. + */ +static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info, + struct sge_eth_txq *q, u64 tcp_seq, + u64 tcp_ack, u64 tcp_win) +{ + bool first_wr = ((tx_info->prev_ack == 0) && (tx_info->prev_win == 0)); + u32 len, cpl = 0, ndesc, wr_len; + struct fw_ulptx_wr *wr; + int credits; + void *pos; + + wr_len = sizeof(*wr); + /* there can be max 4 cpls, check if we have enough credits */ + len = wr_len + 4 * roundup(CHCR_SET_TCB_FIELD_LEN, 16); + ndesc = DIV_ROUND_UP(len, 64); + + credits = chcr_txq_avail(&q->q) - ndesc; + if (unlikely(credits < 0)) { + chcr_eth_txq_stop(q); + return NETDEV_TX_BUSY; + } + + pos = &q->q.desc[q->q.pidx]; + /* make space for WR, we'll fill it later when we know all the cpls + * being sent out and have complete length. + */ + wr = pos; + pos += wr_len; + /* update tx_max if its a re-transmit or the first wr */ + if (first_wr || tcp_seq != tx_info->prev_seq) { + pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos, + TCB_TX_MAX_W, + TCB_TX_MAX_V(TCB_TX_MAX_M), + TCB_TX_MAX_V(tcp_seq), 0); + cpl++; + } + /* reset snd una if it's a re-transmit pkt */ + if (tcp_seq != tx_info->prev_seq) { + /* reset snd_una */ + pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos, + TCB_SND_UNA_RAW_W, + TCB_SND_UNA_RAW_V + (TCB_SND_UNA_RAW_M), + TCB_SND_UNA_RAW_V(0), 0); + atomic64_inc(&tx_info->adap->chcr_stats.ktls_tx_ooo); + cpl++; + } + /* update ack */ + if (first_wr || tx_info->prev_ack != tcp_ack) { + pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos, + TCB_RCV_NXT_W, + TCB_RCV_NXT_V(TCB_RCV_NXT_M), + TCB_RCV_NXT_V(tcp_ack), 0); + tx_info->prev_ack = tcp_ack; + cpl++; + } + /* update receive window */ + if (first_wr || tx_info->prev_win != tcp_win) { + pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos, + TCB_RCV_WND_W, + TCB_RCV_WND_V(TCB_RCV_WND_M), + TCB_RCV_WND_V(tcp_win), 0); + tx_info->prev_win = tcp_win; + cpl++; + } + + if (cpl) { + /* get the actual length */ + len = wr_len + cpl * roundup(CHCR_SET_TCB_FIELD_LEN, 16); + /* ULPTX wr */ + wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR)); + wr->cookie = 0; + /* fill len in wr field */ + wr->flowid_len16 = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(len, 16))); + + ndesc = DIV_ROUND_UP(len, 64); + chcr_txq_advance(&q->q, ndesc); + cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc); + } + return 0; +} + +/* + * chcr_ktls_skb_copy + * @nskb - new skb where the frags to be added. + * @skb - old skb from which frags will be copied. + */ +static void chcr_ktls_skb_copy(struct sk_buff *skb, struct sk_buff *nskb) +{ + int i; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_shinfo(nskb)->frags[i] = skb_shinfo(skb)->frags[i]; + __skb_frag_ref(&skb_shinfo(nskb)->frags[i]); + } + + skb_shinfo(nskb)->nr_frags = skb_shinfo(skb)->nr_frags; + nskb->len += skb->data_len; + nskb->data_len = skb->data_len; + nskb->truesize += skb->data_len; +} + +/* + * chcr_ktls_get_tx_flits + * returns number of flits to be sent out, it includes key context length, WR + * size and skb fragments. + */ +static unsigned int +chcr_ktls_get_tx_flits(const struct sk_buff *skb, unsigned int key_ctx_len) +{ + return chcr_sgl_len(skb_shinfo(skb)->nr_frags) + + DIV_ROUND_UP(key_ctx_len + CHCR_KTLS_WR_SIZE, 8); +} + +/* + * chcr_ktls_check_tcp_options: To check if there is any TCP option availbale + * other than timestamp. + * @skb - skb contains partial record.. + * return: 1 / 0 + */ +static int +chcr_ktls_check_tcp_options(struct tcphdr *tcp) +{ + int cnt, opt, optlen; + u_char *cp; + + cp = (u_char *)(tcp + 1); + cnt = (tcp->doff << 2) - sizeof(struct tcphdr); + for (; cnt > 0; cnt -= optlen, cp += optlen) { + opt = cp[0]; + if (opt == TCPOPT_EOL) + break; + if (opt == TCPOPT_NOP) { + optlen = 1; + } else { + if (cnt < 2) + break; + optlen = cp[1]; + if (optlen < 2 || optlen > cnt) + break; + } + switch (opt) { + case TCPOPT_NOP: + break; + default: + return 1; + } + } + return 0; +} + +/* + * chcr_ktls_write_tcp_options : TP can't send out all the options, we need to + * send out separately. + * @tx_info - driver specific tls info. + * @skb - skb contains partial record.. + * @q - TX queue. + * @tx_chan - channel number. + * return: NETDEV_TX_OK/NETDEV_TX_BUSY. + */ +static int +chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb, + struct sge_eth_txq *q, uint32_t tx_chan) +{ + struct fw_eth_tx_pkt_wr *wr; + struct cpl_tx_pkt_core *cpl; + u32 ctrl, iplen, maclen; + struct ipv6hdr *ip6; + unsigned int ndesc; + struct tcphdr *tcp; + int len16, pktlen; + struct iphdr *ip; + int credits; + u8 buf[150]; + void *pos; + + iplen = skb_network_header_len(skb); + maclen = skb_mac_header_len(skb); + + /* packet length = eth hdr len + ip hdr len + tcp hdr len + * (including options). + */ + pktlen = skb->len - skb->data_len; + + ctrl = sizeof(*cpl) + pktlen; + len16 = DIV_ROUND_UP(sizeof(*wr) + ctrl, 16); + /* check how many descriptors needed */ + ndesc = DIV_ROUND_UP(len16, 4); + + credits = chcr_txq_avail(&q->q) - ndesc; + if (unlikely(credits < 0)) { + chcr_eth_txq_stop(q); + return NETDEV_TX_BUSY; + } + + pos = &q->q.desc[q->q.pidx]; + wr = pos; + + /* Firmware work request header */ + wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) | + FW_WR_IMMDLEN_V(ctrl)); + + wr->equiq_to_len16 = htonl(FW_WR_LEN16_V(len16)); + wr->r3 = 0; + + cpl = (void *)(wr + 1); + + /* CPL header */ + cpl->ctrl0 = htonl(TXPKT_OPCODE_V(CPL_TX_PKT) | TXPKT_INTF_V(tx_chan) | + TXPKT_PF_V(tx_info->adap->pf)); + cpl->pack = 0; + cpl->len = htons(pktlen); + /* checksum offload */ + cpl->ctrl1 = 0; + + pos = cpl + 1; + + memcpy(buf, skb->data, pktlen); + if (tx_info->ip_family == AF_INET) { + /* we need to correct ip header len */ + ip = (struct iphdr *)(buf + maclen); + ip->tot_len = htons(pktlen - maclen); + } else { + ip6 = (struct ipv6hdr *)(buf + maclen); + ip6->payload_len = htons(pktlen - maclen - iplen); + } + /* now take care of the tcp header, if fin is not set then clear push + * bit as well, and if fin is set, it will be sent at the last so we + * need to update the tcp sequence number as per the last packet. + */ + tcp = (struct tcphdr *)(buf + maclen + iplen); + + if (!tcp->fin) + tcp->psh = 0; + else + tcp->seq = htonl(tx_info->prev_seq); + + chcr_copy_to_txd(buf, &q->q, pos, pktlen); + + chcr_txq_advance(&q->q, ndesc); + cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc); + return 0; +} + +/* chcr_ktls_skb_shift - Shifts request length paged data from skb to another. + * @tgt- buffer into which tail data gets added + * @skb- buffer from which the paged data comes from + * @shiftlen- shift up to this many bytes + */ +static int chcr_ktls_skb_shift(struct sk_buff *tgt, struct sk_buff *skb, + int shiftlen) +{ + skb_frag_t *fragfrom, *fragto; + int from, to, todo; + + WARN_ON(shiftlen > skb->data_len); + + todo = shiftlen; + from = 0; + to = 0; + fragfrom = &skb_shinfo(skb)->frags[from]; + + while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) { + fragfrom = &skb_shinfo(skb)->frags[from]; + fragto = &skb_shinfo(tgt)->frags[to]; + + if (todo >= skb_frag_size(fragfrom)) { + *fragto = *fragfrom; + todo -= skb_frag_size(fragfrom); + from++; + to++; + + } else { + __skb_frag_ref(fragfrom); + skb_frag_page_copy(fragto, fragfrom); + skb_frag_off_copy(fragto, fragfrom); + skb_frag_size_set(fragto, todo); + + skb_frag_off_add(fragfrom, todo); + skb_frag_size_sub(fragfrom, todo); + todo = 0; + + to++; + break; + } + } + + /* Ready to "commit" this state change to tgt */ + skb_shinfo(tgt)->nr_frags = to; + + /* Reposition in the original skb */ + to = 0; + while (from < skb_shinfo(skb)->nr_frags) + skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++]; + + skb_shinfo(skb)->nr_frags = to; + + WARN_ON(todo > 0 && !skb_shinfo(skb)->nr_frags); + + skb->len -= shiftlen; + skb->data_len -= shiftlen; + skb->truesize -= shiftlen; + tgt->len += shiftlen; + tgt->data_len += shiftlen; + tgt->truesize += shiftlen; + + return shiftlen; +} + +/* + * chcr_ktls_xmit_wr_complete: This sends out the complete record. If an skb + * received has partial end part of the record, send out the complete record, so + * that crypto block will be able to generate TAG/HASH. + * @skb - segment which has complete or partial end part. + * @tx_info - driver specific tls info. + * @q - TX queue. + * @tcp_seq + * @tcp_push - tcp push bit. + * @mss - segment size. + * return: NETDEV_TX_BUSY/NET_TX_OK. + */ +static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb, + struct chcr_ktls_info *tx_info, + struct sge_eth_txq *q, u32 tcp_seq, + bool tcp_push, u32 mss) +{ + u32 len16, wr_mid = 0, flits = 0, ndesc, cipher_start; + struct adapter *adap = tx_info->adap; + int credits, left, last_desc; + struct tx_sw_desc *sgl_sdesc; + struct cpl_tx_data *tx_data; + struct cpl_tx_sec_pdu *cpl; + struct ulptx_idata *idata; + struct ulp_txpkt *ulptx; + struct fw_ulptx_wr *wr; + void *pos; + u64 *end; + + /* get the number of flits required */ + flits = chcr_ktls_get_tx_flits(skb, tx_info->key_ctx_len); + /* number of descriptors */ + ndesc = chcr_flits_to_desc(flits); + /* check if enough credits available */ + credits = chcr_txq_avail(&q->q) - ndesc; + if (unlikely(credits < 0)) { + chcr_eth_txq_stop(q); + return NETDEV_TX_BUSY; + } + + if (unlikely(credits < ETHTXQ_STOP_THRES)) { + /* Credits are below the threshold vaues, stop the queue after + * injecting the Work Request for this packet. + */ + chcr_eth_txq_stop(q); + wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; + } + + last_desc = q->q.pidx + ndesc - 1; + if (last_desc >= q->q.size) + last_desc -= q->q.size; + sgl_sdesc = &q->q.sdesc[last_desc]; + + if (unlikely(cxgb4_map_skb(adap->pdev_dev, skb, sgl_sdesc->addr) < 0)) { + memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr)); + q->mapping_err++; + return NETDEV_TX_BUSY; + } + + pos = &q->q.desc[q->q.pidx]; + end = (u64 *)pos + flits; + /* FW_ULPTX_WR */ + wr = pos; + /* WR will need len16 */ + len16 = DIV_ROUND_UP(flits, 2); + wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR)); + wr->flowid_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16)); + wr->cookie = 0; + pos += sizeof(*wr); + /* ULP_TXPKT */ + ulptx = pos; + ulptx->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | + ULP_TXPKT_CHANNELID_V(tx_info->port_id) | + ULP_TXPKT_FID_V(q->q.cntxt_id) | + ULP_TXPKT_RO_F); + ulptx->len = htonl(len16 - 1); + /* ULPTX_IDATA sub-command */ + idata = (struct ulptx_idata *)(ulptx + 1); + idata->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM) | ULP_TX_SC_MORE_F); + /* idata length will include cpl_tx_sec_pdu + key context size + + * cpl_tx_data header. + */ + idata->len = htonl(sizeof(*cpl) + tx_info->key_ctx_len + + sizeof(*tx_data)); + /* SEC CPL */ + cpl = (struct cpl_tx_sec_pdu *)(idata + 1); + cpl->op_ivinsrtofst = + htonl(CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) | + CPL_TX_SEC_PDU_CPLLEN_V(CHCR_CPL_TX_SEC_PDU_LEN_64BIT) | + CPL_TX_SEC_PDU_PLACEHOLDER_V(1) | + CPL_TX_SEC_PDU_IVINSRTOFST_V(TLS_HEADER_SIZE + 1)); + cpl->pldlen = htonl(skb->data_len); + + /* encryption should start after tls header size + iv size */ + cipher_start = TLS_HEADER_SIZE + tx_info->iv_size + 1; + + cpl->aadstart_cipherstop_hi = + htonl(CPL_TX_SEC_PDU_AADSTART_V(1) | + CPL_TX_SEC_PDU_AADSTOP_V(TLS_HEADER_SIZE) | + CPL_TX_SEC_PDU_CIPHERSTART_V(cipher_start)); + + /* authentication will also start after tls header + iv size */ + cpl->cipherstop_lo_authinsert = + htonl(CPL_TX_SEC_PDU_AUTHSTART_V(cipher_start) | + CPL_TX_SEC_PDU_AUTHSTOP_V(TLS_CIPHER_AES_GCM_128_TAG_SIZE) | + CPL_TX_SEC_PDU_AUTHINSERT_V(TLS_CIPHER_AES_GCM_128_TAG_SIZE)); + + /* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */ + cpl->seqno_numivs = htonl(tx_info->scmd0_seqno_numivs); + cpl->ivgen_hdrlen = htonl(tx_info->scmd0_ivgen_hdrlen); + cpl->scmd1 = cpu_to_be64(tx_info->record_no); + + pos = cpl + 1; + /* check if space left to fill the keys */ + left = (void *)q->q.stat - pos; + if (!left) { + left = (void *)end - (void *)q->q.stat; + pos = q->q.desc; + end = pos + left; + } + + pos = chcr_copy_to_txd(&tx_info->key_ctx, &q->q, pos, + tx_info->key_ctx_len); + left = (void *)q->q.stat - pos; + + if (!left) { + left = (void *)end - (void *)q->q.stat; + pos = q->q.desc; + end = pos + left; + } + /* CPL_TX_DATA */ + tx_data = (void *)pos; + OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tx_info->tid)); + tx_data->len = htonl(TX_DATA_MSS_V(mss) | TX_LENGTH_V(skb->data_len)); + + tx_data->rsvd = htonl(tcp_seq); + + tx_data->flags = htonl(TX_BYPASS_F); + if (tcp_push) + tx_data->flags |= htonl(TX_PUSH_F | TX_SHOVE_F); + + /* check left again, it might go beyond queue limit */ + pos = tx_data + 1; + left = (void *)q->q.stat - pos; + + /* check the position again */ + if (!left) { + left = (void *)end - (void *)q->q.stat; + pos = q->q.desc; + end = pos + left; + } + + /* send the complete packet except the header */ + cxgb4_write_sgl(skb, &q->q, pos, end, skb->len - skb->data_len, + sgl_sdesc->addr); + sgl_sdesc->skb = skb; + + chcr_txq_advance(&q->q, ndesc); + cxgb4_ring_tx_db(adap, &q->q, ndesc); + atomic64_inc(&adap->chcr_stats.ktls_tx_send_records); + + return 0; +} + +/* + * chcr_ktls_xmit_wr_short: This is to send out partial records. If its + * a middle part of a record, fetch the prior data to make it 16 byte aligned + * and then only send it out. + * + * @skb - skb contains partial record.. + * @tx_info - driver specific tls info. + * @q - TX queue. + * @tcp_seq + * @tcp_push - tcp push bit. + * @mss - segment size. + * @tls_rec_offset - offset from start of the tls record. + * @perior_data - data before the current segment, required to make this record + * 16 byte aligned. + * @prior_data_len - prior_data length (less than 16) + * return: NETDEV_TX_BUSY/NET_TX_OK. + */ +static int chcr_ktls_xmit_wr_short(struct sk_buff *skb, + struct chcr_ktls_info *tx_info, + struct sge_eth_txq *q, + u32 tcp_seq, bool tcp_push, u32 mss, + u32 tls_rec_offset, u8 *prior_data, + u32 prior_data_len) +{ + struct adapter *adap = tx_info->adap; + u32 len16, wr_mid = 0, cipher_start; + unsigned int flits = 0, ndesc; + int credits, left, last_desc; + struct tx_sw_desc *sgl_sdesc; + struct cpl_tx_data *tx_data; + struct cpl_tx_sec_pdu *cpl; + struct ulptx_idata *idata; + struct ulp_txpkt *ulptx; + struct fw_ulptx_wr *wr; + __be64 iv_record; + void *pos; + u64 *end; + + /* get the number of flits required, it's a partial record so 2 flits + * (AES_BLOCK_SIZE) will be added. + */ + flits = chcr_ktls_get_tx_flits(skb, tx_info->key_ctx_len) + 2; + /* get the correct 8 byte IV of this record */ + iv_record = cpu_to_be64(tx_info->iv + tx_info->record_no); + /* If it's a middle record and not 16 byte aligned to run AES CTR, need + * to make it 16 byte aligned. So atleadt 2 extra flits of immediate + * data will be added. + */ + if (prior_data_len) + flits += 2; + /* number of descriptors */ + ndesc = chcr_flits_to_desc(flits); + /* check if enough credits available */ + credits = chcr_txq_avail(&q->q) - ndesc; + if (unlikely(credits < 0)) { + chcr_eth_txq_stop(q); + return NETDEV_TX_BUSY; + } + + if (unlikely(credits < ETHTXQ_STOP_THRES)) { + chcr_eth_txq_stop(q); + wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; + } + + last_desc = q->q.pidx + ndesc - 1; + if (last_desc >= q->q.size) + last_desc -= q->q.size; + sgl_sdesc = &q->q.sdesc[last_desc]; + + if (unlikely(cxgb4_map_skb(adap->pdev_dev, skb, sgl_sdesc->addr) < 0)) { + memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr)); + q->mapping_err++; + return NETDEV_TX_BUSY; + } + + pos = &q->q.desc[q->q.pidx]; + end = (u64 *)pos + flits; + /* FW_ULPTX_WR */ + wr = pos; + /* WR will need len16 */ + len16 = DIV_ROUND_UP(flits, 2); + wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR)); + wr->flowid_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16)); + wr->cookie = 0; + pos += sizeof(*wr); + /* ULP_TXPKT */ + ulptx = pos; + ulptx->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | + ULP_TXPKT_CHANNELID_V(tx_info->port_id) | + ULP_TXPKT_FID_V(q->q.cntxt_id) | + ULP_TXPKT_RO_F); + ulptx->len = htonl(len16 - 1); + /* ULPTX_IDATA sub-command */ + idata = (struct ulptx_idata *)(ulptx + 1); + idata->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM) | ULP_TX_SC_MORE_F); + /* idata length will include cpl_tx_sec_pdu + key context size + + * cpl_tx_data header. + */ + idata->len = htonl(sizeof(*cpl) + tx_info->key_ctx_len + + sizeof(*tx_data) + AES_BLOCK_LEN + prior_data_len); + /* SEC CPL */ + cpl = (struct cpl_tx_sec_pdu *)(idata + 1); + /* cipher start will have tls header + iv size extra if its a header + * part of tls record. else only 16 byte IV will be added. + */ + cipher_start = + AES_BLOCK_LEN + 1 + + (!tls_rec_offset ? TLS_HEADER_SIZE + tx_info->iv_size : 0); + + cpl->op_ivinsrtofst = + htonl(CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) | + CPL_TX_SEC_PDU_CPLLEN_V(CHCR_CPL_TX_SEC_PDU_LEN_64BIT) | + CPL_TX_SEC_PDU_IVINSRTOFST_V(1)); + cpl->pldlen = htonl(skb->data_len + AES_BLOCK_LEN + prior_data_len); + cpl->aadstart_cipherstop_hi = + htonl(CPL_TX_SEC_PDU_CIPHERSTART_V(cipher_start)); + cpl->cipherstop_lo_authinsert = 0; + /* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */ + cpl->seqno_numivs = htonl(tx_info->scmd0_short_seqno_numivs); + cpl->ivgen_hdrlen = htonl(tx_info->scmd0_short_ivgen_hdrlen); + cpl->scmd1 = 0; + + pos = cpl + 1; + /* check if space left to fill the keys */ + left = (void *)q->q.stat - pos; + if (!left) { + left = (void *)end - (void *)q->q.stat; + pos = q->q.desc; + end = pos + left; + } + + pos = chcr_copy_to_txd(&tx_info->key_ctx, &q->q, pos, + tx_info->key_ctx_len); + left = (void *)q->q.stat - pos; + + if (!left) { + left = (void *)end - (void *)q->q.stat; + pos = q->q.desc; + end = pos + left; + } + /* CPL_TX_DATA */ + tx_data = (void *)pos; + OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tx_info->tid)); + tx_data->len = htonl(TX_DATA_MSS_V(mss) | + TX_LENGTH_V(skb->data_len + prior_data_len)); + tx_data->rsvd = htonl(tcp_seq); + tx_data->flags = htonl(TX_BYPASS_F); + if (tcp_push) + tx_data->flags |= htonl(TX_PUSH_F | TX_SHOVE_F); + + /* check left again, it might go beyond queue limit */ + pos = tx_data + 1; + left = (void *)q->q.stat - pos; + + /* check the position again */ + if (!left) { + left = (void *)end - (void *)q->q.stat; + pos = q->q.desc; + end = pos + left; + } + /* copy the 16 byte IV for AES-CTR, which includes 4 bytes of salt, 8 + * bytes of actual IV and 4 bytes of 16 byte-sequence. + */ + memcpy(pos, tx_info->key_ctx.salt, tx_info->salt_size); + memcpy(pos + tx_info->salt_size, &iv_record, tx_info->iv_size); + *(__be32 *)(pos + tx_info->salt_size + tx_info->iv_size) = + htonl(2 + (tls_rec_offset ? ((tls_rec_offset - + (TLS_HEADER_SIZE + tx_info->iv_size)) / AES_BLOCK_LEN) : 0)); + + pos += 16; + /* Prior_data_len will always be less than 16 bytes, fill the + * prio_data_len after AES_CTRL_BLOCK and clear the remaining length + * to 0. + */ + if (prior_data_len) + pos = chcr_copy_to_txd(prior_data, &q->q, pos, 16); + /* send the complete packet except the header */ + cxgb4_write_sgl(skb, &q->q, pos, end, skb->len - skb->data_len, + sgl_sdesc->addr); + sgl_sdesc->skb = skb; + + chcr_txq_advance(&q->q, ndesc); + cxgb4_ring_tx_db(adap, &q->q, ndesc); + + return 0; +} + +/* + * chcr_ktls_tx_plaintxt: This handler will take care of the records which has + * only plain text (only tls header and iv) + * @tx_info - driver specific tls info. + * @skb - skb contains partial record.. + * @tcp_seq + * @mss - segment size. + * @tcp_push - tcp push bit. + * @q - TX queue. + * @port_id : port number + * @perior_data - data before the current segment, required to make this record + * 16 byte aligned. + * @prior_data_len - prior_data length (less than 16) + * return: NETDEV_TX_BUSY/NET_TX_OK. + */ +static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info, + struct sk_buff *skb, u32 tcp_seq, u32 mss, + bool tcp_push, struct sge_eth_txq *q, + u32 port_id, u8 *prior_data, + u32 prior_data_len) +{ + int credits, left, len16, last_desc; + unsigned int flits = 0, ndesc; + struct tx_sw_desc *sgl_sdesc; + struct cpl_tx_data *tx_data; + struct ulptx_idata *idata; + struct ulp_txpkt *ulptx; + struct fw_ulptx_wr *wr; + u32 wr_mid = 0; + void *pos; + u64 *end; + + flits = DIV_ROUND_UP(CHCR_PLAIN_TX_DATA_LEN, 8); + flits += chcr_sgl_len(skb_shinfo(skb)->nr_frags); + if (prior_data_len) + flits += 2; + /* WR will need len16 */ + len16 = DIV_ROUND_UP(flits, 2); + /* check how many descriptors needed */ + ndesc = DIV_ROUND_UP(flits, 8); + + credits = chcr_txq_avail(&q->q) - ndesc; + if (unlikely(credits < 0)) { + chcr_eth_txq_stop(q); + return NETDEV_TX_BUSY; + } + + if (unlikely(credits < ETHTXQ_STOP_THRES)) { + chcr_eth_txq_stop(q); + wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; + } + + last_desc = q->q.pidx + ndesc - 1; + if (last_desc >= q->q.size) + last_desc -= q->q.size; + sgl_sdesc = &q->q.sdesc[last_desc]; + + if (unlikely(cxgb4_map_skb(tx_info->adap->pdev_dev, skb, + sgl_sdesc->addr) < 0)) { + memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr)); + q->mapping_err++; + return NETDEV_TX_BUSY; + } + + pos = &q->q.desc[q->q.pidx]; + end = (u64 *)pos + flits; + /* FW_ULPTX_WR */ + wr = pos; + wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR)); + wr->flowid_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16)); + wr->cookie = 0; + pos += sizeof(*wr); + /* ULP_TXPKT */ + ulptx = (struct ulp_txpkt *)(wr + 1); + ulptx->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | + ULP_TXPKT_DATAMODIFY_V(0) | + ULP_TXPKT_CHANNELID_V(tx_info->port_id) | + ULP_TXPKT_DEST_V(0) | + ULP_TXPKT_FID_V(q->q.cntxt_id) | ULP_TXPKT_RO_V(1)); + ulptx->len = htonl(len16 - 1); + /* ULPTX_IDATA sub-command */ + idata = (struct ulptx_idata *)(ulptx + 1); + idata->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM) | ULP_TX_SC_MORE_F); + idata->len = htonl(sizeof(*tx_data) + prior_data_len); + /* CPL_TX_DATA */ + tx_data = (struct cpl_tx_data *)(idata + 1); + OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tx_info->tid)); + tx_data->len = htonl(TX_DATA_MSS_V(mss) | + TX_LENGTH_V(skb->data_len + prior_data_len)); + /* set tcp seq number */ + tx_data->rsvd = htonl(tcp_seq); + tx_data->flags = htonl(TX_BYPASS_F); + if (tcp_push) + tx_data->flags |= htonl(TX_PUSH_F | TX_SHOVE_F); + + pos = tx_data + 1; + /* apart from prior_data_len, we should set remaining part of 16 bytes + * to be zero. + */ + if (prior_data_len) + pos = chcr_copy_to_txd(prior_data, &q->q, pos, 16); + + /* check left again, it might go beyond queue limit */ + left = (void *)q->q.stat - pos; + + /* check the position again */ + if (!left) { + left = (void *)end - (void *)q->q.stat; + pos = q->q.desc; + end = pos + left; + } + /* send the complete packet including the header */ + cxgb4_write_sgl(skb, &q->q, pos, end, skb->len - skb->data_len, + sgl_sdesc->addr); + sgl_sdesc->skb = skb; + + chcr_txq_advance(&q->q, ndesc); + cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc); + return 0; +} + +/* + * chcr_ktls_copy_record_in_skb + * @nskb - new skb where the frags to be added. + * @record - specific record which has complete 16k record in frags. + */ +static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb, + struct tls_record_info *record) +{ + int i = 0; + + for (i = 0; i < record->num_frags; i++) { + skb_shinfo(nskb)->frags[i] = record->frags[i]; + /* increase the frag ref count */ + __skb_frag_ref(&skb_shinfo(nskb)->frags[i]); + } + + skb_shinfo(nskb)->nr_frags = record->num_frags; + nskb->data_len = record->len; + nskb->len += record->len; + nskb->truesize += record->len; +} + +/* + * chcr_ktls_update_snd_una: Reset the SEND_UNA. It will be done to avoid + * sending the same segment again. It will discard the segment which is before + * the current tx max. + * @tx_info - driver specific tls info. + * @q - TX queue. + * return: NET_TX_OK/NET_XMIT_DROP. + */ +static int chcr_ktls_update_snd_una(struct chcr_ktls_info *tx_info, + struct sge_eth_txq *q) +{ + struct fw_ulptx_wr *wr; + unsigned int ndesc; + int credits; + void *pos; + u32 len; + + len = sizeof(*wr) + roundup(CHCR_SET_TCB_FIELD_LEN, 16); + ndesc = DIV_ROUND_UP(len, 64); + + credits = chcr_txq_avail(&q->q) - ndesc; + if (unlikely(credits < 0)) { + chcr_eth_txq_stop(q); + return NETDEV_TX_BUSY; + } + + pos = &q->q.desc[q->q.pidx]; + + wr = pos; + /* ULPTX wr */ + wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR)); + wr->cookie = 0; + /* fill len in wr field */ + wr->flowid_len16 = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(len, 16))); + + pos += sizeof(*wr); + + pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos, + TCB_SND_UNA_RAW_W, + TCB_SND_UNA_RAW_V(TCB_SND_UNA_RAW_M), + TCB_SND_UNA_RAW_V(0), 0); + + chcr_txq_advance(&q->q, ndesc); + cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc); + + return 0; +} + +/* + * chcr_end_part_handler: This handler will handle the record which + * is complete or if record's end part is received. T6 adapter has a issue that + * it can't send out TAG with partial record so if its an end part then we have + * to send TAG as well and for which we need to fetch the complete record and + * send it to crypto module. + * @tx_info - driver specific tls info. + * @skb - skb contains partial record. + * @record - complete record of 16K size. + * @tcp_seq + * @mss - segment size in which TP needs to chop a packet. + * @tcp_push_no_fin - tcp push if fin is not set. + * @q - TX queue. + * @tls_end_offset - offset from end of the record. + * @last wr : check if this is the last part of the skb going out. + * return: NETDEV_TX_OK/NETDEV_TX_BUSY. + */ +static int chcr_end_part_handler(struct chcr_ktls_info *tx_info, + struct sk_buff *skb, + struct tls_record_info *record, + u32 tcp_seq, int mss, bool tcp_push_no_fin, + struct sge_eth_txq *q, + u32 tls_end_offset, bool last_wr) +{ + struct sk_buff *nskb = NULL; + /* check if it is a complete record */ + if (tls_end_offset == record->len) { + nskb = skb; + atomic64_inc(&tx_info->adap->chcr_stats.ktls_tx_complete_pkts); + } else { + dev_kfree_skb_any(skb); + + nskb = alloc_skb(0, GFP_KERNEL); + if (!nskb) + return NETDEV_TX_BUSY; + /* copy complete record in skb */ + chcr_ktls_copy_record_in_skb(nskb, record); + /* packet is being sent from the beginning, update the tcp_seq + * accordingly. + */ + tcp_seq = tls_record_start_seq(record); + /* reset snd una, so the middle record won't send the already + * sent part. + */ + if (chcr_ktls_update_snd_una(tx_info, q)) + goto out; + atomic64_inc(&tx_info->adap->chcr_stats.ktls_tx_end_pkts); + } + + if (chcr_ktls_xmit_wr_complete(nskb, tx_info, q, tcp_seq, + (last_wr && tcp_push_no_fin), + mss)) { + goto out; + } + return 0; +out: + dev_kfree_skb_any(nskb); + return NETDEV_TX_BUSY; +} + +/* + * chcr_short_record_handler: This handler will take care of the records which + * doesn't have end part (1st part or the middle part(/s) of a record). In such + * cases, AES CTR will be used in place of AES GCM to send out partial packet. + * This partial record might be the first part of the record, or the middle + * part. In case of middle record we should fetch the prior data to make it 16 + * byte aligned. If it has a partial tls header or iv then get to the start of + * tls header. And if it has partial TAG, then remove the complete TAG and send + * only the payload. + * There is one more possibility that it gets a partial header, send that + * portion as a plaintext. + * @tx_info - driver specific tls info. + * @skb - skb contains partial record.. + * @record - complete record of 16K size. + * @tcp_seq + * @mss - segment size in which TP needs to chop a packet. + * @tcp_push_no_fin - tcp push if fin is not set. + * @q - TX queue. + * @tls_end_offset - offset from end of the record. + * return: NETDEV_TX_OK/NETDEV_TX_BUSY. + */ +static int chcr_short_record_handler(struct chcr_ktls_info *tx_info, + struct sk_buff *skb, + struct tls_record_info *record, + u32 tcp_seq, int mss, bool tcp_push_no_fin, + struct sge_eth_txq *q, u32 tls_end_offset) +{ + u32 tls_rec_offset = tcp_seq - tls_record_start_seq(record); + u8 prior_data[16] = {0}; + u32 prior_data_len = 0; + u32 data_len; + + /* check if the skb is ending in middle of tag/HASH, its a big + * trouble, send the packet before the HASH. + */ + int remaining_record = tls_end_offset - skb->data_len; + + if (remaining_record > 0 && + remaining_record < TLS_CIPHER_AES_GCM_128_TAG_SIZE) { + int trimmed_len = skb->data_len - + (TLS_CIPHER_AES_GCM_128_TAG_SIZE - remaining_record); + struct sk_buff *tmp_skb = NULL; + /* don't process the pkt if it is only a partial tag */ + if (skb->data_len < TLS_CIPHER_AES_GCM_128_TAG_SIZE) + goto out; + + WARN_ON(trimmed_len > skb->data_len); + + /* shift to those many bytes */ + tmp_skb = alloc_skb(0, GFP_KERNEL); + if (unlikely(!tmp_skb)) + goto out; + + chcr_ktls_skb_shift(tmp_skb, skb, trimmed_len); + /* free the last trimmed portion */ + dev_kfree_skb_any(skb); + skb = tmp_skb; + atomic64_inc(&tx_info->adap->chcr_stats.ktls_tx_trimmed_pkts); + } + data_len = skb->data_len; + /* check if the middle record's start point is 16 byte aligned. CTR + * needs 16 byte aligned start point to start encryption. + */ + if (tls_rec_offset) { + /* there is an offset from start, means its a middle record */ + int remaining = 0; + + if (tls_rec_offset < (TLS_HEADER_SIZE + tx_info->iv_size)) { + prior_data_len = tls_rec_offset; + tls_rec_offset = 0; + remaining = 0; + } else { + prior_data_len = + (tls_rec_offset - + (TLS_HEADER_SIZE + tx_info->iv_size)) + % AES_BLOCK_LEN; + remaining = tls_rec_offset - prior_data_len; + } + + /* if prior_data_len is not zero, means we need to fetch prior + * data to make this record 16 byte aligned, or we need to reach + * to start offset. + */ + if (prior_data_len) { + int i = 0; + u8 *data = NULL; + skb_frag_t *f; + u8 *vaddr; + int frag_size = 0, frag_delta = 0; + + while (remaining > 0) { + frag_size = skb_frag_size(&record->frags[i]); + if (remaining < frag_size) + break; + + remaining -= frag_size; + i++; + } + f = &record->frags[i]; + vaddr = kmap_atomic(skb_frag_page(f)); + + data = vaddr + skb_frag_off(f) + remaining; + frag_delta = skb_frag_size(f) - remaining; + + if (frag_delta >= prior_data_len) { + memcpy(prior_data, data, prior_data_len); + kunmap_atomic(vaddr); + } else { + memcpy(prior_data, data, frag_delta); + kunmap_atomic(vaddr); + /* get the next page */ + f = &record->frags[i + 1]; + vaddr = kmap_atomic(skb_frag_page(f)); + data = vaddr + skb_frag_off(f); + memcpy(prior_data + frag_delta, + data, (prior_data_len - frag_delta)); + kunmap_atomic(vaddr); + } + /* reset tcp_seq as per the prior_data_required len */ + tcp_seq -= prior_data_len; + /* include prio_data_len for further calculation. + */ + data_len += prior_data_len; + } + /* reset snd una, so the middle record won't send the already + * sent part. + */ + if (chcr_ktls_update_snd_una(tx_info, q)) + goto out; + atomic64_inc(&tx_info->adap->chcr_stats.ktls_tx_middle_pkts); + } else { + /* Else means, its a partial first part of the record. Check if + * its only the header, don't need to send for encryption then. + */ + if (data_len <= TLS_HEADER_SIZE + tx_info->iv_size) { + if (chcr_ktls_tx_plaintxt(tx_info, skb, tcp_seq, mss, + tcp_push_no_fin, q, + tx_info->port_id, + prior_data, + prior_data_len)) { + goto out; + } + return 0; + } + atomic64_inc(&tx_info->adap->chcr_stats.ktls_tx_start_pkts); + } + + if (chcr_ktls_xmit_wr_short(skb, tx_info, q, tcp_seq, tcp_push_no_fin, + mss, tls_rec_offset, prior_data, + prior_data_len)) { + goto out; + } + + return 0; +out: + dev_kfree_skb_any(skb); + return NETDEV_TX_BUSY; +} + +/* nic tls TX handler */ +int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct chcr_ktls_ofld_ctx_tx *tx_ctx; + struct tcphdr *th = tcp_hdr(skb); + int data_len, qidx, ret = 0, mss; + struct tls_record_info *record; + struct chcr_stats_debug *stats; + struct chcr_ktls_info *tx_info; + u32 tls_end_offset, tcp_seq; + struct tls_context *tls_ctx; + struct sk_buff *local_skb; + int new_connection_state; + struct sge_eth_txq *q; + struct adapter *adap; + unsigned long flags; + + tcp_seq = ntohl(th->seq); + + mss = skb_is_gso(skb) ? skb_shinfo(skb)->gso_size : skb->data_len; + + /* check if we haven't set it for ktls offload */ + if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)) + goto out; + + tls_ctx = tls_get_ctx(skb->sk); + if (unlikely(tls_ctx->netdev != dev)) + goto out; + + tx_ctx = chcr_get_ktls_tx_context(tls_ctx); + tx_info = tx_ctx->chcr_info; + + if (unlikely(!tx_info)) + goto out; + + /* check the connection state, we don't need to pass new connection + * state, state machine will check and update the new state if it is + * stuck due to responses not received from HW. + * Start the tx handling only if state is KTLS_CONN_TX_READY. + */ + new_connection_state = chcr_ktls_update_connection_state(tx_info, 0); + if (new_connection_state != KTLS_CONN_TX_READY) + goto out; + + /* don't touch the original skb, make a new skb to extract each records + * and send them separately. + */ + local_skb = alloc_skb(0, GFP_KERNEL); + + if (unlikely(!local_skb)) + return NETDEV_TX_BUSY; + + adap = tx_info->adap; + stats = &adap->chcr_stats; + + qidx = skb->queue_mapping; + q = &adap->sge.ethtxq[qidx + tx_info->first_qset]; + cxgb4_reclaim_completed_tx(adap, &q->q, true); + /* if tcp options are set but finish is not send the options first */ + if (!th->fin && chcr_ktls_check_tcp_options(th)) { + ret = chcr_ktls_write_tcp_options(tx_info, skb, q, + tx_info->tx_chan); + if (ret) + return NETDEV_TX_BUSY; + } + /* update tcb */ + ret = chcr_ktls_xmit_tcb_cpls(tx_info, q, ntohl(th->seq), + ntohl(th->ack_seq), + ntohs(th->window)); + if (ret) { + dev_kfree_skb_any(local_skb); + return NETDEV_TX_BUSY; + } + + /* copy skb contents into local skb */ + chcr_ktls_skb_copy(skb, local_skb); + + /* go through the skb and send only one record at a time. */ + data_len = skb->data_len; + /* TCP segments can be in received either complete or partial. + * chcr_end_part_handler will handle cases if complete record or end + * part of the record is received. Incase of partial end part of record, + * we will send the complete record again. + */ + + do { + int i; + + cxgb4_reclaim_completed_tx(adap, &q->q, true); + /* lock taken */ + spin_lock_irqsave(&tx_ctx->base.lock, flags); + /* fetch the tls record */ + record = tls_get_record(&tx_ctx->base, tcp_seq, + &tx_info->record_no); + /* By the time packet reached to us, ACK is received, and record + * won't be found in that case, handle it gracefully. + */ + if (unlikely(!record)) { + spin_unlock_irqrestore(&tx_ctx->base.lock, flags); + atomic64_inc(&stats->ktls_tx_drop_no_sync_data); + goto out; + } + + if (unlikely(tls_record_is_start_marker(record))) { + spin_unlock_irqrestore(&tx_ctx->base.lock, flags); + atomic64_inc(&stats->ktls_tx_skip_no_sync_data); + goto out; + } + + /* increase page reference count of the record, so that there + * won't be any chance of page free in middle if in case stack + * receives ACK and try to delete the record. + */ + for (i = 0; i < record->num_frags; i++) + __skb_frag_ref(&record->frags[i]); + /* lock cleared */ + spin_unlock_irqrestore(&tx_ctx->base.lock, flags); + + tls_end_offset = record->end_seq - tcp_seq; + + pr_debug("seq 0x%x, end_seq 0x%x prev_seq 0x%x, datalen 0x%x\n", + tcp_seq, record->end_seq, tx_info->prev_seq, data_len); + /* if a tls record is finishing in this SKB */ + if (tls_end_offset <= data_len) { + struct sk_buff *nskb = NULL; + + if (tls_end_offset < data_len) { + nskb = alloc_skb(0, GFP_KERNEL); + if (unlikely(!nskb)) { + ret = -ENOMEM; + goto clear_ref; + } + + chcr_ktls_skb_shift(nskb, local_skb, + tls_end_offset); + } else { + /* its the only record in this skb, directly + * point it. + */ + nskb = local_skb; + } + ret = chcr_end_part_handler(tx_info, nskb, record, + tcp_seq, mss, + (!th->fin && th->psh), q, + tls_end_offset, + (nskb == local_skb)); + + if (ret && nskb != local_skb) + dev_kfree_skb_any(local_skb); + + data_len -= tls_end_offset; + /* tcp_seq increment is required to handle next record. + */ + tcp_seq += tls_end_offset; + } else { + ret = chcr_short_record_handler(tx_info, local_skb, + record, tcp_seq, mss, + (!th->fin && th->psh), + q, tls_end_offset); + data_len = 0; + } +clear_ref: + /* clear the frag ref count which increased locally before */ + for (i = 0; i < record->num_frags; i++) { + /* clear the frag ref count */ + __skb_frag_unref(&record->frags[i]); + } + /* if any failure, come out from the loop. */ + if (ret) + goto out; + /* length should never be less than 0 */ + WARN_ON(data_len < 0); + + } while (data_len > 0); + + tx_info->prev_seq = ntohl(th->seq) + skb->data_len; + + atomic64_inc(&stats->ktls_tx_encrypted_packets); + atomic64_add(skb->data_len, &stats->ktls_tx_encrypted_bytes); + + /* tcp finish is set, send a separate tcp msg including all the options + * as well. + */ + if (th->fin) + chcr_ktls_write_tcp_options(tx_info, skb, q, tx_info->tx_chan); + +out: + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} +#endif /* CONFIG_CHELSIO_TLS_DEVICE */ diff --git a/drivers/crypto/chelsio/chcr_ktls.h b/drivers/crypto/chelsio/chcr_ktls.h new file mode 100644 index 000000000000..5a7ae2ca446e --- /dev/null +++ b/drivers/crypto/chelsio/chcr_ktls.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2020 Chelsio Communications. All rights reserved. */ + +#ifndef __CHCR_KTLS_H__ +#define __CHCR_KTLS_H__ + +#ifdef CONFIG_CHELSIO_TLS_DEVICE +#include <net/tls.h> +#include "cxgb4.h" +#include "t4_msg.h" +#include "t4_tcb.h" +#include "l2t.h" +#include "chcr_common.h" +#include "cxgb4_uld.h" + +#define CHCR_TCB_STATE_CLOSED 0 +#define CHCR_KTLS_KEY_CTX_LEN 16 +#define CHCR_SET_TCB_FIELD_LEN sizeof(struct cpl_set_tcb_field) +#define CHCR_PLAIN_TX_DATA_LEN (sizeof(struct fw_ulptx_wr) +\ + sizeof(struct ulp_txpkt) +\ + sizeof(struct ulptx_idata) +\ + sizeof(struct cpl_tx_data)) + +#define CHCR_KTLS_WR_SIZE (CHCR_PLAIN_TX_DATA_LEN +\ + sizeof(struct cpl_tx_sec_pdu)) + +enum chcr_ktls_conn_state { + KTLS_CONN_CLOSED, + KTLS_CONN_ACT_OPEN_REQ, + KTLS_CONN_ACT_OPEN_RPL, + KTLS_CONN_SET_TCB_REQ, + KTLS_CONN_SET_TCB_RPL, + KTLS_CONN_TX_READY, +}; + +struct chcr_ktls_info { + struct sock *sk; + spinlock_t lock; /* state machine lock */ + struct ktls_key_ctx key_ctx; + struct adapter *adap; + struct l2t_entry *l2te; + struct net_device *netdev; + u64 iv; + u64 record_no; + int tid; + int atid; + int rx_qid; + u32 iv_size; + u32 prev_seq; + u32 prev_ack; + u32 salt_size; + u32 key_ctx_len; + u32 scmd0_seqno_numivs; + u32 scmd0_ivgen_hdrlen; + u32 tcp_start_seq_number; + u32 scmd0_short_seqno_numivs; + u32 scmd0_short_ivgen_hdrlen; + enum chcr_ktls_conn_state connection_state; + u16 prev_win; + u8 tx_chan; + u8 smt_idx; + u8 port_id; + u8 ip_family; + u8 first_qset; +}; + +struct chcr_ktls_ofld_ctx_tx { + struct tls_offload_context_tx base; + struct chcr_ktls_info *chcr_info; +}; + +static inline struct chcr_ktls_ofld_ctx_tx * +chcr_get_ktls_tx_context(struct tls_context *tls_ctx) +{ + BUILD_BUG_ON(sizeof(struct chcr_ktls_ofld_ctx_tx) > + TLS_OFFLOAD_CONTEXT_SIZE_TX); + return container_of(tls_offload_ctx_tx(tls_ctx), + struct chcr_ktls_ofld_ctx_tx, + base); +} + +static inline int chcr_get_first_rx_qid(struct adapter *adap) +{ + /* u_ctx is saved in adap, fetch it */ + struct uld_ctx *u_ctx = adap->uld[CXGB4_ULD_CRYPTO].handle; + + if (!u_ctx) + return -1; + return u_ctx->lldi.rxq_ids[0]; +} + +void chcr_enable_ktls(struct adapter *adap); +void chcr_disable_ktls(struct adapter *adap); +int chcr_ktls_cpl_act_open_rpl(struct adapter *adap, unsigned char *input); +int chcr_ktls_cpl_set_tcb_rpl(struct adapter *adap, unsigned char *input); +int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev); +#endif /* CONFIG_CHELSIO_TLS_DEVICE */ +#endif /* __CHCR_KTLS_H__ */ diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index 9b2745ad9e38..d5720a859443 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -445,6 +445,7 @@ void chtls_destroy_sock(struct sock *sk) chtls_purge_write_queue(sk); free_tls_keyid(sk); kref_put(&csk->kref, chtls_sock_release); + csk->cdev = NULL; sk->sk_prot = &tcp_prot; sk->sk_prot->destroy(sk); } @@ -759,8 +760,10 @@ static void chtls_release_resources(struct sock *sk) csk->l2t_entry = NULL; } - cxgb4_remove_tid(tids, csk->port_id, tid, sk->sk_family); - sock_put(sk); + if (sk->sk_state != TCP_SYN_SENT) { + cxgb4_remove_tid(tids, csk->port_id, tid, sk->sk_family); + sock_put(sk); + } } static void chtls_conn_done(struct sock *sk) @@ -1716,6 +1719,9 @@ static void chtls_peer_close(struct sock *sk, struct sk_buff *skb) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); + if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING)) + goto out; + sk->sk_shutdown |= RCV_SHUTDOWN; sock_set_flag(sk, SOCK_DONE); @@ -1748,6 +1754,7 @@ static void chtls_peer_close(struct sock *sk, struct sk_buff *skb) else sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); } +out: kfree_skb(skb); } @@ -1758,6 +1765,10 @@ static void chtls_close_con_rpl(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp; csk = rcu_dereference_sk_user_data(sk); + + if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING)) + goto out; + tp = tcp_sk(sk); tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */ @@ -1787,6 +1798,7 @@ static void chtls_close_con_rpl(struct sock *sk, struct sk_buff *skb) default: pr_info("close_con_rpl in bad state %d\n", sk->sk_state); } +out: kfree_skb(skb); } @@ -1896,6 +1908,7 @@ static void chtls_send_abort_rpl(struct sock *sk, struct sk_buff *skb, } set_abort_rpl_wr(reply_skb, tid, status); + kfree_skb(skb); set_wr_txq(reply_skb, CPL_PRIORITY_DATA, queue); if (csk_conn_inline(csk)) { struct l2t_entry *e = csk->l2t_entry; @@ -1906,7 +1919,6 @@ static void chtls_send_abort_rpl(struct sock *sk, struct sk_buff *skb, } } cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb); - kfree_skb(skb); } /* @@ -2008,7 +2020,8 @@ static void chtls_abort_req_rss(struct sock *sk, struct sk_buff *skb) chtls_conn_done(sk); } - chtls_send_abort_rpl(sk, skb, csk->cdev, rst_status, queue); + chtls_send_abort_rpl(sk, skb, BLOG_SKB_CB(skb)->cdev, + rst_status, queue); } static void chtls_abort_rpl_rss(struct sock *sk, struct sk_buff *skb) @@ -2042,6 +2055,7 @@ static int chtls_conn_cpl(struct chtls_dev *cdev, struct sk_buff *skb) struct cpl_peer_close *req = cplhdr(skb) + RSS_HDR; void (*fn)(struct sock *sk, struct sk_buff *skb); unsigned int hwtid = GET_TID(req); + struct chtls_sock *csk; struct sock *sk; u8 opcode; @@ -2051,6 +2065,8 @@ static int chtls_conn_cpl(struct chtls_dev *cdev, struct sk_buff *skb) if (!sk) goto rel_skb; + csk = sk->sk_user_data; + switch (opcode) { case CPL_PEER_CLOSE: fn = chtls_peer_close; @@ -2059,6 +2075,11 @@ static int chtls_conn_cpl(struct chtls_dev *cdev, struct sk_buff *skb) fn = chtls_close_con_rpl; break; case CPL_ABORT_REQ_RSS: + /* + * Save the offload device in the skb, we may process this + * message after the socket has closed. + */ + BLOG_SKB_CB(skb)->cdev = csk->cdev; fn = chtls_abort_req_rss; break; case CPL_ABORT_RPL_RSS: diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c index 5cf9b021220b..e1401d9cc756 100644 --- a/drivers/crypto/chelsio/chtls/chtls_io.c +++ b/drivers/crypto/chelsio/chtls/chtls_io.c @@ -682,7 +682,7 @@ int chtls_push_frames(struct chtls_sock *csk, int comp) make_tx_data_wr(sk, skb, immdlen, len, credits_needed, completion); tp->snd_nxt += len; - tp->lsndtime = tcp_time_stamp(tp); + tp->lsndtime = tcp_jiffies32; if (completion) ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_NEED_HDR; } else { @@ -902,14 +902,6 @@ static int chtls_skb_copy_to_page_nocache(struct sock *sk, return 0; } -/* Read TLS header to find content type and data length */ -static int tls_header_read(struct tls_hdr *thdr, struct iov_iter *from) -{ - if (copy_from_iter(thdr, sizeof(*thdr), from) != sizeof(*thdr)) - return -EFAULT; - return (__force int)cpu_to_be16(thdr->length); -} - static int csk_mem_free(struct chtls_dev *cdev, struct sock *sk) { return (cdev->max_host_sndbuf - sk->sk_wmem_queued); @@ -981,6 +973,37 @@ do_interrupted: goto do_rm_wq; } +static int chtls_proccess_cmsg(struct sock *sk, struct msghdr *msg, + unsigned char *record_type) +{ + struct cmsghdr *cmsg; + int rc = -EINVAL; + + for_each_cmsghdr(cmsg, msg) { + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + if (cmsg->cmsg_level != SOL_TLS) + continue; + + switch (cmsg->cmsg_type) { + case TLS_SET_RECORD_TYPE: + if (cmsg->cmsg_len < CMSG_LEN(sizeof(*record_type))) + return -EINVAL; + + if (msg->msg_flags & MSG_MORE) + return -EINVAL; + + *record_type = *(unsigned char *)CMSG_DATA(cmsg); + rc = 0; + break; + default: + return -EINVAL; + } + } + + return rc; +} + int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); @@ -1022,15 +1045,21 @@ int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) goto wait_for_sndbuf; if (is_tls_tx(csk) && !csk->tlshws.txleft) { - struct tls_hdr hdr; + unsigned char record_type = TLS_RECORD_TYPE_DATA; - recordsz = tls_header_read(&hdr, &msg->msg_iter); - size -= TLS_HEADER_LENGTH; - copied += TLS_HEADER_LENGTH; + if (unlikely(msg->msg_controllen)) { + err = chtls_proccess_cmsg(sk, msg, + &record_type); + if (err) + goto out_err; + } + + recordsz = size; csk->tlshws.txleft = recordsz; - csk->tlshws.type = hdr.type; + csk->tlshws.type = record_type; + if (skb) - ULP_SKB_CB(skb)->ulp.tls.type = hdr.type; + ULP_SKB_CB(skb)->ulp.tls.type = record_type; } if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || @@ -1081,10 +1110,10 @@ new_buf: pg_size = page_size(page); if (off < pg_size && skb_can_coalesce(skb, i, page, off)) { - merge = 1; + merge = true; goto copy; } - merge = 0; + merge = false; if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) : MAX_SKB_FRAGS)) goto new_buf; @@ -1399,6 +1428,8 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); struct chtls_hws *hws = &csk->tlshws; + struct net_device *dev = csk->egress_dev; + struct adapter *adap = netdev2adap(dev); struct tcp_sock *tp = tcp_sk(sk); unsigned long avail; int buffers_freed; @@ -1521,6 +1552,22 @@ found_ok_skb: } } } + /* Set record type if not already done. For a non-data record, + * do not proceed if record type could not be copied. + */ + if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) { + struct tls_hdr *thdr = (struct tls_hdr *)skb->data; + int cerr = 0; + + cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, + sizeof(thdr->type), &thdr->type); + + if (cerr && thdr->type != TLS_RECORD_TYPE_DATA) + return -EIO; + /* don't send tls header, skip copy */ + goto skip_copy; + } + if (skb_copy_datagram_msg(skb, offset, msg, avail)) { if (!copied) { copied = -EFAULT; @@ -1540,6 +1587,7 @@ skip_copy: tp->copied_seq += skb->len; hws->rcvpld = skb->hdr_len; } else { + atomic_inc(&adap->chcr_stats.tls_pdu_rx); tp->copied_seq += hws->rcvpld; } chtls_free_skb(sk, skb); diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c index a038de90b2ea..2110d0893bc7 100644 --- a/drivers/crypto/chelsio/chtls/chtls_main.c +++ b/drivers/crypto/chelsio/chtls/chtls_main.c @@ -174,9 +174,16 @@ static inline void chtls_dev_release(struct kref *kref) { struct tls_toe_device *dev; struct chtls_dev *cdev; + struct adapter *adap; dev = container_of(kref, struct tls_toe_device, kref); cdev = to_chtls_dev(dev); + + /* Reset tls rx/tx stats */ + adap = pci_get_drvdata(cdev->pdev); + atomic_set(&adap->chcr_stats.tls_pdu_tx, 0); + atomic_set(&adap->chcr_stats.tls_pdu_rx, 0); + chtls_free_uld(cdev); } @@ -229,8 +236,7 @@ static void *chtls_uld_add(const struct cxgb4_lld_info *info) struct chtls_dev *cdev; int i, j; - cdev = kzalloc(sizeof(*cdev) + info->nports * - (sizeof(struct net_device *)), GFP_KERNEL); + cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); if (!cdev) goto out; diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig index 8851161f722f..f09c6cf7823e 100644 --- a/drivers/crypto/hisilicon/Kconfig +++ b/drivers/crypto/hisilicon/Kconfig @@ -27,6 +27,7 @@ config CRYPTO_DEV_HISI_SEC2 select CRYPTO_SHA256 select CRYPTO_SHA512 depends on PCI && PCI_MSI + depends on UACCE || UACCE=n depends on ARM64 || (COMPILE_TEST && 64BIT) help Support for HiSilicon SEC Engine of version 2 in crypto subsystem. @@ -40,6 +41,7 @@ config CRYPTO_DEV_HISI_QM tristate depends on ARM64 || COMPILE_TEST depends on PCI && PCI_MSI + depends on UACCE || UACCE=n help HiSilicon accelerator engines use a common queue management interface. Specific engine driver may use this module. @@ -49,6 +51,7 @@ config CRYPTO_DEV_HISI_ZIP depends on PCI && PCI_MSI depends on ARM64 || (COMPILE_TEST && 64BIT) depends on !CPU_BIG_ENDIAN || COMPILE_TEST + depends on UACCE || UACCE=n select CRYPTO_DEV_HISI_QM help Support for HiSilicon ZIP Driver @@ -56,6 +59,7 @@ config CRYPTO_DEV_HISI_ZIP config CRYPTO_DEV_HISI_HPRE tristate "Support for HISI HPRE accelerator" depends on PCI && PCI_MSI + depends on UACCE || UACCE=n depends on ARM64 || (COMPILE_TEST && 64BIT) select CRYPTO_DEV_HISI_QM select CRYPTO_DH diff --git a/drivers/crypto/hisilicon/hpre/hpre.h b/drivers/crypto/hisilicon/hpre/hpre.h index ddf13ea9862a..03d512ec6336 100644 --- a/drivers/crypto/hisilicon/hpre/hpre.h +++ b/drivers/crypto/hisilicon/hpre/hpre.h @@ -46,7 +46,6 @@ struct hpre_debug { struct hpre { struct hisi_qm qm; - struct list_head list; struct hpre_debug debug; u32 num_vfs; unsigned long status; @@ -76,7 +75,7 @@ struct hpre_sqe { __le32 rsvd1[_HPRE_SQE_ALIGN_EXT]; }; -struct hpre *hpre_find_device(int node); +struct hisi_qp *hpre_create_qp(void); int hpre_algs_register(void); void hpre_algs_unregister(void); diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index 5d400d69e8e4..65425250b2e9 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -147,26 +147,18 @@ static void hpre_rm_req_from_ctx(struct hpre_asym_request *hpre_req) static struct hisi_qp *hpre_get_qp_and_start(void) { struct hisi_qp *qp; - struct hpre *hpre; int ret; - /* find the proper hpre device, which is near the current CPU core */ - hpre = hpre_find_device(cpu_to_node(smp_processor_id())); - if (!hpre) { - pr_err("Can not find proper hpre device!\n"); - return ERR_PTR(-ENODEV); - } - - qp = hisi_qm_create_qp(&hpre->qm, 0); - if (IS_ERR(qp)) { - pci_err(hpre->qm.pdev, "Can not create qp!\n"); + qp = hpre_create_qp(); + if (!qp) { + pr_err("Can not create hpre qp!\n"); return ERR_PTR(-ENODEV); } ret = hisi_qm_start_qp(qp, 0); if (ret < 0) { - hisi_qm_release_qp(qp); - pci_err(hpre->qm.pdev, "Can not start qp!\n"); + hisi_qm_free_qps(&qp, 1); + pci_err(qp->qm->pdev, "Can not start qp!\n"); return ERR_PTR(-EINVAL); } @@ -338,7 +330,7 @@ static void hpre_ctx_clear(struct hpre_ctx *ctx, bool is_clear_all) if (is_clear_all) { idr_destroy(&ctx->req_idr); kfree(ctx->req_list); - hisi_qm_release_qp(ctx->qp); + hisi_qm_free_qps(&ctx->qp, 1); } ctx->crt_g2_mode = false; diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 401747de67a8..88be53bf4a38 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -82,8 +82,7 @@ #define HPRE_VIA_MSI_DSM 1 -static LIST_HEAD(hpre_list); -static DEFINE_MUTEX(hpre_list_lock); +static struct hisi_qm_list hpre_devices; static const char hpre_name[] = "hisi_hpre"; static struct dentry *hpre_debugfs_root; static const struct pci_device_id hpre_dev_ids[] = { @@ -196,43 +195,17 @@ static u32 hpre_pf_q_num = HPRE_PF_DEF_Q_NUM; module_param_cb(hpre_pf_q_num, &hpre_pf_q_num_ops, &hpre_pf_q_num, 0444); MODULE_PARM_DESC(hpre_pf_q_num, "Number of queues in PF of CS(1-1024)"); -static inline void hpre_add_to_list(struct hpre *hpre) +struct hisi_qp *hpre_create_qp(void) { - mutex_lock(&hpre_list_lock); - list_add_tail(&hpre->list, &hpre_list); - mutex_unlock(&hpre_list_lock); -} - -static inline void hpre_remove_from_list(struct hpre *hpre) -{ - mutex_lock(&hpre_list_lock); - list_del(&hpre->list); - mutex_unlock(&hpre_list_lock); -} + int node = cpu_to_node(smp_processor_id()); + struct hisi_qp *qp = NULL; + int ret; -struct hpre *hpre_find_device(int node) -{ - struct hpre *hpre, *ret = NULL; - int min_distance = INT_MAX; - struct device *dev; - int dev_node = 0; - - mutex_lock(&hpre_list_lock); - list_for_each_entry(hpre, &hpre_list, list) { - dev = &hpre->qm.pdev->dev; -#ifdef CONFIG_NUMA - dev_node = dev->numa_node; - if (dev_node < 0) - dev_node = 0; -#endif - if (node_distance(dev_node, node) < min_distance) { - ret = hpre; - min_distance = node_distance(dev_node, node); - } - } - mutex_unlock(&hpre_list_lock); + ret = hisi_qm_alloc_qps_node(&hpre_devices, 1, 0, node, &qp); + if (!ret) + return qp; - return ret; + return NULL; } static int hpre_cfg_by_dsm(struct hisi_qm *qm) @@ -349,18 +322,14 @@ static void hpre_cnt_regs_clear(struct hisi_qm *qm) hisi_qm_debug_regs_clear(qm); } -static void hpre_hw_error_disable(struct hpre *hpre) +static void hpre_hw_error_disable(struct hisi_qm *qm) { - struct hisi_qm *qm = &hpre->qm; - /* disable hpre hw error interrupts */ writel(HPRE_CORE_INT_DISABLE, qm->io_base + HPRE_INT_MASK); } -static void hpre_hw_error_enable(struct hpre *hpre) +static void hpre_hw_error_enable(struct hisi_qm *qm) { - struct hisi_qm *qm = &hpre->qm; - /* enable hpre hw error interrupts */ writel(HPRE_CORE_INT_ENABLE, qm->io_base + HPRE_INT_MASK); writel(HPRE_HAC_RAS_CE_ENABLE, qm->io_base + HPRE_RAS_CE_ENB); @@ -713,13 +682,39 @@ static int hpre_qm_pre_init(struct hisi_qm *qm, struct pci_dev *pdev) return 0; } -static void hpre_hw_err_init(struct hpre *hpre) +static void hpre_log_hw_error(struct hisi_qm *qm, u32 err_sts) { - hisi_qm_hw_error_init(&hpre->qm, QM_BASE_CE, QM_BASE_NFE, - 0, QM_DB_RANDOM_INVALID); - hpre_hw_error_enable(hpre); + const struct hpre_hw_error *err = hpre_hw_errors; + struct device *dev = &qm->pdev->dev; + + while (err->msg) { + if (err->int_msk & err_sts) + dev_warn(dev, "%s [error status=0x%x] found\n", + err->msg, err->int_msk); + err++; + } + + writel(err_sts, qm->io_base + HPRE_HAC_SOURCE_INT); +} + +static u32 hpre_get_hw_err_status(struct hisi_qm *qm) +{ + return readl(qm->io_base + HPRE_HAC_INT_STATUS); } +static const struct hisi_qm_err_ini hpre_err_ini = { + .hw_err_enable = hpre_hw_error_enable, + .hw_err_disable = hpre_hw_error_disable, + .get_dev_hw_err_status = hpre_get_hw_err_status, + .log_dev_hw_err = hpre_log_hw_error, + .err_info = { + .ce = QM_BASE_CE, + .nfe = QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT, + .fe = 0, + .msi = QM_DB_RANDOM_INVALID, + } +}; + static int hpre_pf_probe_init(struct hpre *hpre) { struct hisi_qm *qm = &hpre->qm; @@ -731,7 +726,8 @@ static int hpre_pf_probe_init(struct hpre *hpre) if (ret) return ret; - hpre_hw_err_init(hpre); + qm->err_ini = &hpre_err_ini; + hisi_qm_dev_err_init(qm); return 0; } @@ -776,22 +772,21 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) dev_warn(&pdev->dev, "init debugfs fail!\n"); - hpre_add_to_list(hpre); + hisi_qm_add_to_list(qm, &hpre_devices); ret = hpre_algs_register(); if (ret < 0) { - hpre_remove_from_list(hpre); pci_err(pdev, "fail to register algs to crypto!\n"); goto err_with_qm_start; } return 0; err_with_qm_start: + hisi_qm_del_from_list(qm, &hpre_devices); hisi_qm_stop(qm); err_with_err_init: - if (pdev->is_physfn) - hpre_hw_error_disable(hpre); + hisi_qm_dev_err_uninit(qm); err_with_qm_init: hisi_qm_uninit(qm); @@ -907,7 +902,7 @@ static void hpre_remove(struct pci_dev *pdev) int ret; hpre_algs_unregister(); - hpre_remove_from_list(hpre); + hisi_qm_del_from_list(qm, &hpre_devices); if (qm->fun_type == QM_HW_PF && hpre->num_vfs != 0) { ret = hpre_sriov_disable(pdev); if (ret) { @@ -922,69 +917,13 @@ static void hpre_remove(struct pci_dev *pdev) hpre_debugfs_exit(hpre); hisi_qm_stop(qm); - if (qm->fun_type == QM_HW_PF) - hpre_hw_error_disable(hpre); + hisi_qm_dev_err_uninit(qm); hisi_qm_uninit(qm); } -static void hpre_log_hw_error(struct hpre *hpre, u32 err_sts) -{ - const struct hpre_hw_error *err = hpre_hw_errors; - struct device *dev = &hpre->qm.pdev->dev; - - while (err->msg) { - if (err->int_msk & err_sts) - dev_warn(dev, "%s [error status=0x%x] found\n", - err->msg, err->int_msk); - err++; - } -} - -static pci_ers_result_t hpre_hw_error_handle(struct hpre *hpre) -{ - u32 err_sts; - - /* read err sts */ - err_sts = readl(hpre->qm.io_base + HPRE_HAC_INT_STATUS); - if (err_sts) { - hpre_log_hw_error(hpre, err_sts); - - /* clear error interrupts */ - writel(err_sts, hpre->qm.io_base + HPRE_HAC_SOURCE_INT); - return PCI_ERS_RESULT_NEED_RESET; - } - - return PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t hpre_process_hw_error(struct pci_dev *pdev) -{ - struct hpre *hpre = pci_get_drvdata(pdev); - pci_ers_result_t qm_ret, hpre_ret; - - /* log qm error */ - qm_ret = hisi_qm_hw_error_handle(&hpre->qm); - - /* log hpre error */ - hpre_ret = hpre_hw_error_handle(hpre); - - return (qm_ret == PCI_ERS_RESULT_NEED_RESET || - hpre_ret == PCI_ERS_RESULT_NEED_RESET) ? - PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t hpre_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - pci_info(pdev, "PCI error detected, state(=%d)!!\n", state); - if (state == pci_channel_io_perm_failure) - return PCI_ERS_RESULT_DISCONNECT; - - return hpre_process_hw_error(pdev); -} static const struct pci_error_handlers hpre_err_handler = { - .error_detected = hpre_error_detected, + .error_detected = hisi_qm_dev_err_detected, }; static struct pci_driver hpre_pci_driver = { @@ -1013,6 +952,7 @@ static int __init hpre_init(void) { int ret; + hisi_qm_init_list(&hpre_devices); hpre_register_debugfs(); ret = pci_register_driver(&hpre_pci_driver); diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index b57da5ef8b5b..f795fb557630 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -9,6 +9,9 @@ #include <linux/log2.h> #include <linux/seq_file.h> #include <linux/slab.h> +#include <linux/uacce.h> +#include <linux/uaccess.h> +#include <uapi/misc/uacce/hisi_qm.h> #include "qm.h" /* eq/aeq irq enable */ @@ -269,6 +272,12 @@ struct qm_doorbell { __le16 priority; }; +struct hisi_qm_resource { + struct hisi_qm *qm; + int distance; + struct list_head list; +}; + struct hisi_qm_hw_ops { int (*get_vft)(struct hisi_qm *qm, u32 *base, u32 *number); void (*qm_db)(struct hisi_qm *qm, u16 qn, @@ -277,6 +286,7 @@ struct hisi_qm_hw_ops { int (*debug_init)(struct hisi_qm *qm); void (*hw_error_init)(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe, u32 msi); + void (*hw_error_uninit)(struct hisi_qm *qm); pci_ers_result_t (*hw_error_handle)(struct hisi_qm *qm); }; @@ -465,9 +475,14 @@ static void qm_cq_head_update(struct hisi_qp *qp) static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm) { - struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head; + if (qp->event_cb) { + qp->event_cb(qp); + return; + } if (qp->req_cb) { + struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head; + while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) { dma_rmb(); qp->req_cb(qp, qp->sqe + qm->sqe_size * @@ -485,17 +500,9 @@ static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm) } } -static void qm_qp_work_func(struct work_struct *work) +static void qm_work_process(struct work_struct *work) { - struct hisi_qp *qp; - - qp = container_of(work, struct hisi_qp, work); - qm_poll_qp(qp, qp->qm); -} - -static irqreturn_t qm_irq_handler(int irq, void *data) -{ - struct hisi_qm *qm = data; + struct hisi_qm *qm = container_of(work, struct hisi_qm, work); struct qm_eqe *eqe = qm->eqe + qm->status.eq_head; struct hisi_qp *qp; int eqe_num = 0; @@ -504,7 +511,7 @@ static irqreturn_t qm_irq_handler(int irq, void *data) eqe_num++; qp = qm_to_hisi_qp(qm, eqe); if (qp) - queue_work(qp->wq, &qp->work); + qm_poll_qp(qp, qm); if (qm->status.eq_head == QM_Q_DEPTH - 1) { qm->status.eqc_phase = !qm->status.eqc_phase; @@ -522,6 +529,17 @@ static irqreturn_t qm_irq_handler(int irq, void *data) } qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0); +} + +static irqreturn_t do_qm_irq(int irq, void *data) +{ + struct hisi_qm *qm = (struct hisi_qm *)data; + + /* the workqueue created by device driver of QM */ + if (qm->wq) + queue_work(qm->wq, &qm->work); + else + schedule_work(&qm->work); return IRQ_HANDLED; } @@ -531,7 +549,7 @@ static irqreturn_t qm_irq(int irq, void *data) struct hisi_qm *qm = data; if (readl(qm->io_base + QM_VF_EQ_INT_SOURCE)) - return qm_irq_handler(irq, data); + return do_qm_irq(irq, data); dev_err(&qm->pdev->dev, "invalid int source\n"); qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0); @@ -1011,43 +1029,45 @@ static void qm_hw_error_init_v2(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe, writel(irq_unmask, qm->io_base + QM_ABNORMAL_INT_MASK); } +static void qm_hw_error_uninit_v2(struct hisi_qm *qm) +{ + writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK); +} + static void qm_log_hw_error(struct hisi_qm *qm, u32 error_status) { - const struct hisi_qm_hw_error *err = qm_hw_error; + const struct hisi_qm_hw_error *err; struct device *dev = &qm->pdev->dev; u32 reg_val, type, vf_num; + int i; - while (err->msg) { - if (err->int_msk & error_status) { - dev_err(dev, "%s [error status=0x%x] found\n", - err->msg, err->int_msk); - - if (error_status & QM_DB_TIMEOUT) { - reg_val = readl(qm->io_base + - QM_ABNORMAL_INF01); - type = (reg_val & QM_DB_TIMEOUT_TYPE) >> - QM_DB_TIMEOUT_TYPE_SHIFT; - vf_num = reg_val & QM_DB_TIMEOUT_VF; - dev_err(dev, "qm %s doorbell timeout in function %u\n", - qm_db_timeout[type], vf_num); - } - - if (error_status & QM_OF_FIFO_OF) { - reg_val = readl(qm->io_base + - QM_ABNORMAL_INF00); - type = (reg_val & QM_FIFO_OVERFLOW_TYPE) >> - QM_FIFO_OVERFLOW_TYPE_SHIFT; - vf_num = reg_val & QM_FIFO_OVERFLOW_VF; - - if (type < ARRAY_SIZE(qm_fifo_overflow)) - dev_err(dev, "qm %s fifo overflow in function %u\n", - qm_fifo_overflow[type], - vf_num); - else - dev_err(dev, "unknown error type\n"); - } + for (i = 0; i < ARRAY_SIZE(qm_hw_error); i++) { + err = &qm_hw_error[i]; + if (!(err->int_msk & error_status)) + continue; + + dev_err(dev, "%s [error status=0x%x] found\n", + err->msg, err->int_msk); + + if (err->int_msk & QM_DB_TIMEOUT) { + reg_val = readl(qm->io_base + QM_ABNORMAL_INF01); + type = (reg_val & QM_DB_TIMEOUT_TYPE) >> + QM_DB_TIMEOUT_TYPE_SHIFT; + vf_num = reg_val & QM_DB_TIMEOUT_VF; + dev_err(dev, "qm %s doorbell timeout in function %u\n", + qm_db_timeout[type], vf_num); + } else if (err->int_msk & QM_OF_FIFO_OF) { + reg_val = readl(qm->io_base + QM_ABNORMAL_INF00); + type = (reg_val & QM_FIFO_OVERFLOW_TYPE) >> + QM_FIFO_OVERFLOW_TYPE_SHIFT; + vf_num = reg_val & QM_FIFO_OVERFLOW_VF; + + if (type < ARRAY_SIZE(qm_fifo_overflow)) + dev_err(dev, "qm %s fifo overflow in function %u\n", + qm_fifo_overflow[type], vf_num); + else + dev_err(dev, "unknown error type\n"); } - err++; } } @@ -1082,6 +1102,7 @@ static const struct hisi_qm_hw_ops qm_hw_ops_v2 = { .qm_db = qm_db_v2, .get_irq_num = qm_get_irq_num_v2, .hw_error_init = qm_hw_error_init_v2, + .hw_error_uninit = qm_hw_error_uninit_v2, .hw_error_handle = qm_hw_error_handle_v2, }; @@ -1147,20 +1168,9 @@ struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type) qp->qp_id = qp_id; qp->alg_type = alg_type; - INIT_WORK(&qp->work, qm_qp_work_func); - qp->wq = alloc_workqueue("hisi_qm", WQ_UNBOUND | WQ_HIGHPRI | - WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0); - if (!qp->wq) { - ret = -EFAULT; - goto err_free_qp_mem; - } return qp; -err_free_qp_mem: - if (qm->use_dma_api) - dma_free_coherent(dev, qp->qdma.size, qp->qdma.va, - qp->qdma.dma); err_clear_bit: write_lock(&qm->qps_lock); qm->qp_array[qp_id] = NULL; @@ -1269,7 +1279,7 @@ static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, int pasid) * @qp: The qp we want to start to run. * @arg: Accelerator specific argument. * - * After this function, qp can receive request from user. Return qp_id if + * After this function, qp can receive request from user. Return 0 if * successful, Return -EBUSY if failed. */ int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg) @@ -1314,7 +1324,7 @@ int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg) dev_dbg(dev, "queue %d started\n", qp_id); - return qp_id; + return 0; } EXPORT_SYMBOL_GPL(hisi_qm_start_qp); @@ -1395,6 +1405,214 @@ static void hisi_qm_cache_wb(struct hisi_qm *qm) } } +static void qm_qp_event_notifier(struct hisi_qp *qp) +{ + wake_up_interruptible(&qp->uacce_q->wait); +} + +static int hisi_qm_get_available_instances(struct uacce_device *uacce) +{ + int i, ret; + struct hisi_qm *qm = uacce->priv; + + read_lock(&qm->qps_lock); + for (i = 0, ret = 0; i < qm->qp_num; i++) + if (!qm->qp_array[i]) + ret++; + read_unlock(&qm->qps_lock); + + return ret; +} + +static int hisi_qm_uacce_get_queue(struct uacce_device *uacce, + unsigned long arg, + struct uacce_queue *q) +{ + struct hisi_qm *qm = uacce->priv; + struct hisi_qp *qp; + u8 alg_type = 0; + + qp = hisi_qm_create_qp(qm, alg_type); + if (IS_ERR(qp)) + return PTR_ERR(qp); + + q->priv = qp; + q->uacce = uacce; + qp->uacce_q = q; + qp->event_cb = qm_qp_event_notifier; + qp->pasid = arg; + + return 0; +} + +static void hisi_qm_uacce_put_queue(struct uacce_queue *q) +{ + struct hisi_qp *qp = q->priv; + + hisi_qm_cache_wb(qp->qm); + hisi_qm_release_qp(qp); +} + +/* map sq/cq/doorbell to user space */ +static int hisi_qm_uacce_mmap(struct uacce_queue *q, + struct vm_area_struct *vma, + struct uacce_qfile_region *qfr) +{ + struct hisi_qp *qp = q->priv; + struct hisi_qm *qm = qp->qm; + size_t sz = vma->vm_end - vma->vm_start; + struct pci_dev *pdev = qm->pdev; + struct device *dev = &pdev->dev; + unsigned long vm_pgoff; + int ret; + + switch (qfr->type) { + case UACCE_QFRT_MMIO: + if (qm->ver == QM_HW_V2) { + if (sz > PAGE_SIZE * (QM_DOORBELL_PAGE_NR + + QM_DOORBELL_SQ_CQ_BASE_V2 / PAGE_SIZE)) + return -EINVAL; + } else { + if (sz > PAGE_SIZE * QM_DOORBELL_PAGE_NR) + return -EINVAL; + } + + vma->vm_flags |= VM_IO; + + return remap_pfn_range(vma, vma->vm_start, + qm->phys_base >> PAGE_SHIFT, + sz, pgprot_noncached(vma->vm_page_prot)); + case UACCE_QFRT_DUS: + if (sz != qp->qdma.size) + return -EINVAL; + + /* + * dma_mmap_coherent() requires vm_pgoff as 0 + * restore vm_pfoff to initial value for mmap() + */ + vm_pgoff = vma->vm_pgoff; + vma->vm_pgoff = 0; + ret = dma_mmap_coherent(dev, vma, qp->qdma.va, + qp->qdma.dma, sz); + vma->vm_pgoff = vm_pgoff; + return ret; + + default: + return -EINVAL; + } +} + +static int hisi_qm_uacce_start_queue(struct uacce_queue *q) +{ + struct hisi_qp *qp = q->priv; + + return hisi_qm_start_qp(qp, qp->pasid); +} + +static void hisi_qm_uacce_stop_queue(struct uacce_queue *q) +{ + hisi_qm_stop_qp(q->priv); +} + +static int qm_set_sqctype(struct uacce_queue *q, u16 type) +{ + struct hisi_qm *qm = q->uacce->priv; + struct hisi_qp *qp = q->priv; + + write_lock(&qm->qps_lock); + qp->alg_type = type; + write_unlock(&qm->qps_lock); + + return 0; +} + +static long hisi_qm_uacce_ioctl(struct uacce_queue *q, unsigned int cmd, + unsigned long arg) +{ + struct hisi_qp *qp = q->priv; + struct hisi_qp_ctx qp_ctx; + + if (cmd == UACCE_CMD_QM_SET_QP_CTX) { + if (copy_from_user(&qp_ctx, (void __user *)arg, + sizeof(struct hisi_qp_ctx))) + return -EFAULT; + + if (qp_ctx.qc_type != 0 && qp_ctx.qc_type != 1) + return -EINVAL; + + qm_set_sqctype(q, qp_ctx.qc_type); + qp_ctx.id = qp->qp_id; + + if (copy_to_user((void __user *)arg, &qp_ctx, + sizeof(struct hisi_qp_ctx))) + return -EFAULT; + } else { + return -EINVAL; + } + + return 0; +} + +static const struct uacce_ops uacce_qm_ops = { + .get_available_instances = hisi_qm_get_available_instances, + .get_queue = hisi_qm_uacce_get_queue, + .put_queue = hisi_qm_uacce_put_queue, + .start_queue = hisi_qm_uacce_start_queue, + .stop_queue = hisi_qm_uacce_stop_queue, + .mmap = hisi_qm_uacce_mmap, + .ioctl = hisi_qm_uacce_ioctl, +}; + +static int qm_alloc_uacce(struct hisi_qm *qm) +{ + struct pci_dev *pdev = qm->pdev; + struct uacce_device *uacce; + unsigned long mmio_page_nr; + unsigned long dus_page_nr; + struct uacce_interface interface = { + .flags = UACCE_DEV_SVA, + .ops = &uacce_qm_ops, + }; + + strncpy(interface.name, pdev->driver->name, sizeof(interface.name)); + + uacce = uacce_alloc(&pdev->dev, &interface); + if (IS_ERR(uacce)) + return PTR_ERR(uacce); + + if (uacce->flags & UACCE_DEV_SVA) { + qm->use_sva = true; + } else { + /* only consider sva case */ + uacce_remove(uacce); + qm->uacce = NULL; + return -EINVAL; + } + + uacce->is_vf = pdev->is_virtfn; + uacce->priv = qm; + uacce->algs = qm->algs; + + if (qm->ver == QM_HW_V1) { + mmio_page_nr = QM_DOORBELL_PAGE_NR; + uacce->api_ver = HISI_QM_API_VER_BASE; + } else { + mmio_page_nr = QM_DOORBELL_PAGE_NR + + QM_DOORBELL_SQ_CQ_BASE_V2 / PAGE_SIZE; + uacce->api_ver = HISI_QM_API_VER2_BASE; + } + + dus_page_nr = (PAGE_SIZE - 1 + qm->sqe_size * QM_Q_DEPTH + + sizeof(struct qm_cqe) * QM_Q_DEPTH) >> PAGE_SHIFT; + + uacce->qf_pg_num[UACCE_QFRT_MMIO] = mmio_page_nr; + uacce->qf_pg_num[UACCE_QFRT_DUS] = dus_page_nr; + + qm->uacce = uacce; + + return 0; +} + /** * hisi_qm_get_free_qp_num() - Get free number of qp in qm. * @qm: The qm which want to get free qp. @@ -1437,10 +1655,14 @@ int hisi_qm_init(struct hisi_qm *qm) return -EINVAL; } + ret = qm_alloc_uacce(qm); + if (ret < 0) + dev_warn(&pdev->dev, "fail to alloc uacce (%d)\n", ret); + ret = pci_enable_device_mem(pdev); if (ret < 0) { dev_err(&pdev->dev, "Failed to enable device mem!\n"); - return ret; + goto err_remove_uacce; } ret = pci_request_mem_regions(pdev, qm->dev_name); @@ -1449,8 +1671,9 @@ int hisi_qm_init(struct hisi_qm *qm) goto err_disable_pcidev; } - qm->io_base = ioremap(pci_resource_start(pdev, PCI_BAR_2), - pci_resource_len(qm->pdev, PCI_BAR_2)); + qm->phys_base = pci_resource_start(pdev, PCI_BAR_2); + qm->phys_size = pci_resource_len(qm->pdev, PCI_BAR_2); + qm->io_base = ioremap(qm->phys_base, qm->phys_size); if (!qm->io_base) { ret = -EIO; goto err_release_mem_regions; @@ -1479,6 +1702,7 @@ int hisi_qm_init(struct hisi_qm *qm) qm->qp_in_used = 0; mutex_init(&qm->mailbox_lock); rwlock_init(&qm->qps_lock); + INIT_WORK(&qm->work, qm_work_process); dev_dbg(dev, "init qm %s with %s\n", pdev->is_physfn ? "pf" : "vf", qm->use_dma_api ? "dma api" : "iommu api"); @@ -1493,6 +1717,9 @@ err_release_mem_regions: pci_release_mem_regions(pdev); err_disable_pcidev: pci_disable_device(pdev); +err_remove_uacce: + uacce_remove(qm->uacce); + qm->uacce = NULL; return ret; } @@ -1509,6 +1736,9 @@ void hisi_qm_uninit(struct hisi_qm *qm) struct pci_dev *pdev = qm->pdev; struct device *dev = &pdev->dev; + uacce_remove(qm->uacce); + qm->uacce = NULL; + if (qm->use_dma_api && qm->qdma.va) { hisi_qm_cache_wb(qm); dma_free_coherent(dev, qm->qdma.size, @@ -1856,43 +2086,30 @@ void hisi_qm_debug_regs_clear(struct hisi_qm *qm) } EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear); -/** - * hisi_qm_hw_error_init() - Configure qm hardware error report method. - * @qm: The qm which we want to configure. - * @ce: Bit mask of correctable error configure. - * @nfe: Bit mask of non-fatal error configure. - * @fe: Bit mask of fatal error configure. - * @msi: Bit mask of error reported by message signal interrupt. - * - * Hardware errors of qm can be reported either by RAS interrupts which will - * be handled by UEFI and then PCIe AER or by device MSI. User can configure - * each error to use either of above two methods. For RAS interrupts, we can - * configure an error as one of correctable error, non-fatal error or - * fatal error. - * - * Bits indicating errors can be configured to ce, nfe, fe and msi to enable - * related report methods. Error report will be masked if related error bit - * does not configure. - */ -void hisi_qm_hw_error_init(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe, - u32 msi) +static void qm_hw_error_init(struct hisi_qm *qm) { + const struct hisi_qm_err_info *err_info = &qm->err_ini->err_info; + if (!qm->ops->hw_error_init) { dev_err(&qm->pdev->dev, "QM doesn't support hw error handling!\n"); return; } - qm->ops->hw_error_init(qm, ce, nfe, fe, msi); + qm->ops->hw_error_init(qm, err_info->ce, err_info->nfe, + err_info->fe, err_info->msi); } -EXPORT_SYMBOL_GPL(hisi_qm_hw_error_init); -/** - * hisi_qm_hw_error_handle() - Handle qm non-fatal hardware errors. - * @qm: The qm which has non-fatal hardware errors. - * - * Accelerators use this function to handle qm non-fatal hardware errors. - */ -pci_ers_result_t hisi_qm_hw_error_handle(struct hisi_qm *qm) +static void qm_hw_error_uninit(struct hisi_qm *qm) +{ + if (!qm->ops->hw_error_uninit) { + dev_err(&qm->pdev->dev, "Unexpected QM hw error uninit!\n"); + return; + } + + qm->ops->hw_error_uninit(qm); +} + +static pci_ers_result_t qm_hw_error_handle(struct hisi_qm *qm) { if (!qm->ops->hw_error_handle) { dev_err(&qm->pdev->dev, "QM doesn't support hw error report!\n"); @@ -1901,7 +2118,6 @@ pci_ers_result_t hisi_qm_hw_error_handle(struct hisi_qm *qm) return qm->ops->hw_error_handle(qm); } -EXPORT_SYMBOL_GPL(hisi_qm_hw_error_handle); /** * hisi_qm_get_hw_version() - Get hardware version of a qm. @@ -1922,6 +2138,229 @@ enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev) } EXPORT_SYMBOL_GPL(hisi_qm_get_hw_version); +/** + * hisi_qm_dev_err_init() - Initialize device error configuration. + * @qm: The qm for which we want to do error initialization. + * + * Initialize QM and device error related configuration. + */ +void hisi_qm_dev_err_init(struct hisi_qm *qm) +{ + if (qm->fun_type == QM_HW_VF) + return; + + qm_hw_error_init(qm); + + if (!qm->err_ini->hw_err_enable) { + dev_err(&qm->pdev->dev, "Device doesn't support hw error init!\n"); + return; + } + qm->err_ini->hw_err_enable(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_dev_err_init); + +/** + * hisi_qm_dev_err_uninit() - Uninitialize device error configuration. + * @qm: The qm for which we want to do error uninitialization. + * + * Uninitialize QM and device error related configuration. + */ +void hisi_qm_dev_err_uninit(struct hisi_qm *qm) +{ + if (qm->fun_type == QM_HW_VF) + return; + + qm_hw_error_uninit(qm); + + if (!qm->err_ini->hw_err_disable) { + dev_err(&qm->pdev->dev, "Unexpected device hw error uninit!\n"); + return; + } + qm->err_ini->hw_err_disable(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_dev_err_uninit); + +/** + * hisi_qm_free_qps() - free multiple queue pairs. + * @qps: The queue pairs need to be freed. + * @qp_num: The num of queue pairs. + */ +void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num) +{ + int i; + + if (!qps || qp_num <= 0) + return; + + for (i = qp_num - 1; i >= 0; i--) + hisi_qm_release_qp(qps[i]); +} +EXPORT_SYMBOL_GPL(hisi_qm_free_qps); + +static void free_list(struct list_head *head) +{ + struct hisi_qm_resource *res, *tmp; + + list_for_each_entry_safe(res, tmp, head, list) { + list_del(&res->list); + kfree(res); + } +} + +static int hisi_qm_sort_devices(int node, struct list_head *head, + struct hisi_qm_list *qm_list) +{ + struct hisi_qm_resource *res, *tmp; + struct hisi_qm *qm; + struct list_head *n; + struct device *dev; + int dev_node = 0; + + list_for_each_entry(qm, &qm_list->list, list) { + dev = &qm->pdev->dev; + + if (IS_ENABLED(CONFIG_NUMA)) { + dev_node = dev_to_node(dev); + if (dev_node < 0) + dev_node = 0; + } + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; + + res->qm = qm; + res->distance = node_distance(dev_node, node); + n = head; + list_for_each_entry(tmp, head, list) { + if (res->distance < tmp->distance) { + n = &tmp->list; + break; + } + } + list_add_tail(&res->list, n); + } + + return 0; +} + +/** + * hisi_qm_alloc_qps_node() - Create multiple queue pairs. + * @qm_list: The list of all available devices. + * @qp_num: The number of queue pairs need created. + * @alg_type: The algorithm type. + * @node: The numa node. + * @qps: The queue pairs need created. + * + * This function will sort all available device according to numa distance. + * Then try to create all queue pairs from one device, if all devices do + * not meet the requirements will return error. + */ +int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, + u8 alg_type, int node, struct hisi_qp **qps) +{ + struct hisi_qm_resource *tmp; + int ret = -ENODEV; + LIST_HEAD(head); + int i; + + if (!qps || !qm_list || qp_num <= 0) + return -EINVAL; + + mutex_lock(&qm_list->lock); + if (hisi_qm_sort_devices(node, &head, qm_list)) { + mutex_unlock(&qm_list->lock); + goto err; + } + + list_for_each_entry(tmp, &head, list) { + for (i = 0; i < qp_num; i++) { + qps[i] = hisi_qm_create_qp(tmp->qm, alg_type); + if (IS_ERR(qps[i])) { + hisi_qm_free_qps(qps, i); + break; + } + } + + if (i == qp_num) { + ret = 0; + break; + } + } + + mutex_unlock(&qm_list->lock); + if (ret) + pr_info("Failed to create qps, node[%d], alg[%d], qp[%d]!\n", + node, alg_type, qp_num); + +err: + free_list(&head); + return ret; +} +EXPORT_SYMBOL_GPL(hisi_qm_alloc_qps_node); + +static pci_ers_result_t qm_dev_err_handle(struct hisi_qm *qm) +{ + u32 err_sts; + + if (!qm->err_ini->get_dev_hw_err_status) { + dev_err(&qm->pdev->dev, "Device doesn't support get hw error status!\n"); + return PCI_ERS_RESULT_NONE; + } + + /* get device hardware error status */ + err_sts = qm->err_ini->get_dev_hw_err_status(qm); + if (err_sts) { + if (!qm->err_ini->log_dev_hw_err) { + dev_err(&qm->pdev->dev, "Device doesn't support log hw error!\n"); + return PCI_ERS_RESULT_NEED_RESET; + } + + qm->err_ini->log_dev_hw_err(qm, err_sts); + return PCI_ERS_RESULT_NEED_RESET; + } + + return PCI_ERS_RESULT_RECOVERED; +} + +static pci_ers_result_t qm_process_dev_error(struct pci_dev *pdev) +{ + struct hisi_qm *qm = pci_get_drvdata(pdev); + pci_ers_result_t qm_ret, dev_ret; + + /* log qm error */ + qm_ret = qm_hw_error_handle(qm); + + /* log device error */ + dev_ret = qm_dev_err_handle(qm); + + return (qm_ret == PCI_ERS_RESULT_NEED_RESET || + dev_ret == PCI_ERS_RESULT_NEED_RESET) ? + PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED; +} + +/** + * hisi_qm_dev_err_detected() - Get device and qm error status then log it. + * @pdev: The PCI device which need report error. + * @state: The connectivity between CPU and device. + * + * We register this function into PCIe AER handlers, It will report device or + * qm hardware error status when error occur. + */ +pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + if (pdev->is_virtfn) + return PCI_ERS_RESULT_NONE; + + pci_info(pdev, "PCI error detected, state(=%d)!!\n", state); + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + + return qm_process_dev_error(pdev); +} +EXPORT_SYMBOL_GPL(hisi_qm_dev_err_detected); + MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Zhou Wang <wangzhou1@hisilicon.com>"); MODULE_DESCRIPTION("HiSilicon Accelerator queue manager driver"); diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index 078b8f1f1b77..ec5b6f48db6c 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -77,6 +77,9 @@ #define HISI_ACC_SGL_SGE_NR_MAX 255 +/* page number for queue file region */ +#define QM_DOORBELL_PAGE_NR 1 + enum qp_state { QP_STOP, }; @@ -125,6 +128,28 @@ struct hisi_qm_status { unsigned long flags; }; +struct hisi_qm; + +struct hisi_qm_err_info { + u32 ce; + u32 nfe; + u32 fe; + u32 msi; +}; + +struct hisi_qm_err_ini { + void (*hw_err_enable)(struct hisi_qm *qm); + void (*hw_err_disable)(struct hisi_qm *qm); + u32 (*get_dev_hw_err_status)(struct hisi_qm *qm); + void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts); + struct hisi_qm_err_info err_info; +}; + +struct hisi_qm_list { + struct mutex lock; + struct list_head list; +}; + struct hisi_qm { enum qm_hw_ver ver; enum qm_fun_type fun_type; @@ -136,6 +161,7 @@ struct hisi_qm { u32 qp_num; u32 qp_in_used; u32 ctrl_qp_num; + struct list_head list; struct qm_dma qdma; struct qm_sqc *sqc; @@ -148,6 +174,7 @@ struct hisi_qm { dma_addr_t aeqe_dma; struct hisi_qm_status status; + const struct hisi_qm_err_ini *err_ini; rwlock_t qps_lock; unsigned long *qp_bitmap; @@ -162,7 +189,15 @@ struct hisi_qm { u32 error_mask; u32 msi_mask; + struct workqueue_struct *wq; + struct work_struct work; + + const char *algs; bool use_dma_api; + bool use_sva; + resource_size_t phys_base; + resource_size_t phys_size; + struct uacce_device *uacce; }; struct hisi_qp_status { @@ -192,12 +227,35 @@ struct hisi_qp { struct hisi_qp_ops *hw_ops; void *qp_ctx; void (*req_cb)(struct hisi_qp *qp, void *data); - struct work_struct work; - struct workqueue_struct *wq; + void (*event_cb)(struct hisi_qp *qp); struct hisi_qm *qm; + u16 pasid; + struct uacce_queue *uacce_q; }; +static inline void hisi_qm_init_list(struct hisi_qm_list *qm_list) +{ + INIT_LIST_HEAD(&qm_list->list); + mutex_init(&qm_list->lock); +} + +static inline void hisi_qm_add_to_list(struct hisi_qm *qm, + struct hisi_qm_list *qm_list) +{ + mutex_lock(&qm_list->lock); + list_add_tail(&qm->list, &qm_list->list); + mutex_unlock(&qm_list->lock); +} + +static inline void hisi_qm_del_from_list(struct hisi_qm *qm, + struct hisi_qm_list *qm_list) +{ + mutex_lock(&qm_list->lock); + list_del(&qm->list); + mutex_unlock(&qm_list->lock); +} + int hisi_qm_init(struct hisi_qm *qm); void hisi_qm_uninit(struct hisi_qm *qm); int hisi_qm_start(struct hisi_qm *qm); @@ -211,11 +269,12 @@ int hisi_qm_get_free_qp_num(struct hisi_qm *qm); int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number); int hisi_qm_set_vft(struct hisi_qm *qm, u32 fun_num, u32 base, u32 number); int hisi_qm_debug_init(struct hisi_qm *qm); -void hisi_qm_hw_error_init(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe, - u32 msi); -pci_ers_result_t hisi_qm_hw_error_handle(struct hisi_qm *qm); enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev); void hisi_qm_debug_regs_clear(struct hisi_qm *qm); +void hisi_qm_dev_err_init(struct hisi_qm *qm); +void hisi_qm_dev_err_uninit(struct hisi_qm *qm); +pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev, + pci_channel_state_t state); struct hisi_acc_sgl_pool; struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, @@ -227,4 +286,7 @@ struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev, u32 count, u32 sge_nr); void hisi_acc_free_sgl_pool(struct device *dev, struct hisi_acc_sgl_pool *pool); +int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, + u8 alg_type, int node, struct hisi_qp **qps); +void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num); #endif diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index 13e2d8d7be94..3598fa17beb2 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -11,6 +11,8 @@ /* Algorithm resource per hardware SEC queue */ struct sec_alg_res { + u8 *pbuf; + dma_addr_t pbuf_dma; u8 *c_ivin; dma_addr_t c_ivin_dma; u8 *out_mac; @@ -23,6 +25,8 @@ struct sec_cipher_req { dma_addr_t c_in_dma; struct hisi_acc_hw_sgl *c_out; dma_addr_t c_out_dma; + u8 *c_ivin; + dma_addr_t c_ivin_dma; struct skcipher_request *sk_req; u32 c_len; bool encrypt; @@ -48,6 +52,7 @@ struct sec_req { /* Status of the SEC request */ bool fake_busy; + bool use_pbuf; }; /** @@ -114,6 +119,7 @@ struct sec_ctx { struct sec_qp_ctx *qp_ctx; struct sec_dev *sec; const struct sec_req_op *req_op; + struct hisi_qp **qps; /* Half queues for encipher, and half for decipher */ u32 hlf_q_num; @@ -128,6 +134,7 @@ struct sec_ctx { atomic_t dec_qcyclic; enum sec_alg_type alg_type; + bool pbuf_supported; struct sec_cipher_ctx c_ctx; struct sec_auth_ctx a_ctx; }; @@ -162,14 +169,15 @@ struct sec_debug { struct sec_dev { struct hisi_qm qm; - struct list_head list; struct sec_debug debug; u32 ctx_q_num; + bool iommu_used; u32 num_vfs; unsigned long status; }; -struct sec_dev *sec_find_device(int node); +void sec_destroy_qps(struct hisi_qp **qps, int qp_num); +struct hisi_qp **sec_create_qps(void); int sec_register_to_crypto(void); void sec_unregister_from_crypto(void); #endif diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index a2cfcc9ccd94..7f1c6a31b82f 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -46,7 +46,21 @@ #define SEC_CIPHER_AUTH 0xfe #define SEC_AUTH_CIPHER 0x1 #define SEC_MAX_MAC_LEN 64 +#define SEC_MAX_AAD_LEN 65535 #define SEC_TOTAL_MAC_SZ (SEC_MAX_MAC_LEN * QM_Q_DEPTH) + +#define SEC_PBUF_SZ 512 +#define SEC_PBUF_IV_OFFSET SEC_PBUF_SZ +#define SEC_PBUF_MAC_OFFSET (SEC_PBUF_SZ + SEC_IV_SIZE) +#define SEC_PBUF_PKG (SEC_PBUF_SZ + SEC_IV_SIZE + \ + SEC_MAX_MAC_LEN * 2) +#define SEC_PBUF_NUM (PAGE_SIZE / SEC_PBUF_PKG) +#define SEC_PBUF_PAGE_NUM (QM_Q_DEPTH / SEC_PBUF_NUM) +#define SEC_PBUF_LEFT_SZ (SEC_PBUF_PKG * (QM_Q_DEPTH - \ + SEC_PBUF_PAGE_NUM * SEC_PBUF_NUM)) +#define SEC_TOTAL_PBUF_SZ (PAGE_SIZE * SEC_PBUF_PAGE_NUM + \ + SEC_PBUF_LEFT_SZ) + #define SEC_SQE_LEN_RATE 4 #define SEC_SQE_CFLAG 2 #define SEC_SQE_AEAD_FLAG 3 @@ -110,12 +124,12 @@ static void sec_free_req_id(struct sec_req *req) mutex_unlock(&qp_ctx->req_lock); } -static int sec_aead_verify(struct sec_req *req, struct sec_qp_ctx *qp_ctx) +static int sec_aead_verify(struct sec_req *req) { struct aead_request *aead_req = req->aead_req.aead_req; struct crypto_aead *tfm = crypto_aead_reqtfm(aead_req); - u8 *mac_out = qp_ctx->res[req->req_id].out_mac; size_t authsize = crypto_aead_authsize(tfm); + u8 *mac_out = req->aead_req.out_mac; u8 *mac = mac_out + SEC_MAX_MAC_LEN; struct scatterlist *sgl = aead_req->src; size_t sz; @@ -163,7 +177,7 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp) } if (ctx->alg_type == SEC_AEAD && !req->c_req.encrypt) - err = sec_aead_verify(req, qp_ctx); + err = sec_aead_verify(req); atomic64_inc(&ctx->sec->debug.dfx.recv_cnt); @@ -245,6 +259,50 @@ static void sec_free_mac_resource(struct device *dev, struct sec_alg_res *res) res->out_mac, res->out_mac_dma); } +static void sec_free_pbuf_resource(struct device *dev, struct sec_alg_res *res) +{ + if (res->pbuf) + dma_free_coherent(dev, SEC_TOTAL_PBUF_SZ, + res->pbuf, res->pbuf_dma); +} + +/* + * To improve performance, pbuffer is used for + * small packets (< 512Bytes) as IOMMU translation using. + */ +static int sec_alloc_pbuf_resource(struct device *dev, struct sec_alg_res *res) +{ + int pbuf_page_offset; + int i, j, k; + + res->pbuf = dma_alloc_coherent(dev, SEC_TOTAL_PBUF_SZ, + &res->pbuf_dma, GFP_KERNEL); + if (!res->pbuf) + return -ENOMEM; + + /* + * SEC_PBUF_PKG contains data pbuf, iv and + * out_mac : <SEC_PBUF|SEC_IV|SEC_MAC> + * Every PAGE contains six SEC_PBUF_PKG + * The sec_qp_ctx contains QM_Q_DEPTH numbers of SEC_PBUF_PKG + * So we need SEC_PBUF_PAGE_NUM numbers of PAGE + * for the SEC_TOTAL_PBUF_SZ + */ + for (i = 0; i <= SEC_PBUF_PAGE_NUM; i++) { + pbuf_page_offset = PAGE_SIZE * i; + for (j = 0; j < SEC_PBUF_NUM; j++) { + k = i * SEC_PBUF_NUM + j; + if (k == QM_Q_DEPTH) + break; + res[k].pbuf = res->pbuf + + j * SEC_PBUF_PKG + pbuf_page_offset; + res[k].pbuf_dma = res->pbuf_dma + + j * SEC_PBUF_PKG + pbuf_page_offset; + } + } + return 0; +} + static int sec_alg_resource_alloc(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ctx) { @@ -259,11 +317,18 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx, if (ctx->alg_type == SEC_AEAD) { ret = sec_alloc_mac_resource(dev, res); if (ret) - goto get_fail; + goto alloc_fail; + } + if (ctx->pbuf_supported) { + ret = sec_alloc_pbuf_resource(dev, res); + if (ret) { + dev_err(dev, "fail to alloc pbuf dma resource!\n"); + goto alloc_fail; + } } return 0; -get_fail: +alloc_fail: sec_free_civ_resource(dev, res); return ret; @@ -276,6 +341,8 @@ static void sec_alg_resource_free(struct sec_ctx *ctx, sec_free_civ_resource(dev, qp_ctx->res); + if (ctx->pbuf_supported) + sec_free_pbuf_resource(dev, qp_ctx->res); if (ctx->alg_type == SEC_AEAD) sec_free_mac_resource(dev, qp_ctx->res); } @@ -288,11 +355,8 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx, struct hisi_qp *qp; int ret = -ENOMEM; - qp = hisi_qm_create_qp(qm, alg_type); - if (IS_ERR(qp)) - return PTR_ERR(qp); - qp_ctx = &ctx->qp_ctx[qp_ctx_id]; + qp = ctx->qps[qp_ctx_id]; qp->req_type = 0; qp->qp_ctx = qp_ctx; qp->req_cb = sec_req_cb; @@ -335,7 +399,6 @@ err_free_c_in_pool: hisi_acc_free_sgl_pool(dev, qp_ctx->c_in_pool); err_destroy_idr: idr_destroy(&qp_ctx->req_idr); - hisi_qm_release_qp(qp); return ret; } @@ -352,7 +415,6 @@ static void sec_release_qp_ctx(struct sec_ctx *ctx, hisi_acc_free_sgl_pool(dev, qp_ctx->c_in_pool); idr_destroy(&qp_ctx->req_idr); - hisi_qm_release_qp(qp_ctx->qp); } static int sec_ctx_base_init(struct sec_ctx *ctx) @@ -360,14 +422,18 @@ static int sec_ctx_base_init(struct sec_ctx *ctx) struct sec_dev *sec; int i, ret; - sec = sec_find_device(cpu_to_node(smp_processor_id())); - if (!sec) { - pr_err("Can not find proper Hisilicon SEC device!\n"); + ctx->qps = sec_create_qps(); + if (!ctx->qps) { + pr_err("Can not create sec qps!\n"); return -ENODEV; } + + sec = container_of(ctx->qps[0]->qm, struct sec_dev, qm); ctx->sec = sec; ctx->hlf_q_num = sec->ctx_q_num >> 1; + ctx->pbuf_supported = ctx->sec->iommu_used; + /* Half of queue depth is taken as fake requests limit in the queue. */ ctx->fake_req_limit = QM_Q_DEPTH >> 1; ctx->qp_ctx = kcalloc(sec->ctx_q_num, sizeof(struct sec_qp_ctx), @@ -386,6 +452,7 @@ err_sec_release_qp_ctx: for (i = i - 1; i >= 0; i--) sec_release_qp_ctx(ctx, &ctx->qp_ctx[i]); + sec_destroy_qps(ctx->qps, sec->ctx_q_num); kfree(ctx->qp_ctx); return ret; } @@ -397,6 +464,7 @@ static void sec_ctx_base_uninit(struct sec_ctx *ctx) for (i = 0; i < ctx->sec->ctx_q_num; i++) sec_release_qp_ctx(ctx, &ctx->qp_ctx[i]); + sec_destroy_qps(ctx->qps, ctx->sec->ctx_q_num); kfree(ctx->qp_ctx); } @@ -447,7 +515,6 @@ static int sec_skcipher_init(struct crypto_skcipher *tfm) struct sec_ctx *ctx = crypto_skcipher_ctx(tfm); int ret; - ctx = crypto_skcipher_ctx(tfm); ctx->alg_type = SEC_SKCIPHER; crypto_skcipher_set_reqsize(tfm, sizeof(struct sec_req)); ctx->c_ctx.ivsize = crypto_skcipher_ivsize(tfm); @@ -591,11 +658,94 @@ GEN_SEC_SETKEY_FUNC(3des_cbc, SEC_CALG_3DES, SEC_CMODE_CBC) GEN_SEC_SETKEY_FUNC(sm4_xts, SEC_CALG_SM4, SEC_CMODE_XTS) GEN_SEC_SETKEY_FUNC(sm4_cbc, SEC_CALG_SM4, SEC_CMODE_CBC) -static int sec_cipher_map(struct device *dev, struct sec_req *req, +static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req, + struct scatterlist *src) +{ + struct aead_request *aead_req = req->aead_req.aead_req; + struct sec_cipher_req *c_req = &req->c_req; + struct sec_qp_ctx *qp_ctx = req->qp_ctx; + struct device *dev = SEC_CTX_DEV(ctx); + int copy_size, pbuf_length; + int req_id = req->req_id; + + if (ctx->alg_type == SEC_AEAD) + copy_size = aead_req->cryptlen + aead_req->assoclen; + else + copy_size = c_req->c_len; + + pbuf_length = sg_copy_to_buffer(src, sg_nents(src), + qp_ctx->res[req_id].pbuf, + copy_size); + + if (unlikely(pbuf_length != copy_size)) { + dev_err(dev, "copy src data to pbuf error!\n"); + return -EINVAL; + } + + c_req->c_in_dma = qp_ctx->res[req_id].pbuf_dma; + + if (!c_req->c_in_dma) { + dev_err(dev, "fail to set pbuffer address!\n"); + return -ENOMEM; + } + + c_req->c_out_dma = c_req->c_in_dma; + + return 0; +} + +static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req, + struct scatterlist *dst) +{ + struct aead_request *aead_req = req->aead_req.aead_req; + struct sec_cipher_req *c_req = &req->c_req; + struct sec_qp_ctx *qp_ctx = req->qp_ctx; + struct device *dev = SEC_CTX_DEV(ctx); + int copy_size, pbuf_length; + int req_id = req->req_id; + + if (ctx->alg_type == SEC_AEAD) + copy_size = c_req->c_len + aead_req->assoclen; + else + copy_size = c_req->c_len; + + pbuf_length = sg_copy_from_buffer(dst, sg_nents(dst), + qp_ctx->res[req_id].pbuf, + copy_size); + + if (unlikely(pbuf_length != copy_size)) + dev_err(dev, "copy pbuf data to dst error!\n"); + +} + +static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, struct scatterlist *src, struct scatterlist *dst) { struct sec_cipher_req *c_req = &req->c_req; + struct sec_aead_req *a_req = &req->aead_req; struct sec_qp_ctx *qp_ctx = req->qp_ctx; + struct sec_alg_res *res = &qp_ctx->res[req->req_id]; + struct device *dev = SEC_CTX_DEV(ctx); + int ret; + + if (req->use_pbuf) { + ret = sec_cipher_pbuf_map(ctx, req, src); + c_req->c_ivin = res->pbuf + SEC_PBUF_IV_OFFSET; + c_req->c_ivin_dma = res->pbuf_dma + SEC_PBUF_IV_OFFSET; + if (ctx->alg_type == SEC_AEAD) { + a_req->out_mac = res->pbuf + SEC_PBUF_MAC_OFFSET; + a_req->out_mac_dma = res->pbuf_dma + + SEC_PBUF_MAC_OFFSET; + } + + return ret; + } + c_req->c_ivin = res->c_ivin; + c_req->c_ivin_dma = res->c_ivin_dma; + if (ctx->alg_type == SEC_AEAD) { + a_req->out_mac = res->out_mac; + a_req->out_mac_dma = res->out_mac_dma; + } c_req->c_in = hisi_acc_sg_buf_map_to_hw_sgl(dev, src, qp_ctx->c_in_pool, @@ -626,29 +776,34 @@ static int sec_cipher_map(struct device *dev, struct sec_req *req, return 0; } -static void sec_cipher_unmap(struct device *dev, struct sec_cipher_req *req, +static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req, struct scatterlist *src, struct scatterlist *dst) { - if (dst != src) - hisi_acc_sg_buf_unmap(dev, src, req->c_in); + struct sec_cipher_req *c_req = &req->c_req; + struct device *dev = SEC_CTX_DEV(ctx); + + if (req->use_pbuf) { + sec_cipher_pbuf_unmap(ctx, req, dst); + } else { + if (dst != src) + hisi_acc_sg_buf_unmap(dev, src, c_req->c_in); - hisi_acc_sg_buf_unmap(dev, dst, req->c_out); + hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out); + } } static int sec_skcipher_sgl_map(struct sec_ctx *ctx, struct sec_req *req) { struct skcipher_request *sq = req->c_req.sk_req; - return sec_cipher_map(SEC_CTX_DEV(ctx), req, sq->src, sq->dst); + return sec_cipher_map(ctx, req, sq->src, sq->dst); } static void sec_skcipher_sgl_unmap(struct sec_ctx *ctx, struct sec_req *req) { - struct device *dev = SEC_CTX_DEV(ctx); - struct sec_cipher_req *c_req = &req->c_req; - struct skcipher_request *sk_req = c_req->sk_req; + struct skcipher_request *sq = req->c_req.sk_req; - sec_cipher_unmap(dev, c_req, sk_req->src, sk_req->dst); + sec_cipher_unmap(ctx, req, sq->src, sq->dst); } static int sec_aead_aes_set_key(struct sec_cipher_ctx *c_ctx, @@ -759,16 +914,14 @@ static int sec_aead_sgl_map(struct sec_ctx *ctx, struct sec_req *req) { struct aead_request *aq = req->aead_req.aead_req; - return sec_cipher_map(SEC_CTX_DEV(ctx), req, aq->src, aq->dst); + return sec_cipher_map(ctx, req, aq->src, aq->dst); } static void sec_aead_sgl_unmap(struct sec_ctx *ctx, struct sec_req *req) { - struct device *dev = SEC_CTX_DEV(ctx); - struct sec_cipher_req *cq = &req->c_req; struct aead_request *aq = req->aead_req.aead_req; - sec_cipher_unmap(dev, cq, aq->src, aq->dst); + sec_cipher_unmap(ctx, req, aq->src, aq->dst); } static int sec_request_transfer(struct sec_ctx *ctx, struct sec_req *req) @@ -801,9 +954,9 @@ static void sec_request_untransfer(struct sec_ctx *ctx, struct sec_req *req) static void sec_skcipher_copy_iv(struct sec_ctx *ctx, struct sec_req *req) { struct skcipher_request *sk_req = req->c_req.sk_req; - u8 *c_ivin = req->qp_ctx->res[req->req_id].c_ivin; + struct sec_cipher_req *c_req = &req->c_req; - memcpy(c_ivin, sk_req->iv, ctx->c_ctx.ivsize); + memcpy(c_req->c_ivin, sk_req->iv, ctx->c_ctx.ivsize); } static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req) @@ -818,8 +971,7 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req) memset(sec_sqe, 0, sizeof(struct sec_sqe)); sec_sqe->type2.c_key_addr = cpu_to_le64(c_ctx->c_key_dma); - sec_sqe->type2.c_ivin_addr = - cpu_to_le64(req->qp_ctx->res[req->req_id].c_ivin_dma); + sec_sqe->type2.c_ivin_addr = cpu_to_le64(c_req->c_ivin_dma); sec_sqe->type2.data_src_addr = cpu_to_le64(c_req->c_in_dma); sec_sqe->type2.data_dst_addr = cpu_to_le64(c_req->c_out_dma); @@ -836,7 +988,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req) cipher = SEC_CIPHER_DEC << SEC_CIPHER_OFFSET; sec_sqe->type_cipher_auth = bd_type | cipher; - sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET; + if (req->use_pbuf) + sa_type = SEC_PBUF << SEC_SRC_SGL_OFFSET; + else + sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET; scene = SEC_COMM_SCENE << SEC_SCENE_OFFSET; if (c_req->c_in_dma != c_req->c_out_dma) de = 0x1 << SEC_DE_OFFSET; @@ -844,7 +999,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req) sec_sqe->sds_sa_type = (de | scene | sa_type); /* Just set DST address type */ - da_type = SEC_SGL << SEC_DST_SGL_OFFSET; + if (req->use_pbuf) + da_type = SEC_PBUF << SEC_DST_SGL_OFFSET; + else + da_type = SEC_SGL << SEC_DST_SGL_OFFSET; sec_sqe->sdm_addr_type |= da_type; sec_sqe->type2.clen_ivhlen |= cpu_to_le32(c_req->c_len); @@ -904,9 +1062,9 @@ static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req, static void sec_aead_copy_iv(struct sec_ctx *ctx, struct sec_req *req) { struct aead_request *aead_req = req->aead_req.aead_req; - u8 *c_ivin = req->qp_ctx->res[req->req_id].c_ivin; + struct sec_cipher_req *c_req = &req->c_req; - memcpy(c_ivin, aead_req->iv, ctx->c_ctx.ivsize); + memcpy(c_req->c_ivin, aead_req->iv, ctx->c_ctx.ivsize); } static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir, @@ -939,8 +1097,7 @@ static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir, sec_sqe->type2.cipher_src_offset = cpu_to_le16((u16)aq->assoclen); - sec_sqe->type2.mac_addr = - cpu_to_le64(req->qp_ctx->res[req->req_id].out_mac_dma); + sec_sqe->type2.mac_addr = cpu_to_le64(a_req->out_mac_dma); } static int sec_aead_bd_fill(struct sec_ctx *ctx, struct sec_req *req) @@ -964,6 +1121,7 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err) { struct aead_request *a_req = req->aead_req.aead_req; struct crypto_aead *tfm = crypto_aead_reqtfm(a_req); + struct sec_aead_req *aead_req = &req->aead_req; struct sec_cipher_req *c_req = &req->c_req; size_t authsize = crypto_aead_authsize(tfm); struct sec_qp_ctx *qp_ctx = req->qp_ctx; @@ -979,7 +1137,7 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err) struct scatterlist *sgl = a_req->dst; sz = sg_pcopy_from_buffer(sgl, sg_nents(sgl), - qp_ctx->res[req->req_id].out_mac, + aead_req->out_mac, authsize, a_req->cryptlen + a_req->assoclen); @@ -1031,6 +1189,7 @@ static int sec_request_init(struct sec_ctx *ctx, struct sec_req *req) static int sec_process(struct sec_ctx *ctx, struct sec_req *req) { + struct sec_cipher_req *c_req = &req->c_req; int ret; ret = sec_request_init(ctx, req); @@ -1057,12 +1216,10 @@ err_send_req: /* As failing, restore the IV from user */ if (ctx->c_ctx.c_mode == SEC_CMODE_CBC && !req->c_req.encrypt) { if (ctx->alg_type == SEC_SKCIPHER) - memcpy(req->c_req.sk_req->iv, - req->qp_ctx->res[req->req_id].c_ivin, + memcpy(req->c_req.sk_req->iv, c_req->c_ivin, ctx->c_ctx.ivsize); else - memcpy(req->aead_req.aead_req->iv, - req->qp_ctx->res[req->req_id].c_ivin, + memcpy(req->aead_req.aead_req->iv, c_req->c_ivin, ctx->c_ctx.ivsize); } @@ -1208,6 +1365,12 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq) return -EINVAL; } sreq->c_req.c_len = sk_req->cryptlen; + + if (ctx->pbuf_supported && sk_req->cryptlen <= SEC_PBUF_SZ) + sreq->use_pbuf = true; + else + sreq->use_pbuf = false; + if (c_alg == SEC_CALG_3DES) { if (unlikely(sk_req->cryptlen & (DES3_EDE_BLOCK_SIZE - 1))) { dev_err(dev, "skcipher 3des input length error!\n"); @@ -1321,11 +1484,18 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq) struct crypto_aead *tfm = crypto_aead_reqtfm(req); size_t authsize = crypto_aead_authsize(tfm); - if (unlikely(!req->src || !req->dst || !req->cryptlen)) { + if (unlikely(!req->src || !req->dst || !req->cryptlen || + req->assoclen > SEC_MAX_AAD_LEN)) { dev_err(SEC_CTX_DEV(ctx), "aead input param error!\n"); return -EINVAL; } + if (ctx->pbuf_supported && (req->cryptlen + req->assoclen) <= + SEC_PBUF_SZ) + sreq->use_pbuf = true; + else + sreq->use_pbuf = false; + /* Support AES only */ if (unlikely(c_alg != SEC_CALG_AES)) { dev_err(SEC_CTX_DEV(ctx), "aead crypto alg error!\n"); diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 2bbaf1e2dae7..1f54ebe164b6 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -7,6 +7,7 @@ #include <linux/debugfs.h> #include <linux/init.h> #include <linux/io.h> +#include <linux/iommu.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/pci.h> @@ -89,8 +90,7 @@ struct sec_hw_error { static const char sec_name[] = "hisi_sec2"; static struct dentry *sec_debugfs_root; -static LIST_HEAD(sec_list); -static DEFINE_MUTEX(sec_list_lock); +static struct hisi_qm_list sec_devices; static const struct sec_hw_error sec_hw_errors[] = { {.int_msk = BIT(0), .msg = "sec_axi_rresp_err_rint"}, @@ -105,37 +105,6 @@ static const struct sec_hw_error sec_hw_errors[] = { { /* sentinel */ } }; -struct sec_dev *sec_find_device(int node) -{ -#define SEC_NUMA_MAX_DISTANCE 100 - int min_distance = SEC_NUMA_MAX_DISTANCE; - int dev_node = 0, free_qp_num = 0; - struct sec_dev *sec, *ret = NULL; - struct hisi_qm *qm; - struct device *dev; - - mutex_lock(&sec_list_lock); - list_for_each_entry(sec, &sec_list, list) { - qm = &sec->qm; - dev = &qm->pdev->dev; -#ifdef CONFIG_NUMA - dev_node = dev->numa_node; - if (dev_node < 0) - dev_node = 0; -#endif - if (node_distance(dev_node, node) < min_distance) { - free_qp_num = hisi_qm_get_free_qp_num(qm); - if (free_qp_num >= sec->ctx_q_num) { - ret = sec; - min_distance = node_distance(dev_node, node); - } - } - } - mutex_unlock(&sec_list_lock); - - return ret; -} - static const char * const sec_dbg_file_name[] = { [SEC_CURRENT_QM] = "current_qm", [SEC_CLEAR_ENABLE] = "clear_enable", @@ -238,6 +207,32 @@ static u32 ctx_q_num = SEC_CTX_Q_NUM_DEF; module_param_cb(ctx_q_num, &sec_ctx_q_num_ops, &ctx_q_num, 0444); MODULE_PARM_DESC(ctx_q_num, "Queue num in ctx (24 default, 2, 4, ..., 32)"); +void sec_destroy_qps(struct hisi_qp **qps, int qp_num) +{ + hisi_qm_free_qps(qps, qp_num); + kfree(qps); +} + +struct hisi_qp **sec_create_qps(void) +{ + int node = cpu_to_node(smp_processor_id()); + u32 ctx_num = ctx_q_num; + struct hisi_qp **qps; + int ret; + + qps = kcalloc(ctx_num, sizeof(struct hisi_qp *), GFP_KERNEL); + if (!qps) + return NULL; + + ret = hisi_qm_alloc_qps_node(&sec_devices, ctx_num, 0, node, qps); + if (!ret) + return qps; + + kfree(qps); + return NULL; +} + + static const struct pci_device_id sec_dev_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_PF_PCI_DEVICE_ID) }, { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_VF_PCI_DEVICE_ID) }, @@ -245,20 +240,6 @@ static const struct pci_device_id sec_dev_ids[] = { }; MODULE_DEVICE_TABLE(pci, sec_dev_ids); -static inline void sec_add_to_list(struct sec_dev *sec) -{ - mutex_lock(&sec_list_lock); - list_add_tail(&sec->list, &sec_list); - mutex_unlock(&sec_list_lock); -} - -static inline void sec_remove_from_list(struct sec_dev *sec) -{ - mutex_lock(&sec_list_lock); - list_del(&sec->list); - mutex_unlock(&sec_list_lock); -} - static u8 sec_get_endian(struct sec_dev *sec) { struct hisi_qm *qm = &sec->qm; @@ -384,9 +365,8 @@ static void sec_debug_regs_clear(struct hisi_qm *qm) hisi_qm_debug_regs_clear(qm); } -static void sec_hw_error_enable(struct sec_dev *sec) +static void sec_hw_error_enable(struct hisi_qm *qm) { - struct hisi_qm *qm = &sec->qm; u32 val; if (qm->ver == QM_HW_V1) { @@ -414,9 +394,8 @@ static void sec_hw_error_enable(struct sec_dev *sec) writel(val, qm->io_base + SEC_CONTROL_REG); } -static void sec_hw_error_disable(struct sec_dev *sec) +static void sec_hw_error_disable(struct hisi_qm *qm) { - struct hisi_qm *qm = &sec->qm; u32 val; val = readl(qm->io_base + SEC_CONTROL_REG); @@ -435,27 +414,6 @@ static void sec_hw_error_disable(struct sec_dev *sec) writel(val, qm->io_base + SEC_CONTROL_REG); } -static void sec_hw_error_init(struct sec_dev *sec) -{ - if (sec->qm.fun_type == QM_HW_VF) - return; - - hisi_qm_hw_error_init(&sec->qm, QM_BASE_CE, - QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT - | QM_ACC_WB_NOT_READY_TIMEOUT, 0, - QM_DB_RANDOM_INVALID); - sec_hw_error_enable(sec); -} - -static void sec_hw_error_uninit(struct sec_dev *sec) -{ - if (sec->qm.fun_type == QM_HW_VF) - return; - - sec_hw_error_disable(sec); - writel(GENMASK(12, 0), sec->qm.io_base + SEC_QM_ABNORMAL_INT_MASK); -} - static u32 sec_current_qm_read(struct sec_debug_file *file) { struct hisi_qm *qm = file->qm; @@ -695,6 +653,51 @@ static void sec_debugfs_exit(struct sec_dev *sec) debugfs_remove_recursive(sec->qm.debug.debug_root); } +static void sec_log_hw_error(struct hisi_qm *qm, u32 err_sts) +{ + const struct sec_hw_error *errs = sec_hw_errors; + struct device *dev = &qm->pdev->dev; + u32 err_val; + + while (errs->msg) { + if (errs->int_msk & err_sts) { + dev_err(dev, "%s [error status=0x%x] found\n", + errs->msg, errs->int_msk); + + if (SEC_CORE_INT_STATUS_M_ECC & errs->int_msk) { + err_val = readl(qm->io_base + + SEC_CORE_SRAM_ECC_ERR_INFO); + dev_err(dev, "multi ecc sram num=0x%x\n", + SEC_ECC_NUM(err_val)); + dev_err(dev, "multi ecc sram addr=0x%x\n", + SEC_ECC_ADDR(err_val)); + } + } + errs++; + } + + writel(err_sts, qm->io_base + SEC_CORE_INT_SOURCE); +} + +static u32 sec_get_hw_err_status(struct hisi_qm *qm) +{ + return readl(qm->io_base + SEC_CORE_INT_STATUS); +} + +static const struct hisi_qm_err_ini sec_err_ini = { + .hw_err_enable = sec_hw_error_enable, + .hw_err_disable = sec_hw_error_disable, + .get_dev_hw_err_status = sec_get_hw_err_status, + .log_dev_hw_err = sec_log_hw_error, + .err_info = { + .ce = QM_BASE_CE, + .nfe = QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT | + QM_ACC_WB_NOT_READY_TIMEOUT, + .fe = 0, + .msi = QM_DB_RANDOM_INVALID, + } +}; + static int sec_pf_probe_init(struct sec_dev *sec) { struct hisi_qm *qm = &sec->qm; @@ -713,11 +716,13 @@ static int sec_pf_probe_init(struct sec_dev *sec) return -EINVAL; } + qm->err_ini = &sec_err_ini; + ret = sec_set_user_domain_and_cache(sec); if (ret) return ret; - sec_hw_error_init(sec); + hisi_qm_dev_err_init(qm); sec_debug_regs_clear(qm); return 0; @@ -750,12 +755,30 @@ static void sec_qm_uninit(struct hisi_qm *qm) static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec) { + int ret; + + /* + * WQ_HIGHPRI: SEC request must be low delayed, + * so need a high priority workqueue. + * WQ_UNBOUND: SEC task is likely with long + * running CPU intensive workloads. + */ + qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | + WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus(), + pci_name(qm->pdev)); + if (!qm->wq) { + pci_err(qm->pdev, "fail to alloc workqueue\n"); + return -ENOMEM; + } + if (qm->fun_type == QM_HW_PF) { qm->qp_base = SEC_PF_DEF_Q_BASE; qm->qp_num = pf_q_num; qm->debug.curr_qm_qp_num = pf_q_num; - return sec_pf_probe_init(sec); + ret = sec_pf_probe_init(sec); + if (ret) + goto err_probe_uninit; } else if (qm->fun_type == QM_HW_VF) { /* * have no way to get qm configure in VM in v1 hardware, @@ -768,18 +791,43 @@ static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec) qm->qp_num = SEC_QUEUE_NUM_V1 - SEC_PF_DEF_Q_NUM; } else if (qm->ver == QM_HW_V2) { /* v2 starts to support get vft by mailbox */ - return hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num); + ret = hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num); + if (ret) + goto err_probe_uninit; } } else { - return -ENODEV; + ret = -ENODEV; + goto err_probe_uninit; } return 0; +err_probe_uninit: + destroy_workqueue(qm->wq); + return ret; } -static void sec_probe_uninit(struct sec_dev *sec) +static void sec_probe_uninit(struct hisi_qm *qm) { - sec_hw_error_uninit(sec); + hisi_qm_dev_err_uninit(qm); + + destroy_workqueue(qm->wq); +} + +static void sec_iommu_used_check(struct sec_dev *sec) +{ + struct iommu_domain *domain; + struct device *dev = &sec->qm.pdev->dev; + + domain = iommu_get_domain_for_dev(dev); + + /* Check if iommu is used */ + sec->iommu_used = false; + if (domain) { + if (domain->type & __IOMMU_DOMAIN_PAGING) + sec->iommu_used = true; + dev_info(dev, "SMMU Opened, the iommu type = %u\n", + domain->type); + } } static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) @@ -795,6 +843,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, sec); sec->ctx_q_num = ctx_q_num; + sec_iommu_used_check(sec); qm = &sec->qm; @@ -820,7 +869,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) pci_warn(pdev, "Failed to init debugfs!\n"); - sec_add_to_list(sec); + hisi_qm_add_to_list(qm, &sec_devices); ret = sec_register_to_crypto(); if (ret < 0) { @@ -831,12 +880,12 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_remove_from_list: - sec_remove_from_list(sec); + hisi_qm_del_from_list(qm, &sec_devices); sec_debugfs_exit(sec); hisi_qm_stop(qm); err_probe_uninit: - sec_probe_uninit(sec); + sec_probe_uninit(qm); err_qm_uninit: sec_qm_uninit(qm); @@ -955,7 +1004,7 @@ static void sec_remove(struct pci_dev *pdev) sec_unregister_from_crypto(); - sec_remove_from_list(sec); + hisi_qm_del_from_list(qm, &sec_devices); if (qm->fun_type == QM_HW_PF && sec->num_vfs) (void)sec_sriov_disable(pdev); @@ -967,89 +1016,13 @@ static void sec_remove(struct pci_dev *pdev) if (qm->fun_type == QM_HW_PF) sec_debug_regs_clear(qm); - sec_probe_uninit(sec); + sec_probe_uninit(qm); sec_qm_uninit(qm); } -static void sec_log_hw_error(struct sec_dev *sec, u32 err_sts) -{ - const struct sec_hw_error *errs = sec_hw_errors; - struct device *dev = &sec->qm.pdev->dev; - u32 err_val; - - while (errs->msg) { - if (errs->int_msk & err_sts) { - dev_err(dev, "%s [error status=0x%x] found\n", - errs->msg, errs->int_msk); - - if (SEC_CORE_INT_STATUS_M_ECC & err_sts) { - err_val = readl(sec->qm.io_base + - SEC_CORE_SRAM_ECC_ERR_INFO); - dev_err(dev, "multi ecc sram num=0x%x\n", - SEC_ECC_NUM(err_val)); - dev_err(dev, "multi ecc sram addr=0x%x\n", - SEC_ECC_ADDR(err_val)); - } - } - errs++; - } -} - -static pci_ers_result_t sec_hw_error_handle(struct sec_dev *sec) -{ - u32 err_sts; - - /* read err sts */ - err_sts = readl(sec->qm.io_base + SEC_CORE_INT_STATUS); - if (err_sts) { - sec_log_hw_error(sec, err_sts); - - /* clear error interrupts */ - writel(err_sts, sec->qm.io_base + SEC_CORE_INT_SOURCE); - - return PCI_ERS_RESULT_NEED_RESET; - } - - return PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t sec_process_hw_error(struct pci_dev *pdev) -{ - struct sec_dev *sec = pci_get_drvdata(pdev); - pci_ers_result_t qm_ret, sec_ret; - - if (!sec) { - pci_err(pdev, "Can't recover error during device init\n"); - return PCI_ERS_RESULT_NONE; - } - - /* log qm error */ - qm_ret = hisi_qm_hw_error_handle(&sec->qm); - - /* log sec error */ - sec_ret = sec_hw_error_handle(sec); - - return (qm_ret == PCI_ERS_RESULT_NEED_RESET || - sec_ret == PCI_ERS_RESULT_NEED_RESET) ? - PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t sec_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - if (pdev->is_virtfn) - return PCI_ERS_RESULT_NONE; - - pci_info(pdev, "PCI error detected, state(=%d)!!\n", state); - if (state == pci_channel_io_perm_failure) - return PCI_ERS_RESULT_DISCONNECT; - - return sec_process_hw_error(pdev); -} - static const struct pci_error_handlers sec_err_handler = { - .error_detected = sec_error_detected, + .error_detected = hisi_qm_dev_err_detected, }; static struct pci_driver sec_pci_driver = { @@ -1078,6 +1051,7 @@ static int __init sec_init(void) { int ret; + hisi_qm_init_list(&sec_devices); sec_register_debugfs(); ret = pci_register_driver(&sec_pci_driver); diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h index bc1db26598bb..82dc6f867171 100644 --- a/drivers/crypto/hisilicon/zip/zip.h +++ b/drivers/crypto/hisilicon/zip/zip.h @@ -68,7 +68,7 @@ struct hisi_zip_sqe { u32 rsvd1[4]; }; -struct hisi_zip *find_zip_device(int node); +int zip_create_qps(struct hisi_qp **qps, int ctx_num); int hisi_zip_register_to_crypto(void); void hisi_zip_unregister_from_crypto(void); #endif diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c index 9815d5e3ccd0..369ec3220574 100644 --- a/drivers/crypto/hisilicon/zip/zip_crypto.c +++ b/drivers/crypto/hisilicon/zip/zip_crypto.c @@ -132,29 +132,25 @@ static void hisi_zip_fill_sqe(struct hisi_zip_sqe *sqe, u8 req_type, sqe->dest_addr_h = upper_32_bits(d_addr); } -static int hisi_zip_create_qp(struct hisi_qm *qm, struct hisi_zip_qp_ctx *ctx, - int alg_type, int req_type) +static int hisi_zip_start_qp(struct hisi_qp *qp, struct hisi_zip_qp_ctx *ctx, + int alg_type, int req_type) { - struct hisi_qp *qp; + struct device *dev = &qp->qm->pdev->dev; int ret; - qp = hisi_qm_create_qp(qm, alg_type); - if (IS_ERR(qp)) - return PTR_ERR(qp); - qp->req_type = req_type; + qp->alg_type = alg_type; qp->qp_ctx = ctx; - ctx->qp = qp; ret = hisi_qm_start_qp(qp, 0); - if (ret < 0) - goto err_release_qp; + if (ret < 0) { + dev_err(dev, "start qp failed!\n"); + return ret; + } - return 0; + ctx->qp = qp; -err_release_qp: - hisi_qm_release_qp(qp); - return ret; + return 0; } static void hisi_zip_release_qp(struct hisi_zip_qp_ctx *ctx) @@ -165,34 +161,34 @@ static void hisi_zip_release_qp(struct hisi_zip_qp_ctx *ctx) static int hisi_zip_ctx_init(struct hisi_zip_ctx *hisi_zip_ctx, u8 req_type) { + struct hisi_qp *qps[HZIP_CTX_Q_NUM] = { NULL }; struct hisi_zip *hisi_zip; - struct hisi_qm *qm; int ret, i, j; - /* find the proper zip device */ - hisi_zip = find_zip_device(cpu_to_node(smp_processor_id())); - if (!hisi_zip) { - pr_err("Failed to find a proper ZIP device!\n"); + ret = zip_create_qps(qps, HZIP_CTX_Q_NUM); + if (ret) { + pr_err("Can not create zip qps!\n"); return -ENODEV; } - qm = &hisi_zip->qm; + + hisi_zip = container_of(qps[0]->qm, struct hisi_zip, qm); for (i = 0; i < HZIP_CTX_Q_NUM; i++) { /* alg_type = 0 for compress, 1 for decompress in hw sqe */ - ret = hisi_zip_create_qp(qm, &hisi_zip_ctx->qp_ctx[i], i, - req_type); - if (ret) - goto err; + ret = hisi_zip_start_qp(qps[i], &hisi_zip_ctx->qp_ctx[i], i, + req_type); + if (ret) { + for (j = i - 1; j >= 0; j--) + hisi_qm_stop_qp(hisi_zip_ctx->qp_ctx[j].qp); + + hisi_qm_free_qps(qps, HZIP_CTX_Q_NUM); + return ret; + } hisi_zip_ctx->qp_ctx[i].zip_dev = hisi_zip; } return 0; -err: - for (j = i - 1; j >= 0; j--) - hisi_zip_release_qp(&hisi_zip_ctx->qp_ctx[j]); - - return ret; } static void hisi_zip_ctx_exit(struct hisi_zip_ctx *hisi_zip_ctx) diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index e1bab1a91333..fcc85d2dbd07 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -11,6 +11,7 @@ #include <linux/pci.h> #include <linux/seq_file.h> #include <linux/topology.h> +#include <linux/uacce.h> #include "zip.h" #define PCI_DEVICE_ID_ZIP_PF 0xa250 @@ -60,13 +61,17 @@ #define HZIP_CORE_DEBUG_DECOMP_5 0x309000 #define HZIP_CORE_INT_SOURCE 0x3010A0 -#define HZIP_CORE_INT_MASK 0x3010A4 +#define HZIP_CORE_INT_MASK_REG 0x3010A4 #define HZIP_CORE_INT_STATUS 0x3010AC #define HZIP_CORE_INT_STATUS_M_ECC BIT(1) #define HZIP_CORE_SRAM_ECC_ERR_INFO 0x301148 -#define SRAM_ECC_ERR_NUM_SHIFT 16 -#define SRAM_ECC_ERR_ADDR_SHIFT 24 -#define HZIP_CORE_INT_DISABLE 0x000007FF +#define HZIP_CORE_INT_RAS_CE_ENB 0x301160 +#define HZIP_CORE_INT_RAS_NFE_ENB 0x301164 +#define HZIP_CORE_INT_RAS_FE_ENB 0x301168 +#define HZIP_CORE_INT_RAS_NFE_ENABLE 0x7FE +#define HZIP_SRAM_ECC_ERR_NUM_SHIFT 16 +#define HZIP_SRAM_ECC_ERR_ADDR_SHIFT 24 +#define HZIP_CORE_INT_MASK_ALL GENMASK(10, 0) #define HZIP_COMP_CORE_NUM 2 #define HZIP_DECOMP_CORE_NUM 6 #define HZIP_CORE_NUM (HZIP_COMP_CORE_NUM + \ @@ -83,77 +88,7 @@ static const char hisi_zip_name[] = "hisi_zip"; static struct dentry *hzip_debugfs_root; -static LIST_HEAD(hisi_zip_list); -static DEFINE_MUTEX(hisi_zip_list_lock); - -struct hisi_zip_resource { - struct hisi_zip *hzip; - int distance; - struct list_head list; -}; - -static void free_list(struct list_head *head) -{ - struct hisi_zip_resource *res, *tmp; - - list_for_each_entry_safe(res, tmp, head, list) { - list_del(&res->list); - kfree(res); - } -} - -struct hisi_zip *find_zip_device(int node) -{ - struct hisi_zip_resource *res, *tmp; - struct hisi_zip *ret = NULL; - struct hisi_zip *hisi_zip; - struct list_head *n; - struct device *dev; - LIST_HEAD(head); - - mutex_lock(&hisi_zip_list_lock); - - if (IS_ENABLED(CONFIG_NUMA)) { - list_for_each_entry(hisi_zip, &hisi_zip_list, list) { - res = kzalloc(sizeof(*res), GFP_KERNEL); - if (!res) - goto err; - - dev = &hisi_zip->qm.pdev->dev; - res->hzip = hisi_zip; - res->distance = node_distance(dev_to_node(dev), node); - - n = &head; - list_for_each_entry(tmp, &head, list) { - if (res->distance < tmp->distance) { - n = &tmp->list; - break; - } - } - list_add_tail(&res->list, n); - } - - list_for_each_entry(tmp, &head, list) { - if (hisi_qm_get_free_qp_num(&tmp->hzip->qm)) { - ret = tmp->hzip; - break; - } - } - - free_list(&head); - } else { - ret = list_first_entry(&hisi_zip_list, struct hisi_zip, list); - } - - mutex_unlock(&hisi_zip_list_lock); - - return ret; - -err: - free_list(&head); - mutex_unlock(&hisi_zip_list_lock); - return NULL; -} +static struct hisi_qm_list zip_devices; struct hisi_zip_hw_error { u32 int_msk; @@ -297,9 +232,6 @@ static u32 pf_q_num = HZIP_PF_DEF_Q_NUM; module_param_cb(pf_q_num, &pf_q_num_ops, &pf_q_num, 0444); MODULE_PARM_DESC(pf_q_num, "Number of queues in PF(v1 1-4096, v2 1-1024)"); -static int uacce_mode; -module_param(uacce_mode, int, 0); - static u32 vfs_num; module_param(vfs_num, uint, 0444); MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63)"); @@ -311,18 +243,11 @@ static const struct pci_device_id hisi_zip_dev_ids[] = { }; MODULE_DEVICE_TABLE(pci, hisi_zip_dev_ids); -static inline void hisi_zip_add_to_list(struct hisi_zip *hisi_zip) +int zip_create_qps(struct hisi_qp **qps, int qp_num) { - mutex_lock(&hisi_zip_list_lock); - list_add_tail(&hisi_zip->list, &hisi_zip_list); - mutex_unlock(&hisi_zip_list_lock); -} + int node = cpu_to_node(smp_processor_id()); -static inline void hisi_zip_remove_from_list(struct hisi_zip *hisi_zip) -{ - mutex_lock(&hisi_zip_list_lock); - list_del(&hisi_zip->list); - mutex_unlock(&hisi_zip_list_lock); + return hisi_qm_alloc_qps_node(&zip_devices, qp_num, 0, node, qps); } static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip) @@ -353,8 +278,14 @@ static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip) writel(AXUSER_BASE, base + HZIP_BD_RUSER_32_63); writel(AXUSER_BASE, base + HZIP_SGL_RUSER_32_63); writel(AXUSER_BASE, base + HZIP_BD_WUSER_32_63); - writel(AXUSER_BASE, base + HZIP_DATA_RUSER_32_63); - writel(AXUSER_BASE, base + HZIP_DATA_WUSER_32_63); + + if (hisi_zip->qm.use_sva) { + writel(AXUSER_BASE | AXUSER_SSV, base + HZIP_DATA_RUSER_32_63); + writel(AXUSER_BASE | AXUSER_SSV, base + HZIP_DATA_WUSER_32_63); + } else { + writel(AXUSER_BASE, base + HZIP_DATA_RUSER_32_63); + writel(AXUSER_BASE, base + HZIP_DATA_WUSER_32_63); + } /* let's open all compression/decompression cores */ writel(DECOMP_CHECK_ENABLE | ALL_COMP_DECOMP_EN, @@ -366,27 +297,32 @@ static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip) FIELD_PREP(CQC_CACHE_WB_THRD, 1), base + QM_CACHE_CTL); } -static void hisi_zip_hw_error_set_state(struct hisi_zip *hisi_zip, bool state) +static void hisi_zip_hw_error_enable(struct hisi_qm *qm) { - struct hisi_qm *qm = &hisi_zip->qm; - if (qm->ver == QM_HW_V1) { - writel(HZIP_CORE_INT_DISABLE, qm->io_base + HZIP_CORE_INT_MASK); + writel(HZIP_CORE_INT_MASK_ALL, + qm->io_base + HZIP_CORE_INT_MASK_REG); dev_info(&qm->pdev->dev, "Does not support hw error handle\n"); return; } - if (state) { - /* clear ZIP hw error source if having */ - writel(HZIP_CORE_INT_DISABLE, hisi_zip->qm.io_base + - HZIP_CORE_INT_SOURCE); - /* enable ZIP hw error interrupts */ - writel(0, hisi_zip->qm.io_base + HZIP_CORE_INT_MASK); - } else { - /* disable ZIP hw error interrupts */ - writel(HZIP_CORE_INT_DISABLE, - hisi_zip->qm.io_base + HZIP_CORE_INT_MASK); - } + /* clear ZIP hw error source if having */ + writel(HZIP_CORE_INT_MASK_ALL, qm->io_base + HZIP_CORE_INT_SOURCE); + + /* configure error type */ + writel(0x1, qm->io_base + HZIP_CORE_INT_RAS_CE_ENB); + writel(0x0, qm->io_base + HZIP_CORE_INT_RAS_FE_ENB); + writel(HZIP_CORE_INT_RAS_NFE_ENABLE, + qm->io_base + HZIP_CORE_INT_RAS_NFE_ENB); + + /* enable ZIP hw error interrupts */ + writel(0, qm->io_base + HZIP_CORE_INT_MASK_REG); +} + +static void hisi_zip_hw_error_disable(struct hisi_qm *qm) +{ + /* disable ZIP hw error interrupts */ + writel(HZIP_CORE_INT_MASK_ALL, qm->io_base + HZIP_CORE_INT_MASK_REG); } static inline struct hisi_qm *file_to_qm(struct ctrl_debug_file *file) @@ -638,14 +574,53 @@ static void hisi_zip_debugfs_exit(struct hisi_zip *hisi_zip) hisi_zip_debug_regs_clear(hisi_zip); } -static void hisi_zip_hw_error_init(struct hisi_zip *hisi_zip) +static void hisi_zip_log_hw_error(struct hisi_qm *qm, u32 err_sts) +{ + const struct hisi_zip_hw_error *err = zip_hw_error; + struct device *dev = &qm->pdev->dev; + u32 err_val; + + while (err->msg) { + if (err->int_msk & err_sts) { + dev_err(dev, "%s [error status=0x%x] found\n", + err->msg, err->int_msk); + + if (err->int_msk & HZIP_CORE_INT_STATUS_M_ECC) { + err_val = readl(qm->io_base + + HZIP_CORE_SRAM_ECC_ERR_INFO); + dev_err(dev, "hisi-zip multi ecc sram num=0x%x\n", + ((err_val >> + HZIP_SRAM_ECC_ERR_NUM_SHIFT) & 0xFF)); + dev_err(dev, "hisi-zip multi ecc sram addr=0x%x\n", + (err_val >> + HZIP_SRAM_ECC_ERR_ADDR_SHIFT)); + } + } + err++; + } + + writel(err_sts, qm->io_base + HZIP_CORE_INT_SOURCE); +} + +static u32 hisi_zip_get_hw_err_status(struct hisi_qm *qm) { - hisi_qm_hw_error_init(&hisi_zip->qm, QM_BASE_CE, - QM_BASE_NFE | QM_ACC_WB_NOT_READY_TIMEOUT, 0, - QM_DB_RANDOM_INVALID); - hisi_zip_hw_error_set_state(hisi_zip, true); + return readl(qm->io_base + HZIP_CORE_INT_STATUS); } +static const struct hisi_qm_err_ini hisi_zip_err_ini = { + .hw_err_enable = hisi_zip_hw_error_enable, + .hw_err_disable = hisi_zip_hw_error_disable, + .get_dev_hw_err_status = hisi_zip_get_hw_err_status, + .log_dev_hw_err = hisi_zip_log_hw_error, + .err_info = { + .ce = QM_BASE_CE, + .nfe = QM_BASE_NFE | + QM_ACC_WB_NOT_READY_TIMEOUT, + .fe = 0, + .msi = QM_DB_RANDOM_INVALID, + } +}; + static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip) { struct hisi_qm *qm = &hisi_zip->qm; @@ -671,8 +646,10 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip) return -EINVAL; } + qm->err_ini = &hisi_zip_err_ini; + hisi_zip_set_user_domain_and_cache(hisi_zip); - hisi_zip_hw_error_init(hisi_zip); + hisi_qm_dev_err_init(qm); hisi_zip_debug_regs_clear(hisi_zip); return 0; @@ -791,27 +768,15 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, hisi_zip); qm = &hisi_zip->qm; + qm->use_dma_api = true; qm->pdev = pdev; qm->ver = rev_id; + qm->algs = "zlib\ngzip"; qm->sqe_size = HZIP_SQE_SIZE; qm->dev_name = hisi_zip_name; qm->fun_type = (pdev->device == PCI_DEVICE_ID_ZIP_PF) ? QM_HW_PF : QM_HW_VF; - switch (uacce_mode) { - case 0: - qm->use_dma_api = true; - break; - case 1: - qm->use_dma_api = false; - break; - case 2: - qm->use_dma_api = true; - break; - default: - return -EINVAL; - } - ret = hisi_qm_init(qm); if (ret) { dev_err(&pdev->dev, "Failed to init qm!\n"); @@ -849,7 +814,13 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) dev_err(&pdev->dev, "Failed to init debugfs (%d)!\n", ret); - hisi_zip_add_to_list(hisi_zip); + hisi_qm_add_to_list(qm, &zip_devices); + + if (qm->uacce) { + ret = uacce_register(qm->uacce); + if (ret) + goto err_qm_uninit; + } if (qm->fun_type == QM_HW_PF && vfs_num > 0) { ret = hisi_zip_sriov_enable(pdev, vfs_num); @@ -860,7 +831,7 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_remove_from_list: - hisi_zip_remove_from_list(hisi_zip); + hisi_qm_del_from_list(qm, &zip_devices); hisi_zip_debugfs_exit(hisi_zip); hisi_qm_stop(qm); err_qm_uninit: @@ -887,92 +858,13 @@ static void hisi_zip_remove(struct pci_dev *pdev) hisi_zip_debugfs_exit(hisi_zip); hisi_qm_stop(qm); - if (qm->fun_type == QM_HW_PF) - hisi_zip_hw_error_set_state(hisi_zip, false); - + hisi_qm_dev_err_uninit(qm); hisi_qm_uninit(qm); - hisi_zip_remove_from_list(hisi_zip); -} - -static void hisi_zip_log_hw_error(struct hisi_zip *hisi_zip, u32 err_sts) -{ - const struct hisi_zip_hw_error *err = zip_hw_error; - struct device *dev = &hisi_zip->qm.pdev->dev; - u32 err_val; - - while (err->msg) { - if (err->int_msk & err_sts) { - dev_warn(dev, "%s [error status=0x%x] found\n", - err->msg, err->int_msk); - - if (HZIP_CORE_INT_STATUS_M_ECC & err->int_msk) { - err_val = readl(hisi_zip->qm.io_base + - HZIP_CORE_SRAM_ECC_ERR_INFO); - dev_warn(dev, "hisi-zip multi ecc sram num=0x%x\n", - ((err_val >> SRAM_ECC_ERR_NUM_SHIFT) & - 0xFF)); - dev_warn(dev, "hisi-zip multi ecc sram addr=0x%x\n", - (err_val >> SRAM_ECC_ERR_ADDR_SHIFT)); - } - } - err++; - } -} - -static pci_ers_result_t hisi_zip_hw_error_handle(struct hisi_zip *hisi_zip) -{ - u32 err_sts; - - /* read err sts */ - err_sts = readl(hisi_zip->qm.io_base + HZIP_CORE_INT_STATUS); - - if (err_sts) { - hisi_zip_log_hw_error(hisi_zip, err_sts); - /* clear error interrupts */ - writel(err_sts, hisi_zip->qm.io_base + HZIP_CORE_INT_SOURCE); - - return PCI_ERS_RESULT_NEED_RESET; - } - - return PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t hisi_zip_process_hw_error(struct pci_dev *pdev) -{ - struct hisi_zip *hisi_zip = pci_get_drvdata(pdev); - struct device *dev = &pdev->dev; - pci_ers_result_t qm_ret, zip_ret; - - if (!hisi_zip) { - dev_err(dev, - "Can't recover ZIP-error occurred during device init\n"); - return PCI_ERS_RESULT_NONE; - } - - qm_ret = hisi_qm_hw_error_handle(&hisi_zip->qm); - - zip_ret = hisi_zip_hw_error_handle(hisi_zip); - - return (qm_ret == PCI_ERS_RESULT_NEED_RESET || - zip_ret == PCI_ERS_RESULT_NEED_RESET) ? - PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t hisi_zip_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - if (pdev->is_virtfn) - return PCI_ERS_RESULT_NONE; - - dev_info(&pdev->dev, "PCI error detected, state(=%d)!!\n", state); - if (state == pci_channel_io_perm_failure) - return PCI_ERS_RESULT_DISCONNECT; - - return hisi_zip_process_hw_error(pdev); + hisi_qm_del_from_list(qm, &zip_devices); } static const struct pci_error_handlers hisi_zip_err_handler = { - .error_detected = hisi_zip_error_detected, + .error_detected = hisi_qm_dev_err_detected, }; static struct pci_driver hisi_zip_pci_driver = { @@ -1002,6 +894,7 @@ static int __init hisi_zip_init(void) { int ret; + hisi_qm_init_list(&zip_devices); hisi_zip_register_debugfs(); ret = pci_register_driver(&hisi_zip_pci_driver); @@ -1010,12 +903,10 @@ static int __init hisi_zip_init(void) goto err_pci; } - if (uacce_mode == 0 || uacce_mode == 2) { - ret = hisi_zip_register_to_crypto(); - if (ret < 0) { - pr_err("Failed to register driver to crypto.\n"); - goto err_crypto; - } + ret = hisi_zip_register_to_crypto(); + if (ret < 0) { + pr_err("Failed to register driver to crypto.\n"); + goto err_crypto; } return 0; @@ -1030,8 +921,7 @@ err_pci: static void __exit hisi_zip_exit(void) { - if (uacce_mode == 0 || uacce_mode == 2) - hisi_zip_unregister_from_crypto(); + hisi_zip_unregister_from_crypto(); pci_unregister_driver(&hisi_zip_pci_driver); hisi_zip_unregister_debugfs(); } diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c index 25d5227f74a1..0e25fc3087f3 100644 --- a/drivers/crypto/img-hash.c +++ b/drivers/crypto/img-hash.c @@ -103,7 +103,7 @@ struct img_hash_request_ctx { struct ahash_request fallback_req; /* Zero length buffer must remain last member of struct */ - u8 buffer[0] __aligned(sizeof(u32)); + u8 buffer[] __aligned(sizeof(u32)); }; struct img_hash_ctx { diff --git a/drivers/crypto/marvell/Kconfig b/drivers/crypto/marvell/Kconfig new file mode 100644 index 000000000000..13063384f958 --- /dev/null +++ b/drivers/crypto/marvell/Kconfig @@ -0,0 +1,37 @@ +# +# Marvell crypto drivers configuration +# + +config CRYPTO_DEV_MARVELL + tristate + +config CRYPTO_DEV_MARVELL_CESA + tristate "Marvell's Cryptographic Engine driver" + depends on PLAT_ORION || ARCH_MVEBU + select CRYPTO_LIB_AES + select CRYPTO_LIB_DES + select CRYPTO_SKCIPHER + select CRYPTO_HASH + select SRAM + select CRYPTO_DEV_MARVELL + help + This driver allows you to utilize the Cryptographic Engines and + Security Accelerator (CESA) which can be found on MVEBU and ORION + platforms. + This driver supports CPU offload through DMA transfers. + +config CRYPTO_DEV_OCTEONTX_CPT + tristate "Support for Marvell OcteonTX CPT driver" + depends on ARCH_THUNDER || COMPILE_TEST + depends on PCI_MSI && 64BIT + depends on CRYPTO_LIB_AES + select CRYPTO_SKCIPHER + select CRYPTO_HASH + select CRYPTO_AEAD + select CRYPTO_DEV_MARVELL + help + This driver allows you to utilize the Marvell Cryptographic + Accelerator Unit(CPT) found in OcteonTX series of processors. + + To compile this driver as module, choose M here: + the modules will be called octeontx-cpt and octeontx-cptvf diff --git a/drivers/crypto/marvell/Makefile b/drivers/crypto/marvell/Makefile index b27cab65e696..6c6a1519b0f1 100644 --- a/drivers/crypto/marvell/Makefile +++ b/drivers/crypto/marvell/Makefile @@ -1,3 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell-cesa.o -marvell-cesa-objs := cesa.o cipher.o hash.o tdma.o +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += cesa/ +obj-$(CONFIG_CRYPTO_DEV_OCTEONTX_CPT) += octeontx/ diff --git a/drivers/crypto/marvell/cesa/Makefile b/drivers/crypto/marvell/cesa/Makefile new file mode 100644 index 000000000000..b27cab65e696 --- /dev/null +++ b/drivers/crypto/marvell/cesa/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell-cesa.o +marvell-cesa-objs := cesa.o cipher.o hash.o tdma.o diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa/cesa.c index 8a5f0b0bdf77..8a5f0b0bdf77 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa/cesa.c diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa/cesa.h index f1ed3b85c0d2..e8632d5f343f 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa/cesa.h @@ -436,7 +436,7 @@ struct mv_cesa_dev { * @queue: fifo of the pending crypto requests * @load: engine load counter, useful for load balancing * @chain: list of the current tdma descriptors being processed - * by this engine. + * by this engine. * @complete_queue: fifo of the processed requests by the engine * * Structure storing CESA engine information. @@ -467,7 +467,7 @@ struct mv_cesa_engine { * @step: launch the crypto operation on the next chunk * @cleanup: cleanup the crypto request (release associated data) * @complete: complete the request, i.e copy result or context from sram when - * needed. + * needed. */ struct mv_cesa_req_ops { int (*process)(struct crypto_async_request *req, u32 status); @@ -734,6 +734,7 @@ static inline struct mv_cesa_engine *mv_cesa_select_engine(int weight) for (i = 0; i < cesa_dev->caps->nengines; i++) { struct mv_cesa_engine *engine = cesa_dev->engines + i; u32 load = atomic_read(&engine->load); + if (load < min_load) { min_load = load; selected = engine; diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cesa/cipher.c index c24f34a48cef..f133c2ccb5ae 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cesa/cipher.c @@ -106,8 +106,8 @@ static void mv_cesa_skcipher_std_step(struct skcipher_request *req) mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE); writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG); - BUG_ON(readl(engine->regs + CESA_SA_CMD) & - CESA_SA_CMD_EN_CESA_SA_ACCL0); + WARN_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } @@ -178,6 +178,7 @@ static inline void mv_cesa_skcipher_prepare(struct crypto_async_request *req, { struct skcipher_request *skreq = skcipher_request_cast(req); struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(skreq); + creq->base.engine = engine; if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) @@ -336,7 +337,8 @@ static int mv_cesa_skcipher_dma_req_init(struct skcipher_request *req, do { struct mv_cesa_op_ctx *op; - op = mv_cesa_dma_add_op(&basereq->chain, op_templ, skip_ctx, flags); + op = mv_cesa_dma_add_op(&basereq->chain, op_templ, skip_ctx, + flags); if (IS_ERR(op)) { ret = PTR_ERR(op); goto err_free_tdma; @@ -365,9 +367,10 @@ static int mv_cesa_skcipher_dma_req_init(struct skcipher_request *req, } while (mv_cesa_skcipher_req_iter_next_op(&iter)); /* Add output data for IV */ - ret = mv_cesa_dma_add_result_op(&basereq->chain, CESA_SA_CFG_SRAM_OFFSET, - CESA_SA_DATA_SRAM_OFFSET, - CESA_TDMA_SRC_IN_SRAM, flags); + ret = mv_cesa_dma_add_result_op(&basereq->chain, + CESA_SA_CFG_SRAM_OFFSET, + CESA_SA_DATA_SRAM_OFFSET, + CESA_TDMA_SRC_IN_SRAM, flags); if (ret) goto err_free_tdma; diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/cesa/hash.c index a2b35fb0fb89..b971284332b6 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/cesa/hash.c @@ -141,9 +141,11 @@ static int mv_cesa_ahash_pad_req(struct mv_cesa_ahash_req *creq, u8 *buf) if (creq->algo_le) { __le64 bits = cpu_to_le64(creq->len << 3); + memcpy(buf + padlen, &bits, sizeof(bits)); } else { __be64 bits = cpu_to_be64(creq->len << 3); + memcpy(buf + padlen, &bits, sizeof(bits)); } @@ -168,7 +170,8 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) if (!sreq->offset) { digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(req)); for (i = 0; i < digsize / 4; i++) - writel_relaxed(creq->state[i], engine->regs + CESA_IVDIG(i)); + writel_relaxed(creq->state[i], + engine->regs + CESA_IVDIG(i)); } if (creq->cache_ptr) @@ -245,8 +248,8 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE); writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG); - BUG_ON(readl(engine->regs + CESA_SA_CMD) & - CESA_SA_CMD_EN_CESA_SA_ACCL0); + WARN_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } @@ -329,11 +332,12 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req) digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq)); if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ && - (creq->base.chain.last->flags & CESA_TDMA_TYPE_MSK) == CESA_TDMA_RESULT) { + (creq->base.chain.last->flags & CESA_TDMA_TYPE_MSK) == + CESA_TDMA_RESULT) { __le32 *data = NULL; /* - * Result is already in the correct endianess when the SA is + * Result is already in the correct endianness when the SA is * used */ data = creq->base.chain.last->op->ctx.hash.hash; @@ -347,9 +351,9 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req) CESA_IVDIG(i)); if (creq->last_req) { /* - * Hardware's MD5 digest is in little endian format, but - * SHA in big endian format - */ + * Hardware's MD5 digest is in little endian format, but + * SHA in big endian format + */ if (creq->algo_le) { __le32 *result = (void *)ahashreq->result; @@ -439,7 +443,8 @@ static bool mv_cesa_ahash_cache_req(struct ahash_request *req) struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); bool cached = false; - if (creq->cache_ptr + req->nbytes < CESA_MAX_HASH_BLOCK_SIZE && !creq->last_req) { + if (creq->cache_ptr + req->nbytes < CESA_MAX_HASH_BLOCK_SIZE && + !creq->last_req) { cached = true; if (!req->nbytes) @@ -648,7 +653,8 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) if (!mv_cesa_ahash_req_iter_next_op(&iter)) break; - op = mv_cesa_dma_add_frag(&basereq->chain, &creq->op_tmpl, + op = mv_cesa_dma_add_frag(&basereq->chain, + &creq->op_tmpl, frag_len, flags); if (IS_ERR(op)) { ret = PTR_ERR(op); @@ -920,7 +926,7 @@ struct ahash_alg mv_md5_alg = { .cra_ctxsize = sizeof(struct mv_cesa_hash_ctx), .cra_init = mv_cesa_ahash_cra_init, .cra_module = THIS_MODULE, - } + } } }; @@ -990,7 +996,7 @@ struct ahash_alg mv_sha1_alg = { .cra_ctxsize = sizeof(struct mv_cesa_hash_ctx), .cra_init = mv_cesa_ahash_cra_init, .cra_module = THIS_MODULE, - } + } } }; @@ -1063,7 +1069,7 @@ struct ahash_alg mv_sha256_alg = { .cra_ctxsize = sizeof(struct mv_cesa_hash_ctx), .cra_init = mv_cesa_ahash_cra_init, .cra_module = THIS_MODULE, - } + } } }; @@ -1297,7 +1303,7 @@ struct ahash_alg mv_ahmac_md5_alg = { .cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx), .cra_init = mv_cesa_ahmac_cra_init, .cra_module = THIS_MODULE, - } + } } }; @@ -1367,7 +1373,7 @@ struct ahash_alg mv_ahmac_sha1_alg = { .cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx), .cra_init = mv_cesa_ahmac_cra_init, .cra_module = THIS_MODULE, - } + } } }; @@ -1437,6 +1443,6 @@ struct ahash_alg mv_ahmac_sha256_alg = { .cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx), .cra_init = mv_cesa_ahmac_cra_init, .cra_module = THIS_MODULE, - } + } } }; diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/cesa/tdma.c index 45939d53e8d6..b81ee276fe0e 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/cesa/tdma.c @@ -50,8 +50,8 @@ void mv_cesa_dma_step(struct mv_cesa_req *dreq) engine->regs + CESA_SA_CFG); writel_relaxed(dreq->chain.first->cur_dma, engine->regs + CESA_TDMA_NEXT_ADDR); - BUG_ON(readl(engine->regs + CESA_SA_CMD) & - CESA_SA_CMD_EN_CESA_SA_ACCL0); + WARN_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } @@ -175,8 +175,10 @@ int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status) break; } - /* Save the last request in error to engine->req, so that the core - * knows which request was fautly */ + /* + * Save the last request in error to engine->req, so that the core + * knows which request was fautly + */ if (res) { spin_lock_bh(&engine->lock); engine->req = req; diff --git a/drivers/crypto/marvell/octeontx/Makefile b/drivers/crypto/marvell/octeontx/Makefile new file mode 100644 index 000000000000..5e956fe1a85b --- /dev/null +++ b/drivers/crypto/marvell/octeontx/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_CRYPTO_DEV_OCTEONTX_CPT) += octeontx-cpt.o octeontx-cptvf.o + +octeontx-cpt-objs := otx_cptpf_main.o otx_cptpf_mbox.o otx_cptpf_ucode.o +octeontx-cptvf-objs := otx_cptvf_main.o otx_cptvf_mbox.o otx_cptvf_reqmgr.o \ + otx_cptvf_algs.o diff --git a/drivers/crypto/marvell/octeontx/otx_cpt_common.h b/drivers/crypto/marvell/octeontx/otx_cpt_common.h new file mode 100644 index 000000000000..ca704a7a265f --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cpt_common.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPT_COMMON_H +#define __OTX_CPT_COMMON_H + +#include <linux/types.h> +#include <linux/delay.h> +#include <linux/device.h> + +#define OTX_CPT_MAX_MBOX_DATA_STR_SIZE 64 + +enum otx_cptpf_type { + OTX_CPT_AE = 2, + OTX_CPT_SE = 3, + BAD_OTX_CPTPF_TYPE, +}; + +enum otx_cptvf_type { + OTX_CPT_AE_TYPES = 1, + OTX_CPT_SE_TYPES = 2, + BAD_OTX_CPTVF_TYPE, +}; + +/* VF-PF message opcodes */ +enum otx_cpt_mbox_opcode { + OTX_CPT_MSG_VF_UP = 1, + OTX_CPT_MSG_VF_DOWN, + OTX_CPT_MSG_READY, + OTX_CPT_MSG_QLEN, + OTX_CPT_MSG_QBIND_GRP, + OTX_CPT_MSG_VQ_PRIORITY, + OTX_CPT_MSG_PF_TYPE, + OTX_CPT_MSG_ACK, + OTX_CPT_MSG_NACK +}; + +/* OcteonTX CPT mailbox structure */ +struct otx_cpt_mbox { + u64 msg; /* Message type MBOX[0] */ + u64 data;/* Data MBOX[1] */ +}; + +#endif /* __OTX_CPT_COMMON_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h new file mode 100644 index 000000000000..b8bdb9f134f3 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h @@ -0,0 +1,824 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPT_HW_TYPES_H +#define __OTX_CPT_HW_TYPES_H + +#include <linux/types.h> + +/* Device IDs */ +#define OTX_CPT_PCI_PF_DEVICE_ID 0xa040 +#define OTX_CPT_PCI_VF_DEVICE_ID 0xa041 + +#define OTX_CPT_PCI_PF_SUBSYS_ID 0xa340 +#define OTX_CPT_PCI_VF_SUBSYS_ID 0xa341 + +/* Configuration and status registers are in BAR0 on OcteonTX platform */ +#define OTX_CPT_PF_PCI_CFG_BAR 0 +#define OTX_CPT_VF_PCI_CFG_BAR 0 + +#define OTX_CPT_BAR_E_CPTX_VFX_BAR0_OFFSET(a, b) \ + (0x000020000000ll + 0x1000000000ll * (a) + 0x100000ll * (b)) +#define OTX_CPT_BAR_E_CPTX_VFX_BAR0_SIZE 0x400000 + +/* Mailbox interrupts offset */ +#define OTX_CPT_PF_MBOX_INT 3 +#define OTX_CPT_PF_INT_VEC_E_MBOXX(x, a) ((x) + (a)) +/* Number of MSIX supported in PF */ +#define OTX_CPT_PF_MSIX_VECTORS 4 +/* Maximum supported microcode groups */ +#define OTX_CPT_MAX_ENGINE_GROUPS 8 + +/* CPT instruction size in bytes */ +#define OTX_CPT_INST_SIZE 64 +/* CPT queue next chunk pointer size in bytes */ +#define OTX_CPT_NEXT_CHUNK_PTR_SIZE 8 + +/* OcteonTX CPT VF MSIX vectors and their offsets */ +#define OTX_CPT_VF_MSIX_VECTORS 2 +#define OTX_CPT_VF_INTR_MBOX_MASK BIT(0) +#define OTX_CPT_VF_INTR_DOVF_MASK BIT(1) +#define OTX_CPT_VF_INTR_IRDE_MASK BIT(2) +#define OTX_CPT_VF_INTR_NWRP_MASK BIT(3) +#define OTX_CPT_VF_INTR_SERR_MASK BIT(4) + +/* OcteonTX CPT PF registers */ +#define OTX_CPT_PF_CONSTANTS (0x0ll) +#define OTX_CPT_PF_RESET (0x100ll) +#define OTX_CPT_PF_DIAG (0x120ll) +#define OTX_CPT_PF_BIST_STATUS (0x160ll) +#define OTX_CPT_PF_ECC0_CTL (0x200ll) +#define OTX_CPT_PF_ECC0_FLIP (0x210ll) +#define OTX_CPT_PF_ECC0_INT (0x220ll) +#define OTX_CPT_PF_ECC0_INT_W1S (0x230ll) +#define OTX_CPT_PF_ECC0_ENA_W1S (0x240ll) +#define OTX_CPT_PF_ECC0_ENA_W1C (0x250ll) +#define OTX_CPT_PF_MBOX_INTX(b) (0x400ll | (u64)(b) << 3) +#define OTX_CPT_PF_MBOX_INT_W1SX(b) (0x420ll | (u64)(b) << 3) +#define OTX_CPT_PF_MBOX_ENA_W1CX(b) (0x440ll | (u64)(b) << 3) +#define OTX_CPT_PF_MBOX_ENA_W1SX(b) (0x460ll | (u64)(b) << 3) +#define OTX_CPT_PF_EXEC_INT (0x500ll) +#define OTX_CPT_PF_EXEC_INT_W1S (0x520ll) +#define OTX_CPT_PF_EXEC_ENA_W1C (0x540ll) +#define OTX_CPT_PF_EXEC_ENA_W1S (0x560ll) +#define OTX_CPT_PF_GX_EN(b) (0x600ll | (u64)(b) << 3) +#define OTX_CPT_PF_EXEC_INFO (0x700ll) +#define OTX_CPT_PF_EXEC_BUSY (0x800ll) +#define OTX_CPT_PF_EXEC_INFO0 (0x900ll) +#define OTX_CPT_PF_EXEC_INFO1 (0x910ll) +#define OTX_CPT_PF_INST_REQ_PC (0x10000ll) +#define OTX_CPT_PF_INST_LATENCY_PC (0x10020ll) +#define OTX_CPT_PF_RD_REQ_PC (0x10040ll) +#define OTX_CPT_PF_RD_LATENCY_PC (0x10060ll) +#define OTX_CPT_PF_RD_UC_PC (0x10080ll) +#define OTX_CPT_PF_ACTIVE_CYCLES_PC (0x10100ll) +#define OTX_CPT_PF_EXE_CTL (0x4000000ll) +#define OTX_CPT_PF_EXE_STATUS (0x4000008ll) +#define OTX_CPT_PF_EXE_CLK (0x4000010ll) +#define OTX_CPT_PF_EXE_DBG_CTL (0x4000018ll) +#define OTX_CPT_PF_EXE_DBG_DATA (0x4000020ll) +#define OTX_CPT_PF_EXE_BIST_STATUS (0x4000028ll) +#define OTX_CPT_PF_EXE_REQ_TIMER (0x4000030ll) +#define OTX_CPT_PF_EXE_MEM_CTL (0x4000038ll) +#define OTX_CPT_PF_EXE_PERF_CTL (0x4001000ll) +#define OTX_CPT_PF_EXE_DBG_CNTX(b) (0x4001100ll | (u64)(b) << 3) +#define OTX_CPT_PF_EXE_PERF_EVENT_CNT (0x4001180ll) +#define OTX_CPT_PF_EXE_EPCI_INBX_CNT(b) (0x4001200ll | (u64)(b) << 3) +#define OTX_CPT_PF_EXE_EPCI_OUTBX_CNT(b) (0x4001240ll | (u64)(b) << 3) +#define OTX_CPT_PF_ENGX_UCODE_BASE(b) (0x4002000ll | (u64)(b) << 3) +#define OTX_CPT_PF_QX_CTL(b) (0x8000000ll | (u64)(b) << 20) +#define OTX_CPT_PF_QX_GMCTL(b) (0x8000020ll | (u64)(b) << 20) +#define OTX_CPT_PF_QX_CTL2(b) (0x8000100ll | (u64)(b) << 20) +#define OTX_CPT_PF_VFX_MBOXX(b, c) (0x8001000ll | (u64)(b) << 20 | \ + (u64)(c) << 8) + +/* OcteonTX CPT VF registers */ +#define OTX_CPT_VQX_CTL(b) (0x100ll | (u64)(b) << 20) +#define OTX_CPT_VQX_SADDR(b) (0x200ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_WAIT(b) (0x400ll | (u64)(b) << 20) +#define OTX_CPT_VQX_INPROG(b) (0x410ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE(b) (0x420ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_ACK(b) (0x440ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_INT_W1S(b) (0x460ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_INT_W1C(b) (0x468ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_ENA_W1S(b) (0x470ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_ENA_W1C(b) (0x478ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_INT(b) (0x500ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_INT_W1S(b) (0x508ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_ENA_W1S(b) (0x510ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_ENA_W1C(b) (0x518ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DOORBELL(b) (0x600ll | (u64)(b) << 20) +#define OTX_CPT_VFX_PF_MBOXX(b, c) (0x1000ll | ((b) << 20) | ((c) << 3)) + +/* + * Enumeration otx_cpt_ucode_error_code_e + * + * Enumerates ucode errors + */ +enum otx_cpt_ucode_error_code_e { + CPT_NO_UCODE_ERROR = 0x00, + ERR_OPCODE_UNSUPPORTED = 0x01, + + /* Scatter gather */ + ERR_SCATTER_GATHER_WRITE_LENGTH = 0x02, + ERR_SCATTER_GATHER_LIST = 0x03, + ERR_SCATTER_GATHER_NOT_SUPPORTED = 0x04, + +}; + +/* + * Enumeration otx_cpt_comp_e + * + * CPT OcteonTX Completion Enumeration + * Enumerates the values of CPT_RES_S[COMPCODE]. + */ +enum otx_cpt_comp_e { + CPT_COMP_E_NOTDONE = 0x00, + CPT_COMP_E_GOOD = 0x01, + CPT_COMP_E_FAULT = 0x02, + CPT_COMP_E_SWERR = 0x03, + CPT_COMP_E_HWERR = 0x04, + CPT_COMP_E_LAST_ENTRY = 0x05 +}; + +/* + * Enumeration otx_cpt_vf_int_vec_e + * + * CPT OcteonTX VF MSI-X Vector Enumeration + * Enumerates the MSI-X interrupt vectors. + */ +enum otx_cpt_vf_int_vec_e { + CPT_VF_INT_VEC_E_MISC = 0x00, + CPT_VF_INT_VEC_E_DONE = 0x01 +}; + +/* + * Structure cpt_inst_s + * + * CPT Instruction Structure + * This structure specifies the instruction layout. Instructions are + * stored in memory as little-endian unless CPT()_PF_Q()_CTL[INST_BE] is set. + * cpt_inst_s_s + * Word 0 + * doneint:1 Done interrupt. + * 0 = No interrupts related to this instruction. + * 1 = When the instruction completes, CPT()_VQ()_DONE[DONE] will be + * incremented,and based on the rules described there an interrupt may + * occur. + * Word 1 + * res_addr [127: 64] Result IOVA. + * If nonzero, specifies where to write CPT_RES_S. + * If zero, no result structure will be written. + * Address must be 16-byte aligned. + * Bits <63:49> are ignored by hardware; software should use a + * sign-extended bit <48> for forward compatibility. + * Word 2 + * grp:10 [171:162] If [WQ_PTR] is nonzero, the SSO guest-group to use when + * CPT submits work SSO. + * For the SSO to not discard the add-work request, FPA_PF_MAP() must map + * [GRP] and CPT()_PF_Q()_GMCTL[GMID] as valid. + * tt:2 [161:160] If [WQ_PTR] is nonzero, the SSO tag type to use when CPT + * submits work to SSO + * tag:32 [159:128] If [WQ_PTR] is nonzero, the SSO tag to use when CPT + * submits work to SSO. + * Word 3 + * wq_ptr [255:192] If [WQ_PTR] is nonzero, it is a pointer to a + * work-queue entry that CPT submits work to SSO after all context, + * output data, and result write operations are visible to other + * CNXXXX units and the cores. Bits <2:0> must be zero. + * Bits <63:49> are ignored by hardware; software should + * use a sign-extended bit <48> for forward compatibility. + * Internal: + * Bits <63:49>, <2:0> are ignored by hardware, treated as always 0x0. + * Word 4 + * ei0; [319:256] Engine instruction word 0. Passed to the AE/SE. + * Word 5 + * ei1; [383:320] Engine instruction word 1. Passed to the AE/SE. + * Word 6 + * ei2; [447:384] Engine instruction word 1. Passed to the AE/SE. + * Word 7 + * ei3; [511:448] Engine instruction word 1. Passed to the AE/SE. + * + */ +union otx_cpt_inst_s { + u64 u[8]; + + struct { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_17_63:47; + u64 doneint:1; + u64 reserved_0_15:16; +#else /* Word 0 - Little Endian */ + u64 reserved_0_15:16; + u64 doneint:1; + u64 reserved_17_63:47; +#endif /* Word 0 - End */ + u64 res_addr; +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 2 - Big Endian */ + u64 reserved_172_191:20; + u64 grp:10; + u64 tt:2; + u64 tag:32; +#else /* Word 2 - Little Endian */ + u64 tag:32; + u64 tt:2; + u64 grp:10; + u64 reserved_172_191:20; +#endif /* Word 2 - End */ + u64 wq_ptr; + u64 ei0; + u64 ei1; + u64 ei2; + u64 ei3; + } s; +}; + +/* + * Structure cpt_res_s + * + * CPT Result Structure + * The CPT coprocessor writes the result structure after it completes a + * CPT_INST_S instruction. The result structure is exactly 16 bytes, and + * each instruction completion produces exactly one result structure. + * + * This structure is stored in memory as little-endian unless + * CPT()_PF_Q()_CTL[INST_BE] is set. + * cpt_res_s_s + * Word 0 + * doneint:1 [16:16] Done interrupt. This bit is copied from the + * corresponding instruction's CPT_INST_S[DONEINT]. + * compcode:8 [7:0] Indicates completion/error status of the CPT coprocessor + * for the associated instruction, as enumerated by CPT_COMP_E. + * Core software may write the memory location containing [COMPCODE] to + * 0x0 before ringing the doorbell, and then poll for completion by + * checking for a nonzero value. + * Once the core observes a nonzero [COMPCODE] value in this case,the CPT + * coprocessor will have also completed L2/DRAM write operations. + * Word 1 + * reserved + * + */ +union otx_cpt_res_s { + u64 u[2]; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_17_63:47; + u64 doneint:1; + u64 reserved_8_15:8; + u64 compcode:8; +#else /* Word 0 - Little Endian */ + u64 compcode:8; + u64 reserved_8_15:8; + u64 doneint:1; + u64 reserved_17_63:47; +#endif /* Word 0 - End */ + u64 reserved_64_127; + } s; +}; + +/* + * Register (NCB) otx_cpt#_pf_bist_status + * + * CPT PF Control Bist Status Register + * This register has the BIST status of memories. Each bit is the BIST result + * of an individual memory (per bit, 0 = pass and 1 = fail). + * otx_cptx_pf_bist_status_s + * Word0 + * bstatus [29:0](RO/H) BIST status. One bit per memory, enumerated by + * CPT_RAMS_E. + */ +union otx_cptx_pf_bist_status { + u64 u; + struct otx_cptx_pf_bist_status_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_30_63:34; + u64 bstatus:30; +#else /* Word 0 - Little Endian */ + u64 bstatus:30; + u64 reserved_30_63:34; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_pf_constants + * + * CPT PF Constants Register + * This register contains implementation-related parameters of CPT in CNXXXX. + * otx_cptx_pf_constants_s + * Word 0 + * reserved_40_63:24 [63:40] Reserved. + * epcis:8 [39:32](RO) Number of EPCI busses. + * grps:8 [31:24](RO) Number of engine groups implemented. + * ae:8 [23:16](RO/H) Number of AEs. In CNXXXX, for CPT0 returns 0x0, + * for CPT1 returns 0x18, or less if there are fuse-disables. + * se:8 [15:8](RO/H) Number of SEs. In CNXXXX, for CPT0 returns 0x30, + * or less if there are fuse-disables, for CPT1 returns 0x0. + * vq:8 [7:0](RO) Number of VQs. + */ +union otx_cptx_pf_constants { + u64 u; + struct otx_cptx_pf_constants_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_40_63:24; + u64 epcis:8; + u64 grps:8; + u64 ae:8; + u64 se:8; + u64 vq:8; +#else /* Word 0 - Little Endian */ + u64 vq:8; + u64 se:8; + u64 ae:8; + u64 grps:8; + u64 epcis:8; + u64 reserved_40_63:24; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_pf_exe_bist_status + * + * CPT PF Engine Bist Status Register + * This register has the BIST status of each engine. Each bit is the + * BIST result of an individual engine (per bit, 0 = pass and 1 = fail). + * otx_cptx_pf_exe_bist_status_s + * Word0 + * reserved_48_63:16 [63:48] reserved + * bstatus:48 [47:0](RO/H) BIST status. One bit per engine. + * + */ +union otx_cptx_pf_exe_bist_status { + u64 u; + struct otx_cptx_pf_exe_bist_status_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_48_63:16; + u64 bstatus:48; +#else /* Word 0 - Little Endian */ + u64 bstatus:48; + u64 reserved_48_63:16; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_pf_q#_ctl + * + * CPT Queue Control Register + * This register configures queues. This register should be changed only + * when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]). + * otx_cptx_pf_qx_ctl_s + * Word0 + * reserved_60_63:4 [63:60] reserved. + * aura:12; [59:48](R/W) Guest-aura for returning this queue's + * instruction-chunk buffers to FPA. Only used when [INST_FREE] is set. + * For the FPA to not discard the request, FPA_PF_MAP() must map + * [AURA] and CPT()_PF_Q()_GMCTL[GMID] as valid. + * reserved_45_47:3 [47:45] reserved. + * size:13 [44:32](R/W) Command-buffer size, in number of 64-bit words per + * command buffer segment. Must be 8*n + 1, where n is the number of + * instructions per buffer segment. + * reserved_11_31:21 [31:11] Reserved. + * cont_err:1 [10:10](R/W) Continue on error. + * 0 = When CPT()_VQ()_MISC_INT[NWRP], CPT()_VQ()_MISC_INT[IRDE] or + * CPT()_VQ()_MISC_INT[DOVF] are set by hardware or software via + * CPT()_VQ()_MISC_INT_W1S, then CPT()_VQ()_CTL[ENA] is cleared. Due to + * pipelining, additional instructions may have been processed between the + * instruction causing the error and the next instruction in the disabled + * queue (the instruction at CPT()_VQ()_SADDR). + * 1 = Ignore errors and continue processing instructions. + * For diagnostic use only. + * inst_free:1 [9:9](R/W) Instruction FPA free. When set, when CPT reaches the + * end of an instruction chunk, that chunk will be freed to the FPA. + * inst_be:1 [8:8](R/W) Instruction big-endian control. When set, instructions, + * instruction next chunk pointers, and result structures are stored in + * big-endian format in memory. + * iqb_ldwb:1 [7:7](R/W) Instruction load don't write back. + * 0 = The hardware issues NCB transient load (LDT) towards the cache, + * which if the line hits and is is dirty will cause the line to be + * written back before being replaced. + * 1 = The hardware issues NCB LDWB read-and-invalidate command towards + * the cache when fetching the last word of instructions; as a result the + * line will not be written back when replaced. This improves + * performance, but software must not read the instructions after they are + * posted to the hardware. Reads that do not consume the last word of a + * cache line always use LDI. + * reserved_4_6:3 [6:4] Reserved. + * grp:3; [3:1](R/W) Engine group. + * pri:1; [0:0](R/W) Queue priority. + * 1 = This queue has higher priority. Round-robin between higher + * priority queues. + * 0 = This queue has lower priority. Round-robin between lower + * priority queues. + */ +union otx_cptx_pf_qx_ctl { + u64 u; + struct otx_cptx_pf_qx_ctl_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_60_63:4; + u64 aura:12; + u64 reserved_45_47:3; + u64 size:13; + u64 reserved_11_31:21; + u64 cont_err:1; + u64 inst_free:1; + u64 inst_be:1; + u64 iqb_ldwb:1; + u64 reserved_4_6:3; + u64 grp:3; + u64 pri:1; +#else /* Word 0 - Little Endian */ + u64 pri:1; + u64 grp:3; + u64 reserved_4_6:3; + u64 iqb_ldwb:1; + u64 inst_be:1; + u64 inst_free:1; + u64 cont_err:1; + u64 reserved_11_31:21; + u64 size:13; + u64 reserved_45_47:3; + u64 aura:12; + u64 reserved_60_63:4; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_saddr + * + * CPT Queue Starting Buffer Address Registers + * These registers set the instruction buffer starting address. + * otx_cptx_vqx_saddr_s + * Word0 + * reserved_49_63:15 [63:49] Reserved. + * ptr:43 [48:6](R/W/H) Instruction buffer IOVA <48:6> (64-byte aligned). + * When written, it is the initial buffer starting address; when read, + * it is the next read pointer to be requested from L2C. The PTR field + * is overwritten with the next pointer each time that the command buffer + * segment is exhausted. New commands will then be read from the newly + * specified command buffer pointer. + * reserved_0_5:6 [5:0] Reserved. + * + */ +union otx_cptx_vqx_saddr { + u64 u; + struct otx_cptx_vqx_saddr_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_49_63:15; + u64 ptr:43; + u64 reserved_0_5:6; +#else /* Word 0 - Little Endian */ + u64 reserved_0_5:6; + u64 ptr:43; + u64 reserved_49_63:15; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_misc_ena_w1s + * + * CPT Queue Misc Interrupt Enable Set Register + * This register sets interrupt enable bits. + * otx_cptx_vqx_misc_ena_w1s_s + * Word0 + * reserved_5_63:59 [63:5] Reserved. + * swerr:1 [4:4](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[SWERR]. + * nwrp:1 [3:3](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[NWRP]. + * irde:1 [2:2](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[IRDE]. + * dovf:1 [1:1](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[DOVF]. + * mbox:1 [0:0](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[MBOX]. + * + */ +union otx_cptx_vqx_misc_ena_w1s { + u64 u; + struct otx_cptx_vqx_misc_ena_w1s_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_5_63:59; + u64 swerr:1; + u64 nwrp:1; + u64 irde:1; + u64 dovf:1; + u64 mbox:1; +#else /* Word 0 - Little Endian */ + u64 mbox:1; + u64 dovf:1; + u64 irde:1; + u64 nwrp:1; + u64 swerr:1; + u64 reserved_5_63:59; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_doorbell + * + * CPT Queue Doorbell Registers + * Doorbells for the CPT instruction queues. + * otx_cptx_vqx_doorbell_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * dbell_cnt:20 [19:0](R/W/H) Number of instruction queue 64-bit words to add + * to the CPT instruction doorbell count. Readback value is the the + * current number of pending doorbell requests. If counter overflows + * CPT()_VQ()_MISC_INT[DBELL_DOVF] is set. To reset the count back to + * zero, write one to clear CPT()_VQ()_MISC_INT_ENA_W1C[DBELL_DOVF], + * then write a value of 2^20 minus the read [DBELL_CNT], then write one + * to CPT()_VQ()_MISC_INT_W1C[DBELL_DOVF] and + * CPT()_VQ()_MISC_INT_ENA_W1S[DBELL_DOVF]. Must be a multiple of 8. + * All CPT instructions are 8 words and require a doorbell count of + * multiple of 8. + */ +union otx_cptx_vqx_doorbell { + u64 u; + struct otx_cptx_vqx_doorbell_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 dbell_cnt:20; +#else /* Word 0 - Little Endian */ + u64 dbell_cnt:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_inprog + * + * CPT Queue In Progress Count Registers + * These registers contain the per-queue instruction in flight registers. + * otx_cptx_vqx_inprog_s + * Word0 + * reserved_8_63:56 [63:8] Reserved. + * inflight:8 [7:0](RO/H) Inflight count. Counts the number of instructions + * for the VF for which CPT is fetching, executing or responding to + * instructions. However this does not include any interrupts that are + * awaiting software handling (CPT()_VQ()_DONE[DONE] != 0x0). + * A queue may not be reconfigured until: + * 1. CPT()_VQ()_CTL[ENA] is cleared by software. + * 2. [INFLIGHT] is polled until equals to zero. + */ +union otx_cptx_vqx_inprog { + u64 u; + struct otx_cptx_vqx_inprog_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_8_63:56; + u64 inflight:8; +#else /* Word 0 - Little Endian */ + u64 inflight:8; + u64 reserved_8_63:56; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_misc_int + * + * CPT Queue Misc Interrupt Register + * These registers contain the per-queue miscellaneous interrupts. + * otx_cptx_vqx_misc_int_s + * Word 0 + * reserved_5_63:59 [63:5] Reserved. + * swerr:1 [4:4](R/W1C/H) Software error from engines. + * nwrp:1 [3:3](R/W1C/H) NCB result write response error. + * irde:1 [2:2](R/W1C/H) Instruction NCB read response error. + * dovf:1 [1:1](R/W1C/H) Doorbell overflow. + * mbox:1 [0:0](R/W1C/H) PF to VF mailbox interrupt. Set when + * CPT()_VF()_PF_MBOX(0) is written. + * + */ +union otx_cptx_vqx_misc_int { + u64 u; + struct otx_cptx_vqx_misc_int_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_5_63:59; + u64 swerr:1; + u64 nwrp:1; + u64 irde:1; + u64 dovf:1; + u64 mbox:1; +#else /* Word 0 - Little Endian */ + u64 mbox:1; + u64 dovf:1; + u64 irde:1; + u64 nwrp:1; + u64 swerr:1; + u64 reserved_5_63:59; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done_ack + * + * CPT Queue Done Count Ack Registers + * This register is written by software to acknowledge interrupts. + * otx_cptx_vqx_done_ack_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * done_ack:20 [19:0](R/W/H) Number of decrements to CPT()_VQ()_DONE[DONE]. + * Reads CPT()_VQ()_DONE[DONE]. Written by software to acknowledge + * interrupts. If CPT()_VQ()_DONE[DONE] is still nonzero the interrupt + * will be re-sent if the conditions described in CPT()_VQ()_DONE[DONE] + * are satisfied. + * + */ +union otx_cptx_vqx_done_ack { + u64 u; + struct otx_cptx_vqx_done_ack_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 done_ack:20; +#else /* Word 0 - Little Endian */ + u64 done_ack:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done + * + * CPT Queue Done Count Registers + * These registers contain the per-queue instruction done count. + * cptx_vqx_done_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * done:20 [19:0](R/W/H) Done count. When CPT_INST_S[DONEINT] set and that + * instruction completes, CPT()_VQ()_DONE[DONE] is incremented when the + * instruction finishes. Write to this field are for diagnostic use only; + * instead software writes CPT()_VQ()_DONE_ACK with the number of + * decrements for this field. + * Interrupts are sent as follows: + * * When CPT()_VQ()_DONE[DONE] = 0, then no results are pending, the + * interrupt coalescing timer is held to zero, and an interrupt is not + * sent. + * * When CPT()_VQ()_DONE[DONE] != 0, then the interrupt coalescing timer + * counts. If the counter is >= CPT()_VQ()_DONE_WAIT[TIME_WAIT]*1024, or + * CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT], i.e. enough + * time has passed or enough results have arrived, then the interrupt is + * sent. + * * When CPT()_VQ()_DONE_ACK is written (or CPT()_VQ()_DONE is written + * but this is not typical), the interrupt coalescing timer restarts. + * Note after decrementing this interrupt equation is recomputed, + * for example if CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT] + * and because the timer is zero, the interrupt will be resent immediately. + * (This covers the race case between software acknowledging an interrupt + * and a result returning.) + * * When CPT()_VQ()_DONE_ENA_W1S[DONE] = 0, interrupts are not sent, + * but the counting described above still occurs. + * Since CPT instructions complete out-of-order, if software is using + * completion interrupts the suggested scheme is to request a DONEINT on + * each request, and when an interrupt arrives perform a "greedy" scan for + * completions; even if a later command is acknowledged first this will + * not result in missing a completion. + * Software is responsible for making sure [DONE] does not overflow; + * for example by insuring there are not more than 2^20-1 instructions in + * flight that may request interrupts. + * + */ +union otx_cptx_vqx_done { + u64 u; + struct otx_cptx_vqx_done_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 done:20; +#else /* Word 0 - Little Endian */ + u64 done:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done_wait + * + * CPT Queue Done Interrupt Coalescing Wait Registers + * Specifies the per queue interrupt coalescing settings. + * cptx_vqx_done_wait_s + * Word0 + * reserved_48_63:16 [63:48] Reserved. + * time_wait:16; [47:32](R/W) Time hold-off. When CPT()_VQ()_DONE[DONE] = 0 + * or CPT()_VQ()_DONE_ACK is written a timer is cleared. When the timer + * reaches [TIME_WAIT]*1024 then interrupt coalescing ends. + * see CPT()_VQ()_DONE[DONE]. If 0x0, time coalescing is disabled. + * reserved_20_31:12 [31:20] Reserved. + * num_wait:20 [19:0](R/W) Number of messages hold-off. + * When CPT()_VQ()_DONE[DONE] >= [NUM_WAIT] then interrupt coalescing ends + * see CPT()_VQ()_DONE[DONE]. If 0x0, same behavior as 0x1. + * + */ +union otx_cptx_vqx_done_wait { + u64 u; + struct otx_cptx_vqx_done_wait_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_48_63:16; + u64 time_wait:16; + u64 reserved_20_31:12; + u64 num_wait:20; +#else /* Word 0 - Little Endian */ + u64 num_wait:20; + u64 reserved_20_31:12; + u64 time_wait:16; + u64 reserved_48_63:16; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done_ena_w1s + * + * CPT Queue Done Interrupt Enable Set Registers + * Write 1 to these registers will enable the DONEINT interrupt for the queue. + * cptx_vqx_done_ena_w1s_s + * Word0 + * reserved_1_63:63 [63:1] Reserved. + * done:1 [0:0](R/W1S/H) Write 1 will enable DONEINT for this queue. + * Write 0 has no effect. Read will return the enable bit. + */ +union otx_cptx_vqx_done_ena_w1s { + u64 u; + struct otx_cptx_vqx_done_ena_w1s_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_1_63:63; + u64 done:1; +#else /* Word 0 - Little Endian */ + u64 done:1; + u64 reserved_1_63:63; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_ctl + * + * CPT VF Queue Control Registers + * This register configures queues. This register should be changed (other than + * clearing [ENA]) only when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]). + * cptx_vqx_ctl_s + * Word0 + * reserved_1_63:63 [63:1] Reserved. + * ena:1 [0:0](R/W/H) Enables the logical instruction queue. + * See also CPT()_PF_Q()_CTL[CONT_ERR] and CPT()_VQ()_INPROG[INFLIGHT]. + * 1 = Queue is enabled. + * 0 = Queue is disabled. + */ +union otx_cptx_vqx_ctl { + u64 u; + struct otx_cptx_vqx_ctl_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_1_63:63; + u64 ena:1; +#else /* Word 0 - Little Endian */ + u64 ena:1; + u64 reserved_1_63:63; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Error Address/Error Codes + * + * In the event of a severe error, microcode writes an 8-byte Error Code + * value (ECODE) to host memory at the Rptr address specified by the host + * system (in the 64-byte request). + * + * Word0 + * [63:56](R) 8-bit completion code + * [55:48](R) Number of the core that reported the severe error + * [47:0] Lower 6 bytes of M-Inst word2. Used to assist in uniquely + * identifying which specific instruction caused the error. This assumes + * that each instruction has a unique result location (RPTR), at least + * for a given period of time. + */ +union otx_cpt_error_code { + u64 u; + struct otx_cpt_error_code_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + uint64_t ccode:8; + uint64_t coreid:8; + uint64_t rptr6:48; +#else /* Word 0 - Little Endian */ + uint64_t rptr6:48; + uint64_t coreid:8; + uint64_t ccode:8; +#endif /* Word 0 - End */ + } s; +}; + +#endif /*__OTX_CPT_HW_TYPES_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf.h b/drivers/crypto/marvell/octeontx/otx_cptpf.h new file mode 100644 index 000000000000..73cd0a9bc563 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPTPF_H +#define __OTX_CPTPF_H + +#include <linux/types.h> +#include <linux/device.h> +#include "otx_cptpf_ucode.h" + +/* + * OcteonTX CPT device structure + */ +struct otx_cpt_device { + void __iomem *reg_base; /* Register start address */ + struct pci_dev *pdev; /* Pci device handle */ + struct otx_cpt_eng_grps eng_grps;/* Engine groups information */ + struct list_head list; + u8 pf_type; /* PF type SE or AE */ + u8 max_vfs; /* Maximum number of VFs supported by the CPT */ + u8 vfs_enabled; /* Number of enabled VFs */ +}; + +void otx_cpt_mbox_intr_handler(struct otx_cpt_device *cpt, int mbx); +void otx_cpt_disable_all_cores(struct otx_cpt_device *cpt); + +#endif /* __OTX_CPTPF_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_main.c b/drivers/crypto/marvell/octeontx/otx_cptpf_main.c new file mode 100644 index 000000000000..200fb3303db0 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_main.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "otx_cpt_common.h" +#include "otx_cptpf.h" + +#define DRV_NAME "octeontx-cpt" +#define DRV_VERSION "1.0" + +static void otx_cpt_disable_mbox_interrupts(struct otx_cpt_device *cpt) +{ + /* Disable mbox(0) interrupts for all VFs */ + writeq(~0ull, cpt->reg_base + OTX_CPT_PF_MBOX_ENA_W1CX(0)); +} + +static void otx_cpt_enable_mbox_interrupts(struct otx_cpt_device *cpt) +{ + /* Enable mbox(0) interrupts for all VFs */ + writeq(~0ull, cpt->reg_base + OTX_CPT_PF_MBOX_ENA_W1SX(0)); +} + +static irqreturn_t otx_cpt_mbx0_intr_handler(int __always_unused irq, + void *cpt) +{ + otx_cpt_mbox_intr_handler(cpt, 0); + + return IRQ_HANDLED; +} + +static void otx_cpt_reset(struct otx_cpt_device *cpt) +{ + writeq(1, cpt->reg_base + OTX_CPT_PF_RESET); +} + +static void otx_cpt_find_max_enabled_cores(struct otx_cpt_device *cpt) +{ + union otx_cptx_pf_constants pf_cnsts = {0}; + + pf_cnsts.u = readq(cpt->reg_base + OTX_CPT_PF_CONSTANTS); + cpt->eng_grps.avail.max_se_cnt = pf_cnsts.s.se; + cpt->eng_grps.avail.max_ae_cnt = pf_cnsts.s.ae; +} + +static u32 otx_cpt_check_bist_status(struct otx_cpt_device *cpt) +{ + union otx_cptx_pf_bist_status bist_sts = {0}; + + bist_sts.u = readq(cpt->reg_base + OTX_CPT_PF_BIST_STATUS); + return bist_sts.u; +} + +static u64 otx_cpt_check_exe_bist_status(struct otx_cpt_device *cpt) +{ + union otx_cptx_pf_exe_bist_status bist_sts = {0}; + + bist_sts.u = readq(cpt->reg_base + OTX_CPT_PF_EXE_BIST_STATUS); + return bist_sts.u; +} + +static int otx_cpt_device_init(struct otx_cpt_device *cpt) +{ + struct device *dev = &cpt->pdev->dev; + u16 sdevid; + u64 bist; + + /* Reset the PF when probed first */ + otx_cpt_reset(cpt); + mdelay(100); + + pci_read_config_word(cpt->pdev, PCI_SUBSYSTEM_ID, &sdevid); + + /* Check BIST status */ + bist = (u64)otx_cpt_check_bist_status(cpt); + if (bist) { + dev_err(dev, "RAM BIST failed with code 0x%llx", bist); + return -ENODEV; + } + + bist = otx_cpt_check_exe_bist_status(cpt); + if (bist) { + dev_err(dev, "Engine BIST failed with code 0x%llx", bist); + return -ENODEV; + } + + /* Get max enabled cores */ + otx_cpt_find_max_enabled_cores(cpt); + + if ((sdevid == OTX_CPT_PCI_PF_SUBSYS_ID) && + (cpt->eng_grps.avail.max_se_cnt == 0)) { + cpt->pf_type = OTX_CPT_AE; + } else if ((sdevid == OTX_CPT_PCI_PF_SUBSYS_ID) && + (cpt->eng_grps.avail.max_ae_cnt == 0)) { + cpt->pf_type = OTX_CPT_SE; + } + + /* Get max VQs/VFs supported by the device */ + cpt->max_vfs = pci_sriov_get_totalvfs(cpt->pdev); + + /* Disable all cores */ + otx_cpt_disable_all_cores(cpt); + + return 0; +} + +static int otx_cpt_register_interrupts(struct otx_cpt_device *cpt) +{ + struct device *dev = &cpt->pdev->dev; + u32 mbox_int_idx = OTX_CPT_PF_MBOX_INT; + u32 num_vec = OTX_CPT_PF_MSIX_VECTORS; + int ret; + + /* Enable MSI-X */ + ret = pci_alloc_irq_vectors(cpt->pdev, num_vec, num_vec, PCI_IRQ_MSIX); + if (ret < 0) { + dev_err(&cpt->pdev->dev, + "Request for #%d msix vectors failed\n", + num_vec); + return ret; + } + + /* Register mailbox interrupt handlers */ + ret = request_irq(pci_irq_vector(cpt->pdev, + OTX_CPT_PF_INT_VEC_E_MBOXX(mbox_int_idx, 0)), + otx_cpt_mbx0_intr_handler, 0, "CPT Mbox0", cpt); + if (ret) { + dev_err(dev, "Request irq failed\n"); + pci_free_irq_vectors(cpt->pdev); + return ret; + } + /* Enable mailbox interrupt */ + otx_cpt_enable_mbox_interrupts(cpt); + return 0; +} + +static void otx_cpt_unregister_interrupts(struct otx_cpt_device *cpt) +{ + u32 mbox_int_idx = OTX_CPT_PF_MBOX_INT; + + otx_cpt_disable_mbox_interrupts(cpt); + free_irq(pci_irq_vector(cpt->pdev, + OTX_CPT_PF_INT_VEC_E_MBOXX(mbox_int_idx, 0)), + cpt); + pci_free_irq_vectors(cpt->pdev); +} + + +static int otx_cpt_sriov_configure(struct pci_dev *pdev, int numvfs) +{ + struct otx_cpt_device *cpt = pci_get_drvdata(pdev); + int ret = 0; + + if (numvfs > cpt->max_vfs) + numvfs = cpt->max_vfs; + + if (numvfs > 0) { + ret = otx_cpt_try_create_default_eng_grps(cpt->pdev, + &cpt->eng_grps, + cpt->pf_type); + if (ret) + return ret; + + cpt->vfs_enabled = numvfs; + ret = pci_enable_sriov(pdev, numvfs); + if (ret) { + cpt->vfs_enabled = 0; + return ret; + } + otx_cpt_set_eng_grps_is_rdonly(&cpt->eng_grps, true); + try_module_get(THIS_MODULE); + ret = numvfs; + } else { + pci_disable_sriov(pdev); + otx_cpt_set_eng_grps_is_rdonly(&cpt->eng_grps, false); + module_put(THIS_MODULE); + cpt->vfs_enabled = 0; + } + dev_notice(&cpt->pdev->dev, "VFs enabled: %d\n", ret); + + return ret; +} + +static int otx_cpt_probe(struct pci_dev *pdev, + const struct pci_device_id __always_unused *ent) +{ + struct device *dev = &pdev->dev; + struct otx_cpt_device *cpt; + int err; + + cpt = devm_kzalloc(dev, sizeof(*cpt), GFP_KERNEL); + if (!cpt) + return -ENOMEM; + + pci_set_drvdata(pdev, cpt); + cpt->pdev = pdev; + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + goto err_clear_drvdata; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto err_disable_device; + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get usable DMA configuration\n"); + goto err_release_regions; + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n"); + goto err_release_regions; + } + + /* MAP PF's configuration registers */ + cpt->reg_base = pci_iomap(pdev, OTX_CPT_PF_PCI_CFG_BAR, 0); + if (!cpt->reg_base) { + dev_err(dev, "Cannot map config register space, aborting\n"); + err = -ENOMEM; + goto err_release_regions; + } + + /* CPT device HW initialization */ + err = otx_cpt_device_init(cpt); + if (err) + goto err_unmap_region; + + /* Register interrupts */ + err = otx_cpt_register_interrupts(cpt); + if (err) + goto err_unmap_region; + + /* Initialize engine groups */ + err = otx_cpt_init_eng_grps(pdev, &cpt->eng_grps, cpt->pf_type); + if (err) + goto err_unregister_interrupts; + + return 0; + +err_unregister_interrupts: + otx_cpt_unregister_interrupts(cpt); +err_unmap_region: + pci_iounmap(pdev, cpt->reg_base); +err_release_regions: + pci_release_regions(pdev); +err_disable_device: + pci_disable_device(pdev); +err_clear_drvdata: + pci_set_drvdata(pdev, NULL); + + return err; +} + +static void otx_cpt_remove(struct pci_dev *pdev) +{ + struct otx_cpt_device *cpt = pci_get_drvdata(pdev); + + if (!cpt) + return; + + /* Disable VFs */ + pci_disable_sriov(pdev); + /* Cleanup engine groups */ + otx_cpt_cleanup_eng_grps(pdev, &cpt->eng_grps); + /* Disable CPT PF interrupts */ + otx_cpt_unregister_interrupts(cpt); + /* Disengage SE and AE cores from all groups */ + otx_cpt_disable_all_cores(cpt); + pci_iounmap(pdev, cpt->reg_base); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +/* Supported devices */ +static const struct pci_device_id otx_cpt_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OTX_CPT_PCI_PF_DEVICE_ID) }, + { 0, } /* end of table */ +}; + +static struct pci_driver otx_cpt_pci_driver = { + .name = DRV_NAME, + .id_table = otx_cpt_id_table, + .probe = otx_cpt_probe, + .remove = otx_cpt_remove, + .sriov_configure = otx_cpt_sriov_configure +}; + +module_pci_driver(otx_cpt_pci_driver); + +MODULE_AUTHOR("Marvell International Ltd."); +MODULE_DESCRIPTION("Marvell OcteonTX CPT Physical Function Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, otx_cpt_id_table); diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c b/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c new file mode 100644 index 000000000000..a6774232e9a3 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "otx_cpt_common.h" +#include "otx_cptpf.h" + +static char *get_mbox_opcode_str(int msg_opcode) +{ + char *str = "Unknown"; + + switch (msg_opcode) { + case OTX_CPT_MSG_VF_UP: + str = "UP"; + break; + + case OTX_CPT_MSG_VF_DOWN: + str = "DOWN"; + break; + + case OTX_CPT_MSG_READY: + str = "READY"; + break; + + case OTX_CPT_MSG_QLEN: + str = "QLEN"; + break; + + case OTX_CPT_MSG_QBIND_GRP: + str = "QBIND_GRP"; + break; + + case OTX_CPT_MSG_VQ_PRIORITY: + str = "VQ_PRIORITY"; + break; + + case OTX_CPT_MSG_PF_TYPE: + str = "PF_TYPE"; + break; + + case OTX_CPT_MSG_ACK: + str = "ACK"; + break; + + case OTX_CPT_MSG_NACK: + str = "NACK"; + break; + } + + return str; +} + +static void dump_mbox_msg(struct otx_cpt_mbox *mbox_msg, int vf_id) +{ + char raw_data_str[OTX_CPT_MAX_MBOX_DATA_STR_SIZE]; + + hex_dump_to_buffer(mbox_msg, sizeof(struct otx_cpt_mbox), 16, 8, + raw_data_str, OTX_CPT_MAX_MBOX_DATA_STR_SIZE, false); + if (vf_id >= 0) + pr_debug("MBOX opcode %s received from VF%d raw_data %s", + get_mbox_opcode_str(mbox_msg->msg), vf_id, + raw_data_str); + else + pr_debug("MBOX opcode %s received from PF raw_data %s", + get_mbox_opcode_str(mbox_msg->msg), raw_data_str); +} + +static void otx_cpt_send_msg_to_vf(struct otx_cpt_device *cpt, int vf, + struct otx_cpt_mbox *mbx) +{ + /* Writing mbox(0) causes interrupt */ + writeq(mbx->data, cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 1)); + writeq(mbx->msg, cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 0)); +} + +/* + * ACKs VF's mailbox message + * @vf: VF to which ACK to be sent + */ +static void otx_cpt_mbox_send_ack(struct otx_cpt_device *cpt, int vf, + struct otx_cpt_mbox *mbx) +{ + mbx->data = 0ull; + mbx->msg = OTX_CPT_MSG_ACK; + otx_cpt_send_msg_to_vf(cpt, vf, mbx); +} + +/* NACKs VF's mailbox message that PF is not able to complete the action */ +static void otx_cptpf_mbox_send_nack(struct otx_cpt_device *cpt, int vf, + struct otx_cpt_mbox *mbx) +{ + mbx->data = 0ull; + mbx->msg = OTX_CPT_MSG_NACK; + otx_cpt_send_msg_to_vf(cpt, vf, mbx); +} + +static void otx_cpt_clear_mbox_intr(struct otx_cpt_device *cpt, u32 vf) +{ + /* W1C for the VF */ + writeq(1ull << vf, cpt->reg_base + OTX_CPT_PF_MBOX_INTX(0)); +} + +/* + * Configure QLEN/Chunk sizes for VF + */ +static void otx_cpt_cfg_qlen_for_vf(struct otx_cpt_device *cpt, int vf, + u32 size) +{ + union otx_cptx_pf_qx_ctl pf_qx_ctl; + + pf_qx_ctl.u = readq(cpt->reg_base + OTX_CPT_PF_QX_CTL(vf)); + pf_qx_ctl.s.size = size; + pf_qx_ctl.s.cont_err = true; + writeq(pf_qx_ctl.u, cpt->reg_base + OTX_CPT_PF_QX_CTL(vf)); +} + +/* + * Configure VQ priority + */ +static void otx_cpt_cfg_vq_priority(struct otx_cpt_device *cpt, int vf, u32 pri) +{ + union otx_cptx_pf_qx_ctl pf_qx_ctl; + + pf_qx_ctl.u = readq(cpt->reg_base + OTX_CPT_PF_QX_CTL(vf)); + pf_qx_ctl.s.pri = pri; + writeq(pf_qx_ctl.u, cpt->reg_base + OTX_CPT_PF_QX_CTL(vf)); +} + +static int otx_cpt_bind_vq_to_grp(struct otx_cpt_device *cpt, u8 q, u8 grp) +{ + struct device *dev = &cpt->pdev->dev; + struct otx_cpt_eng_grp_info *eng_grp; + union otx_cptx_pf_qx_ctl pf_qx_ctl; + struct otx_cpt_ucode *ucode; + + if (q >= cpt->max_vfs) { + dev_err(dev, "Requested queue %d is > than maximum avail %d", + q, cpt->max_vfs); + return -EINVAL; + } + + if (grp >= OTX_CPT_MAX_ENGINE_GROUPS) { + dev_err(dev, "Requested group %d is > than maximum avail %d", + grp, OTX_CPT_MAX_ENGINE_GROUPS); + return -EINVAL; + } + + eng_grp = &cpt->eng_grps.grp[grp]; + if (!eng_grp->is_enabled) { + dev_err(dev, "Requested engine group %d is disabled", grp); + return -EINVAL; + } + + pf_qx_ctl.u = readq(cpt->reg_base + OTX_CPT_PF_QX_CTL(q)); + pf_qx_ctl.s.grp = grp; + writeq(pf_qx_ctl.u, cpt->reg_base + OTX_CPT_PF_QX_CTL(q)); + + if (eng_grp->mirror.is_ena) + ucode = &eng_grp->g->grp[eng_grp->mirror.idx].ucode[0]; + else + ucode = &eng_grp->ucode[0]; + + if (otx_cpt_uc_supports_eng_type(ucode, OTX_CPT_SE_TYPES)) + return OTX_CPT_SE_TYPES; + else if (otx_cpt_uc_supports_eng_type(ucode, OTX_CPT_AE_TYPES)) + return OTX_CPT_AE_TYPES; + else + return BAD_OTX_CPTVF_TYPE; +} + +/* Interrupt handler to handle mailbox messages from VFs */ +static void otx_cpt_handle_mbox_intr(struct otx_cpt_device *cpt, int vf) +{ + int vftype = 0; + struct otx_cpt_mbox mbx = {}; + struct device *dev = &cpt->pdev->dev; + /* + * MBOX[0] contains msg + * MBOX[1] contains data + */ + mbx.msg = readq(cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 0)); + mbx.data = readq(cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 1)); + + dump_mbox_msg(&mbx, vf); + + switch (mbx.msg) { + case OTX_CPT_MSG_VF_UP: + mbx.msg = OTX_CPT_MSG_VF_UP; + mbx.data = cpt->vfs_enabled; + otx_cpt_send_msg_to_vf(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_READY: + mbx.msg = OTX_CPT_MSG_READY; + mbx.data = vf; + otx_cpt_send_msg_to_vf(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_VF_DOWN: + /* First msg in VF teardown sequence */ + otx_cpt_mbox_send_ack(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_QLEN: + otx_cpt_cfg_qlen_for_vf(cpt, vf, mbx.data); + otx_cpt_mbox_send_ack(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_QBIND_GRP: + vftype = otx_cpt_bind_vq_to_grp(cpt, vf, (u8)mbx.data); + if ((vftype != OTX_CPT_AE_TYPES) && + (vftype != OTX_CPT_SE_TYPES)) { + dev_err(dev, "VF%d binding to eng group %llu failed", + vf, mbx.data); + otx_cptpf_mbox_send_nack(cpt, vf, &mbx); + } else { + mbx.msg = OTX_CPT_MSG_QBIND_GRP; + mbx.data = vftype; + otx_cpt_send_msg_to_vf(cpt, vf, &mbx); + } + break; + case OTX_CPT_MSG_PF_TYPE: + mbx.msg = OTX_CPT_MSG_PF_TYPE; + mbx.data = cpt->pf_type; + otx_cpt_send_msg_to_vf(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_VQ_PRIORITY: + otx_cpt_cfg_vq_priority(cpt, vf, mbx.data); + otx_cpt_mbox_send_ack(cpt, vf, &mbx); + break; + default: + dev_err(&cpt->pdev->dev, "Invalid msg from VF%d, msg 0x%llx\n", + vf, mbx.msg); + break; + } +} + +void otx_cpt_mbox_intr_handler (struct otx_cpt_device *cpt, int mbx) +{ + u64 intr; + u8 vf; + + intr = readq(cpt->reg_base + OTX_CPT_PF_MBOX_INTX(0)); + pr_debug("PF interrupt mbox%d mask 0x%llx\n", mbx, intr); + for (vf = 0; vf < cpt->max_vfs; vf++) { + if (intr & (1ULL << vf)) { + otx_cpt_handle_mbox_intr(cpt, vf); + otx_cpt_clear_mbox_intr(cpt, vf); + } + } +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c new file mode 100644 index 000000000000..d04baa319592 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c @@ -0,0 +1,1686 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ctype.h> +#include <linux/firmware.h> +#include "otx_cpt_common.h" +#include "otx_cptpf_ucode.h" +#include "otx_cptpf.h" + +#define CSR_DELAY 30 +/* Tar archive defines */ +#define TAR_MAGIC "ustar" +#define TAR_MAGIC_LEN 6 +#define TAR_BLOCK_LEN 512 +#define REGTYPE '0' +#define AREGTYPE '\0' + +/* tar header as defined in POSIX 1003.1-1990. */ +struct tar_hdr_t { + char name[100]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char chksum[8]; + char typeflag; + char linkname[100]; + char magic[6]; + char version[2]; + char uname[32]; + char gname[32]; + char devmajor[8]; + char devminor[8]; + char prefix[155]; +}; + +struct tar_blk_t { + union { + struct tar_hdr_t hdr; + char block[TAR_BLOCK_LEN]; + }; +}; + +struct tar_arch_info_t { + struct list_head ucodes; + const struct firmware *fw; +}; + +static struct otx_cpt_bitmap get_cores_bmap(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp) +{ + struct otx_cpt_bitmap bmap = { {0} }; + bool found = false; + int i; + + if (eng_grp->g->engs_num > OTX_CPT_MAX_ENGINES) { + dev_err(dev, "unsupported number of engines %d on octeontx", + eng_grp->g->engs_num); + return bmap; + } + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (eng_grp->engs[i].type) { + bitmap_or(bmap.bits, bmap.bits, + eng_grp->engs[i].bmap, + eng_grp->g->engs_num); + bmap.size = eng_grp->g->engs_num; + found = true; + } + } + + if (!found) + dev_err(dev, "No engines reserved for engine group %d", + eng_grp->idx); + return bmap; +} + +static int is_eng_type(int val, int eng_type) +{ + return val & (1 << eng_type); +} + +static int dev_supports_eng_type(struct otx_cpt_eng_grps *eng_grps, + int eng_type) +{ + return is_eng_type(eng_grps->eng_types_supported, eng_type); +} + +static void set_ucode_filename(struct otx_cpt_ucode *ucode, + const char *filename) +{ + strlcpy(ucode->filename, filename, OTX_CPT_UCODE_NAME_LENGTH); +} + +static char *get_eng_type_str(int eng_type) +{ + char *str = "unknown"; + + switch (eng_type) { + case OTX_CPT_SE_TYPES: + str = "SE"; + break; + + case OTX_CPT_AE_TYPES: + str = "AE"; + break; + } + return str; +} + +static char *get_ucode_type_str(int ucode_type) +{ + char *str = "unknown"; + + switch (ucode_type) { + case (1 << OTX_CPT_SE_TYPES): + str = "SE"; + break; + + case (1 << OTX_CPT_AE_TYPES): + str = "AE"; + break; + } + return str; +} + +static int get_ucode_type(struct otx_cpt_ucode_hdr *ucode_hdr, int *ucode_type) +{ + char tmp_ver_str[OTX_CPT_UCODE_VER_STR_SZ]; + u32 i, val = 0; + u8 nn; + + strlcpy(tmp_ver_str, ucode_hdr->ver_str, OTX_CPT_UCODE_VER_STR_SZ); + for (i = 0; i < strlen(tmp_ver_str); i++) + tmp_ver_str[i] = tolower(tmp_ver_str[i]); + + nn = ucode_hdr->ver_num.nn; + if (strnstr(tmp_ver_str, "se-", OTX_CPT_UCODE_VER_STR_SZ) && + (nn == OTX_CPT_SE_UC_TYPE1 || nn == OTX_CPT_SE_UC_TYPE2 || + nn == OTX_CPT_SE_UC_TYPE3)) + val |= 1 << OTX_CPT_SE_TYPES; + if (strnstr(tmp_ver_str, "ae", OTX_CPT_UCODE_VER_STR_SZ) && + nn == OTX_CPT_AE_UC_TYPE) + val |= 1 << OTX_CPT_AE_TYPES; + + *ucode_type = val; + + if (!val) + return -EINVAL; + if (is_eng_type(val, OTX_CPT_AE_TYPES) && + is_eng_type(val, OTX_CPT_SE_TYPES)) + return -EINVAL; + return 0; +} + +static int is_mem_zero(const char *ptr, int size) +{ + int i; + + for (i = 0; i < size; i++) { + if (ptr[i]) + return 0; + } + return 1; +} + +static int cpt_set_ucode_base(struct otx_cpt_eng_grp_info *eng_grp, void *obj) +{ + struct otx_cpt_device *cpt = (struct otx_cpt_device *) obj; + dma_addr_t dma_addr; + struct otx_cpt_bitmap bmap; + int i; + + bmap = get_cores_bmap(&cpt->pdev->dev, eng_grp); + if (!bmap.size) + return -EINVAL; + + if (eng_grp->mirror.is_ena) + dma_addr = + eng_grp->g->grp[eng_grp->mirror.idx].ucode[0].align_dma; + else + dma_addr = eng_grp->ucode[0].align_dma; + + /* + * Set UCODE_BASE only for the cores which are not used, + * other cores should have already valid UCODE_BASE set + */ + for_each_set_bit(i, bmap.bits, bmap.size) + if (!eng_grp->g->eng_ref_cnt[i]) + writeq((u64) dma_addr, cpt->reg_base + + OTX_CPT_PF_ENGX_UCODE_BASE(i)); + return 0; +} + +static int cpt_detach_and_disable_cores(struct otx_cpt_eng_grp_info *eng_grp, + void *obj) +{ + struct otx_cpt_device *cpt = (struct otx_cpt_device *) obj; + struct otx_cpt_bitmap bmap = { {0} }; + int timeout = 10; + int i, busy; + u64 reg; + + bmap = get_cores_bmap(&cpt->pdev->dev, eng_grp); + if (!bmap.size) + return -EINVAL; + + /* Detach the cores from group */ + reg = readq(cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx)); + for_each_set_bit(i, bmap.bits, bmap.size) { + if (reg & (1ull << i)) { + eng_grp->g->eng_ref_cnt[i]--; + reg &= ~(1ull << i); + } + } + writeq(reg, cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx)); + + /* Wait for cores to become idle */ + do { + busy = 0; + usleep_range(10000, 20000); + if (timeout-- < 0) + return -EBUSY; + + reg = readq(cpt->reg_base + OTX_CPT_PF_EXEC_BUSY); + for_each_set_bit(i, bmap.bits, bmap.size) + if (reg & (1ull << i)) { + busy = 1; + break; + } + } while (busy); + + /* Disable the cores only if they are not used anymore */ + reg = readq(cpt->reg_base + OTX_CPT_PF_EXE_CTL); + for_each_set_bit(i, bmap.bits, bmap.size) + if (!eng_grp->g->eng_ref_cnt[i]) + reg &= ~(1ull << i); + writeq(reg, cpt->reg_base + OTX_CPT_PF_EXE_CTL); + + return 0; +} + +static int cpt_attach_and_enable_cores(struct otx_cpt_eng_grp_info *eng_grp, + void *obj) +{ + struct otx_cpt_device *cpt = (struct otx_cpt_device *) obj; + struct otx_cpt_bitmap bmap; + u64 reg; + int i; + + bmap = get_cores_bmap(&cpt->pdev->dev, eng_grp); + if (!bmap.size) + return -EINVAL; + + /* Attach the cores to the group */ + reg = readq(cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx)); + for_each_set_bit(i, bmap.bits, bmap.size) { + if (!(reg & (1ull << i))) { + eng_grp->g->eng_ref_cnt[i]++; + reg |= 1ull << i; + } + } + writeq(reg, cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx)); + + /* Enable the cores */ + reg = readq(cpt->reg_base + OTX_CPT_PF_EXE_CTL); + for_each_set_bit(i, bmap.bits, bmap.size) + reg |= 1ull << i; + writeq(reg, cpt->reg_base + OTX_CPT_PF_EXE_CTL); + + return 0; +} + +static int process_tar_file(struct device *dev, + struct tar_arch_info_t *tar_arch, char *filename, + const u8 *data, u32 size) +{ + struct tar_ucode_info_t *tar_info; + struct otx_cpt_ucode_hdr *ucode_hdr; + int ucode_type, ucode_size; + + /* + * If size is less than microcode header size then don't report + * an error because it might not be microcode file, just process + * next file from archive + */ + if (size < sizeof(struct otx_cpt_ucode_hdr)) + return 0; + + ucode_hdr = (struct otx_cpt_ucode_hdr *) data; + /* + * If microcode version can't be found don't report an error + * because it might not be microcode file, just process next file + */ + if (get_ucode_type(ucode_hdr, &ucode_type)) + return 0; + + ucode_size = ntohl(ucode_hdr->code_length) * 2; + if (!ucode_size || (size < round_up(ucode_size, 16) + + sizeof(struct otx_cpt_ucode_hdr) + OTX_CPT_UCODE_SIGN_LEN)) { + dev_err(dev, "Ucode %s invalid size", filename); + return -EINVAL; + } + + tar_info = kzalloc(sizeof(struct tar_ucode_info_t), GFP_KERNEL); + if (!tar_info) + return -ENOMEM; + + tar_info->ucode_ptr = data; + set_ucode_filename(&tar_info->ucode, filename); + memcpy(tar_info->ucode.ver_str, ucode_hdr->ver_str, + OTX_CPT_UCODE_VER_STR_SZ); + tar_info->ucode.ver_num = ucode_hdr->ver_num; + tar_info->ucode.type = ucode_type; + tar_info->ucode.size = ucode_size; + list_add_tail(&tar_info->list, &tar_arch->ucodes); + + return 0; +} + +static void release_tar_archive(struct tar_arch_info_t *tar_arch) +{ + struct tar_ucode_info_t *curr, *temp; + + if (!tar_arch) + return; + + list_for_each_entry_safe(curr, temp, &tar_arch->ucodes, list) { + list_del(&curr->list); + kfree(curr); + } + + if (tar_arch->fw) + release_firmware(tar_arch->fw); + kfree(tar_arch); +} + +static struct tar_ucode_info_t *get_uc_from_tar_archive( + struct tar_arch_info_t *tar_arch, + int ucode_type) +{ + struct tar_ucode_info_t *curr, *uc_found = NULL; + + list_for_each_entry(curr, &tar_arch->ucodes, list) { + if (!is_eng_type(curr->ucode.type, ucode_type)) + continue; + + if (!uc_found) { + uc_found = curr; + continue; + } + + switch (ucode_type) { + case OTX_CPT_AE_TYPES: + break; + + case OTX_CPT_SE_TYPES: + if (uc_found->ucode.ver_num.nn == OTX_CPT_SE_UC_TYPE2 || + (uc_found->ucode.ver_num.nn == OTX_CPT_SE_UC_TYPE3 + && curr->ucode.ver_num.nn == OTX_CPT_SE_UC_TYPE1)) + uc_found = curr; + break; + } + } + + return uc_found; +} + +static void print_tar_dbg_info(struct tar_arch_info_t *tar_arch, + char *tar_filename) +{ + struct tar_ucode_info_t *curr; + + pr_debug("Tar archive filename %s", tar_filename); + pr_debug("Tar archive pointer %p, size %ld", tar_arch->fw->data, + tar_arch->fw->size); + list_for_each_entry(curr, &tar_arch->ucodes, list) { + pr_debug("Ucode filename %s", curr->ucode.filename); + pr_debug("Ucode version string %s", curr->ucode.ver_str); + pr_debug("Ucode version %d.%d.%d.%d", + curr->ucode.ver_num.nn, curr->ucode.ver_num.xx, + curr->ucode.ver_num.yy, curr->ucode.ver_num.zz); + pr_debug("Ucode type (%d) %s", curr->ucode.type, + get_ucode_type_str(curr->ucode.type)); + pr_debug("Ucode size %d", curr->ucode.size); + pr_debug("Ucode ptr %p\n", curr->ucode_ptr); + } +} + +static struct tar_arch_info_t *load_tar_archive(struct device *dev, + char *tar_filename) +{ + struct tar_arch_info_t *tar_arch = NULL; + struct tar_blk_t *tar_blk; + unsigned int cur_size; + size_t tar_offs = 0; + size_t tar_size; + int ret; + + tar_arch = kzalloc(sizeof(struct tar_arch_info_t), GFP_KERNEL); + if (!tar_arch) + return NULL; + + INIT_LIST_HEAD(&tar_arch->ucodes); + + /* Load tar archive */ + ret = request_firmware(&tar_arch->fw, tar_filename, dev); + if (ret) + goto release_tar_arch; + + if (tar_arch->fw->size < TAR_BLOCK_LEN) { + dev_err(dev, "Invalid tar archive %s ", tar_filename); + goto release_tar_arch; + } + + tar_size = tar_arch->fw->size; + tar_blk = (struct tar_blk_t *) tar_arch->fw->data; + if (strncmp(tar_blk->hdr.magic, TAR_MAGIC, TAR_MAGIC_LEN - 1)) { + dev_err(dev, "Unsupported format of tar archive %s", + tar_filename); + goto release_tar_arch; + } + + while (1) { + /* Read current file size */ + ret = kstrtouint(tar_blk->hdr.size, 8, &cur_size); + if (ret) + goto release_tar_arch; + + if (tar_offs + cur_size > tar_size || + tar_offs + 2*TAR_BLOCK_LEN > tar_size) { + dev_err(dev, "Invalid tar archive %s ", tar_filename); + goto release_tar_arch; + } + + tar_offs += TAR_BLOCK_LEN; + if (tar_blk->hdr.typeflag == REGTYPE || + tar_blk->hdr.typeflag == AREGTYPE) { + ret = process_tar_file(dev, tar_arch, + tar_blk->hdr.name, + &tar_arch->fw->data[tar_offs], + cur_size); + if (ret) + goto release_tar_arch; + } + + tar_offs += (cur_size/TAR_BLOCK_LEN) * TAR_BLOCK_LEN; + if (cur_size % TAR_BLOCK_LEN) + tar_offs += TAR_BLOCK_LEN; + + /* Check for the end of the archive */ + if (tar_offs + 2*TAR_BLOCK_LEN > tar_size) { + dev_err(dev, "Invalid tar archive %s ", tar_filename); + goto release_tar_arch; + } + + if (is_mem_zero(&tar_arch->fw->data[tar_offs], + 2*TAR_BLOCK_LEN)) + break; + + /* Read next block from tar archive */ + tar_blk = (struct tar_blk_t *) &tar_arch->fw->data[tar_offs]; + } + + print_tar_dbg_info(tar_arch, tar_filename); + return tar_arch; +release_tar_arch: + release_tar_archive(tar_arch); + return NULL; +} + +static struct otx_cpt_engs_rsvd *find_engines_by_type( + struct otx_cpt_eng_grp_info *eng_grp, + int eng_type) +{ + int i; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!eng_grp->engs[i].type) + continue; + + if (eng_grp->engs[i].type == eng_type) + return &eng_grp->engs[i]; + } + return NULL; +} + +int otx_cpt_uc_supports_eng_type(struct otx_cpt_ucode *ucode, int eng_type) +{ + return is_eng_type(ucode->type, eng_type); +} +EXPORT_SYMBOL_GPL(otx_cpt_uc_supports_eng_type); + +int otx_cpt_eng_grp_has_eng_type(struct otx_cpt_eng_grp_info *eng_grp, + int eng_type) +{ + struct otx_cpt_engs_rsvd *engs; + + engs = find_engines_by_type(eng_grp, eng_type); + + return (engs != NULL ? 1 : 0); +} +EXPORT_SYMBOL_GPL(otx_cpt_eng_grp_has_eng_type); + +static void print_ucode_info(struct otx_cpt_eng_grp_info *eng_grp, + char *buf, int size) +{ + if (eng_grp->mirror.is_ena) { + scnprintf(buf, size, "%s (shared with engine_group%d)", + eng_grp->g->grp[eng_grp->mirror.idx].ucode[0].ver_str, + eng_grp->mirror.idx); + } else { + scnprintf(buf, size, "%s", eng_grp->ucode[0].ver_str); + } +} + +static void print_engs_info(struct otx_cpt_eng_grp_info *eng_grp, + char *buf, int size, int idx) +{ + struct otx_cpt_engs_rsvd *mirrored_engs = NULL; + struct otx_cpt_engs_rsvd *engs; + int len, i; + + buf[0] = '\0'; + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + engs = &eng_grp->engs[i]; + if (!engs->type) + continue; + if (idx != -1 && idx != i) + continue; + + if (eng_grp->mirror.is_ena) + mirrored_engs = find_engines_by_type( + &eng_grp->g->grp[eng_grp->mirror.idx], + engs->type); + if (i > 0 && idx == -1) { + len = strlen(buf); + scnprintf(buf+len, size-len, ", "); + } + + len = strlen(buf); + scnprintf(buf+len, size-len, "%d %s ", mirrored_engs ? + engs->count + mirrored_engs->count : engs->count, + get_eng_type_str(engs->type)); + if (mirrored_engs) { + len = strlen(buf); + scnprintf(buf+len, size-len, + "(%d shared with engine_group%d) ", + engs->count <= 0 ? engs->count + + mirrored_engs->count : mirrored_engs->count, + eng_grp->mirror.idx); + } + } +} + +static void print_ucode_dbg_info(struct otx_cpt_ucode *ucode) +{ + pr_debug("Ucode info"); + pr_debug("Ucode version string %s", ucode->ver_str); + pr_debug("Ucode version %d.%d.%d.%d", ucode->ver_num.nn, + ucode->ver_num.xx, ucode->ver_num.yy, ucode->ver_num.zz); + pr_debug("Ucode type %s", get_ucode_type_str(ucode->type)); + pr_debug("Ucode size %d", ucode->size); + pr_debug("Ucode virt address %16.16llx", (u64)ucode->align_va); + pr_debug("Ucode phys address %16.16llx\n", ucode->align_dma); +} + +static void cpt_print_engines_mask(struct otx_cpt_eng_grp_info *eng_grp, + struct device *dev, char *buf, int size) +{ + struct otx_cpt_bitmap bmap; + u32 mask[2]; + + bmap = get_cores_bmap(dev, eng_grp); + if (!bmap.size) { + scnprintf(buf, size, "unknown"); + return; + } + bitmap_to_arr32(mask, bmap.bits, bmap.size); + scnprintf(buf, size, "%8.8x %8.8x", mask[1], mask[0]); +} + + +static void print_dbg_info(struct device *dev, + struct otx_cpt_eng_grps *eng_grps) +{ + char engs_info[2*OTX_CPT_UCODE_NAME_LENGTH]; + struct otx_cpt_eng_grp_info *mirrored_grp; + char engs_mask[OTX_CPT_UCODE_NAME_LENGTH]; + struct otx_cpt_eng_grp_info *grp; + struct otx_cpt_engs_rsvd *engs; + u32 mask[4]; + int i, j; + + pr_debug("Engine groups global info"); + pr_debug("max SE %d, max AE %d", + eng_grps->avail.max_se_cnt, eng_grps->avail.max_ae_cnt); + pr_debug("free SE %d", eng_grps->avail.se_cnt); + pr_debug("free AE %d", eng_grps->avail.ae_cnt); + + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + grp = &eng_grps->grp[i]; + pr_debug("engine_group%d, state %s", i, grp->is_enabled ? + "enabled" : "disabled"); + if (grp->is_enabled) { + mirrored_grp = &eng_grps->grp[grp->mirror.idx]; + pr_debug("Ucode0 filename %s, version %s", + grp->mirror.is_ena ? + mirrored_grp->ucode[0].filename : + grp->ucode[0].filename, + grp->mirror.is_ena ? + mirrored_grp->ucode[0].ver_str : + grp->ucode[0].ver_str); + } + + for (j = 0; j < OTX_CPT_MAX_ETYPES_PER_GRP; j++) { + engs = &grp->engs[j]; + if (engs->type) { + print_engs_info(grp, engs_info, + 2*OTX_CPT_UCODE_NAME_LENGTH, j); + pr_debug("Slot%d: %s", j, engs_info); + bitmap_to_arr32(mask, engs->bmap, + eng_grps->engs_num); + pr_debug("Mask: %8.8x %8.8x %8.8x %8.8x", + mask[3], mask[2], mask[1], mask[0]); + } else + pr_debug("Slot%d not used", j); + } + if (grp->is_enabled) { + cpt_print_engines_mask(grp, dev, engs_mask, + OTX_CPT_UCODE_NAME_LENGTH); + pr_debug("Cmask: %s", engs_mask); + } + } +} + +static int update_engines_avail_count(struct device *dev, + struct otx_cpt_engs_available *avail, + struct otx_cpt_engs_rsvd *engs, int val) +{ + switch (engs->type) { + case OTX_CPT_SE_TYPES: + avail->se_cnt += val; + break; + + case OTX_CPT_AE_TYPES: + avail->ae_cnt += val; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", engs->type); + return -EINVAL; + } + + return 0; +} + +static int update_engines_offset(struct device *dev, + struct otx_cpt_engs_available *avail, + struct otx_cpt_engs_rsvd *engs) +{ + switch (engs->type) { + case OTX_CPT_SE_TYPES: + engs->offset = 0; + break; + + case OTX_CPT_AE_TYPES: + engs->offset = avail->max_se_cnt; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", engs->type); + return -EINVAL; + } + + return 0; +} + +static int release_engines(struct device *dev, struct otx_cpt_eng_grp_info *grp) +{ + int i, ret = 0; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!grp->engs[i].type) + continue; + + if (grp->engs[i].count > 0) { + ret = update_engines_avail_count(dev, &grp->g->avail, + &grp->engs[i], + grp->engs[i].count); + if (ret) + return ret; + } + + grp->engs[i].type = 0; + grp->engs[i].count = 0; + grp->engs[i].offset = 0; + grp->engs[i].ucode = NULL; + bitmap_zero(grp->engs[i].bmap, grp->g->engs_num); + } + + return 0; +} + +static int do_reserve_engines(struct device *dev, + struct otx_cpt_eng_grp_info *grp, + struct otx_cpt_engines *req_engs) +{ + struct otx_cpt_engs_rsvd *engs = NULL; + int i, ret; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!grp->engs[i].type) { + engs = &grp->engs[i]; + break; + } + } + + if (!engs) + return -ENOMEM; + + engs->type = req_engs->type; + engs->count = req_engs->count; + + ret = update_engines_offset(dev, &grp->g->avail, engs); + if (ret) + return ret; + + if (engs->count > 0) { + ret = update_engines_avail_count(dev, &grp->g->avail, engs, + -engs->count); + if (ret) + return ret; + } + + return 0; +} + +static int check_engines_availability(struct device *dev, + struct otx_cpt_eng_grp_info *grp, + struct otx_cpt_engines *req_eng) +{ + int avail_cnt = 0; + + switch (req_eng->type) { + case OTX_CPT_SE_TYPES: + avail_cnt = grp->g->avail.se_cnt; + break; + + case OTX_CPT_AE_TYPES: + avail_cnt = grp->g->avail.ae_cnt; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", req_eng->type); + return -EINVAL; + } + + if (avail_cnt < req_eng->count) { + dev_err(dev, + "Error available %s engines %d < than requested %d", + get_eng_type_str(req_eng->type), + avail_cnt, req_eng->count); + return -EBUSY; + } + + return 0; +} + +static int reserve_engines(struct device *dev, struct otx_cpt_eng_grp_info *grp, + struct otx_cpt_engines *req_engs, int req_cnt) +{ + int i, ret; + + /* Validate if a number of requested engines is available */ + for (i = 0; i < req_cnt; i++) { + ret = check_engines_availability(dev, grp, &req_engs[i]); + if (ret) + return ret; + } + + /* Reserve requested engines for this engine group */ + for (i = 0; i < req_cnt; i++) { + ret = do_reserve_engines(dev, grp, &req_engs[i]); + if (ret) + return ret; + } + return 0; +} + +static ssize_t eng_grp_info_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + char ucode_info[2*OTX_CPT_UCODE_NAME_LENGTH]; + char engs_info[2*OTX_CPT_UCODE_NAME_LENGTH]; + char engs_mask[OTX_CPT_UCODE_NAME_LENGTH]; + struct otx_cpt_eng_grp_info *eng_grp; + int ret; + + eng_grp = container_of(attr, struct otx_cpt_eng_grp_info, info_attr); + mutex_lock(&eng_grp->g->lock); + + print_engs_info(eng_grp, engs_info, 2*OTX_CPT_UCODE_NAME_LENGTH, -1); + print_ucode_info(eng_grp, ucode_info, 2*OTX_CPT_UCODE_NAME_LENGTH); + cpt_print_engines_mask(eng_grp, dev, engs_mask, + OTX_CPT_UCODE_NAME_LENGTH); + ret = scnprintf(buf, PAGE_SIZE, + "Microcode : %s\nEngines: %s\nEngines mask: %s\n", + ucode_info, engs_info, engs_mask); + + mutex_unlock(&eng_grp->g->lock); + return ret; +} + +static int create_sysfs_eng_grps_info(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp) +{ + int ret; + + eng_grp->info_attr.show = eng_grp_info_show; + eng_grp->info_attr.store = NULL; + eng_grp->info_attr.attr.name = eng_grp->sysfs_info_name; + eng_grp->info_attr.attr.mode = 0440; + sysfs_attr_init(&eng_grp->info_attr.attr); + ret = device_create_file(dev, &eng_grp->info_attr); + if (ret) + return ret; + + return 0; +} + +static void ucode_unload(struct device *dev, struct otx_cpt_ucode *ucode) +{ + if (ucode->va) { + dma_free_coherent(dev, ucode->size + OTX_CPT_UCODE_ALIGNMENT, + ucode->va, ucode->dma); + ucode->va = NULL; + ucode->align_va = NULL; + ucode->dma = 0; + ucode->align_dma = 0; + ucode->size = 0; + } + + memset(&ucode->ver_str, 0, OTX_CPT_UCODE_VER_STR_SZ); + memset(&ucode->ver_num, 0, sizeof(struct otx_cpt_ucode_ver_num)); + set_ucode_filename(ucode, ""); + ucode->type = 0; +} + +static int copy_ucode_to_dma_mem(struct device *dev, + struct otx_cpt_ucode *ucode, + const u8 *ucode_data) +{ + u32 i; + + /* Allocate DMAable space */ + ucode->va = dma_alloc_coherent(dev, ucode->size + + OTX_CPT_UCODE_ALIGNMENT, + &ucode->dma, GFP_KERNEL); + if (!ucode->va) { + dev_err(dev, "Unable to allocate space for microcode"); + return -ENOMEM; + } + ucode->align_va = PTR_ALIGN(ucode->va, OTX_CPT_UCODE_ALIGNMENT); + ucode->align_dma = PTR_ALIGN(ucode->dma, OTX_CPT_UCODE_ALIGNMENT); + + memcpy((void *) ucode->align_va, (void *) ucode_data + + sizeof(struct otx_cpt_ucode_hdr), ucode->size); + + /* Byte swap 64-bit */ + for (i = 0; i < (ucode->size / 8); i++) + ((u64 *)ucode->align_va)[i] = + cpu_to_be64(((u64 *)ucode->align_va)[i]); + /* Ucode needs 16-bit swap */ + for (i = 0; i < (ucode->size / 2); i++) + ((u16 *)ucode->align_va)[i] = + cpu_to_be16(((u16 *)ucode->align_va)[i]); + return 0; +} + +static int ucode_load(struct device *dev, struct otx_cpt_ucode *ucode, + const char *ucode_filename) +{ + struct otx_cpt_ucode_hdr *ucode_hdr; + const struct firmware *fw; + int ret; + + set_ucode_filename(ucode, ucode_filename); + ret = request_firmware(&fw, ucode->filename, dev); + if (ret) + return ret; + + ucode_hdr = (struct otx_cpt_ucode_hdr *) fw->data; + memcpy(ucode->ver_str, ucode_hdr->ver_str, OTX_CPT_UCODE_VER_STR_SZ); + ucode->ver_num = ucode_hdr->ver_num; + ucode->size = ntohl(ucode_hdr->code_length) * 2; + if (!ucode->size || (fw->size < round_up(ucode->size, 16) + + sizeof(struct otx_cpt_ucode_hdr) + OTX_CPT_UCODE_SIGN_LEN)) { + dev_err(dev, "Ucode %s invalid size", ucode_filename); + ret = -EINVAL; + goto release_fw; + } + + ret = get_ucode_type(ucode_hdr, &ucode->type); + if (ret) { + dev_err(dev, "Microcode %s unknown type 0x%x", ucode->filename, + ucode->type); + goto release_fw; + } + + ret = copy_ucode_to_dma_mem(dev, ucode, fw->data); + if (ret) + goto release_fw; + + print_ucode_dbg_info(ucode); +release_fw: + release_firmware(fw); + return ret; +} + +static int enable_eng_grp(struct otx_cpt_eng_grp_info *eng_grp, + void *obj) +{ + int ret; + + ret = cpt_set_ucode_base(eng_grp, obj); + if (ret) + return ret; + + ret = cpt_attach_and_enable_cores(eng_grp, obj); + return ret; +} + +static int disable_eng_grp(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp, + void *obj) +{ + int i, ret; + + ret = cpt_detach_and_disable_cores(eng_grp, obj); + if (ret) + return ret; + + /* Unload ucode used by this engine group */ + ucode_unload(dev, &eng_grp->ucode[0]); + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!eng_grp->engs[i].type) + continue; + + eng_grp->engs[i].ucode = &eng_grp->ucode[0]; + } + + ret = cpt_set_ucode_base(eng_grp, obj); + + return ret; +} + +static void setup_eng_grp_mirroring(struct otx_cpt_eng_grp_info *dst_grp, + struct otx_cpt_eng_grp_info *src_grp) +{ + /* Setup fields for engine group which is mirrored */ + src_grp->mirror.is_ena = false; + src_grp->mirror.idx = 0; + src_grp->mirror.ref_count++; + + /* Setup fields for mirroring engine group */ + dst_grp->mirror.is_ena = true; + dst_grp->mirror.idx = src_grp->idx; + dst_grp->mirror.ref_count = 0; +} + +static void remove_eng_grp_mirroring(struct otx_cpt_eng_grp_info *dst_grp) +{ + struct otx_cpt_eng_grp_info *src_grp; + + if (!dst_grp->mirror.is_ena) + return; + + src_grp = &dst_grp->g->grp[dst_grp->mirror.idx]; + + src_grp->mirror.ref_count--; + dst_grp->mirror.is_ena = false; + dst_grp->mirror.idx = 0; + dst_grp->mirror.ref_count = 0; +} + +static void update_requested_engs(struct otx_cpt_eng_grp_info *mirrored_eng_grp, + struct otx_cpt_engines *engs, int engs_cnt) +{ + struct otx_cpt_engs_rsvd *mirrored_engs; + int i; + + for (i = 0; i < engs_cnt; i++) { + mirrored_engs = find_engines_by_type(mirrored_eng_grp, + engs[i].type); + if (!mirrored_engs) + continue; + + /* + * If mirrored group has this type of engines attached then + * there are 3 scenarios possible: + * 1) mirrored_engs.count == engs[i].count then all engines + * from mirrored engine group will be shared with this engine + * group + * 2) mirrored_engs.count > engs[i].count then only a subset of + * engines from mirrored engine group will be shared with this + * engine group + * 3) mirrored_engs.count < engs[i].count then all engines + * from mirrored engine group will be shared with this group + * and additional engines will be reserved for exclusively use + * by this engine group + */ + engs[i].count -= mirrored_engs->count; + } +} + +static struct otx_cpt_eng_grp_info *find_mirrored_eng_grp( + struct otx_cpt_eng_grp_info *grp) +{ + struct otx_cpt_eng_grps *eng_grps = grp->g; + int i; + + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + if (!eng_grps->grp[i].is_enabled) + continue; + if (eng_grps->grp[i].ucode[0].type) + continue; + if (grp->idx == i) + continue; + if (!strncasecmp(eng_grps->grp[i].ucode[0].ver_str, + grp->ucode[0].ver_str, + OTX_CPT_UCODE_VER_STR_SZ)) + return &eng_grps->grp[i]; + } + + return NULL; +} + +static struct otx_cpt_eng_grp_info *find_unused_eng_grp( + struct otx_cpt_eng_grps *eng_grps) +{ + int i; + + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + if (!eng_grps->grp[i].is_enabled) + return &eng_grps->grp[i]; + } + return NULL; +} + +static int eng_grp_update_masks(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp) +{ + struct otx_cpt_engs_rsvd *engs, *mirrored_engs; + struct otx_cpt_bitmap tmp_bmap = { {0} }; + int i, j, cnt, max_cnt; + int bit; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + engs = &eng_grp->engs[i]; + if (!engs->type) + continue; + if (engs->count <= 0) + continue; + + switch (engs->type) { + case OTX_CPT_SE_TYPES: + max_cnt = eng_grp->g->avail.max_se_cnt; + break; + + case OTX_CPT_AE_TYPES: + max_cnt = eng_grp->g->avail.max_ae_cnt; + break; + + default: + dev_err(dev, "Invalid engine type %d", engs->type); + return -EINVAL; + } + + cnt = engs->count; + WARN_ON(engs->offset + max_cnt > OTX_CPT_MAX_ENGINES); + bitmap_zero(tmp_bmap.bits, eng_grp->g->engs_num); + for (j = engs->offset; j < engs->offset + max_cnt; j++) { + if (!eng_grp->g->eng_ref_cnt[j]) { + bitmap_set(tmp_bmap.bits, j, 1); + cnt--; + if (!cnt) + break; + } + } + + if (cnt) + return -ENOSPC; + + bitmap_copy(engs->bmap, tmp_bmap.bits, eng_grp->g->engs_num); + } + + if (!eng_grp->mirror.is_ena) + return 0; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + engs = &eng_grp->engs[i]; + if (!engs->type) + continue; + + mirrored_engs = find_engines_by_type( + &eng_grp->g->grp[eng_grp->mirror.idx], + engs->type); + WARN_ON(!mirrored_engs && engs->count <= 0); + if (!mirrored_engs) + continue; + + bitmap_copy(tmp_bmap.bits, mirrored_engs->bmap, + eng_grp->g->engs_num); + if (engs->count < 0) { + bit = find_first_bit(mirrored_engs->bmap, + eng_grp->g->engs_num); + bitmap_clear(tmp_bmap.bits, bit, -engs->count); + } + bitmap_or(engs->bmap, engs->bmap, tmp_bmap.bits, + eng_grp->g->engs_num); + } + return 0; +} + +static int delete_engine_group(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp) +{ + int i, ret; + + if (!eng_grp->is_enabled) + return -EINVAL; + + if (eng_grp->mirror.ref_count) { + dev_err(dev, "Can't delete engine_group%d as it is used by:", + eng_grp->idx); + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + if (eng_grp->g->grp[i].mirror.is_ena && + eng_grp->g->grp[i].mirror.idx == eng_grp->idx) + dev_err(dev, "engine_group%d", i); + } + return -EINVAL; + } + + /* Removing engine group mirroring if enabled */ + remove_eng_grp_mirroring(eng_grp); + + /* Disable engine group */ + ret = disable_eng_grp(dev, eng_grp, eng_grp->g->obj); + if (ret) + return ret; + + /* Release all engines held by this engine group */ + ret = release_engines(dev, eng_grp); + if (ret) + return ret; + + device_remove_file(dev, &eng_grp->info_attr); + eng_grp->is_enabled = false; + + return 0; +} + +static int validate_1_ucode_scenario(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp, + struct otx_cpt_engines *engs, int engs_cnt) +{ + int i; + + /* Verify that ucode loaded supports requested engine types */ + for (i = 0; i < engs_cnt; i++) { + if (!otx_cpt_uc_supports_eng_type(&eng_grp->ucode[0], + engs[i].type)) { + dev_err(dev, + "Microcode %s does not support %s engines", + eng_grp->ucode[0].filename, + get_eng_type_str(engs[i].type)); + return -EINVAL; + } + } + return 0; +} + +static void update_ucode_ptrs(struct otx_cpt_eng_grp_info *eng_grp) +{ + struct otx_cpt_ucode *ucode; + + if (eng_grp->mirror.is_ena) + ucode = &eng_grp->g->grp[eng_grp->mirror.idx].ucode[0]; + else + ucode = &eng_grp->ucode[0]; + WARN_ON(!eng_grp->engs[0].type); + eng_grp->engs[0].ucode = ucode; +} + +static int create_engine_group(struct device *dev, + struct otx_cpt_eng_grps *eng_grps, + struct otx_cpt_engines *engs, int engs_cnt, + void *ucode_data[], int ucodes_cnt, + bool use_uc_from_tar_arch) +{ + struct otx_cpt_eng_grp_info *mirrored_eng_grp; + struct tar_ucode_info_t *tar_info; + struct otx_cpt_eng_grp_info *eng_grp; + int i, ret = 0; + + if (ucodes_cnt > OTX_CPT_MAX_ETYPES_PER_GRP) + return -EINVAL; + + /* Validate if requested engine types are supported by this device */ + for (i = 0; i < engs_cnt; i++) + if (!dev_supports_eng_type(eng_grps, engs[i].type)) { + dev_err(dev, "Device does not support %s engines", + get_eng_type_str(engs[i].type)); + return -EPERM; + } + + /* Find engine group which is not used */ + eng_grp = find_unused_eng_grp(eng_grps); + if (!eng_grp) { + dev_err(dev, "Error all engine groups are being used"); + return -ENOSPC; + } + + /* Load ucode */ + for (i = 0; i < ucodes_cnt; i++) { + if (use_uc_from_tar_arch) { + tar_info = (struct tar_ucode_info_t *) ucode_data[i]; + eng_grp->ucode[i] = tar_info->ucode; + ret = copy_ucode_to_dma_mem(dev, &eng_grp->ucode[i], + tar_info->ucode_ptr); + } else + ret = ucode_load(dev, &eng_grp->ucode[i], + (char *) ucode_data[i]); + if (ret) + goto err_ucode_unload; + } + + /* Validate scenario where 1 ucode is used */ + ret = validate_1_ucode_scenario(dev, eng_grp, engs, engs_cnt); + if (ret) + goto err_ucode_unload; + + /* Check if this group mirrors another existing engine group */ + mirrored_eng_grp = find_mirrored_eng_grp(eng_grp); + if (mirrored_eng_grp) { + /* Setup mirroring */ + setup_eng_grp_mirroring(eng_grp, mirrored_eng_grp); + + /* + * Update count of requested engines because some + * of them might be shared with mirrored group + */ + update_requested_engs(mirrored_eng_grp, engs, engs_cnt); + } + + /* Reserve engines */ + ret = reserve_engines(dev, eng_grp, engs, engs_cnt); + if (ret) + goto err_ucode_unload; + + /* Update ucode pointers used by engines */ + update_ucode_ptrs(eng_grp); + + /* Update engine masks used by this group */ + ret = eng_grp_update_masks(dev, eng_grp); + if (ret) + goto err_release_engs; + + /* Create sysfs entry for engine group info */ + ret = create_sysfs_eng_grps_info(dev, eng_grp); + if (ret) + goto err_release_engs; + + /* Enable engine group */ + ret = enable_eng_grp(eng_grp, eng_grps->obj); + if (ret) + goto err_release_engs; + + /* + * If this engine group mirrors another engine group + * then we need to unload ucode as we will use ucode + * from mirrored engine group + */ + if (eng_grp->mirror.is_ena) + ucode_unload(dev, &eng_grp->ucode[0]); + + eng_grp->is_enabled = true; + if (eng_grp->mirror.is_ena) + dev_info(dev, + "Engine_group%d: reuse microcode %s from group %d", + eng_grp->idx, mirrored_eng_grp->ucode[0].ver_str, + mirrored_eng_grp->idx); + else + dev_info(dev, "Engine_group%d: microcode loaded %s", + eng_grp->idx, eng_grp->ucode[0].ver_str); + + return 0; + +err_release_engs: + release_engines(dev, eng_grp); +err_ucode_unload: + ucode_unload(dev, &eng_grp->ucode[0]); + return ret; +} + +static ssize_t ucode_load_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx_cpt_engines engs[OTX_CPT_MAX_ETYPES_PER_GRP] = { {0} }; + char *ucode_filename[OTX_CPT_MAX_ETYPES_PER_GRP]; + char tmp_buf[OTX_CPT_UCODE_NAME_LENGTH] = { 0 }; + char *start, *val, *err_msg, *tmp; + struct otx_cpt_eng_grps *eng_grps; + int grp_idx = 0, ret = -EINVAL; + bool has_se, has_ie, has_ae; + int del_grp_idx = -1; + int ucode_idx = 0; + + if (strlen(buf) > OTX_CPT_UCODE_NAME_LENGTH) + return -EINVAL; + + eng_grps = container_of(attr, struct otx_cpt_eng_grps, ucode_load_attr); + err_msg = "Invalid engine group format"; + strlcpy(tmp_buf, buf, OTX_CPT_UCODE_NAME_LENGTH); + start = tmp_buf; + + has_se = has_ie = has_ae = false; + + for (;;) { + val = strsep(&start, ";"); + if (!val) + break; + val = strim(val); + if (!*val) + continue; + + if (!strncasecmp(val, "engine_group", 12)) { + if (del_grp_idx != -1) + goto err_print; + tmp = strim(strsep(&val, ":")); + if (!val) + goto err_print; + if (strlen(tmp) != 13) + goto err_print; + if (kstrtoint((tmp + 12), 10, &del_grp_idx)) + goto err_print; + val = strim(val); + if (strncasecmp(val, "null", 4)) + goto err_print; + if (strlen(val) != 4) + goto err_print; + } else if (!strncasecmp(val, "se", 2) && strchr(val, ':')) { + if (has_se || ucode_idx) + goto err_print; + tmp = strim(strsep(&val, ":")); + if (!val) + goto err_print; + if (strlen(tmp) != 2) + goto err_print; + if (kstrtoint(strim(val), 10, &engs[grp_idx].count)) + goto err_print; + engs[grp_idx++].type = OTX_CPT_SE_TYPES; + has_se = true; + } else if (!strncasecmp(val, "ae", 2) && strchr(val, ':')) { + if (has_ae || ucode_idx) + goto err_print; + tmp = strim(strsep(&val, ":")); + if (!val) + goto err_print; + if (strlen(tmp) != 2) + goto err_print; + if (kstrtoint(strim(val), 10, &engs[grp_idx].count)) + goto err_print; + engs[grp_idx++].type = OTX_CPT_AE_TYPES; + has_ae = true; + } else { + if (ucode_idx > 1) + goto err_print; + if (!strlen(val)) + goto err_print; + if (strnstr(val, " ", strlen(val))) + goto err_print; + ucode_filename[ucode_idx++] = val; + } + } + + /* Validate input parameters */ + if (del_grp_idx == -1) { + if (!(grp_idx && ucode_idx)) + goto err_print; + + if (ucode_idx > 1 && grp_idx < 2) + goto err_print; + + if (grp_idx > OTX_CPT_MAX_ETYPES_PER_GRP) { + err_msg = "Error max 2 engine types can be attached"; + goto err_print; + } + + } else { + if (del_grp_idx < 0 || + del_grp_idx >= OTX_CPT_MAX_ENGINE_GROUPS) { + dev_err(dev, "Invalid engine group index %d", + del_grp_idx); + ret = -EINVAL; + return ret; + } + + if (!eng_grps->grp[del_grp_idx].is_enabled) { + dev_err(dev, "Error engine_group%d is not configured", + del_grp_idx); + ret = -EINVAL; + return ret; + } + + if (grp_idx || ucode_idx) + goto err_print; + } + + mutex_lock(&eng_grps->lock); + + if (eng_grps->is_rdonly) { + dev_err(dev, "Disable VFs before modifying engine groups\n"); + ret = -EACCES; + goto err_unlock; + } + + if (del_grp_idx == -1) + /* create engine group */ + ret = create_engine_group(dev, eng_grps, engs, grp_idx, + (void **) ucode_filename, + ucode_idx, false); + else + /* delete engine group */ + ret = delete_engine_group(dev, &eng_grps->grp[del_grp_idx]); + if (ret) + goto err_unlock; + + print_dbg_info(dev, eng_grps); +err_unlock: + mutex_unlock(&eng_grps->lock); + return ret ? ret : count; +err_print: + dev_err(dev, "%s\n", err_msg); + + return ret; +} + +int otx_cpt_try_create_default_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps, + int pf_type) +{ + struct tar_ucode_info_t *tar_info[OTX_CPT_MAX_ETYPES_PER_GRP] = { 0 }; + struct otx_cpt_engines engs[OTX_CPT_MAX_ETYPES_PER_GRP] = { {0} }; + struct tar_arch_info_t *tar_arch = NULL; + char *tar_filename; + int i, ret = 0; + + mutex_lock(&eng_grps->lock); + + /* + * We don't create engine group for kernel crypto if attempt to create + * it was already made (when user enabled VFs for the first time) + */ + if (eng_grps->is_first_try) + goto unlock_mutex; + eng_grps->is_first_try = true; + + /* We create group for kcrypto only if no groups are configured */ + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) + if (eng_grps->grp[i].is_enabled) + goto unlock_mutex; + + switch (pf_type) { + case OTX_CPT_AE: + case OTX_CPT_SE: + tar_filename = OTX_CPT_UCODE_TAR_FILE_NAME; + break; + + default: + dev_err(&pdev->dev, "Unknown PF type %d\n", pf_type); + ret = -EINVAL; + goto unlock_mutex; + } + + tar_arch = load_tar_archive(&pdev->dev, tar_filename); + if (!tar_arch) + goto unlock_mutex; + + /* + * If device supports SE engines and there is SE microcode in tar + * archive try to create engine group with SE engines for kernel + * crypto functionality (symmetric crypto) + */ + tar_info[0] = get_uc_from_tar_archive(tar_arch, OTX_CPT_SE_TYPES); + if (tar_info[0] && + dev_supports_eng_type(eng_grps, OTX_CPT_SE_TYPES)) { + + engs[0].type = OTX_CPT_SE_TYPES; + engs[0].count = eng_grps->avail.max_se_cnt; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 1, + (void **) tar_info, 1, true); + if (ret) + goto release_tar_arch; + } + /* + * If device supports AE engines and there is AE microcode in tar + * archive try to create engine group with AE engines for asymmetric + * crypto functionality. + */ + tar_info[0] = get_uc_from_tar_archive(tar_arch, OTX_CPT_AE_TYPES); + if (tar_info[0] && + dev_supports_eng_type(eng_grps, OTX_CPT_AE_TYPES)) { + + engs[0].type = OTX_CPT_AE_TYPES; + engs[0].count = eng_grps->avail.max_ae_cnt; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 1, + (void **) tar_info, 1, true); + if (ret) + goto release_tar_arch; + } + + print_dbg_info(&pdev->dev, eng_grps); +release_tar_arch: + release_tar_archive(tar_arch); +unlock_mutex: + mutex_unlock(&eng_grps->lock); + return ret; +} + +void otx_cpt_set_eng_grps_is_rdonly(struct otx_cpt_eng_grps *eng_grps, + bool is_rdonly) +{ + mutex_lock(&eng_grps->lock); + + eng_grps->is_rdonly = is_rdonly; + + mutex_unlock(&eng_grps->lock); +} + +void otx_cpt_disable_all_cores(struct otx_cpt_device *cpt) +{ + int grp, timeout = 100; + u64 reg; + + /* Disengage the cores from groups */ + for (grp = 0; grp < OTX_CPT_MAX_ENGINE_GROUPS; grp++) { + writeq(0, cpt->reg_base + OTX_CPT_PF_GX_EN(grp)); + udelay(CSR_DELAY); + } + + reg = readq(cpt->reg_base + OTX_CPT_PF_EXEC_BUSY); + while (reg) { + udelay(CSR_DELAY); + reg = readq(cpt->reg_base + OTX_CPT_PF_EXEC_BUSY); + if (timeout--) { + dev_warn(&cpt->pdev->dev, "Cores still busy"); + break; + } + } + + /* Disable the cores */ + writeq(0, cpt->reg_base + OTX_CPT_PF_EXE_CTL); +} + +void otx_cpt_cleanup_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps) +{ + struct otx_cpt_eng_grp_info *grp; + int i, j; + + mutex_lock(&eng_grps->lock); + if (eng_grps->is_ucode_load_created) { + device_remove_file(&pdev->dev, + &eng_grps->ucode_load_attr); + eng_grps->is_ucode_load_created = false; + } + + /* First delete all mirroring engine groups */ + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) + if (eng_grps->grp[i].mirror.is_ena) + delete_engine_group(&pdev->dev, &eng_grps->grp[i]); + + /* Delete remaining engine groups */ + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) + delete_engine_group(&pdev->dev, &eng_grps->grp[i]); + + /* Release memory */ + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + grp = &eng_grps->grp[i]; + for (j = 0; j < OTX_CPT_MAX_ETYPES_PER_GRP; j++) { + kfree(grp->engs[j].bmap); + grp->engs[j].bmap = NULL; + } + } + + mutex_unlock(&eng_grps->lock); +} + +int otx_cpt_init_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps, int pf_type) +{ + struct otx_cpt_eng_grp_info *grp; + int i, j, ret = 0; + + mutex_init(&eng_grps->lock); + eng_grps->obj = pci_get_drvdata(pdev); + eng_grps->avail.se_cnt = eng_grps->avail.max_se_cnt; + eng_grps->avail.ae_cnt = eng_grps->avail.max_ae_cnt; + + eng_grps->engs_num = eng_grps->avail.max_se_cnt + + eng_grps->avail.max_ae_cnt; + if (eng_grps->engs_num > OTX_CPT_MAX_ENGINES) { + dev_err(&pdev->dev, + "Number of engines %d > than max supported %d", + eng_grps->engs_num, OTX_CPT_MAX_ENGINES); + ret = -EINVAL; + goto err; + } + + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + grp = &eng_grps->grp[i]; + grp->g = eng_grps; + grp->idx = i; + + snprintf(grp->sysfs_info_name, OTX_CPT_UCODE_NAME_LENGTH, + "engine_group%d", i); + for (j = 0; j < OTX_CPT_MAX_ETYPES_PER_GRP; j++) { + grp->engs[j].bmap = + kcalloc(BITS_TO_LONGS(eng_grps->engs_num), + sizeof(long), GFP_KERNEL); + if (!grp->engs[j].bmap) { + ret = -ENOMEM; + goto err; + } + } + } + + switch (pf_type) { + case OTX_CPT_SE: + /* OcteonTX 83XX SE CPT PF has only SE engines attached */ + eng_grps->eng_types_supported = 1 << OTX_CPT_SE_TYPES; + break; + + case OTX_CPT_AE: + /* OcteonTX 83XX AE CPT PF has only AE engines attached */ + eng_grps->eng_types_supported = 1 << OTX_CPT_AE_TYPES; + break; + + default: + dev_err(&pdev->dev, "Unknown PF type %d\n", pf_type); + ret = -EINVAL; + goto err; + } + + eng_grps->ucode_load_attr.show = NULL; + eng_grps->ucode_load_attr.store = ucode_load_store; + eng_grps->ucode_load_attr.attr.name = "ucode_load"; + eng_grps->ucode_load_attr.attr.mode = 0220; + sysfs_attr_init(&eng_grps->ucode_load_attr.attr); + ret = device_create_file(&pdev->dev, + &eng_grps->ucode_load_attr); + if (ret) + goto err; + eng_grps->is_ucode_load_created = true; + + print_dbg_info(&pdev->dev, eng_grps); + return ret; +err: + otx_cpt_cleanup_eng_grps(pdev, eng_grps); + return ret; +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h new file mode 100644 index 000000000000..14f02b60d0c2 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPTPF_UCODE_H +#define __OTX_CPTPF_UCODE_H + +#include <linux/pci.h> +#include <linux/types.h> +#include <linux/module.h> +#include "otx_cpt_hw_types.h" + +/* CPT ucode name maximum length */ +#define OTX_CPT_UCODE_NAME_LENGTH 64 +/* + * On OcteonTX 83xx platform, only one type of engines is allowed to be + * attached to an engine group. + */ +#define OTX_CPT_MAX_ETYPES_PER_GRP 1 + +/* Default tar archive file names */ +#define OTX_CPT_UCODE_TAR_FILE_NAME "cpt8x-mc.tar" + +/* CPT ucode alignment */ +#define OTX_CPT_UCODE_ALIGNMENT 128 + +/* CPT ucode signature size */ +#define OTX_CPT_UCODE_SIGN_LEN 256 + +/* Microcode version string length */ +#define OTX_CPT_UCODE_VER_STR_SZ 44 + +/* Maximum number of supported engines/cores on OcteonTX 83XX platform */ +#define OTX_CPT_MAX_ENGINES 64 + +#define OTX_CPT_ENGS_BITMASK_LEN (OTX_CPT_MAX_ENGINES/(BITS_PER_BYTE * \ + sizeof(unsigned long))) + +/* Microcode types */ +enum otx_cpt_ucode_type { + OTX_CPT_AE_UC_TYPE = 1, /* AE-MAIN */ + OTX_CPT_SE_UC_TYPE1 = 20, /* SE-MAIN - combination of 21 and 22 */ + OTX_CPT_SE_UC_TYPE2 = 21, /* Fast Path IPSec + AirCrypto */ + OTX_CPT_SE_UC_TYPE3 = 22, /* + * Hash + HMAC + FlexiCrypto + RNG + Full + * Feature IPSec + AirCrypto + Kasumi + */ +}; + +struct otx_cpt_bitmap { + unsigned long bits[OTX_CPT_ENGS_BITMASK_LEN]; + int size; +}; + +struct otx_cpt_engines { + int type; + int count; +}; + +/* Microcode version number */ +struct otx_cpt_ucode_ver_num { + u8 nn; + u8 xx; + u8 yy; + u8 zz; +}; + +struct otx_cpt_ucode_hdr { + struct otx_cpt_ucode_ver_num ver_num; + u8 ver_str[OTX_CPT_UCODE_VER_STR_SZ]; + u32 code_length; + u32 padding[3]; +}; + +struct otx_cpt_ucode { + u8 ver_str[OTX_CPT_UCODE_VER_STR_SZ];/* + * ucode version in readable format + */ + struct otx_cpt_ucode_ver_num ver_num;/* ucode version number */ + char filename[OTX_CPT_UCODE_NAME_LENGTH]; /* ucode filename */ + dma_addr_t dma; /* phys address of ucode image */ + dma_addr_t align_dma; /* aligned phys address of ucode image */ + void *va; /* virt address of ucode image */ + void *align_va; /* aligned virt address of ucode image */ + u32 size; /* ucode image size */ + int type; /* ucode image type SE or AE */ +}; + +struct tar_ucode_info_t { + struct list_head list; + struct otx_cpt_ucode ucode;/* microcode information */ + const u8 *ucode_ptr; /* pointer to microcode in tar archive */ +}; + +/* Maximum and current number of engines available for all engine groups */ +struct otx_cpt_engs_available { + int max_se_cnt; + int max_ae_cnt; + int se_cnt; + int ae_cnt; +}; + +/* Engines reserved to an engine group */ +struct otx_cpt_engs_rsvd { + int type; /* engine type */ + int count; /* number of engines attached */ + int offset; /* constant offset of engine type in the bitmap */ + unsigned long *bmap; /* attached engines bitmap */ + struct otx_cpt_ucode *ucode; /* ucode used by these engines */ +}; + +struct otx_cpt_mirror_info { + int is_ena; /* + * is mirroring enabled, it is set only for engine + * group which mirrors another engine group + */ + int idx; /* + * index of engine group which is mirrored by this + * group, set only for engine group which mirrors + * another group + */ + int ref_count; /* + * number of times this engine group is mirrored by + * other groups, this is set only for engine group + * which is mirrored by other group(s) + */ +}; + +struct otx_cpt_eng_grp_info { + struct otx_cpt_eng_grps *g; /* pointer to engine_groups structure */ + struct device_attribute info_attr; /* group info entry attr */ + /* engines attached */ + struct otx_cpt_engs_rsvd engs[OTX_CPT_MAX_ETYPES_PER_GRP]; + /* Microcode information */ + struct otx_cpt_ucode ucode[OTX_CPT_MAX_ETYPES_PER_GRP]; + /* sysfs info entry name */ + char sysfs_info_name[OTX_CPT_UCODE_NAME_LENGTH]; + /* engine group mirroring information */ + struct otx_cpt_mirror_info mirror; + int idx; /* engine group index */ + bool is_enabled; /* + * is engine group enabled, engine group is enabled + * when it has engines attached and ucode loaded + */ +}; + +struct otx_cpt_eng_grps { + struct otx_cpt_eng_grp_info grp[OTX_CPT_MAX_ENGINE_GROUPS]; + struct device_attribute ucode_load_attr;/* ucode load attr */ + struct otx_cpt_engs_available avail; + struct mutex lock; + void *obj; + int engs_num; /* total number of engines supported */ + int eng_types_supported; /* engine types supported SE, AE */ + u8 eng_ref_cnt[OTX_CPT_MAX_ENGINES];/* engines reference count */ + bool is_ucode_load_created; /* is ucode_load sysfs entry created */ + bool is_first_try; /* is this first try to create kcrypto engine grp */ + bool is_rdonly; /* do engine groups configuration can be modified */ +}; + +int otx_cpt_init_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps, int pf_type); +void otx_cpt_cleanup_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps); +int otx_cpt_try_create_default_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps, + int pf_type); +void otx_cpt_set_eng_grps_is_rdonly(struct otx_cpt_eng_grps *eng_grps, + bool is_rdonly); +int otx_cpt_uc_supports_eng_type(struct otx_cpt_ucode *ucode, int eng_type); +int otx_cpt_eng_grp_has_eng_type(struct otx_cpt_eng_grp_info *eng_grp, + int eng_type); + +#endif /* __OTX_CPTPF_UCODE_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf.h b/drivers/crypto/marvell/octeontx/otx_cptvf.h new file mode 100644 index 000000000000..dd02f21659af --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPTVF_H +#define __OTX_CPTVF_H + +#include <linux/list.h> +#include <linux/interrupt.h> +#include <linux/device.h> +#include "otx_cpt_common.h" +#include "otx_cptvf_reqmgr.h" + +/* Flags to indicate the features supported */ +#define OTX_CPT_FLAG_DEVICE_READY BIT(1) +#define otx_cpt_device_ready(cpt) ((cpt)->flags & OTX_CPT_FLAG_DEVICE_READY) +/* Default command queue length */ +#define OTX_CPT_CMD_QLEN (4*2046) +#define OTX_CPT_CMD_QCHUNK_SIZE 1023 +#define OTX_CPT_NUM_QS_PER_VF 1 + +struct otx_cpt_cmd_chunk { + u8 *head; + dma_addr_t dma_addr; + u32 size; /* Chunk size, max OTX_CPT_INST_CHUNK_MAX_SIZE */ + struct list_head nextchunk; +}; + +struct otx_cpt_cmd_queue { + u32 idx; /* Command queue host write idx */ + u32 num_chunks; /* Number of command chunks */ + struct otx_cpt_cmd_chunk *qhead;/* + * Command queue head, instructions + * are inserted here + */ + struct otx_cpt_cmd_chunk *base; + struct list_head chead; +}; + +struct otx_cpt_cmd_qinfo { + u32 qchunksize; /* Command queue chunk size */ + struct otx_cpt_cmd_queue queue[OTX_CPT_NUM_QS_PER_VF]; +}; + +struct otx_cpt_pending_qinfo { + u32 num_queues; /* Number of queues supported */ + struct otx_cpt_pending_queue queue[OTX_CPT_NUM_QS_PER_VF]; +}; + +#define for_each_pending_queue(qinfo, q, i) \ + for (i = 0, q = &qinfo->queue[i]; i < qinfo->num_queues; i++, \ + q = &qinfo->queue[i]) + +struct otx_cptvf_wqe { + struct tasklet_struct twork; + struct otx_cptvf *cptvf; +}; + +struct otx_cptvf_wqe_info { + struct otx_cptvf_wqe vq_wqe[OTX_CPT_NUM_QS_PER_VF]; +}; + +struct otx_cptvf { + u16 flags; /* Flags to hold device status bits */ + u8 vfid; /* Device Index 0...OTX_CPT_MAX_VF_NUM */ + u8 num_vfs; /* Number of enabled VFs */ + u8 vftype; /* VF type of SE_TYPE(2) or AE_TYPE(1) */ + u8 vfgrp; /* VF group (0 - 8) */ + u8 node; /* Operating node: Bits (46:44) in BAR0 address */ + u8 priority; /* + * VF priority ring: 1-High proirity round + * robin ring;0-Low priority round robin ring; + */ + struct pci_dev *pdev; /* Pci device handle */ + void __iomem *reg_base; /* Register start address */ + void *wqe_info; /* BH worker info */ + /* MSI-X */ + cpumask_var_t affinity_mask[OTX_CPT_VF_MSIX_VECTORS]; + /* Command and Pending queues */ + u32 qsize; + u32 num_queues; + struct otx_cpt_cmd_qinfo cqinfo; /* Command queue information */ + struct otx_cpt_pending_qinfo pqinfo; /* Pending queue information */ + /* VF-PF mailbox communication */ + bool pf_acked; + bool pf_nacked; +}; + +int otx_cptvf_send_vf_up(struct otx_cptvf *cptvf); +int otx_cptvf_send_vf_down(struct otx_cptvf *cptvf); +int otx_cptvf_send_vf_to_grp_msg(struct otx_cptvf *cptvf, int group); +int otx_cptvf_send_vf_priority_msg(struct otx_cptvf *cptvf); +int otx_cptvf_send_vq_size_msg(struct otx_cptvf *cptvf); +int otx_cptvf_check_pf_ready(struct otx_cptvf *cptvf); +void otx_cptvf_handle_mbox_intr(struct otx_cptvf *cptvf); +void otx_cptvf_write_vq_doorbell(struct otx_cptvf *cptvf, u32 val); + +#endif /* __OTX_CPTVF_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c new file mode 100644 index 000000000000..06202bcffb33 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c @@ -0,0 +1,1746 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <crypto/aes.h> +#include <crypto/authenc.h> +#include <crypto/cryptd.h> +#include <crypto/des.h> +#include <crypto/internal/aead.h> +#include <crypto/sha.h> +#include <crypto/xts.h> +#include <crypto/scatterwalk.h> +#include <linux/rtnetlink.h> +#include <linux/sort.h> +#include <linux/module.h> +#include "otx_cptvf.h" +#include "otx_cptvf_algs.h" +#include "otx_cptvf_reqmgr.h" + +#define CPT_MAX_VF_NUM 64 +/* Size of salt in AES GCM mode */ +#define AES_GCM_SALT_SIZE 4 +/* Size of IV in AES GCM mode */ +#define AES_GCM_IV_SIZE 8 +/* Size of ICV (Integrity Check Value) in AES GCM mode */ +#define AES_GCM_ICV_SIZE 16 +/* Offset of IV in AES GCM mode */ +#define AES_GCM_IV_OFFSET 8 +#define CONTROL_WORD_LEN 8 +#define KEY2_OFFSET 48 +#define DMA_MODE_FLAG(dma_mode) \ + (((dma_mode) == OTX_CPT_DMA_GATHER_SCATTER) ? (1 << 7) : 0) + +/* Truncated SHA digest size */ +#define SHA1_TRUNC_DIGEST_SIZE 12 +#define SHA256_TRUNC_DIGEST_SIZE 16 +#define SHA384_TRUNC_DIGEST_SIZE 24 +#define SHA512_TRUNC_DIGEST_SIZE 32 + +static DEFINE_MUTEX(mutex); +static int is_crypto_registered; + +struct cpt_device_desc { + enum otx_cptpf_type pf_type; + struct pci_dev *dev; + int num_queues; +}; + +struct cpt_device_table { + atomic_t count; + struct cpt_device_desc desc[CPT_MAX_VF_NUM]; +}; + +static struct cpt_device_table se_devices = { + .count = ATOMIC_INIT(0) +}; + +static struct cpt_device_table ae_devices = { + .count = ATOMIC_INIT(0) +}; + +static inline int get_se_device(struct pci_dev **pdev, int *cpu_num) +{ + int count, ret = 0; + + count = atomic_read(&se_devices.count); + if (count < 1) + return -ENODEV; + + *cpu_num = get_cpu(); + + if (se_devices.desc[0].pf_type == OTX_CPT_SE) { + /* + * On OcteonTX platform there is one CPT instruction queue bound + * to each VF. We get maximum performance if one CPT queue + * is available for each cpu otherwise CPT queues need to be + * shared between cpus. + */ + if (*cpu_num >= count) + *cpu_num %= count; + *pdev = se_devices.desc[*cpu_num].dev; + } else { + pr_err("Unknown PF type %d\n", se_devices.desc[0].pf_type); + ret = -EINVAL; + } + put_cpu(); + + return ret; +} + +static inline int validate_hmac_cipher_null(struct otx_cpt_req_info *cpt_req) +{ + struct otx_cpt_req_ctx *rctx; + struct aead_request *req; + struct crypto_aead *tfm; + + req = container_of(cpt_req->areq, struct aead_request, base); + tfm = crypto_aead_reqtfm(req); + rctx = aead_request_ctx(req); + if (memcmp(rctx->fctx.hmac.s.hmac_calc, + rctx->fctx.hmac.s.hmac_recv, + crypto_aead_authsize(tfm)) != 0) + return -EBADMSG; + + return 0; +} + +static void otx_cpt_aead_callback(int status, void *arg1, void *arg2) +{ + struct otx_cpt_info_buffer *cpt_info = arg2; + struct crypto_async_request *areq = arg1; + struct otx_cpt_req_info *cpt_req; + struct pci_dev *pdev; + + cpt_req = cpt_info->req; + if (!status) { + /* + * When selected cipher is NULL we need to manually + * verify whether calculated hmac value matches + * received hmac value + */ + if (cpt_req->req_type == OTX_CPT_AEAD_ENC_DEC_NULL_REQ && + !cpt_req->is_enc) + status = validate_hmac_cipher_null(cpt_req); + } + if (cpt_info) { + pdev = cpt_info->pdev; + do_request_cleanup(pdev, cpt_info); + } + if (areq) + areq->complete(areq, status); +} + +static void output_iv_copyback(struct crypto_async_request *areq) +{ + struct otx_cpt_req_info *req_info; + struct skcipher_request *sreq; + struct crypto_skcipher *stfm; + struct otx_cpt_req_ctx *rctx; + struct otx_cpt_enc_ctx *ctx; + u32 start, ivsize; + + sreq = container_of(areq, struct skcipher_request, base); + stfm = crypto_skcipher_reqtfm(sreq); + ctx = crypto_skcipher_ctx(stfm); + if (ctx->cipher_type == OTX_CPT_AES_CBC || + ctx->cipher_type == OTX_CPT_DES3_CBC) { + rctx = skcipher_request_ctx(sreq); + req_info = &rctx->cpt_req; + ivsize = crypto_skcipher_ivsize(stfm); + start = sreq->cryptlen - ivsize; + + if (req_info->is_enc) { + scatterwalk_map_and_copy(sreq->iv, sreq->dst, start, + ivsize, 0); + } else { + if (sreq->src != sreq->dst) { + scatterwalk_map_and_copy(sreq->iv, sreq->src, + start, ivsize, 0); + } else { + memcpy(sreq->iv, req_info->iv_out, ivsize); + kfree(req_info->iv_out); + } + } + } +} + +static void otx_cpt_skcipher_callback(int status, void *arg1, void *arg2) +{ + struct otx_cpt_info_buffer *cpt_info = arg2; + struct crypto_async_request *areq = arg1; + struct pci_dev *pdev; + + if (areq) { + if (!status) + output_iv_copyback(areq); + if (cpt_info) { + pdev = cpt_info->pdev; + do_request_cleanup(pdev, cpt_info); + } + areq->complete(areq, status); + } +} + +static inline void update_input_data(struct otx_cpt_req_info *req_info, + struct scatterlist *inp_sg, + u32 nbytes, u32 *argcnt) +{ + req_info->req.dlen += nbytes; + + while (nbytes) { + u32 len = min(nbytes, inp_sg->length); + u8 *ptr = sg_virt(inp_sg); + + req_info->in[*argcnt].vptr = (void *)ptr; + req_info->in[*argcnt].size = len; + nbytes -= len; + ++(*argcnt); + inp_sg = sg_next(inp_sg); + } +} + +static inline void update_output_data(struct otx_cpt_req_info *req_info, + struct scatterlist *outp_sg, + u32 offset, u32 nbytes, u32 *argcnt) +{ + req_info->rlen += nbytes; + + while (nbytes) { + u32 len = min(nbytes, outp_sg->length - offset); + u8 *ptr = sg_virt(outp_sg); + + req_info->out[*argcnt].vptr = (void *) (ptr + offset); + req_info->out[*argcnt].size = len; + nbytes -= len; + ++(*argcnt); + offset = 0; + outp_sg = sg_next(outp_sg); + } +} + +static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc, + u32 *argcnt) +{ + struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + struct crypto_tfm *tfm = crypto_skcipher_tfm(stfm); + struct otx_cpt_enc_ctx *ctx = crypto_tfm_ctx(tfm); + struct otx_cpt_fc_ctx *fctx = &rctx->fctx; + int ivsize = crypto_skcipher_ivsize(stfm); + u32 start = req->cryptlen - ivsize; + u64 *ctrl_flags = NULL; + gfp_t flags; + + flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER; + req_info->ctrl.s.se_req = OTX_CPT_SE_CORE_REQ; + + req_info->req.opcode.s.major = OTX_CPT_MAJOR_OP_FC | + DMA_MODE_FLAG(OTX_CPT_DMA_GATHER_SCATTER); + if (enc) { + req_info->req.opcode.s.minor = 2; + } else { + req_info->req.opcode.s.minor = 3; + if ((ctx->cipher_type == OTX_CPT_AES_CBC || + ctx->cipher_type == OTX_CPT_DES3_CBC) && + req->src == req->dst) { + req_info->iv_out = kmalloc(ivsize, flags); + if (!req_info->iv_out) + return -ENOMEM; + + scatterwalk_map_and_copy(req_info->iv_out, req->src, + start, ivsize, 0); + } + } + /* Encryption data length */ + req_info->req.param1 = req->cryptlen; + /* Authentication data length */ + req_info->req.param2 = 0; + + fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type; + fctx->enc.enc_ctrl.e.aes_key = ctx->key_type; + fctx->enc.enc_ctrl.e.iv_source = OTX_CPT_FROM_CPTR; + + if (ctx->cipher_type == OTX_CPT_AES_XTS) + memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len * 2); + else + memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len); + + memcpy(fctx->enc.encr_iv, req->iv, crypto_skcipher_ivsize(stfm)); + + ctrl_flags = (u64 *)&fctx->enc.enc_ctrl.flags; + *ctrl_flags = cpu_to_be64(*ctrl_flags); + + /* + * Storing Packet Data Information in offset + * Control Word First 8 bytes + */ + req_info->in[*argcnt].vptr = (u8 *)&rctx->ctrl_word; + req_info->in[*argcnt].size = CONTROL_WORD_LEN; + req_info->req.dlen += CONTROL_WORD_LEN; + ++(*argcnt); + + req_info->in[*argcnt].vptr = (u8 *)fctx; + req_info->in[*argcnt].size = sizeof(struct otx_cpt_fc_ctx); + req_info->req.dlen += sizeof(struct otx_cpt_fc_ctx); + + ++(*argcnt); + + return 0; +} + +static inline u32 create_input_list(struct skcipher_request *req, u32 enc, + u32 enc_iv_len) +{ + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 argcnt = 0; + int ret; + + ret = create_ctx_hdr(req, enc, &argcnt); + if (ret) + return ret; + + update_input_data(req_info, req->src, req->cryptlen, &argcnt); + req_info->incnt = argcnt; + + return 0; +} + +static inline void create_output_list(struct skcipher_request *req, + u32 enc_iv_len) +{ + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 argcnt = 0; + + /* + * OUTPUT Buffer Processing + * AES encryption/decryption output would be + * received in the following format + * + * ------IV--------|------ENCRYPTED/DECRYPTED DATA-----| + * [ 16 Bytes/ [ Request Enc/Dec/ DATA Len AES CBC ] + */ + update_output_data(req_info, req->dst, 0, req->cryptlen, &argcnt); + req_info->outcnt = argcnt; +} + +static inline int cpt_enc_dec(struct skcipher_request *req, u32 enc) +{ + struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 enc_iv_len = crypto_skcipher_ivsize(stfm); + struct pci_dev *pdev; + int status, cpu_num; + + /* Validate that request doesn't exceed maximum CPT supported size */ + if (req->cryptlen > OTX_CPT_MAX_REQ_SIZE) + return -E2BIG; + + /* Clear control words */ + rctx->ctrl_word.flags = 0; + rctx->fctx.enc.enc_ctrl.flags = 0; + + status = create_input_list(req, enc, enc_iv_len); + if (status) + return status; + create_output_list(req, enc_iv_len); + + status = get_se_device(&pdev, &cpu_num); + if (status) + return status; + + req_info->callback = (void *)otx_cpt_skcipher_callback; + req_info->areq = &req->base; + req_info->req_type = OTX_CPT_ENC_DEC_REQ; + req_info->is_enc = enc; + req_info->is_trunc_hmac = false; + req_info->ctrl.s.grp = 0; + + /* + * We perform an asynchronous send and once + * the request is completed the driver would + * intimate through registered call back functions + */ + status = otx_cpt_do_request(pdev, req_info, cpu_num); + + return status; +} + +static int otx_cpt_skcipher_encrypt(struct skcipher_request *req) +{ + return cpt_enc_dec(req, true); +} + +static int otx_cpt_skcipher_decrypt(struct skcipher_request *req) +{ + return cpt_enc_dec(req, false); +} + +static int otx_cpt_skcipher_xts_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + const u8 *key2 = key + (keylen / 2); + const u8 *key1 = key; + int ret; + + ret = xts_check_key(crypto_skcipher_tfm(tfm), key, keylen); + if (ret) + return ret; + ctx->key_len = keylen; + memcpy(ctx->enc_key, key1, keylen / 2); + memcpy(ctx->enc_key + KEY2_OFFSET, key2, keylen / 2); + ctx->cipher_type = OTX_CPT_AES_XTS; + switch (ctx->key_len) { + case 2 * AES_KEYSIZE_128: + ctx->key_type = OTX_CPT_AES_128_BIT; + break; + case 2 * AES_KEYSIZE_256: + ctx->key_type = OTX_CPT_AES_256_BIT; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int cpt_des_setkey(struct crypto_skcipher *tfm, const u8 *key, + u32 keylen, u8 cipher_type) +{ + struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (keylen != DES3_EDE_KEY_SIZE) + return -EINVAL; + + ctx->key_len = keylen; + ctx->cipher_type = cipher_type; + + memcpy(ctx->enc_key, key, keylen); + + return 0; +} + +static int cpt_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, + u32 keylen, u8 cipher_type) +{ + struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + + switch (keylen) { + case AES_KEYSIZE_128: + ctx->key_type = OTX_CPT_AES_128_BIT; + break; + case AES_KEYSIZE_192: + ctx->key_type = OTX_CPT_AES_192_BIT; + break; + case AES_KEYSIZE_256: + ctx->key_type = OTX_CPT_AES_256_BIT; + break; + default: + return -EINVAL; + } + ctx->key_len = keylen; + ctx->cipher_type = cipher_type; + + memcpy(ctx->enc_key, key, keylen); + + return 0; +} + +static int otx_cpt_skcipher_cbc_aes_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_aes_setkey(tfm, key, keylen, OTX_CPT_AES_CBC); +} + +static int otx_cpt_skcipher_ecb_aes_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_aes_setkey(tfm, key, keylen, OTX_CPT_AES_ECB); +} + +static int otx_cpt_skcipher_cfb_aes_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_aes_setkey(tfm, key, keylen, OTX_CPT_AES_CFB); +} + +static int otx_cpt_skcipher_cbc_des3_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_des_setkey(tfm, key, keylen, OTX_CPT_DES3_CBC); +} + +static int otx_cpt_skcipher_ecb_des3_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_des_setkey(tfm, key, keylen, OTX_CPT_DES3_ECB); +} + +static int otx_cpt_enc_dec_init(struct crypto_skcipher *tfm) +{ + struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + + memset(ctx, 0, sizeof(*ctx)); + /* + * Additional memory for skcipher_request is + * allocated since the cryptd daemon uses + * this memory for request_ctx information + */ + crypto_skcipher_set_reqsize(tfm, sizeof(struct otx_cpt_req_ctx) + + sizeof(struct skcipher_request)); + + return 0; +} + +static int cpt_aead_init(struct crypto_aead *tfm, u8 cipher_type, u8 mac_type) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + + ctx->cipher_type = cipher_type; + ctx->mac_type = mac_type; + + /* + * When selected cipher is NULL we use HMAC opcode instead of + * FLEXICRYPTO opcode therefore we don't need to use HASH algorithms + * for calculating ipad and opad + */ + if (ctx->cipher_type != OTX_CPT_CIPHER_NULL) { + switch (ctx->mac_type) { + case OTX_CPT_SHA1: + ctx->hashalg = crypto_alloc_shash("sha1", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + + case OTX_CPT_SHA256: + ctx->hashalg = crypto_alloc_shash("sha256", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + + case OTX_CPT_SHA384: + ctx->hashalg = crypto_alloc_shash("sha384", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + + case OTX_CPT_SHA512: + ctx->hashalg = crypto_alloc_shash("sha512", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + } + } + + crypto_aead_set_reqsize(tfm, sizeof(struct otx_cpt_req_ctx)); + + return 0; +} + +static int otx_cpt_aead_cbc_aes_sha1_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA1); +} + +static int otx_cpt_aead_cbc_aes_sha256_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA256); +} + +static int otx_cpt_aead_cbc_aes_sha384_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA384); +} + +static int otx_cpt_aead_cbc_aes_sha512_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA512); +} + +static int otx_cpt_aead_ecb_null_sha1_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA1); +} + +static int otx_cpt_aead_ecb_null_sha256_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA256); +} + +static int otx_cpt_aead_ecb_null_sha384_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA384); +} + +static int otx_cpt_aead_ecb_null_sha512_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA512); +} + +static int otx_cpt_aead_gcm_aes_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_GCM, OTX_CPT_MAC_NULL); +} + +static void otx_cpt_aead_exit(struct crypto_aead *tfm) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + + kfree(ctx->ipad); + kfree(ctx->opad); + if (ctx->hashalg) + crypto_free_shash(ctx->hashalg); + kfree(ctx->sdesc); +} + +/* + * This is the Integrity Check Value validation (aka the authentication tag + * length) + */ +static int otx_cpt_aead_set_authsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + + switch (ctx->mac_type) { + case OTX_CPT_SHA1: + if (authsize != SHA1_DIGEST_SIZE && + authsize != SHA1_TRUNC_DIGEST_SIZE) + return -EINVAL; + + if (authsize == SHA1_TRUNC_DIGEST_SIZE) + ctx->is_trunc_hmac = true; + break; + + case OTX_CPT_SHA256: + if (authsize != SHA256_DIGEST_SIZE && + authsize != SHA256_TRUNC_DIGEST_SIZE) + return -EINVAL; + + if (authsize == SHA256_TRUNC_DIGEST_SIZE) + ctx->is_trunc_hmac = true; + break; + + case OTX_CPT_SHA384: + if (authsize != SHA384_DIGEST_SIZE && + authsize != SHA384_TRUNC_DIGEST_SIZE) + return -EINVAL; + + if (authsize == SHA384_TRUNC_DIGEST_SIZE) + ctx->is_trunc_hmac = true; + break; + + case OTX_CPT_SHA512: + if (authsize != SHA512_DIGEST_SIZE && + authsize != SHA512_TRUNC_DIGEST_SIZE) + return -EINVAL; + + if (authsize == SHA512_TRUNC_DIGEST_SIZE) + ctx->is_trunc_hmac = true; + break; + + case OTX_CPT_MAC_NULL: + if (ctx->cipher_type == OTX_CPT_AES_GCM) { + if (authsize != AES_GCM_ICV_SIZE) + return -EINVAL; + } else + return -EINVAL; + break; + + default: + return -EINVAL; + } + + tfm->authsize = authsize; + return 0; +} + +static struct otx_cpt_sdesc *alloc_sdesc(struct crypto_shash *alg) +{ + struct otx_cpt_sdesc *sdesc; + int size; + + size = sizeof(struct shash_desc) + crypto_shash_descsize(alg); + sdesc = kmalloc(size, GFP_KERNEL); + if (!sdesc) + return NULL; + + sdesc->shash.tfm = alg; + + return sdesc; +} + +static inline void swap_data32(void *buf, u32 len) +{ + u32 *store = (u32 *) buf; + int i = 0; + + for (i = 0 ; i < len/sizeof(u32); i++, store++) + *store = cpu_to_be32(*store); +} + +static inline void swap_data64(void *buf, u32 len) +{ + u64 *store = (u64 *) buf; + int i = 0; + + for (i = 0 ; i < len/sizeof(u64); i++, store++) + *store = cpu_to_be64(*store); +} + +static int copy_pad(u8 mac_type, u8 *out_pad, u8 *in_pad) +{ + struct sha512_state *sha512; + struct sha256_state *sha256; + struct sha1_state *sha1; + + switch (mac_type) { + case OTX_CPT_SHA1: + sha1 = (struct sha1_state *) in_pad; + swap_data32(sha1->state, SHA1_DIGEST_SIZE); + memcpy(out_pad, &sha1->state, SHA1_DIGEST_SIZE); + break; + + case OTX_CPT_SHA256: + sha256 = (struct sha256_state *) in_pad; + swap_data32(sha256->state, SHA256_DIGEST_SIZE); + memcpy(out_pad, &sha256->state, SHA256_DIGEST_SIZE); + break; + + case OTX_CPT_SHA384: + case OTX_CPT_SHA512: + sha512 = (struct sha512_state *) in_pad; + swap_data64(sha512->state, SHA512_DIGEST_SIZE); + memcpy(out_pad, &sha512->state, SHA512_DIGEST_SIZE); + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int aead_hmac_init(struct crypto_aead *cipher) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + int state_size = crypto_shash_statesize(ctx->hashalg); + int ds = crypto_shash_digestsize(ctx->hashalg); + int bs = crypto_shash_blocksize(ctx->hashalg); + int authkeylen = ctx->auth_key_len; + u8 *ipad = NULL, *opad = NULL; + int ret = 0, icount = 0; + + ctx->sdesc = alloc_sdesc(ctx->hashalg); + if (!ctx->sdesc) + return -ENOMEM; + + ctx->ipad = kzalloc(bs, GFP_KERNEL); + if (!ctx->ipad) { + ret = -ENOMEM; + goto calc_fail; + } + + ctx->opad = kzalloc(bs, GFP_KERNEL); + if (!ctx->opad) { + ret = -ENOMEM; + goto calc_fail; + } + + ipad = kzalloc(state_size, GFP_KERNEL); + if (!ipad) { + ret = -ENOMEM; + goto calc_fail; + } + + opad = kzalloc(state_size, GFP_KERNEL); + if (!opad) { + ret = -ENOMEM; + goto calc_fail; + } + + if (authkeylen > bs) { + ret = crypto_shash_digest(&ctx->sdesc->shash, ctx->key, + authkeylen, ipad); + if (ret) + goto calc_fail; + + authkeylen = ds; + } else { + memcpy(ipad, ctx->key, authkeylen); + } + + memset(ipad + authkeylen, 0, bs - authkeylen); + memcpy(opad, ipad, bs); + + for (icount = 0; icount < bs; icount++) { + ipad[icount] ^= 0x36; + opad[icount] ^= 0x5c; + } + + /* + * Partial Hash calculated from the software + * algorithm is retrieved for IPAD & OPAD + */ + + /* IPAD Calculation */ + crypto_shash_init(&ctx->sdesc->shash); + crypto_shash_update(&ctx->sdesc->shash, ipad, bs); + crypto_shash_export(&ctx->sdesc->shash, ipad); + ret = copy_pad(ctx->mac_type, ctx->ipad, ipad); + if (ret) + goto calc_fail; + + /* OPAD Calculation */ + crypto_shash_init(&ctx->sdesc->shash); + crypto_shash_update(&ctx->sdesc->shash, opad, bs); + crypto_shash_export(&ctx->sdesc->shash, opad); + ret = copy_pad(ctx->mac_type, ctx->opad, opad); + if (ret) + goto calc_fail; + + kfree(ipad); + kfree(opad); + + return 0; + +calc_fail: + kfree(ctx->ipad); + ctx->ipad = NULL; + kfree(ctx->opad); + ctx->opad = NULL; + kfree(ipad); + kfree(opad); + kfree(ctx->sdesc); + ctx->sdesc = NULL; + + return ret; +} + +static int otx_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher, + const unsigned char *key, + unsigned int keylen) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct crypto_authenc_key_param *param; + int enckeylen = 0, authkeylen = 0; + struct rtattr *rta = (void *)key; + int status = -EINVAL; + + if (!RTA_OK(rta, keylen)) + goto badkey; + + if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) + goto badkey; + + if (RTA_PAYLOAD(rta) < sizeof(*param)) + goto badkey; + + param = RTA_DATA(rta); + enckeylen = be32_to_cpu(param->enckeylen); + key += RTA_ALIGN(rta->rta_len); + keylen -= RTA_ALIGN(rta->rta_len); + if (keylen < enckeylen) + goto badkey; + + if (keylen > OTX_CPT_MAX_KEY_SIZE) + goto badkey; + + authkeylen = keylen - enckeylen; + memcpy(ctx->key, key, keylen); + + switch (enckeylen) { + case AES_KEYSIZE_128: + ctx->key_type = OTX_CPT_AES_128_BIT; + break; + case AES_KEYSIZE_192: + ctx->key_type = OTX_CPT_AES_192_BIT; + break; + case AES_KEYSIZE_256: + ctx->key_type = OTX_CPT_AES_256_BIT; + break; + default: + /* Invalid key length */ + goto badkey; + } + + ctx->enc_key_len = enckeylen; + ctx->auth_key_len = authkeylen; + + status = aead_hmac_init(cipher); + if (status) + goto badkey; + + return 0; +badkey: + return status; +} + +static int otx_cpt_aead_ecb_null_sha_setkey(struct crypto_aead *cipher, + const unsigned char *key, + unsigned int keylen) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct crypto_authenc_key_param *param; + struct rtattr *rta = (void *)key; + int enckeylen = 0; + + if (!RTA_OK(rta, keylen)) + goto badkey; + + if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) + goto badkey; + + if (RTA_PAYLOAD(rta) < sizeof(*param)) + goto badkey; + + param = RTA_DATA(rta); + enckeylen = be32_to_cpu(param->enckeylen); + key += RTA_ALIGN(rta->rta_len); + keylen -= RTA_ALIGN(rta->rta_len); + if (enckeylen != 0) + goto badkey; + + if (keylen > OTX_CPT_MAX_KEY_SIZE) + goto badkey; + + memcpy(ctx->key, key, keylen); + ctx->enc_key_len = enckeylen; + ctx->auth_key_len = keylen; + return 0; +badkey: + return -EINVAL; +} + +static int otx_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher, + const unsigned char *key, + unsigned int keylen) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + + /* + * For aes gcm we expect to get encryption key (16, 24, 32 bytes) + * and salt (4 bytes) + */ + switch (keylen) { + case AES_KEYSIZE_128 + AES_GCM_SALT_SIZE: + ctx->key_type = OTX_CPT_AES_128_BIT; + ctx->enc_key_len = AES_KEYSIZE_128; + break; + case AES_KEYSIZE_192 + AES_GCM_SALT_SIZE: + ctx->key_type = OTX_CPT_AES_192_BIT; + ctx->enc_key_len = AES_KEYSIZE_192; + break; + case AES_KEYSIZE_256 + AES_GCM_SALT_SIZE: + ctx->key_type = OTX_CPT_AES_256_BIT; + ctx->enc_key_len = AES_KEYSIZE_256; + break; + default: + /* Invalid key and salt length */ + return -EINVAL; + } + + /* Store encryption key and salt */ + memcpy(ctx->key, key, keylen); + + return 0; +} + +static inline u32 create_aead_ctx_hdr(struct aead_request *req, u32 enc, + u32 *argcnt) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + struct otx_cpt_fc_ctx *fctx = &rctx->fctx; + int mac_len = crypto_aead_authsize(tfm); + int ds; + + rctx->ctrl_word.e.enc_data_offset = req->assoclen; + + switch (ctx->cipher_type) { + case OTX_CPT_AES_CBC: + fctx->enc.enc_ctrl.e.iv_source = OTX_CPT_FROM_CPTR; + /* Copy encryption key to context */ + memcpy(fctx->enc.encr_key, ctx->key + ctx->auth_key_len, + ctx->enc_key_len); + /* Copy IV to context */ + memcpy(fctx->enc.encr_iv, req->iv, crypto_aead_ivsize(tfm)); + + ds = crypto_shash_digestsize(ctx->hashalg); + if (ctx->mac_type == OTX_CPT_SHA384) + ds = SHA512_DIGEST_SIZE; + if (ctx->ipad) + memcpy(fctx->hmac.e.ipad, ctx->ipad, ds); + if (ctx->opad) + memcpy(fctx->hmac.e.opad, ctx->opad, ds); + break; + + case OTX_CPT_AES_GCM: + fctx->enc.enc_ctrl.e.iv_source = OTX_CPT_FROM_DPTR; + /* Copy encryption key to context */ + memcpy(fctx->enc.encr_key, ctx->key, ctx->enc_key_len); + /* Copy salt to context */ + memcpy(fctx->enc.encr_iv, ctx->key + ctx->enc_key_len, + AES_GCM_SALT_SIZE); + + rctx->ctrl_word.e.iv_offset = req->assoclen - AES_GCM_IV_OFFSET; + break; + + default: + /* Unknown cipher type */ + return -EINVAL; + } + rctx->ctrl_word.flags = cpu_to_be64(rctx->ctrl_word.flags); + + req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER; + req_info->ctrl.s.se_req = OTX_CPT_SE_CORE_REQ; + req_info->req.opcode.s.major = OTX_CPT_MAJOR_OP_FC | + DMA_MODE_FLAG(OTX_CPT_DMA_GATHER_SCATTER); + if (enc) { + req_info->req.opcode.s.minor = 2; + req_info->req.param1 = req->cryptlen; + req_info->req.param2 = req->cryptlen + req->assoclen; + } else { + req_info->req.opcode.s.minor = 3; + req_info->req.param1 = req->cryptlen - mac_len; + req_info->req.param2 = req->cryptlen + req->assoclen - mac_len; + } + + fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type; + fctx->enc.enc_ctrl.e.aes_key = ctx->key_type; + fctx->enc.enc_ctrl.e.mac_type = ctx->mac_type; + fctx->enc.enc_ctrl.e.mac_len = mac_len; + fctx->enc.enc_ctrl.flags = cpu_to_be64(fctx->enc.enc_ctrl.flags); + + /* + * Storing Packet Data Information in offset + * Control Word First 8 bytes + */ + req_info->in[*argcnt].vptr = (u8 *)&rctx->ctrl_word; + req_info->in[*argcnt].size = CONTROL_WORD_LEN; + req_info->req.dlen += CONTROL_WORD_LEN; + ++(*argcnt); + + req_info->in[*argcnt].vptr = (u8 *)fctx; + req_info->in[*argcnt].size = sizeof(struct otx_cpt_fc_ctx); + req_info->req.dlen += sizeof(struct otx_cpt_fc_ctx); + ++(*argcnt); + + return 0; +} + +static inline u32 create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt, + u32 enc) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + + req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER; + req_info->ctrl.s.se_req = OTX_CPT_SE_CORE_REQ; + req_info->req.opcode.s.major = OTX_CPT_MAJOR_OP_HMAC | + DMA_MODE_FLAG(OTX_CPT_DMA_GATHER_SCATTER); + req_info->is_trunc_hmac = ctx->is_trunc_hmac; + + req_info->req.opcode.s.minor = 0; + req_info->req.param1 = ctx->auth_key_len; + req_info->req.param2 = ctx->mac_type << 8; + + /* Add authentication key */ + req_info->in[*argcnt].vptr = ctx->key; + req_info->in[*argcnt].size = round_up(ctx->auth_key_len, 8); + req_info->req.dlen += round_up(ctx->auth_key_len, 8); + ++(*argcnt); + + return 0; +} + +static inline u32 create_aead_input_list(struct aead_request *req, u32 enc) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 inputlen = req->cryptlen + req->assoclen; + u32 status, argcnt = 0; + + status = create_aead_ctx_hdr(req, enc, &argcnt); + if (status) + return status; + update_input_data(req_info, req->src, inputlen, &argcnt); + req_info->incnt = argcnt; + + return 0; +} + +static inline u32 create_aead_output_list(struct aead_request *req, u32 enc, + u32 mac_len) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 argcnt = 0, outputlen = 0; + + if (enc) + outputlen = req->cryptlen + req->assoclen + mac_len; + else + outputlen = req->cryptlen + req->assoclen - mac_len; + + update_output_data(req_info, req->dst, 0, outputlen, &argcnt); + req_info->outcnt = argcnt; + + return 0; +} + +static inline u32 create_aead_null_input_list(struct aead_request *req, + u32 enc, u32 mac_len) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 inputlen, argcnt = 0; + + if (enc) + inputlen = req->cryptlen + req->assoclen; + else + inputlen = req->cryptlen + req->assoclen - mac_len; + + create_hmac_ctx_hdr(req, &argcnt, enc); + update_input_data(req_info, req->src, inputlen, &argcnt); + req_info->incnt = argcnt; + + return 0; +} + +static inline u32 create_aead_null_output_list(struct aead_request *req, + u32 enc, u32 mac_len) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + struct scatterlist *dst; + u8 *ptr = NULL; + int argcnt = 0, status, offset; + u32 inputlen; + + if (enc) + inputlen = req->cryptlen + req->assoclen; + else + inputlen = req->cryptlen + req->assoclen - mac_len; + + /* + * If source and destination are different + * then copy payload to destination + */ + if (req->src != req->dst) { + + ptr = kmalloc(inputlen, (req_info->areq->flags & + CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC); + if (!ptr) { + status = -ENOMEM; + goto error; + } + + status = sg_copy_to_buffer(req->src, sg_nents(req->src), ptr, + inputlen); + if (status != inputlen) { + status = -EINVAL; + goto error_free; + } + status = sg_copy_from_buffer(req->dst, sg_nents(req->dst), ptr, + inputlen); + if (status != inputlen) { + status = -EINVAL; + goto error_free; + } + kfree(ptr); + } + + if (enc) { + /* + * In an encryption scenario hmac needs + * to be appended after payload + */ + dst = req->dst; + offset = inputlen; + while (offset >= dst->length) { + offset -= dst->length; + dst = sg_next(dst); + if (!dst) { + status = -ENOENT; + goto error; + } + } + + update_output_data(req_info, dst, offset, mac_len, &argcnt); + } else { + /* + * In a decryption scenario calculated hmac for received + * payload needs to be compare with hmac received + */ + status = sg_copy_buffer(req->src, sg_nents(req->src), + rctx->fctx.hmac.s.hmac_recv, mac_len, + inputlen, true); + if (status != mac_len) { + status = -EINVAL; + goto error; + } + + req_info->out[argcnt].vptr = rctx->fctx.hmac.s.hmac_calc; + req_info->out[argcnt].size = mac_len; + argcnt++; + } + + req_info->outcnt = argcnt; + return 0; + +error_free: + kfree(ptr); +error: + return status; +} + +static u32 cpt_aead_enc_dec(struct aead_request *req, u8 reg_type, u8 enc) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct pci_dev *pdev; + u32 status, cpu_num; + + /* Clear control words */ + rctx->ctrl_word.flags = 0; + rctx->fctx.enc.enc_ctrl.flags = 0; + + req_info->callback = otx_cpt_aead_callback; + req_info->areq = &req->base; + req_info->req_type = reg_type; + req_info->is_enc = enc; + req_info->is_trunc_hmac = false; + + switch (reg_type) { + case OTX_CPT_AEAD_ENC_DEC_REQ: + status = create_aead_input_list(req, enc); + if (status) + return status; + status = create_aead_output_list(req, enc, + crypto_aead_authsize(tfm)); + if (status) + return status; + break; + + case OTX_CPT_AEAD_ENC_DEC_NULL_REQ: + status = create_aead_null_input_list(req, enc, + crypto_aead_authsize(tfm)); + if (status) + return status; + status = create_aead_null_output_list(req, enc, + crypto_aead_authsize(tfm)); + if (status) + return status; + break; + + default: + return -EINVAL; + } + + /* Validate that request doesn't exceed maximum CPT supported size */ + if (req_info->req.param1 > OTX_CPT_MAX_REQ_SIZE || + req_info->req.param2 > OTX_CPT_MAX_REQ_SIZE) + return -E2BIG; + + status = get_se_device(&pdev, &cpu_num); + if (status) + return status; + + req_info->ctrl.s.grp = 0; + + status = otx_cpt_do_request(pdev, req_info, cpu_num); + /* + * We perform an asynchronous send and once + * the request is completed the driver would + * intimate through registered call back functions + */ + return status; +} + +static int otx_cpt_aead_encrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_REQ, true); +} + +static int otx_cpt_aead_decrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_REQ, false); +} + +static int otx_cpt_aead_null_encrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_NULL_REQ, true); +} + +static int otx_cpt_aead_null_decrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_NULL_REQ, false); +} + +static struct skcipher_alg otx_cpt_skciphers[] = { { + .base.cra_name = "xts(aes)", + .base.cra_driver_name = "cpt_xts_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .ivsize = AES_BLOCK_SIZE, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .setkey = otx_cpt_skcipher_xts_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "cbc(aes)", + .base.cra_driver_name = "cpt_cbc_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .ivsize = AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = otx_cpt_skcipher_cbc_aes_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "ecb(aes)", + .base.cra_driver_name = "cpt_ecb_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .ivsize = 0, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = otx_cpt_skcipher_ecb_aes_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "cfb(aes)", + .base.cra_driver_name = "cpt_cfb_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .ivsize = AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = otx_cpt_skcipher_cfb_aes_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "cbc(des3_ede)", + .base.cra_driver_name = "cpt_cbc_des3_ede", + .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_des3_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = otx_cpt_skcipher_cbc_des3_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "ecb(des3_ede)", + .base.cra_driver_name = "cpt_ecb_des3_ede", + .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_des3_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = 0, + .setkey = otx_cpt_skcipher_ecb_des3_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +} }; + +static struct aead_alg otx_cpt_aeads[] = { { + .base = { + .cra_name = "authenc(hmac(sha1),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha1_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_cbc_aes_sha1_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha256),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha256_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_cbc_aes_sha256_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha384),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha384_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_cbc_aes_sha384_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA384_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha512),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha512_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_cbc_aes_sha512_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA512_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha1),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha1_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_ecb_null_sha1_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_null_encrypt, + .decrypt = otx_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA1_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha256),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha256_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_ecb_null_sha256_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_null_encrypt, + .decrypt = otx_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA256_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha384),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha384_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_ecb_null_sha384_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_null_encrypt, + .decrypt = otx_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA384_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha512),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha512_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_ecb_null_sha512_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_null_encrypt, + .decrypt = otx_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA512_DIGEST_SIZE, +}, { + .base = { + .cra_name = "rfc4106(gcm(aes))", + .cra_driver_name = "cpt_rfc4106_gcm_aes", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_gcm_aes_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_gcm_aes_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_GCM_IV_SIZE, + .maxauthsize = AES_GCM_ICV_SIZE, +} }; + +static inline int is_any_alg_used(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(otx_cpt_skciphers); i++) + if (refcount_read(&otx_cpt_skciphers[i].base.cra_refcnt) != 1) + return true; + for (i = 0; i < ARRAY_SIZE(otx_cpt_aeads); i++) + if (refcount_read(&otx_cpt_aeads[i].base.cra_refcnt) != 1) + return true; + return false; +} + +static inline int cpt_register_algs(void) +{ + int i, err = 0; + + if (!IS_ENABLED(CONFIG_DM_CRYPT)) { + for (i = 0; i < ARRAY_SIZE(otx_cpt_skciphers); i++) + otx_cpt_skciphers[i].base.cra_flags &= ~CRYPTO_ALG_DEAD; + + err = crypto_register_skciphers(otx_cpt_skciphers, + ARRAY_SIZE(otx_cpt_skciphers)); + if (err) + return err; + } + + for (i = 0; i < ARRAY_SIZE(otx_cpt_aeads); i++) + otx_cpt_aeads[i].base.cra_flags &= ~CRYPTO_ALG_DEAD; + + err = crypto_register_aeads(otx_cpt_aeads, ARRAY_SIZE(otx_cpt_aeads)); + if (err) { + crypto_unregister_skciphers(otx_cpt_skciphers, + ARRAY_SIZE(otx_cpt_skciphers)); + return err; + } + + return 0; +} + +static inline void cpt_unregister_algs(void) +{ + crypto_unregister_skciphers(otx_cpt_skciphers, + ARRAY_SIZE(otx_cpt_skciphers)); + crypto_unregister_aeads(otx_cpt_aeads, ARRAY_SIZE(otx_cpt_aeads)); +} + +static int compare_func(const void *lptr, const void *rptr) +{ + struct cpt_device_desc *ldesc = (struct cpt_device_desc *) lptr; + struct cpt_device_desc *rdesc = (struct cpt_device_desc *) rptr; + + if (ldesc->dev->devfn < rdesc->dev->devfn) + return -1; + if (ldesc->dev->devfn > rdesc->dev->devfn) + return 1; + return 0; +} + +static void swap_func(void *lptr, void *rptr, int size) +{ + struct cpt_device_desc *ldesc = (struct cpt_device_desc *) lptr; + struct cpt_device_desc *rdesc = (struct cpt_device_desc *) rptr; + struct cpt_device_desc desc; + + desc = *ldesc; + *ldesc = *rdesc; + *rdesc = desc; +} + +int otx_cpt_crypto_init(struct pci_dev *pdev, struct module *mod, + enum otx_cptpf_type pf_type, + enum otx_cptvf_type engine_type, + int num_queues, int num_devices) +{ + int ret = 0; + int count; + + mutex_lock(&mutex); + switch (engine_type) { + case OTX_CPT_SE_TYPES: + count = atomic_read(&se_devices.count); + if (count >= CPT_MAX_VF_NUM) { + dev_err(&pdev->dev, "No space to add a new device"); + ret = -ENOSPC; + goto err; + } + se_devices.desc[count].pf_type = pf_type; + se_devices.desc[count].num_queues = num_queues; + se_devices.desc[count++].dev = pdev; + atomic_inc(&se_devices.count); + + if (atomic_read(&se_devices.count) == num_devices && + is_crypto_registered == false) { + if (cpt_register_algs()) { + dev_err(&pdev->dev, + "Error in registering crypto algorithms\n"); + ret = -EINVAL; + goto err; + } + try_module_get(mod); + is_crypto_registered = true; + } + sort(se_devices.desc, count, sizeof(struct cpt_device_desc), + compare_func, swap_func); + break; + + case OTX_CPT_AE_TYPES: + count = atomic_read(&ae_devices.count); + if (count >= CPT_MAX_VF_NUM) { + dev_err(&pdev->dev, "No space to a add new device"); + ret = -ENOSPC; + goto err; + } + ae_devices.desc[count].pf_type = pf_type; + ae_devices.desc[count].num_queues = num_queues; + ae_devices.desc[count++].dev = pdev; + atomic_inc(&ae_devices.count); + sort(ae_devices.desc, count, sizeof(struct cpt_device_desc), + compare_func, swap_func); + break; + + default: + dev_err(&pdev->dev, "Unknown VF type %d\n", engine_type); + ret = BAD_OTX_CPTVF_TYPE; + } +err: + mutex_unlock(&mutex); + return ret; +} + +void otx_cpt_crypto_exit(struct pci_dev *pdev, struct module *mod, + enum otx_cptvf_type engine_type) +{ + struct cpt_device_table *dev_tbl; + bool dev_found = false; + int i, j, count; + + mutex_lock(&mutex); + + dev_tbl = (engine_type == OTX_CPT_AE_TYPES) ? &ae_devices : &se_devices; + count = atomic_read(&dev_tbl->count); + for (i = 0; i < count; i++) + if (pdev == dev_tbl->desc[i].dev) { + for (j = i; j < count-1; j++) + dev_tbl->desc[j] = dev_tbl->desc[j+1]; + dev_found = true; + break; + } + + if (!dev_found) { + dev_err(&pdev->dev, "%s device not found", __func__); + goto exit; + } + + if (engine_type != OTX_CPT_AE_TYPES) { + if (atomic_dec_and_test(&se_devices.count) && + !is_any_alg_used()) { + cpt_unregister_algs(); + module_put(mod); + is_crypto_registered = false; + } + } else + atomic_dec(&ae_devices.count); +exit: + mutex_unlock(&mutex); +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.h b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.h new file mode 100644 index 000000000000..67cc0025f5d5 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.h @@ -0,0 +1,188 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPT_ALGS_H +#define __OTX_CPT_ALGS_H + +#include <crypto/hash.h> +#include "otx_cpt_common.h" + +#define OTX_CPT_MAX_ENC_KEY_SIZE 32 +#define OTX_CPT_MAX_HASH_KEY_SIZE 64 +#define OTX_CPT_MAX_KEY_SIZE (OTX_CPT_MAX_ENC_KEY_SIZE + \ + OTX_CPT_MAX_HASH_KEY_SIZE) +enum otx_cpt_request_type { + OTX_CPT_ENC_DEC_REQ = 0x1, + OTX_CPT_AEAD_ENC_DEC_REQ = 0x2, + OTX_CPT_AEAD_ENC_DEC_NULL_REQ = 0x3, + OTX_CPT_PASSTHROUGH_REQ = 0x4 +}; + +enum otx_cpt_major_opcodes { + OTX_CPT_MAJOR_OP_MISC = 0x01, + OTX_CPT_MAJOR_OP_FC = 0x33, + OTX_CPT_MAJOR_OP_HMAC = 0x35, +}; + +enum otx_cpt_req_type { + OTX_CPT_AE_CORE_REQ, + OTX_CPT_SE_CORE_REQ +}; + +enum otx_cpt_cipher_type { + OTX_CPT_CIPHER_NULL = 0x0, + OTX_CPT_DES3_CBC = 0x1, + OTX_CPT_DES3_ECB = 0x2, + OTX_CPT_AES_CBC = 0x3, + OTX_CPT_AES_ECB = 0x4, + OTX_CPT_AES_CFB = 0x5, + OTX_CPT_AES_CTR = 0x6, + OTX_CPT_AES_GCM = 0x7, + OTX_CPT_AES_XTS = 0x8 +}; + +enum otx_cpt_mac_type { + OTX_CPT_MAC_NULL = 0x0, + OTX_CPT_MD5 = 0x1, + OTX_CPT_SHA1 = 0x2, + OTX_CPT_SHA224 = 0x3, + OTX_CPT_SHA256 = 0x4, + OTX_CPT_SHA384 = 0x5, + OTX_CPT_SHA512 = 0x6, + OTX_CPT_GMAC = 0x7 +}; + +enum otx_cpt_aes_key_len { + OTX_CPT_AES_128_BIT = 0x1, + OTX_CPT_AES_192_BIT = 0x2, + OTX_CPT_AES_256_BIT = 0x3 +}; + +union otx_cpt_encr_ctrl { + u64 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 enc_cipher:4; + u64 reserved1:1; + u64 aes_key:2; + u64 iv_source:1; + u64 mac_type:4; + u64 reserved2:3; + u64 auth_input_type:1; + u64 mac_len:8; + u64 reserved3:8; + u64 encr_offset:16; + u64 iv_offset:8; + u64 auth_offset:8; +#else + u64 auth_offset:8; + u64 iv_offset:8; + u64 encr_offset:16; + u64 reserved3:8; + u64 mac_len:8; + u64 auth_input_type:1; + u64 reserved2:3; + u64 mac_type:4; + u64 iv_source:1; + u64 aes_key:2; + u64 reserved1:1; + u64 enc_cipher:4; +#endif + } e; +}; + +struct otx_cpt_cipher { + const char *name; + u8 value; +}; + +struct otx_cpt_enc_context { + union otx_cpt_encr_ctrl enc_ctrl; + u8 encr_key[32]; + u8 encr_iv[16]; +}; + +union otx_cpt_fchmac_ctx { + struct { + u8 ipad[64]; + u8 opad[64]; + } e; + struct { + u8 hmac_calc[64]; /* HMAC calculated */ + u8 hmac_recv[64]; /* HMAC received */ + } s; +}; + +struct otx_cpt_fc_ctx { + struct otx_cpt_enc_context enc; + union otx_cpt_fchmac_ctx hmac; +}; + +struct otx_cpt_enc_ctx { + u32 key_len; + u8 enc_key[OTX_CPT_MAX_KEY_SIZE]; + u8 cipher_type; + u8 key_type; +}; + +struct otx_cpt_des3_ctx { + u32 key_len; + u8 des3_key[OTX_CPT_MAX_KEY_SIZE]; +}; + +union otx_cpt_offset_ctrl_word { + u64 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved:32; + u64 enc_data_offset:16; + u64 iv_offset:8; + u64 auth_offset:8; +#else + u64 auth_offset:8; + u64 iv_offset:8; + u64 enc_data_offset:16; + u64 reserved:32; +#endif + } e; +}; + +struct otx_cpt_req_ctx { + struct otx_cpt_req_info cpt_req; + union otx_cpt_offset_ctrl_word ctrl_word; + struct otx_cpt_fc_ctx fctx; +}; + +struct otx_cpt_sdesc { + struct shash_desc shash; +}; + +struct otx_cpt_aead_ctx { + u8 key[OTX_CPT_MAX_KEY_SIZE]; + struct crypto_shash *hashalg; + struct otx_cpt_sdesc *sdesc; + u8 *ipad; + u8 *opad; + u32 enc_key_len; + u32 auth_key_len; + u8 cipher_type; + u8 mac_type; + u8 key_type; + u8 is_trunc_hmac; +}; +int otx_cpt_crypto_init(struct pci_dev *pdev, struct module *mod, + enum otx_cptpf_type pf_type, + enum otx_cptvf_type engine_type, + int num_queues, int num_devices); +void otx_cpt_crypto_exit(struct pci_dev *pdev, struct module *mod, + enum otx_cptvf_type engine_type); +void otx_cpt_callback(int status, void *arg, void *req); + +#endif /* __OTX_CPT_ALGS_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c new file mode 100644 index 000000000000..a91860b5dc77 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c @@ -0,0 +1,985 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/interrupt.h> +#include <linux/module.h> +#include "otx_cptvf.h" +#include "otx_cptvf_algs.h" +#include "otx_cptvf_reqmgr.h" + +#define DRV_NAME "octeontx-cptvf" +#define DRV_VERSION "1.0" + +static void vq_work_handler(unsigned long data) +{ + struct otx_cptvf_wqe_info *cwqe_info = + (struct otx_cptvf_wqe_info *) data; + + otx_cpt_post_process(&cwqe_info->vq_wqe[0]); +} + +static int init_worker_threads(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct otx_cptvf_wqe_info *cwqe_info; + int i; + + cwqe_info = kzalloc(sizeof(*cwqe_info), GFP_KERNEL); + if (!cwqe_info) + return -ENOMEM; + + if (cptvf->num_queues) { + dev_dbg(&pdev->dev, "Creating VQ worker threads (%d)\n", + cptvf->num_queues); + } + + for (i = 0; i < cptvf->num_queues; i++) { + tasklet_init(&cwqe_info->vq_wqe[i].twork, vq_work_handler, + (u64)cwqe_info); + cwqe_info->vq_wqe[i].cptvf = cptvf; + } + cptvf->wqe_info = cwqe_info; + + return 0; +} + +static void cleanup_worker_threads(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct otx_cptvf_wqe_info *cwqe_info; + int i; + + cwqe_info = (struct otx_cptvf_wqe_info *)cptvf->wqe_info; + if (!cwqe_info) + return; + + if (cptvf->num_queues) { + dev_dbg(&pdev->dev, "Cleaning VQ worker threads (%u)\n", + cptvf->num_queues); + } + + for (i = 0; i < cptvf->num_queues; i++) + tasklet_kill(&cwqe_info->vq_wqe[i].twork); + + kzfree(cwqe_info); + cptvf->wqe_info = NULL; +} + +static void free_pending_queues(struct otx_cpt_pending_qinfo *pqinfo) +{ + struct otx_cpt_pending_queue *queue; + int i; + + for_each_pending_queue(pqinfo, queue, i) { + if (!queue->head) + continue; + + /* free single queue */ + kzfree((queue->head)); + queue->front = 0; + queue->rear = 0; + queue->qlen = 0; + } + pqinfo->num_queues = 0; +} + +static int alloc_pending_queues(struct otx_cpt_pending_qinfo *pqinfo, u32 qlen, + u32 num_queues) +{ + struct otx_cpt_pending_queue *queue = NULL; + size_t size; + int ret; + u32 i; + + pqinfo->num_queues = num_queues; + size = (qlen * sizeof(struct otx_cpt_pending_entry)); + + for_each_pending_queue(pqinfo, queue, i) { + queue->head = kzalloc((size), GFP_KERNEL); + if (!queue->head) { + ret = -ENOMEM; + goto pending_qfail; + } + + queue->pending_count = 0; + queue->front = 0; + queue->rear = 0; + queue->qlen = qlen; + + /* init queue spin lock */ + spin_lock_init(&queue->lock); + } + return 0; + +pending_qfail: + free_pending_queues(pqinfo); + + return ret; +} + +static int init_pending_queues(struct otx_cptvf *cptvf, u32 qlen, + u32 num_queues) +{ + struct pci_dev *pdev = cptvf->pdev; + int ret; + + if (!num_queues) + return 0; + + ret = alloc_pending_queues(&cptvf->pqinfo, qlen, num_queues); + if (ret) { + dev_err(&pdev->dev, "Failed to setup pending queues (%u)\n", + num_queues); + return ret; + } + return 0; +} + +static void cleanup_pending_queues(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + + if (!cptvf->num_queues) + return; + + dev_dbg(&pdev->dev, "Cleaning VQ pending queue (%u)\n", + cptvf->num_queues); + free_pending_queues(&cptvf->pqinfo); +} + +static void free_command_queues(struct otx_cptvf *cptvf, + struct otx_cpt_cmd_qinfo *cqinfo) +{ + struct otx_cpt_cmd_queue *queue = NULL; + struct otx_cpt_cmd_chunk *chunk = NULL; + struct pci_dev *pdev = cptvf->pdev; + int i; + + /* clean up for each queue */ + for (i = 0; i < cptvf->num_queues; i++) { + queue = &cqinfo->queue[i]; + + while (!list_empty(&cqinfo->queue[i].chead)) { + chunk = list_first_entry(&cqinfo->queue[i].chead, + struct otx_cpt_cmd_chunk, nextchunk); + + dma_free_coherent(&pdev->dev, chunk->size, + chunk->head, + chunk->dma_addr); + chunk->head = NULL; + chunk->dma_addr = 0; + list_del(&chunk->nextchunk); + kzfree(chunk); + } + queue->num_chunks = 0; + queue->idx = 0; + + } +} + +static int alloc_command_queues(struct otx_cptvf *cptvf, + struct otx_cpt_cmd_qinfo *cqinfo, + u32 qlen) +{ + struct otx_cpt_cmd_chunk *curr, *first, *last; + struct otx_cpt_cmd_queue *queue = NULL; + struct pci_dev *pdev = cptvf->pdev; + size_t q_size, c_size, rem_q_size; + u32 qcsize_bytes; + int i; + + + /* Qsize in dwords, needed for SADDR config, 1-next chunk pointer */ + cptvf->qsize = min(qlen, cqinfo->qchunksize) * + OTX_CPT_NEXT_CHUNK_PTR_SIZE + 1; + /* Qsize in bytes to create space for alignment */ + q_size = qlen * OTX_CPT_INST_SIZE; + + qcsize_bytes = cqinfo->qchunksize * OTX_CPT_INST_SIZE; + + /* per queue initialization */ + for (i = 0; i < cptvf->num_queues; i++) { + c_size = 0; + rem_q_size = q_size; + first = NULL; + last = NULL; + + queue = &cqinfo->queue[i]; + INIT_LIST_HEAD(&queue->chead); + do { + curr = kzalloc(sizeof(*curr), GFP_KERNEL); + if (!curr) + goto cmd_qfail; + + c_size = (rem_q_size > qcsize_bytes) ? qcsize_bytes : + rem_q_size; + curr->head = dma_alloc_coherent(&pdev->dev, + c_size + OTX_CPT_NEXT_CHUNK_PTR_SIZE, + &curr->dma_addr, GFP_KERNEL); + if (!curr->head) { + dev_err(&pdev->dev, + "Command Q (%d) chunk (%d) allocation failed\n", + i, queue->num_chunks); + goto free_curr; + } + curr->size = c_size; + + if (queue->num_chunks == 0) { + first = curr; + queue->base = first; + } + list_add_tail(&curr->nextchunk, + &cqinfo->queue[i].chead); + + queue->num_chunks++; + rem_q_size -= c_size; + if (last) + *((u64 *)(&last->head[last->size])) = + (u64)curr->dma_addr; + + last = curr; + } while (rem_q_size); + + /* + * Make the queue circular, tie back last chunk entry to head + */ + curr = first; + *((u64 *)(&last->head[last->size])) = (u64)curr->dma_addr; + queue->qhead = curr; + } + return 0; +free_curr: + kfree(curr); +cmd_qfail: + free_command_queues(cptvf, cqinfo); + return -ENOMEM; +} + +static int init_command_queues(struct otx_cptvf *cptvf, u32 qlen) +{ + struct pci_dev *pdev = cptvf->pdev; + int ret; + + /* setup command queues */ + ret = alloc_command_queues(cptvf, &cptvf->cqinfo, qlen); + if (ret) { + dev_err(&pdev->dev, "Failed to allocate command queues (%u)\n", + cptvf->num_queues); + return ret; + } + return ret; +} + +static void cleanup_command_queues(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + + if (!cptvf->num_queues) + return; + + dev_dbg(&pdev->dev, "Cleaning VQ command queue (%u)\n", + cptvf->num_queues); + free_command_queues(cptvf, &cptvf->cqinfo); +} + +static void cptvf_sw_cleanup(struct otx_cptvf *cptvf) +{ + cleanup_worker_threads(cptvf); + cleanup_pending_queues(cptvf); + cleanup_command_queues(cptvf); +} + +static int cptvf_sw_init(struct otx_cptvf *cptvf, u32 qlen, u32 num_queues) +{ + struct pci_dev *pdev = cptvf->pdev; + u32 max_dev_queues = 0; + int ret; + + max_dev_queues = OTX_CPT_NUM_QS_PER_VF; + /* possible cpus */ + num_queues = min_t(u32, num_queues, max_dev_queues); + cptvf->num_queues = num_queues; + + ret = init_command_queues(cptvf, qlen); + if (ret) { + dev_err(&pdev->dev, "Failed to setup command queues (%u)\n", + num_queues); + return ret; + } + + ret = init_pending_queues(cptvf, qlen, num_queues); + if (ret) { + dev_err(&pdev->dev, "Failed to setup pending queues (%u)\n", + num_queues); + goto setup_pqfail; + } + + /* Create worker threads for BH processing */ + ret = init_worker_threads(cptvf); + if (ret) { + dev_err(&pdev->dev, "Failed to setup worker threads\n"); + goto init_work_fail; + } + return 0; + +init_work_fail: + cleanup_worker_threads(cptvf); + cleanup_pending_queues(cptvf); + +setup_pqfail: + cleanup_command_queues(cptvf); + + return ret; +} + +static void cptvf_free_irq_affinity(struct otx_cptvf *cptvf, int vec) +{ + irq_set_affinity_hint(pci_irq_vector(cptvf->pdev, vec), NULL); + free_cpumask_var(cptvf->affinity_mask[vec]); +} + +static void cptvf_write_vq_ctl(struct otx_cptvf *cptvf, bool val) +{ + union otx_cptx_vqx_ctl vqx_ctl; + + vqx_ctl.u = readq(cptvf->reg_base + OTX_CPT_VQX_CTL(0)); + vqx_ctl.s.ena = val; + writeq(vqx_ctl.u, cptvf->reg_base + OTX_CPT_VQX_CTL(0)); +} + +void otx_cptvf_write_vq_doorbell(struct otx_cptvf *cptvf, u32 val) +{ + union otx_cptx_vqx_doorbell vqx_dbell; + + vqx_dbell.u = readq(cptvf->reg_base + OTX_CPT_VQX_DOORBELL(0)); + vqx_dbell.s.dbell_cnt = val * 8; /* Num of Instructions * 8 words */ + writeq(vqx_dbell.u, cptvf->reg_base + OTX_CPT_VQX_DOORBELL(0)); +} + +static void cptvf_write_vq_inprog(struct otx_cptvf *cptvf, u8 val) +{ + union otx_cptx_vqx_inprog vqx_inprg; + + vqx_inprg.u = readq(cptvf->reg_base + OTX_CPT_VQX_INPROG(0)); + vqx_inprg.s.inflight = val; + writeq(vqx_inprg.u, cptvf->reg_base + OTX_CPT_VQX_INPROG(0)); +} + +static void cptvf_write_vq_done_numwait(struct otx_cptvf *cptvf, u32 val) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + vqx_dwait.s.num_wait = val; + writeq(vqx_dwait.u, cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); +} + +static u32 cptvf_read_vq_done_numwait(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + return vqx_dwait.s.num_wait; +} + +static void cptvf_write_vq_done_timewait(struct otx_cptvf *cptvf, u16 time) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + vqx_dwait.s.time_wait = time; + writeq(vqx_dwait.u, cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); +} + + +static u16 cptvf_read_vq_done_timewait(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + return vqx_dwait.s.time_wait; +} + +static void cptvf_enable_swerr_interrupts(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_ena_w1s vqx_misc_ena; + + vqx_misc_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); + /* Enable SWERR interrupts for the requested VF */ + vqx_misc_ena.s.swerr = 1; + writeq(vqx_misc_ena.u, cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); +} + +static void cptvf_enable_mbox_interrupts(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_ena_w1s vqx_misc_ena; + + vqx_misc_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); + /* Enable MBOX interrupt for the requested VF */ + vqx_misc_ena.s.mbox = 1; + writeq(vqx_misc_ena.u, cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); +} + +static void cptvf_enable_done_interrupts(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done_ena_w1s vqx_done_ena; + + vqx_done_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_ENA_W1S(0)); + /* Enable DONE interrupt for the requested VF */ + vqx_done_ena.s.done = 1; + writeq(vqx_done_ena.u, cptvf->reg_base + OTX_CPT_VQX_DONE_ENA_W1S(0)); +} + +static void cptvf_clear_dovf_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.dovf = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_irde_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.irde = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_nwrp_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.nwrp = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_mbox_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.mbox = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_swerr_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.swerr = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static u64 cptvf_read_vf_misc_intr_status(struct otx_cptvf *cptvf) +{ + return readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static irqreturn_t cptvf_misc_intr_handler(int __always_unused irq, + void *arg) +{ + struct otx_cptvf *cptvf = arg; + struct pci_dev *pdev = cptvf->pdev; + u64 intr; + + intr = cptvf_read_vf_misc_intr_status(cptvf); + /* Check for MISC interrupt types */ + if (likely(intr & OTX_CPT_VF_INTR_MBOX_MASK)) { + dev_dbg(&pdev->dev, "Mailbox interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + otx_cptvf_handle_mbox_intr(cptvf); + cptvf_clear_mbox_intr(cptvf); + } else if (unlikely(intr & OTX_CPT_VF_INTR_DOVF_MASK)) { + cptvf_clear_dovf_intr(cptvf); + /* Clear doorbell count */ + otx_cptvf_write_vq_doorbell(cptvf, 0); + dev_err(&pdev->dev, + "Doorbell overflow error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & OTX_CPT_VF_INTR_IRDE_MASK)) { + cptvf_clear_irde_intr(cptvf); + dev_err(&pdev->dev, + "Instruction NCB read error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & OTX_CPT_VF_INTR_NWRP_MASK)) { + cptvf_clear_nwrp_intr(cptvf); + dev_err(&pdev->dev, + "NCB response write error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & OTX_CPT_VF_INTR_SERR_MASK)) { + cptvf_clear_swerr_intr(cptvf); + dev_err(&pdev->dev, + "Software error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else { + dev_err(&pdev->dev, "Unhandled interrupt in OTX_CPT VF %d\n", + cptvf->vfid); + } + + return IRQ_HANDLED; +} + +static inline struct otx_cptvf_wqe *get_cptvf_vq_wqe(struct otx_cptvf *cptvf, + int qno) +{ + struct otx_cptvf_wqe_info *nwqe_info; + + if (unlikely(qno >= cptvf->num_queues)) + return NULL; + nwqe_info = (struct otx_cptvf_wqe_info *)cptvf->wqe_info; + + return &nwqe_info->vq_wqe[qno]; +} + +static inline u32 cptvf_read_vq_done_count(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done vqx_done; + + vqx_done.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE(0)); + return vqx_done.s.done; +} + +static inline void cptvf_write_vq_done_ack(struct otx_cptvf *cptvf, + u32 ackcnt) +{ + union otx_cptx_vqx_done_ack vqx_dack_cnt; + + vqx_dack_cnt.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_ACK(0)); + vqx_dack_cnt.s.done_ack = ackcnt; + writeq(vqx_dack_cnt.u, cptvf->reg_base + OTX_CPT_VQX_DONE_ACK(0)); +} + +static irqreturn_t cptvf_done_intr_handler(int __always_unused irq, + void *cptvf_dev) +{ + struct otx_cptvf *cptvf = (struct otx_cptvf *)cptvf_dev; + struct pci_dev *pdev = cptvf->pdev; + /* Read the number of completions */ + u32 intr = cptvf_read_vq_done_count(cptvf); + + if (intr) { + struct otx_cptvf_wqe *wqe; + + /* + * Acknowledge the number of scheduled completions for + * processing + */ + cptvf_write_vq_done_ack(cptvf, intr); + wqe = get_cptvf_vq_wqe(cptvf, 0); + if (unlikely(!wqe)) { + dev_err(&pdev->dev, "No work to schedule for VF (%d)", + cptvf->vfid); + return IRQ_NONE; + } + tasklet_hi_schedule(&wqe->twork); + } + + return IRQ_HANDLED; +} + +static void cptvf_set_irq_affinity(struct otx_cptvf *cptvf, int vec) +{ + struct pci_dev *pdev = cptvf->pdev; + int cpu; + + if (!zalloc_cpumask_var(&cptvf->affinity_mask[vec], + GFP_KERNEL)) { + dev_err(&pdev->dev, + "Allocation failed for affinity_mask for VF %d", + cptvf->vfid); + return; + } + + cpu = cptvf->vfid % num_online_cpus(); + cpumask_set_cpu(cpumask_local_spread(cpu, cptvf->node), + cptvf->affinity_mask[vec]); + irq_set_affinity_hint(pci_irq_vector(pdev, vec), + cptvf->affinity_mask[vec]); +} + +static void cptvf_write_vq_saddr(struct otx_cptvf *cptvf, u64 val) +{ + union otx_cptx_vqx_saddr vqx_saddr; + + vqx_saddr.u = val; + writeq(vqx_saddr.u, cptvf->reg_base + OTX_CPT_VQX_SADDR(0)); +} + +static void cptvf_device_init(struct otx_cptvf *cptvf) +{ + u64 base_addr = 0; + + /* Disable the VQ */ + cptvf_write_vq_ctl(cptvf, 0); + /* Reset the doorbell */ + otx_cptvf_write_vq_doorbell(cptvf, 0); + /* Clear inflight */ + cptvf_write_vq_inprog(cptvf, 0); + /* Write VQ SADDR */ + base_addr = (u64)(cptvf->cqinfo.queue[0].qhead->dma_addr); + cptvf_write_vq_saddr(cptvf, base_addr); + /* Configure timerhold / coalescence */ + cptvf_write_vq_done_timewait(cptvf, OTX_CPT_TIMER_HOLD); + cptvf_write_vq_done_numwait(cptvf, OTX_CPT_COUNT_HOLD); + /* Enable the VQ */ + cptvf_write_vq_ctl(cptvf, 1); + /* Flag the VF ready */ + cptvf->flags |= OTX_CPT_FLAG_DEVICE_READY; +} + +static ssize_t vf_type_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + char *msg; + + switch (cptvf->vftype) { + case OTX_CPT_AE_TYPES: + msg = "AE"; + break; + + case OTX_CPT_SE_TYPES: + msg = "SE"; + break; + + default: + msg = "Invalid"; + } + + return scnprintf(buf, PAGE_SIZE, "%s\n", msg); +} + +static ssize_t vf_engine_group_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", cptvf->vfgrp); +} + +static ssize_t vf_engine_group_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + int val, ret; + + ret = kstrtoint(buf, 10, &val); + if (ret) + return ret; + + if (val < 0) + return -EINVAL; + + if (val >= OTX_CPT_MAX_ENGINE_GROUPS) { + dev_err(dev, "Engine group >= than max available groups %d", + OTX_CPT_MAX_ENGINE_GROUPS); + return -EINVAL; + } + + ret = otx_cptvf_send_vf_to_grp_msg(cptvf, val); + if (ret) + return ret; + + return count; +} + +static ssize_t vf_coalesc_time_wait_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + cptvf_read_vq_done_timewait(cptvf)); +} + +static ssize_t vf_coalesc_num_wait_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + cptvf_read_vq_done_numwait(cptvf)); +} + +static ssize_t vf_coalesc_time_wait_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + long val; + int ret; + + ret = kstrtol(buf, 10, &val); + if (ret != 0) + return ret; + + if (val < OTX_CPT_COALESC_MIN_TIME_WAIT || + val > OTX_CPT_COALESC_MAX_TIME_WAIT) + return -EINVAL; + + cptvf_write_vq_done_timewait(cptvf, val); + return count; +} + +static ssize_t vf_coalesc_num_wait_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + long val; + int ret; + + ret = kstrtol(buf, 10, &val); + if (ret != 0) + return ret; + + if (val < OTX_CPT_COALESC_MIN_NUM_WAIT || + val > OTX_CPT_COALESC_MAX_NUM_WAIT) + return -EINVAL; + + cptvf_write_vq_done_numwait(cptvf, val); + return count; +} + +static DEVICE_ATTR_RO(vf_type); +static DEVICE_ATTR_RW(vf_engine_group); +static DEVICE_ATTR_RW(vf_coalesc_time_wait); +static DEVICE_ATTR_RW(vf_coalesc_num_wait); + +static struct attribute *otx_cptvf_attrs[] = { + &dev_attr_vf_type.attr, + &dev_attr_vf_engine_group.attr, + &dev_attr_vf_coalesc_time_wait.attr, + &dev_attr_vf_coalesc_num_wait.attr, + NULL +}; + +static const struct attribute_group otx_cptvf_sysfs_group = { + .attrs = otx_cptvf_attrs, +}; + +static int otx_cptvf_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct otx_cptvf *cptvf; + int err; + + cptvf = devm_kzalloc(dev, sizeof(*cptvf), GFP_KERNEL); + if (!cptvf) + return -ENOMEM; + + pci_set_drvdata(pdev, cptvf); + cptvf->pdev = pdev; + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + goto clear_drvdata; + } + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto disable_device; + } + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get usable DMA configuration\n"); + goto release_regions; + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n"); + goto release_regions; + } + + /* MAP PF's configuration registers */ + cptvf->reg_base = pci_iomap(pdev, OTX_CPT_VF_PCI_CFG_BAR, 0); + if (!cptvf->reg_base) { + dev_err(dev, "Cannot map config register space, aborting\n"); + err = -ENOMEM; + goto release_regions; + } + + cptvf->node = dev_to_node(&pdev->dev); + err = pci_alloc_irq_vectors(pdev, OTX_CPT_VF_MSIX_VECTORS, + OTX_CPT_VF_MSIX_VECTORS, PCI_IRQ_MSIX); + if (err < 0) { + dev_err(dev, "Request for #%d msix vectors failed\n", + OTX_CPT_VF_MSIX_VECTORS); + goto unmap_region; + } + + err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), + cptvf_misc_intr_handler, 0, "CPT VF misc intr", + cptvf); + if (err) { + dev_err(dev, "Failed to request misc irq"); + goto free_vectors; + } + + /* Enable mailbox interrupt */ + cptvf_enable_mbox_interrupts(cptvf); + cptvf_enable_swerr_interrupts(cptvf); + + /* Check cpt pf status, gets chip ID / device Id from PF if ready */ + err = otx_cptvf_check_pf_ready(cptvf); + if (err) + goto free_misc_irq; + + /* CPT VF software resources initialization */ + cptvf->cqinfo.qchunksize = OTX_CPT_CMD_QCHUNK_SIZE; + err = cptvf_sw_init(cptvf, OTX_CPT_CMD_QLEN, OTX_CPT_NUM_QS_PER_VF); + if (err) { + dev_err(dev, "cptvf_sw_init() failed"); + goto free_misc_irq; + } + /* Convey VQ LEN to PF */ + err = otx_cptvf_send_vq_size_msg(cptvf); + if (err) + goto sw_cleanup; + + /* CPT VF device initialization */ + cptvf_device_init(cptvf); + /* Send msg to PF to assign currnet Q to required group */ + err = otx_cptvf_send_vf_to_grp_msg(cptvf, cptvf->vfgrp); + if (err) + goto sw_cleanup; + + cptvf->priority = 1; + err = otx_cptvf_send_vf_priority_msg(cptvf); + if (err) + goto sw_cleanup; + + err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), + cptvf_done_intr_handler, 0, "CPT VF done intr", + cptvf); + if (err) { + dev_err(dev, "Failed to request done irq\n"); + goto free_done_irq; + } + + /* Enable done interrupt */ + cptvf_enable_done_interrupts(cptvf); + + /* Set irq affinity masks */ + cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); + cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + + err = otx_cptvf_send_vf_up(cptvf); + if (err) + goto free_irq_affinity; + + /* Initialize algorithms and set ops */ + err = otx_cpt_crypto_init(pdev, THIS_MODULE, + cptvf->vftype == OTX_CPT_SE_TYPES ? OTX_CPT_SE : OTX_CPT_AE, + cptvf->vftype, 1, cptvf->num_vfs); + if (err) { + dev_err(dev, "Failed to register crypto algs\n"); + goto free_irq_affinity; + } + + err = sysfs_create_group(&dev->kobj, &otx_cptvf_sysfs_group); + if (err) { + dev_err(dev, "Creating sysfs entries failed\n"); + goto crypto_exit; + } + + return 0; + +crypto_exit: + otx_cpt_crypto_exit(pdev, THIS_MODULE, cptvf->vftype); +free_irq_affinity: + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); +free_done_irq: + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), cptvf); +sw_cleanup: + cptvf_sw_cleanup(cptvf); +free_misc_irq: + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf); +free_vectors: + pci_free_irq_vectors(cptvf->pdev); +unmap_region: + pci_iounmap(pdev, cptvf->reg_base); +release_regions: + pci_release_regions(pdev); +disable_device: + pci_disable_device(pdev); +clear_drvdata: + pci_set_drvdata(pdev, NULL); + + return err; +} + +static void otx_cptvf_remove(struct pci_dev *pdev) +{ + struct otx_cptvf *cptvf = pci_get_drvdata(pdev); + + if (!cptvf) { + dev_err(&pdev->dev, "Invalid CPT-VF device\n"); + return; + } + + /* Convey DOWN to PF */ + if (otx_cptvf_send_vf_down(cptvf)) { + dev_err(&pdev->dev, "PF not responding to DOWN msg"); + } else { + sysfs_remove_group(&pdev->dev.kobj, &otx_cptvf_sysfs_group); + otx_cpt_crypto_exit(pdev, THIS_MODULE, cptvf->vftype); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), cptvf); + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf); + cptvf_sw_cleanup(cptvf); + pci_free_irq_vectors(cptvf->pdev); + pci_iounmap(pdev, cptvf->reg_base); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + } +} + +/* Supported devices */ +static const struct pci_device_id otx_cptvf_id_table[] = { + {PCI_VDEVICE(CAVIUM, OTX_CPT_PCI_VF_DEVICE_ID), 0}, + { 0, } /* end of table */ +}; + +static struct pci_driver otx_cptvf_pci_driver = { + .name = DRV_NAME, + .id_table = otx_cptvf_id_table, + .probe = otx_cptvf_probe, + .remove = otx_cptvf_remove, +}; + +module_pci_driver(otx_cptvf_pci_driver); + +MODULE_AUTHOR("Marvell International Ltd."); +MODULE_DESCRIPTION("Marvell OcteonTX CPT Virtual Function Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, otx_cptvf_id_table); diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c b/drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c new file mode 100644 index 000000000000..5663787c7a62 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/delay.h> +#include "otx_cptvf.h" + +#define CPT_MBOX_MSG_TIMEOUT 2000 + +static char *get_mbox_opcode_str(int msg_opcode) +{ + char *str = "Unknown"; + + switch (msg_opcode) { + case OTX_CPT_MSG_VF_UP: + str = "UP"; + break; + + case OTX_CPT_MSG_VF_DOWN: + str = "DOWN"; + break; + + case OTX_CPT_MSG_READY: + str = "READY"; + break; + + case OTX_CPT_MSG_QLEN: + str = "QLEN"; + break; + + case OTX_CPT_MSG_QBIND_GRP: + str = "QBIND_GRP"; + break; + + case OTX_CPT_MSG_VQ_PRIORITY: + str = "VQ_PRIORITY"; + break; + + case OTX_CPT_MSG_PF_TYPE: + str = "PF_TYPE"; + break; + + case OTX_CPT_MSG_ACK: + str = "ACK"; + break; + + case OTX_CPT_MSG_NACK: + str = "NACK"; + break; + } + return str; +} + +static void dump_mbox_msg(struct otx_cpt_mbox *mbox_msg, int vf_id) +{ + char raw_data_str[OTX_CPT_MAX_MBOX_DATA_STR_SIZE]; + + hex_dump_to_buffer(mbox_msg, sizeof(struct otx_cpt_mbox), 16, 8, + raw_data_str, OTX_CPT_MAX_MBOX_DATA_STR_SIZE, false); + if (vf_id >= 0) + pr_debug("MBOX msg %s received from VF%d raw_data %s", + get_mbox_opcode_str(mbox_msg->msg), vf_id, + raw_data_str); + else + pr_debug("MBOX msg %s received from PF raw_data %s", + get_mbox_opcode_str(mbox_msg->msg), raw_data_str); +} + +static void cptvf_send_msg_to_pf(struct otx_cptvf *cptvf, + struct otx_cpt_mbox *mbx) +{ + /* Writing mbox(1) causes interrupt */ + writeq(mbx->msg, cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 0)); + writeq(mbx->data, cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 1)); +} + +/* Interrupt handler to handle mailbox messages from VFs */ +void otx_cptvf_handle_mbox_intr(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + + /* + * MBOX[0] contains msg + * MBOX[1] contains data + */ + mbx.msg = readq(cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 0)); + mbx.data = readq(cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 1)); + + dump_mbox_msg(&mbx, -1); + + switch (mbx.msg) { + case OTX_CPT_MSG_VF_UP: + cptvf->pf_acked = true; + cptvf->num_vfs = mbx.data; + break; + case OTX_CPT_MSG_READY: + cptvf->pf_acked = true; + cptvf->vfid = mbx.data; + dev_dbg(&cptvf->pdev->dev, "Received VFID %d\n", cptvf->vfid); + break; + case OTX_CPT_MSG_QBIND_GRP: + cptvf->pf_acked = true; + cptvf->vftype = mbx.data; + dev_dbg(&cptvf->pdev->dev, "VF %d type %s group %d\n", + cptvf->vfid, + ((mbx.data == OTX_CPT_SE_TYPES) ? "SE" : "AE"), + cptvf->vfgrp); + break; + case OTX_CPT_MSG_ACK: + cptvf->pf_acked = true; + break; + case OTX_CPT_MSG_NACK: + cptvf->pf_nacked = true; + break; + default: + dev_err(&cptvf->pdev->dev, "Invalid msg from PF, msg 0x%llx\n", + mbx.msg); + break; + } +} + +static int cptvf_send_msg_to_pf_timeout(struct otx_cptvf *cptvf, + struct otx_cpt_mbox *mbx) +{ + int timeout = CPT_MBOX_MSG_TIMEOUT; + int sleep = 10; + + cptvf->pf_acked = false; + cptvf->pf_nacked = false; + cptvf_send_msg_to_pf(cptvf, mbx); + /* Wait for previous message to be acked, timeout 2sec */ + while (!cptvf->pf_acked) { + if (cptvf->pf_nacked) + return -EINVAL; + msleep(sleep); + if (cptvf->pf_acked) + break; + timeout -= sleep; + if (!timeout) { + dev_err(&cptvf->pdev->dev, + "PF didn't ack to mbox msg %llx from VF%u\n", + mbx->msg, cptvf->vfid); + return -EBUSY; + } + } + return 0; +} + +/* + * Checks if VF is able to comminicate with PF + * and also gets the CPT number this VF is associated to. + */ +int otx_cptvf_check_pf_ready(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + int ret; + + mbx.msg = OTX_CPT_MSG_READY; + ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx); + + return ret; +} + +/* + * Communicate VQs size to PF to program CPT(0)_PF_Q(0-15)_CTL of the VF. + * Must be ACKed. + */ +int otx_cptvf_send_vq_size_msg(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + int ret; + + mbx.msg = OTX_CPT_MSG_QLEN; + mbx.data = cptvf->qsize; + ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx); + + return ret; +} + +/* + * Communicate VF group required to PF and get the VQ binded to that group + */ +int otx_cptvf_send_vf_to_grp_msg(struct otx_cptvf *cptvf, int group) +{ + struct otx_cpt_mbox mbx = {}; + int ret; + + mbx.msg = OTX_CPT_MSG_QBIND_GRP; + /* Convey group of the VF */ + mbx.data = group; + ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx); + if (ret) + return ret; + cptvf->vfgrp = group; + + return 0; +} + +/* + * Communicate VF group required to PF and get the VQ binded to that group + */ +int otx_cptvf_send_vf_priority_msg(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + int ret; + + mbx.msg = OTX_CPT_MSG_VQ_PRIORITY; + /* Convey group of the VF */ + mbx.data = cptvf->priority; + ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx); + + return ret; +} + +/* + * Communicate to PF that VF is UP and running + */ +int otx_cptvf_send_vf_up(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + int ret; + + mbx.msg = OTX_CPT_MSG_VF_UP; + ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx); + + return ret; +} + +/* + * Communicate to PF that VF is DOWN and running + */ +int otx_cptvf_send_vf_down(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + int ret; + + mbx.msg = OTX_CPT_MSG_VF_DOWN; + ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx); + + return ret; +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c new file mode 100644 index 000000000000..df839b880354 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c @@ -0,0 +1,612 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "otx_cptvf.h" +#include "otx_cptvf_algs.h" + +/* Completion code size and initial value */ +#define COMPLETION_CODE_SIZE 8 +#define COMPLETION_CODE_INIT 0 + +/* SG list header size in bytes */ +#define SG_LIST_HDR_SIZE 8 + +/* Default timeout when waiting for free pending entry in us */ +#define CPT_PENTRY_TIMEOUT 1000 +#define CPT_PENTRY_STEP 50 + +/* Default threshold for stopping and resuming sender requests */ +#define CPT_IQ_STOP_MARGIN 128 +#define CPT_IQ_RESUME_MARGIN 512 + +#define CPT_DMA_ALIGN 128 + +void otx_cpt_dump_sg_list(struct pci_dev *pdev, struct otx_cpt_req_info *req) +{ + int i; + + pr_debug("Gather list size %d\n", req->incnt); + for (i = 0; i < req->incnt; i++) { + pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i, + req->in[i].size, req->in[i].vptr, + (void *) req->in[i].dma_addr); + pr_debug("Buffer hexdump (%d bytes)\n", + req->in[i].size); + print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, + req->in[i].vptr, req->in[i].size, false); + } + + pr_debug("Scatter list size %d\n", req->outcnt); + for (i = 0; i < req->outcnt; i++) { + pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i, + req->out[i].size, req->out[i].vptr, + (void *) req->out[i].dma_addr); + pr_debug("Buffer hexdump (%d bytes)\n", req->out[i].size); + print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, + req->out[i].vptr, req->out[i].size, false); + } +} + +static inline struct otx_cpt_pending_entry *get_free_pending_entry( + struct otx_cpt_pending_queue *q, + int qlen) +{ + struct otx_cpt_pending_entry *ent = NULL; + + ent = &q->head[q->rear]; + if (unlikely(ent->busy)) + return NULL; + + q->rear++; + if (unlikely(q->rear == qlen)) + q->rear = 0; + + return ent; +} + +static inline u32 modulo_inc(u32 index, u32 length, u32 inc) +{ + if (WARN_ON(inc > length)) + inc = length; + + index += inc; + if (unlikely(index >= length)) + index -= length; + + return index; +} + +static inline void free_pentry(struct otx_cpt_pending_entry *pentry) +{ + pentry->completion_addr = NULL; + pentry->info = NULL; + pentry->callback = NULL; + pentry->areq = NULL; + pentry->resume_sender = false; + pentry->busy = false; +} + +static inline int setup_sgio_components(struct pci_dev *pdev, + struct otx_cpt_buf_ptr *list, + int buf_count, u8 *buffer) +{ + struct otx_cpt_sglist_component *sg_ptr = NULL; + int ret = 0, i, j; + int components; + + if (unlikely(!list)) { + dev_err(&pdev->dev, "Input list pointer is NULL\n"); + return -EFAULT; + } + + for (i = 0; i < buf_count; i++) { + if (likely(list[i].vptr)) { + list[i].dma_addr = dma_map_single(&pdev->dev, + list[i].vptr, + list[i].size, + DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(&pdev->dev, + list[i].dma_addr))) { + dev_err(&pdev->dev, "Dma mapping failed\n"); + ret = -EIO; + goto sg_cleanup; + } + } + } + + components = buf_count / 4; + sg_ptr = (struct otx_cpt_sglist_component *)buffer; + for (i = 0; i < components; i++) { + sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size); + sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size); + sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size); + sg_ptr->u.s.len3 = cpu_to_be16(list[i * 4 + 3].size); + sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); + sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); + sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); + sg_ptr->ptr3 = cpu_to_be64(list[i * 4 + 3].dma_addr); + sg_ptr++; + } + components = buf_count % 4; + + switch (components) { + case 3: + sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size); + sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); + /* Fall through */ + case 2: + sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size); + sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); + /* Fall through */ + case 1: + sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size); + sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); + break; + default: + break; + } + return ret; + +sg_cleanup: + for (j = 0; j < i; j++) { + if (list[j].dma_addr) { + dma_unmap_single(&pdev->dev, list[i].dma_addr, + list[i].size, DMA_BIDIRECTIONAL); + } + + list[j].dma_addr = 0; + } + return ret; +} + +static inline int setup_sgio_list(struct pci_dev *pdev, + struct otx_cpt_info_buffer **pinfo, + struct otx_cpt_req_info *req, gfp_t gfp) +{ + u32 dlen, align_dlen, info_len, rlen; + struct otx_cpt_info_buffer *info; + u16 g_sz_bytes, s_sz_bytes; + int align = CPT_DMA_ALIGN; + u32 total_mem_len; + + if (unlikely(req->incnt > OTX_CPT_MAX_SG_IN_CNT || + req->outcnt > OTX_CPT_MAX_SG_OUT_CNT)) { + dev_err(&pdev->dev, "Error too many sg components\n"); + return -EINVAL; + } + + g_sz_bytes = ((req->incnt + 3) / 4) * + sizeof(struct otx_cpt_sglist_component); + s_sz_bytes = ((req->outcnt + 3) / 4) * + sizeof(struct otx_cpt_sglist_component); + + dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE; + align_dlen = ALIGN(dlen, align); + info_len = ALIGN(sizeof(*info), align); + rlen = ALIGN(sizeof(union otx_cpt_res_s), align); + total_mem_len = align_dlen + info_len + rlen + COMPLETION_CODE_SIZE; + + info = kzalloc(total_mem_len, gfp); + if (unlikely(!info)) { + dev_err(&pdev->dev, "Memory allocation failed\n"); + return -ENOMEM; + } + *pinfo = info; + info->dlen = dlen; + info->in_buffer = (u8 *)info + info_len; + + ((u16 *)info->in_buffer)[0] = req->outcnt; + ((u16 *)info->in_buffer)[1] = req->incnt; + ((u16 *)info->in_buffer)[2] = 0; + ((u16 *)info->in_buffer)[3] = 0; + *(u64 *)info->in_buffer = cpu_to_be64p((u64 *)info->in_buffer); + + /* Setup gather (input) components */ + if (setup_sgio_components(pdev, req->in, req->incnt, + &info->in_buffer[8])) { + dev_err(&pdev->dev, "Failed to setup gather list\n"); + return -EFAULT; + } + + if (setup_sgio_components(pdev, req->out, req->outcnt, + &info->in_buffer[8 + g_sz_bytes])) { + dev_err(&pdev->dev, "Failed to setup scatter list\n"); + return -EFAULT; + } + + info->dma_len = total_mem_len - info_len; + info->dptr_baddr = dma_map_single(&pdev->dev, (void *)info->in_buffer, + info->dma_len, DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) { + dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n"); + return -EIO; + } + /* + * Get buffer for union otx_cpt_res_s response + * structure and its physical address + */ + info->completion_addr = (u64 *)(info->in_buffer + align_dlen); + info->comp_baddr = info->dptr_baddr + align_dlen; + + /* Create and initialize RPTR */ + info->out_buffer = (u8 *)info->completion_addr + rlen; + info->rptr_baddr = info->comp_baddr + rlen; + + *((u64 *) info->out_buffer) = ~((u64) COMPLETION_CODE_INIT); + + return 0; +} + + +static void cpt_fill_inst(union otx_cpt_inst_s *inst, + struct otx_cpt_info_buffer *info, + struct otx_cpt_iq_cmd *cmd) +{ + inst->u[0] = 0x0; + inst->s.doneint = true; + inst->s.res_addr = (u64)info->comp_baddr; + inst->u[2] = 0x0; + inst->s.wq_ptr = 0; + inst->s.ei0 = cmd->cmd.u64; + inst->s.ei1 = cmd->dptr; + inst->s.ei2 = cmd->rptr; + inst->s.ei3 = cmd->cptr.u64; +} + +/* + * On OcteonTX platform the parameter db_count is used as a count for ringing + * door bell. The valid values for db_count are: + * 0 - 1 CPT instruction will be enqueued however CPT will not be informed + * 1 - 1 CPT instruction will be enqueued and CPT will be informed + */ +static void cpt_send_cmd(union otx_cpt_inst_s *cptinst, struct otx_cptvf *cptvf) +{ + struct otx_cpt_cmd_qinfo *qinfo = &cptvf->cqinfo; + struct otx_cpt_cmd_queue *queue; + struct otx_cpt_cmd_chunk *curr; + u8 *ent; + + queue = &qinfo->queue[0]; + /* + * cpt_send_cmd is currently called only from critical section + * therefore no locking is required for accessing instruction queue + */ + ent = &queue->qhead->head[queue->idx * OTX_CPT_INST_SIZE]; + memcpy(ent, (void *) cptinst, OTX_CPT_INST_SIZE); + + if (++queue->idx >= queue->qhead->size / 64) { + curr = queue->qhead; + + if (list_is_last(&curr->nextchunk, &queue->chead)) + queue->qhead = queue->base; + else + queue->qhead = list_next_entry(queue->qhead, nextchunk); + queue->idx = 0; + } + /* make sure all memory stores are done before ringing doorbell */ + smp_wmb(); + otx_cptvf_write_vq_doorbell(cptvf, 1); +} + +static int process_request(struct pci_dev *pdev, struct otx_cpt_req_info *req, + struct otx_cpt_pending_queue *pqueue, + struct otx_cptvf *cptvf) +{ + struct otx_cptvf_request *cpt_req = &req->req; + struct otx_cpt_pending_entry *pentry = NULL; + union otx_cpt_ctrl_info *ctrl = &req->ctrl; + struct otx_cpt_info_buffer *info = NULL; + union otx_cpt_res_s *result = NULL; + struct otx_cpt_iq_cmd iq_cmd; + union otx_cpt_inst_s cptinst; + int retry, ret = 0; + u8 resume_sender; + gfp_t gfp; + + gfp = (req->areq->flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : + GFP_ATOMIC; + ret = setup_sgio_list(pdev, &info, req, gfp); + if (unlikely(ret)) { + dev_err(&pdev->dev, "Setting up SG list failed"); + goto request_cleanup; + } + cpt_req->dlen = info->dlen; + + result = (union otx_cpt_res_s *) info->completion_addr; + result->s.compcode = COMPLETION_CODE_INIT; + + spin_lock_bh(&pqueue->lock); + pentry = get_free_pending_entry(pqueue, pqueue->qlen); + retry = CPT_PENTRY_TIMEOUT / CPT_PENTRY_STEP; + while (unlikely(!pentry) && retry--) { + spin_unlock_bh(&pqueue->lock); + udelay(CPT_PENTRY_STEP); + spin_lock_bh(&pqueue->lock); + pentry = get_free_pending_entry(pqueue, pqueue->qlen); + } + + if (unlikely(!pentry)) { + ret = -ENOSPC; + spin_unlock_bh(&pqueue->lock); + goto request_cleanup; + } + + /* + * Check if we are close to filling in entire pending queue, + * if so then tell the sender to stop/sleep by returning -EBUSY + * We do it only for context which can sleep (GFP_KERNEL) + */ + if (gfp == GFP_KERNEL && + pqueue->pending_count > (pqueue->qlen - CPT_IQ_STOP_MARGIN)) { + pentry->resume_sender = true; + } else + pentry->resume_sender = false; + resume_sender = pentry->resume_sender; + pqueue->pending_count++; + + pentry->completion_addr = info->completion_addr; + pentry->info = info; + pentry->callback = req->callback; + pentry->areq = req->areq; + pentry->busy = true; + info->pentry = pentry; + info->time_in = jiffies; + info->req = req; + + /* Fill in the command */ + iq_cmd.cmd.u64 = 0; + iq_cmd.cmd.s.opcode = cpu_to_be16(cpt_req->opcode.flags); + iq_cmd.cmd.s.param1 = cpu_to_be16(cpt_req->param1); + iq_cmd.cmd.s.param2 = cpu_to_be16(cpt_req->param2); + iq_cmd.cmd.s.dlen = cpu_to_be16(cpt_req->dlen); + + /* 64-bit swap for microcode data reads, not needed for addresses*/ + iq_cmd.cmd.u64 = cpu_to_be64(iq_cmd.cmd.u64); + iq_cmd.dptr = info->dptr_baddr; + iq_cmd.rptr = info->rptr_baddr; + iq_cmd.cptr.u64 = 0; + iq_cmd.cptr.s.grp = ctrl->s.grp; + + /* Fill in the CPT_INST_S type command for HW interpretation */ + cpt_fill_inst(&cptinst, info, &iq_cmd); + + /* Print debug info if enabled */ + otx_cpt_dump_sg_list(pdev, req); + pr_debug("Cpt_inst_s hexdump (%d bytes)\n", OTX_CPT_INST_SIZE); + print_hex_dump_debug("", 0, 16, 1, &cptinst, OTX_CPT_INST_SIZE, false); + pr_debug("Dptr hexdump (%d bytes)\n", cpt_req->dlen); + print_hex_dump_debug("", 0, 16, 1, info->in_buffer, + cpt_req->dlen, false); + + /* Send CPT command */ + cpt_send_cmd(&cptinst, cptvf); + + /* + * We allocate and prepare pending queue entry in critical section + * together with submitting CPT instruction to CPT instruction queue + * to make sure that order of CPT requests is the same in both + * pending and instruction queues + */ + spin_unlock_bh(&pqueue->lock); + + ret = resume_sender ? -EBUSY : -EINPROGRESS; + return ret; + +request_cleanup: + do_request_cleanup(pdev, info); + return ret; +} + +int otx_cpt_do_request(struct pci_dev *pdev, struct otx_cpt_req_info *req, + int cpu_num) +{ + struct otx_cptvf *cptvf = pci_get_drvdata(pdev); + + if (!otx_cpt_device_ready(cptvf)) { + dev_err(&pdev->dev, "CPT Device is not ready"); + return -ENODEV; + } + + if ((cptvf->vftype == OTX_CPT_SE_TYPES) && (!req->ctrl.s.se_req)) { + dev_err(&pdev->dev, "CPTVF-%d of SE TYPE got AE request", + cptvf->vfid); + return -EINVAL; + } else if ((cptvf->vftype == OTX_CPT_AE_TYPES) && + (req->ctrl.s.se_req)) { + dev_err(&pdev->dev, "CPTVF-%d of AE TYPE got SE request", + cptvf->vfid); + return -EINVAL; + } + + return process_request(pdev, req, &cptvf->pqinfo.queue[0], cptvf); +} + +static int cpt_process_ccode(struct pci_dev *pdev, + union otx_cpt_res_s *cpt_status, + struct otx_cpt_info_buffer *cpt_info, + struct otx_cpt_req_info *req, u32 *res_code) +{ + u8 ccode = cpt_status->s.compcode; + union otx_cpt_error_code ecode; + + ecode.u = be64_to_cpu(*((u64 *) cpt_info->out_buffer)); + switch (ccode) { + case CPT_COMP_E_FAULT: + dev_err(&pdev->dev, + "Request failed with DMA fault\n"); + otx_cpt_dump_sg_list(pdev, req); + break; + + case CPT_COMP_E_SWERR: + dev_err(&pdev->dev, + "Request failed with software error code %d\n", + ecode.s.ccode); + otx_cpt_dump_sg_list(pdev, req); + break; + + case CPT_COMP_E_HWERR: + dev_err(&pdev->dev, + "Request failed with hardware error\n"); + otx_cpt_dump_sg_list(pdev, req); + break; + + case COMPLETION_CODE_INIT: + /* check for timeout */ + if (time_after_eq(jiffies, cpt_info->time_in + + OTX_CPT_COMMAND_TIMEOUT * HZ)) + dev_warn(&pdev->dev, "Request timed out 0x%p", req); + else if (cpt_info->extra_time < OTX_CPT_TIME_IN_RESET_COUNT) { + cpt_info->time_in = jiffies; + cpt_info->extra_time++; + } + return 1; + + case CPT_COMP_E_GOOD: + /* Check microcode completion code */ + if (ecode.s.ccode) { + /* + * If requested hmac is truncated and ucode returns + * s/g write length error then we report success + * because ucode writes as many bytes of calculated + * hmac as available in gather buffer and reports + * s/g write length error if number of bytes in gather + * buffer is less than full hmac size. + */ + if (req->is_trunc_hmac && + ecode.s.ccode == ERR_SCATTER_GATHER_WRITE_LENGTH) { + *res_code = 0; + break; + } + + dev_err(&pdev->dev, + "Request failed with software error code 0x%x\n", + ecode.s.ccode); + otx_cpt_dump_sg_list(pdev, req); + break; + } + + /* Request has been processed with success */ + *res_code = 0; + break; + + default: + dev_err(&pdev->dev, "Request returned invalid status\n"); + break; + } + + return 0; +} + +static inline void process_pending_queue(struct pci_dev *pdev, + struct otx_cpt_pending_queue *pqueue) +{ + void (*callback)(int status, void *arg1, void *arg2); + struct otx_cpt_pending_entry *resume_pentry = NULL; + struct otx_cpt_pending_entry *pentry = NULL; + struct otx_cpt_info_buffer *cpt_info = NULL; + union otx_cpt_res_s *cpt_status = NULL; + struct otx_cpt_req_info *req = NULL; + struct crypto_async_request *areq; + u32 res_code, resume_index; + + while (1) { + spin_lock_bh(&pqueue->lock); + pentry = &pqueue->head[pqueue->front]; + + if (WARN_ON(!pentry)) { + spin_unlock_bh(&pqueue->lock); + break; + } + + res_code = -EINVAL; + if (unlikely(!pentry->busy)) { + spin_unlock_bh(&pqueue->lock); + break; + } + + if (unlikely(!pentry->callback)) { + dev_err(&pdev->dev, "Callback NULL\n"); + goto process_pentry; + } + + cpt_info = pentry->info; + if (unlikely(!cpt_info)) { + dev_err(&pdev->dev, "Pending entry post arg NULL\n"); + goto process_pentry; + } + + req = cpt_info->req; + if (unlikely(!req)) { + dev_err(&pdev->dev, "Request NULL\n"); + goto process_pentry; + } + + cpt_status = (union otx_cpt_res_s *) pentry->completion_addr; + if (unlikely(!cpt_status)) { + dev_err(&pdev->dev, "Completion address NULL\n"); + goto process_pentry; + } + + if (cpt_process_ccode(pdev, cpt_status, cpt_info, req, + &res_code)) { + spin_unlock_bh(&pqueue->lock); + return; + } + cpt_info->pdev = pdev; + +process_pentry: + /* + * Check if we should inform sending side to resume + * We do it CPT_IQ_RESUME_MARGIN elements in advance before + * pending queue becomes empty + */ + resume_index = modulo_inc(pqueue->front, pqueue->qlen, + CPT_IQ_RESUME_MARGIN); + resume_pentry = &pqueue->head[resume_index]; + if (resume_pentry && + resume_pentry->resume_sender) { + resume_pentry->resume_sender = false; + callback = resume_pentry->callback; + areq = resume_pentry->areq; + + if (callback) { + spin_unlock_bh(&pqueue->lock); + + /* + * EINPROGRESS is an indication for sending + * side that it can resume sending requests + */ + callback(-EINPROGRESS, areq, cpt_info); + spin_lock_bh(&pqueue->lock); + } + } + + callback = pentry->callback; + areq = pentry->areq; + free_pentry(pentry); + + pqueue->pending_count--; + pqueue->front = modulo_inc(pqueue->front, pqueue->qlen, 1); + spin_unlock_bh(&pqueue->lock); + + /* + * Call callback after current pending entry has been + * processed, we don't do it if the callback pointer is + * invalid. + */ + if (callback) + callback(res_code, areq, cpt_info); + } +} + +void otx_cpt_post_process(struct otx_cptvf_wqe *wqe) +{ + process_pending_queue(wqe->cptvf->pdev, &wqe->cptvf->pqinfo.queue[0]); +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h new file mode 100644 index 000000000000..a4c9ff730b13 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h @@ -0,0 +1,227 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPTVF_REQUEST_MANAGER_H +#define __OTX_CPTVF_REQUEST_MANAGER_H + +#include <linux/types.h> +#include <linux/crypto.h> +#include <linux/pci.h> +#include "otx_cpt_hw_types.h" + +/* + * Maximum total number of SG buffers is 100, we divide it equally + * between input and output + */ +#define OTX_CPT_MAX_SG_IN_CNT 50 +#define OTX_CPT_MAX_SG_OUT_CNT 50 + +/* DMA mode direct or SG */ +#define OTX_CPT_DMA_DIRECT_DIRECT 0 +#define OTX_CPT_DMA_GATHER_SCATTER 1 + +/* Context source CPTR or DPTR */ +#define OTX_CPT_FROM_CPTR 0 +#define OTX_CPT_FROM_DPTR 1 + +/* CPT instruction queue alignment */ +#define OTX_CPT_INST_Q_ALIGNMENT 128 +#define OTX_CPT_MAX_REQ_SIZE 65535 + +/* Default command timeout in seconds */ +#define OTX_CPT_COMMAND_TIMEOUT 4 +#define OTX_CPT_TIMER_HOLD 0x03F +#define OTX_CPT_COUNT_HOLD 32 +#define OTX_CPT_TIME_IN_RESET_COUNT 5 + +/* Minimum and maximum values for interrupt coalescing */ +#define OTX_CPT_COALESC_MIN_TIME_WAIT 0x0 +#define OTX_CPT_COALESC_MAX_TIME_WAIT ((1<<16)-1) +#define OTX_CPT_COALESC_MIN_NUM_WAIT 0x0 +#define OTX_CPT_COALESC_MAX_NUM_WAIT ((1<<20)-1) + +union otx_cpt_opcode_info { + u16 flags; + struct { + u8 major; + u8 minor; + } s; +}; + +struct otx_cptvf_request { + u32 param1; + u32 param2; + u16 dlen; + union otx_cpt_opcode_info opcode; +}; + +struct otx_cpt_buf_ptr { + u8 *vptr; + dma_addr_t dma_addr; + u16 size; +}; + +union otx_cpt_ctrl_info { + u32 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u32 reserved0:26; + u32 grp:3; /* Group bits */ + u32 dma_mode:2; /* DMA mode */ + u32 se_req:1; /* To SE core */ +#else + u32 se_req:1; /* To SE core */ + u32 dma_mode:2; /* DMA mode */ + u32 grp:3; /* Group bits */ + u32 reserved0:26; +#endif + } s; +}; + +/* + * CPT_INST_S software command definitions + * Words EI (0-3) + */ +union otx_cpt_iq_cmd_word0 { + u64 u64; + struct { + u16 opcode; + u16 param1; + u16 param2; + u16 dlen; + } s; +}; + +union otx_cpt_iq_cmd_word3 { + u64 u64; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 grp:3; + u64 cptr:61; +#else + u64 cptr:61; + u64 grp:3; +#endif + } s; +}; + +struct otx_cpt_iq_cmd { + union otx_cpt_iq_cmd_word0 cmd; + u64 dptr; + u64 rptr; + union otx_cpt_iq_cmd_word3 cptr; +}; + +struct otx_cpt_sglist_component { + union { + u64 len; + struct { + u16 len0; + u16 len1; + u16 len2; + u16 len3; + } s; + } u; + u64 ptr0; + u64 ptr1; + u64 ptr2; + u64 ptr3; +}; + +struct otx_cpt_pending_entry { + u64 *completion_addr; /* Completion address */ + struct otx_cpt_info_buffer *info; + /* Kernel async request callback */ + void (*callback)(int status, void *arg1, void *arg2); + struct crypto_async_request *areq; /* Async request callback arg */ + u8 resume_sender; /* Notify sender to resume sending requests */ + u8 busy; /* Entry status (free/busy) */ +}; + +struct otx_cpt_pending_queue { + struct otx_cpt_pending_entry *head; /* Head of the queue */ + u32 front; /* Process work from here */ + u32 rear; /* Append new work here */ + u32 pending_count; /* Pending requests count */ + u32 qlen; /* Queue length */ + spinlock_t lock; /* Queue lock */ +}; + +struct otx_cpt_req_info { + /* Kernel async request callback */ + void (*callback)(int status, void *arg1, void *arg2); + struct crypto_async_request *areq; /* Async request callback arg */ + struct otx_cptvf_request req;/* Request information (core specific) */ + union otx_cpt_ctrl_info ctrl;/* User control information */ + struct otx_cpt_buf_ptr in[OTX_CPT_MAX_SG_IN_CNT]; + struct otx_cpt_buf_ptr out[OTX_CPT_MAX_SG_OUT_CNT]; + u8 *iv_out; /* IV to send back */ + u16 rlen; /* Output length */ + u8 incnt; /* Number of input buffers */ + u8 outcnt; /* Number of output buffers */ + u8 req_type; /* Type of request */ + u8 is_enc; /* Is a request an encryption request */ + u8 is_trunc_hmac;/* Is truncated hmac used */ +}; + +struct otx_cpt_info_buffer { + struct otx_cpt_pending_entry *pentry; + struct otx_cpt_req_info *req; + struct pci_dev *pdev; + u64 *completion_addr; + u8 *out_buffer; + u8 *in_buffer; + dma_addr_t dptr_baddr; + dma_addr_t rptr_baddr; + dma_addr_t comp_baddr; + unsigned long time_in; + u32 dlen; + u32 dma_len; + u8 extra_time; +}; + +static inline void do_request_cleanup(struct pci_dev *pdev, + struct otx_cpt_info_buffer *info) +{ + struct otx_cpt_req_info *req; + int i; + + if (info->dptr_baddr) + dma_unmap_single(&pdev->dev, info->dptr_baddr, + info->dma_len, DMA_BIDIRECTIONAL); + + if (info->req) { + req = info->req; + for (i = 0; i < req->outcnt; i++) { + if (req->out[i].dma_addr) + dma_unmap_single(&pdev->dev, + req->out[i].dma_addr, + req->out[i].size, + DMA_BIDIRECTIONAL); + } + + for (i = 0; i < req->incnt; i++) { + if (req->in[i].dma_addr) + dma_unmap_single(&pdev->dev, + req->in[i].dma_addr, + req->in[i].size, + DMA_BIDIRECTIONAL); + } + } + kzfree(info); +} + +struct otx_cptvf_wqe; +void otx_cpt_dump_sg_list(struct pci_dev *pdev, struct otx_cpt_req_info *req); +void otx_cpt_post_process(struct otx_cptvf_wqe *wqe); +int otx_cpt_do_request(struct pci_dev *pdev, struct otx_cpt_req_info *req, + int cpu_num); + +#endif /* __OTX_CPTVF_REQUEST_MANAGER_H */ diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c index 9e9f48bb7f85..bd6309e57ab8 100644 --- a/drivers/crypto/mediatek/mtk-sha.c +++ b/drivers/crypto/mediatek/mtk-sha.c @@ -107,7 +107,7 @@ struct mtk_sha_ctx { u8 id; u8 buf[SHA_BUF_SIZE] __aligned(sizeof(u32)); - struct mtk_sha_hmac_ctx base[0]; + struct mtk_sha_hmac_ctx base[]; }; struct mtk_sha_drv { diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 435ac1c83df9..d84530293036 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -20,6 +20,7 @@ #include <crypto/sha.h> #include <crypto/internal/hash.h> #include <crypto/internal/skcipher.h> +#include <crypto/scatterwalk.h> #define DCP_MAX_CHANS 4 #define DCP_BUF_SZ PAGE_SIZE @@ -611,49 +612,46 @@ static int dcp_sha_req_to_buf(struct crypto_async_request *arq) struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm); struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req); struct hash_alg_common *halg = crypto_hash_alg_common(tfm); - const int nents = sg_nents(req->src); uint8_t *in_buf = sdcp->coh->sha_in_buf; uint8_t *out_buf = sdcp->coh->sha_out_buf; - uint8_t *src_buf; - struct scatterlist *src; - unsigned int i, len, clen; + unsigned int i, len, clen, oft = 0; int ret; int fin = rctx->fini; if (fin) rctx->fini = 0; - for_each_sg(req->src, src, nents, i) { - src_buf = sg_virt(src); - len = sg_dma_len(src); - - do { - if (actx->fill + len > DCP_BUF_SZ) - clen = DCP_BUF_SZ - actx->fill; - else - clen = len; - - memcpy(in_buf + actx->fill, src_buf, clen); - len -= clen; - src_buf += clen; - actx->fill += clen; + src = req->src; + len = req->nbytes; - /* - * If we filled the buffer and still have some - * more data, submit the buffer. - */ - if (len && actx->fill == DCP_BUF_SZ) { - ret = mxs_dcp_run_sha(req); - if (ret) - return ret; - actx->fill = 0; - rctx->init = 0; - } - } while (len); + while (len) { + if (actx->fill + len > DCP_BUF_SZ) + clen = DCP_BUF_SZ - actx->fill; + else + clen = len; + + scatterwalk_map_and_copy(in_buf + actx->fill, src, oft, clen, + 0); + + len -= clen; + oft += clen; + actx->fill += clen; + + /* + * If we filled the buffer and still have some + * more data, submit the buffer. + */ + if (len && actx->fill == DCP_BUF_SZ) { + ret = mxs_dcp_run_sha(req); + if (ret) + return ret; + actx->fill = 0; + rctx->init = 0; + } } if (fin) { diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h index 91c54289124a..c6233173c612 100644 --- a/drivers/crypto/nx/nx.h +++ b/drivers/crypto/nx/nx.h @@ -37,7 +37,7 @@ struct max_sync_cop { u32 fc; u32 mode; u32 triplets; - struct msc_triplet trip[0]; + struct msc_triplet trip[]; } __packed; struct alg_props { diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 4f915a4ef5b0..e4072cd38585 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -159,7 +159,7 @@ struct omap_sham_reqctx { int sg_len; unsigned int total; /* total request */ - u8 buffer[0] OMAP_ALIGNED; + u8 buffer[] OMAP_ALIGNED; }; struct omap_sham_hmac_ctx { @@ -176,7 +176,7 @@ struct omap_sham_ctx { /* fallback stuff */ struct crypto_shash *fallback; - struct omap_sham_hmac_ctx base[0]; + struct omap_sham_hmac_ctx base[]; }; #define OMAP_SHAM_QUEUE_LENGTH 10 diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 594d6b1695d5..62c6fe88b212 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -474,7 +474,7 @@ static struct skcipher_alg cbc_aes_alg = { }; static const struct x86_cpu_id padlock_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_XCRYPT), + X86_MATCH_FEATURE(X86_FEATURE_XCRYPT, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, padlock_cpu_id); diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index c826abe79e79..a697a4a3f2d0 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -490,7 +490,7 @@ static struct shash_alg sha256_alg_nano = { }; static const struct x86_cpu_id padlock_sha_ids[] = { - X86_FEATURE_MATCH(X86_FEATURE_PHE), + X86_MATCH_FEATURE(X86_FEATURE_PHE, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids); diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 833bb1d3a11b..e14d3dd291f0 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -97,7 +97,7 @@ struct qat_alg_cd { struct icp_qat_hw_cipher_algo_blk cipher; struct icp_qat_hw_auth_algo_blk hash; } qat_enc_cd; - struct qat_dec { /* Decrytp content desc */ + struct qat_dec { /* Decrypt content desc */ struct icp_qat_hw_auth_algo_blk hash; struct icp_qat_hw_cipher_algo_blk cipher; } qat_dec_cd; diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c index 3852d31ce0a4..fb504cee0305 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.c +++ b/drivers/crypto/qat/qat_common/qat_crypto.c @@ -250,8 +250,7 @@ static int qat_crypto_create_instances(struct adf_accel_dev *accel_dev) char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES]; INIT_LIST_HEAD(&accel_dev->crypto_list); - strlcpy(key, ADF_NUM_CY, sizeof(key)); - if (adf_cfg_get_param_value(accel_dev, SEC, key, val)) + if (adf_cfg_get_param_value(accel_dev, SEC, ADF_NUM_CY, val)) return -EFAULT; if (kstrtoul(val, 0, &num_inst)) diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c index 629e7f34dc09..5006e74c40cd 100644 --- a/drivers/crypto/qce/common.c +++ b/drivers/crypto/qce/common.c @@ -15,8 +15,6 @@ #include "regs-v5.h" #include "sha.h" -#define QCE_SECTOR_SIZE 512 - static inline u32 qce_read(struct qce_device *qce, u32 offset) { return readl(qce->base + offset); diff --git a/drivers/crypto/qce/common.h b/drivers/crypto/qce/common.h index 282d4317470d..9f989cba0f1b 100644 --- a/drivers/crypto/qce/common.h +++ b/drivers/crypto/qce/common.h @@ -12,6 +12,9 @@ #include <crypto/hash.h> #include <crypto/internal/skcipher.h> +/* xts du size */ +#define QCE_SECTOR_SIZE 512 + /* key size in bytes */ #define QCE_SHA_HMAC_KEY_SIZE 64 #define QCE_MAX_CIPHER_KEY_SIZE AES_KEYSIZE_256 diff --git a/drivers/crypto/qce/dma.c b/drivers/crypto/qce/dma.c index 7da893dc00e7..46db5bf366b4 100644 --- a/drivers/crypto/qce/dma.c +++ b/drivers/crypto/qce/dma.c @@ -48,9 +48,10 @@ void qce_dma_release(struct qce_dma_data *dma) struct scatterlist * qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl, - int max_ents) + unsigned int max_len) { struct scatterlist *sg = sgt->sgl, *sg_last = NULL; + unsigned int new_len; while (sg) { if (!sg_page(sg)) @@ -61,13 +62,13 @@ qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl, if (!sg) return ERR_PTR(-EINVAL); - while (new_sgl && sg && max_ents) { - sg_set_page(sg, sg_page(new_sgl), new_sgl->length, - new_sgl->offset); + while (new_sgl && sg && max_len) { + new_len = new_sgl->length > max_len ? max_len : new_sgl->length; + sg_set_page(sg, sg_page(new_sgl), new_len, new_sgl->offset); sg_last = sg; sg = sg_next(sg); new_sgl = sg_next(new_sgl); - max_ents--; + max_len -= new_len; } return sg_last; diff --git a/drivers/crypto/qce/dma.h b/drivers/crypto/qce/dma.h index ed25a0d9829e..786402169360 100644 --- a/drivers/crypto/qce/dma.h +++ b/drivers/crypto/qce/dma.h @@ -43,6 +43,6 @@ void qce_dma_issue_pending(struct qce_dma_data *dma); int qce_dma_terminate_all(struct qce_dma_data *dma); struct scatterlist * qce_sgtable_add(struct sg_table *sgt, struct scatterlist *sg_add, - int max_ents); + unsigned int max_len); #endif /* _DMA_H_ */ diff --git a/drivers/crypto/qce/skcipher.c b/drivers/crypto/qce/skcipher.c index 4217b745f124..9412433f3b21 100644 --- a/drivers/crypto/qce/skcipher.c +++ b/drivers/crypto/qce/skcipher.c @@ -5,6 +5,7 @@ #include <linux/device.h> #include <linux/interrupt.h> +#include <linux/moduleparam.h> #include <linux/types.h> #include <crypto/aes.h> #include <crypto/internal/des.h> @@ -12,6 +13,13 @@ #include "cipher.h" +static unsigned int aes_sw_max_len = CONFIG_CRYPTO_DEV_QCE_SW_MAX_LEN; +module_param(aes_sw_max_len, uint, 0644); +MODULE_PARM_DESC(aes_sw_max_len, + "Only use hardware for AES requests larger than this " + "[0=always use hardware; anything <16 breaks AES-GCM; default=" + __stringify(CONFIG_CRYPTO_DEV_QCE_SW_MAX_LEN)"]"); + static LIST_HEAD(skcipher_algs); static void qce_skcipher_done(void *data) @@ -97,13 +105,14 @@ qce_skcipher_async_req_handle(struct crypto_async_request *async_req) sg_init_one(&rctx->result_sg, qce->dma.result_buf, QCE_RESULT_BUF_SZ); - sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, rctx->dst_nents - 1); + sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, req->cryptlen); if (IS_ERR(sg)) { ret = PTR_ERR(sg); goto error_free; } - sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg, 1); + sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg, + QCE_RESULT_BUF_SZ); if (IS_ERR(sg)) { ret = PTR_ERR(sg); goto error_free; @@ -165,15 +174,10 @@ static int qce_skcipher_setkey(struct crypto_skcipher *ablk, const u8 *key, switch (IS_XTS(flags) ? keylen >> 1 : keylen) { case AES_KEYSIZE_128: case AES_KEYSIZE_256: + memcpy(ctx->enc_key, key, keylen); break; - default: - goto fallback; } - ctx->enc_keylen = keylen; - memcpy(ctx->enc_key, key, keylen); - return 0; -fallback: ret = crypto_sync_skcipher_setkey(ctx->fallback, key, keylen); if (!ret) ctx->enc_keylen = keylen; @@ -223,8 +227,14 @@ static int qce_skcipher_crypt(struct skcipher_request *req, int encrypt) rctx->flags |= encrypt ? QCE_ENCRYPT : QCE_DECRYPT; keylen = IS_XTS(rctx->flags) ? ctx->enc_keylen >> 1 : ctx->enc_keylen; - if (IS_AES(rctx->flags) && keylen != AES_KEYSIZE_128 && - keylen != AES_KEYSIZE_256) { + /* qce is hanging when AES-XTS request len > QCE_SECTOR_SIZE and + * is not a multiple of it; pass such requests to the fallback + */ + if (IS_AES(rctx->flags) && + (((keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256) || + req->cryptlen <= aes_sw_max_len) || + (IS_XTS(rctx->flags) && req->cryptlen > QCE_SECTOR_SIZE && + req->cryptlen % QCE_SECTOR_SIZE))) { SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); skcipher_request_set_sync_tfm(subreq, ctx->fallback); diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index d66e20a2f54c..2a16800d2579 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -369,7 +369,7 @@ struct s5p_hash_reqctx { bool error; u32 bufcnt; - u8 buffer[0]; + u8 buffer[]; }; /** diff --git a/drivers/crypto/vmx/.gitignore b/drivers/crypto/vmx/.gitignore index af4a7ce4738d..7aa71d83f739 100644 --- a/drivers/crypto/vmx/.gitignore +++ b/drivers/crypto/vmx/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only aesp8-ppc.S ghashp8-ppc.S diff --git a/drivers/crypto/xilinx/Makefile b/drivers/crypto/xilinx/Makefile new file mode 100644 index 000000000000..534e32daf76a --- /dev/null +++ b/drivers/crypto/xilinx/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_AES) += zynqmp-aes-gcm.o diff --git a/drivers/crypto/xilinx/zynqmp-aes-gcm.c b/drivers/crypto/xilinx/zynqmp-aes-gcm.c new file mode 100644 index 000000000000..09f7f468eef8 --- /dev/null +++ b/drivers/crypto/xilinx/zynqmp-aes-gcm.c @@ -0,0 +1,457 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Xilinx ZynqMP AES Driver. + * Copyright (c) 2020 Xilinx Inc. + */ + +#include <crypto/aes.h> +#include <crypto/engine.h> +#include <crypto/gcm.h> +#include <crypto/internal/aead.h> +#include <crypto/scatterwalk.h> + +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> + +#include <linux/firmware/xlnx-zynqmp.h> + +#define ZYNQMP_DMA_BIT_MASK 32U + +#define ZYNQMP_AES_KEY_SIZE AES_KEYSIZE_256 +#define ZYNQMP_AES_AUTH_SIZE 16U +#define ZYNQMP_KEY_SRC_SEL_KEY_LEN 1U +#define ZYNQMP_AES_BLK_SIZE 1U +#define ZYNQMP_AES_MIN_INPUT_BLK_SIZE 4U +#define ZYNQMP_AES_WORD_LEN 4U + +#define ZYNQMP_AES_GCM_TAG_MISMATCH_ERR 0x01 +#define ZYNQMP_AES_WRONG_KEY_SRC_ERR 0x13 +#define ZYNQMP_AES_PUF_NOT_PROGRAMMED 0xE300 + +enum zynqmp_aead_op { + ZYNQMP_AES_DECRYPT = 0, + ZYNQMP_AES_ENCRYPT +}; + +enum zynqmp_aead_keysrc { + ZYNQMP_AES_KUP_KEY = 0, + ZYNQMP_AES_DEV_KEY, + ZYNQMP_AES_PUF_KEY +}; + +struct zynqmp_aead_drv_ctx { + union { + struct aead_alg aead; + } alg; + struct device *dev; + struct crypto_engine *engine; + const struct zynqmp_eemi_ops *eemi_ops; +}; + +struct zynqmp_aead_hw_req { + u64 src; + u64 iv; + u64 key; + u64 dst; + u64 size; + u64 op; + u64 keysrc; +}; + +struct zynqmp_aead_tfm_ctx { + struct crypto_engine_ctx engine_ctx; + struct device *dev; + u8 key[ZYNQMP_AES_KEY_SIZE]; + u8 *iv; + u32 keylen; + u32 authsize; + enum zynqmp_aead_keysrc keysrc; + struct crypto_aead *fbk_cipher; +}; + +struct zynqmp_aead_req_ctx { + enum zynqmp_aead_op op; +}; + +static int zynqmp_aes_aead_cipher(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct zynqmp_aead_tfm_ctx *tfm_ctx = crypto_aead_ctx(aead); + struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req); + struct device *dev = tfm_ctx->dev; + struct aead_alg *alg = crypto_aead_alg(aead); + struct zynqmp_aead_drv_ctx *drv_ctx; + struct zynqmp_aead_hw_req *hwreq; + dma_addr_t dma_addr_data, dma_addr_hw_req; + unsigned int data_size; + unsigned int status; + size_t dma_size; + char *kbuf; + int err; + + drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead); + + if (!drv_ctx->eemi_ops->aes) + return -ENOTSUPP; + + if (tfm_ctx->keysrc == ZYNQMP_AES_KUP_KEY) + dma_size = req->cryptlen + ZYNQMP_AES_KEY_SIZE + + GCM_AES_IV_SIZE; + else + dma_size = req->cryptlen + GCM_AES_IV_SIZE; + + kbuf = dma_alloc_coherent(dev, dma_size, &dma_addr_data, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + hwreq = dma_alloc_coherent(dev, sizeof(struct zynqmp_aead_hw_req), + &dma_addr_hw_req, GFP_KERNEL); + if (!hwreq) { + dma_free_coherent(dev, dma_size, kbuf, dma_addr_data); + return -ENOMEM; + } + + data_size = req->cryptlen; + scatterwalk_map_and_copy(kbuf, req->src, 0, req->cryptlen, 0); + memcpy(kbuf + data_size, req->iv, GCM_AES_IV_SIZE); + + hwreq->src = dma_addr_data; + hwreq->dst = dma_addr_data; + hwreq->iv = hwreq->src + data_size; + hwreq->keysrc = tfm_ctx->keysrc; + hwreq->op = rq_ctx->op; + + if (hwreq->op == ZYNQMP_AES_ENCRYPT) + hwreq->size = data_size; + else + hwreq->size = data_size - ZYNQMP_AES_AUTH_SIZE; + + if (hwreq->keysrc == ZYNQMP_AES_KUP_KEY) { + memcpy(kbuf + data_size + GCM_AES_IV_SIZE, + tfm_ctx->key, ZYNQMP_AES_KEY_SIZE); + + hwreq->key = hwreq->src + data_size + GCM_AES_IV_SIZE; + } else { + hwreq->key = 0; + } + + drv_ctx->eemi_ops->aes(dma_addr_hw_req, &status); + + if (status) { + switch (status) { + case ZYNQMP_AES_GCM_TAG_MISMATCH_ERR: + dev_err(dev, "ERROR: Gcm Tag mismatch\n"); + break; + case ZYNQMP_AES_WRONG_KEY_SRC_ERR: + dev_err(dev, "ERROR: Wrong KeySrc, enable secure mode\n"); + break; + case ZYNQMP_AES_PUF_NOT_PROGRAMMED: + dev_err(dev, "ERROR: PUF is not registered\n"); + break; + default: + dev_err(dev, "ERROR: Unknown error\n"); + break; + } + err = -status; + } else { + if (hwreq->op == ZYNQMP_AES_ENCRYPT) + data_size = data_size + ZYNQMP_AES_AUTH_SIZE; + else + data_size = data_size - ZYNQMP_AES_AUTH_SIZE; + + sg_copy_from_buffer(req->dst, sg_nents(req->dst), + kbuf, data_size); + err = 0; + } + + if (kbuf) { + memzero_explicit(kbuf, dma_size); + dma_free_coherent(dev, dma_size, kbuf, dma_addr_data); + } + if (hwreq) { + memzero_explicit(hwreq, sizeof(struct zynqmp_aead_hw_req)); + dma_free_coherent(dev, sizeof(struct zynqmp_aead_hw_req), + hwreq, dma_addr_hw_req); + } + return err; +} + +static int zynqmp_fallback_check(struct zynqmp_aead_tfm_ctx *tfm_ctx, + struct aead_request *req) +{ + int need_fallback = 0; + struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req); + + if (tfm_ctx->authsize != ZYNQMP_AES_AUTH_SIZE) + need_fallback = 1; + + if (tfm_ctx->keysrc == ZYNQMP_AES_KUP_KEY && + tfm_ctx->keylen != ZYNQMP_AES_KEY_SIZE) { + need_fallback = 1; + } + if (req->assoclen != 0 || + req->cryptlen < ZYNQMP_AES_MIN_INPUT_BLK_SIZE) { + need_fallback = 1; + } + if ((req->cryptlen % ZYNQMP_AES_WORD_LEN) != 0) + need_fallback = 1; + + if (rq_ctx->op == ZYNQMP_AES_DECRYPT && + req->cryptlen <= ZYNQMP_AES_AUTH_SIZE) { + need_fallback = 1; + } + return need_fallback; +} + +static int zynqmp_handle_aes_req(struct crypto_engine *engine, + void *req) +{ + struct aead_request *areq = + container_of(req, struct aead_request, base); + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct zynqmp_aead_tfm_ctx *tfm_ctx = crypto_aead_ctx(aead); + struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(areq); + struct aead_request *subreq = aead_request_ctx(req); + int need_fallback; + int err; + + need_fallback = zynqmp_fallback_check(tfm_ctx, areq); + + if (need_fallback) { + aead_request_set_tfm(subreq, tfm_ctx->fbk_cipher); + + aead_request_set_callback(subreq, areq->base.flags, + NULL, NULL); + aead_request_set_crypt(subreq, areq->src, areq->dst, + areq->cryptlen, areq->iv); + aead_request_set_ad(subreq, areq->assoclen); + if (rq_ctx->op == ZYNQMP_AES_ENCRYPT) + err = crypto_aead_encrypt(subreq); + else + err = crypto_aead_decrypt(subreq); + } else { + err = zynqmp_aes_aead_cipher(areq); + } + + crypto_finalize_aead_request(engine, areq, err); + return 0; +} + +static int zynqmp_aes_aead_setkey(struct crypto_aead *aead, const u8 *key, + unsigned int keylen) +{ + struct crypto_tfm *tfm = crypto_aead_tfm(aead); + struct zynqmp_aead_tfm_ctx *tfm_ctx = + (struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm); + unsigned char keysrc; + + if (keylen == ZYNQMP_KEY_SRC_SEL_KEY_LEN) { + keysrc = *key; + if (keysrc == ZYNQMP_AES_KUP_KEY || + keysrc == ZYNQMP_AES_DEV_KEY || + keysrc == ZYNQMP_AES_PUF_KEY) { + tfm_ctx->keysrc = (enum zynqmp_aead_keysrc)keysrc; + } else { + tfm_ctx->keylen = keylen; + } + } else { + tfm_ctx->keylen = keylen; + if (keylen == ZYNQMP_AES_KEY_SIZE) { + tfm_ctx->keysrc = ZYNQMP_AES_KUP_KEY; + memcpy(tfm_ctx->key, key, keylen); + } + } + + tfm_ctx->fbk_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; + tfm_ctx->fbk_cipher->base.crt_flags |= (aead->base.crt_flags & + CRYPTO_TFM_REQ_MASK); + + return crypto_aead_setkey(tfm_ctx->fbk_cipher, key, keylen); +} + +static int zynqmp_aes_aead_setauthsize(struct crypto_aead *aead, + unsigned int authsize) +{ + struct crypto_tfm *tfm = crypto_aead_tfm(aead); + struct zynqmp_aead_tfm_ctx *tfm_ctx = + (struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm); + + tfm_ctx->authsize = authsize; + return crypto_aead_setauthsize(tfm_ctx->fbk_cipher, authsize); +} + +static int zynqmp_aes_aead_encrypt(struct aead_request *req) +{ + struct zynqmp_aead_drv_ctx *drv_ctx; + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct aead_alg *alg = crypto_aead_alg(aead); + struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req); + + rq_ctx->op = ZYNQMP_AES_ENCRYPT; + drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead); + + return crypto_transfer_aead_request_to_engine(drv_ctx->engine, req); +} + +static int zynqmp_aes_aead_decrypt(struct aead_request *req) +{ + struct zynqmp_aead_drv_ctx *drv_ctx; + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct aead_alg *alg = crypto_aead_alg(aead); + struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req); + + rq_ctx->op = ZYNQMP_AES_DECRYPT; + drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead); + + return crypto_transfer_aead_request_to_engine(drv_ctx->engine, req); +} + +static int zynqmp_aes_aead_init(struct crypto_aead *aead) +{ + struct crypto_tfm *tfm = crypto_aead_tfm(aead); + struct zynqmp_aead_tfm_ctx *tfm_ctx = + (struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm); + struct zynqmp_aead_drv_ctx *drv_ctx; + struct aead_alg *alg = crypto_aead_alg(aead); + + drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead); + tfm_ctx->dev = drv_ctx->dev; + + tfm_ctx->engine_ctx.op.do_one_request = zynqmp_handle_aes_req; + tfm_ctx->engine_ctx.op.prepare_request = NULL; + tfm_ctx->engine_ctx.op.unprepare_request = NULL; + + tfm_ctx->fbk_cipher = crypto_alloc_aead(drv_ctx->alg.aead.base.cra_name, + 0, + CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(tfm_ctx->fbk_cipher)) { + pr_err("%s() Error: failed to allocate fallback for %s\n", + __func__, drv_ctx->alg.aead.base.cra_name); + return PTR_ERR(tfm_ctx->fbk_cipher); + } + + crypto_aead_set_reqsize(aead, + max(sizeof(struct zynqmp_aead_req_ctx), + sizeof(struct aead_request) + + crypto_aead_reqsize(tfm_ctx->fbk_cipher))); + return 0; +} + +static void zynqmp_aes_aead_exit(struct crypto_aead *aead) +{ + struct crypto_tfm *tfm = crypto_aead_tfm(aead); + struct zynqmp_aead_tfm_ctx *tfm_ctx = + (struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm); + + if (tfm_ctx->fbk_cipher) { + crypto_free_aead(tfm_ctx->fbk_cipher); + tfm_ctx->fbk_cipher = NULL; + } + memzero_explicit(tfm_ctx, sizeof(struct zynqmp_aead_tfm_ctx)); +} + +static struct zynqmp_aead_drv_ctx aes_drv_ctx = { + .alg.aead = { + .setkey = zynqmp_aes_aead_setkey, + .setauthsize = zynqmp_aes_aead_setauthsize, + .encrypt = zynqmp_aes_aead_encrypt, + .decrypt = zynqmp_aes_aead_decrypt, + .init = zynqmp_aes_aead_init, + .exit = zynqmp_aes_aead_exit, + .ivsize = GCM_AES_IV_SIZE, + .maxauthsize = ZYNQMP_AES_AUTH_SIZE, + .base = { + .cra_name = "gcm(aes)", + .cra_driver_name = "xilinx-zynqmp-aes-gcm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_AEAD | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = ZYNQMP_AES_BLK_SIZE, + .cra_ctxsize = sizeof(struct zynqmp_aead_tfm_ctx), + .cra_module = THIS_MODULE, + } + } +}; + +static int zynqmp_aes_aead_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + int err; + + /* ZynqMP AES driver supports only one instance */ + if (!aes_drv_ctx.dev) + aes_drv_ctx.dev = dev; + else + return -ENODEV; + + aes_drv_ctx.eemi_ops = zynqmp_pm_get_eemi_ops(); + if (IS_ERR(aes_drv_ctx.eemi_ops)) { + dev_err(dev, "Failed to get ZynqMP EEMI interface\n"); + return PTR_ERR(aes_drv_ctx.eemi_ops); + } + + err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(ZYNQMP_DMA_BIT_MASK)); + if (err < 0) { + dev_err(dev, "No usable DMA configuration\n"); + return err; + } + + aes_drv_ctx.engine = crypto_engine_alloc_init(dev, 1); + if (!aes_drv_ctx.engine) { + dev_err(dev, "Cannot alloc AES engine\n"); + err = -ENOMEM; + goto err_engine; + } + + err = crypto_engine_start(aes_drv_ctx.engine); + if (err) { + dev_err(dev, "Cannot start AES engine\n"); + goto err_engine; + } + + err = crypto_register_aead(&aes_drv_ctx.alg.aead); + if (err < 0) { + dev_err(dev, "Failed to register AEAD alg.\n"); + goto err_aead; + } + return 0; + +err_aead: + crypto_unregister_aead(&aes_drv_ctx.alg.aead); + +err_engine: + if (aes_drv_ctx.engine) + crypto_engine_exit(aes_drv_ctx.engine); + + return err; +} + +static int zynqmp_aes_aead_remove(struct platform_device *pdev) +{ + crypto_engine_exit(aes_drv_ctx.engine); + crypto_unregister_aead(&aes_drv_ctx.alg.aead); + + return 0; +} + +static const struct of_device_id zynqmp_aes_dt_ids[] = { + { .compatible = "xlnx,zynqmp-aes" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, zynqmp_aes_dt_ids); + +static struct platform_driver zynqmp_aes_driver = { + .probe = zynqmp_aes_aead_probe, + .remove = zynqmp_aes_aead_remove, + .driver = { + .name = "zynqmp-aes", + .of_match_table = zynqmp_aes_dt_ids, + }, +}; + +module_platform_driver(zynqmp_aes_driver); +MODULE_LICENSE("GPL"); |