From df6313d707e575a679ada3313358289af24454c0 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Thu, 1 Jul 2021 14:56:37 -0400 Subject: crypto: mxs-dcp - Check for DMA mapping errors After calling dma_map_single(), we must also call dma_mapping_error(). This fixes the following warning when compiling with CONFIG_DMA_API_DEBUG: [ 311.241478] WARNING: CPU: 0 PID: 428 at kernel/dma/debug.c:1027 check_unmap+0x79c/0x96c [ 311.249547] DMA-API: mxs-dcp 2280000.crypto: device driver failed to check map error[device address=0x00000000860cb080] [size=32 bytes] [mapped as single] Signed-off-by: Sean Anderson Reviewed-by: Richard Weinberger Signed-off-by: Herbert Xu --- drivers/crypto/mxs-dcp.c | 45 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index d6a7784d2988..f397cc5bf102 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -170,15 +170,19 @@ static struct dcp *global_sdcp; static int mxs_dcp_start_dma(struct dcp_async_ctx *actx) { + int dma_err; struct dcp *sdcp = global_sdcp; const int chan = actx->chan; uint32_t stat; unsigned long ret; struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; - dma_addr_t desc_phys = dma_map_single(sdcp->dev, desc, sizeof(*desc), DMA_TO_DEVICE); + dma_err = dma_mapping_error(sdcp->dev, desc_phys); + if (dma_err) + return dma_err; + reinit_completion(&sdcp->completion[chan]); /* Clear status register. */ @@ -216,18 +220,29 @@ static int mxs_dcp_start_dma(struct dcp_async_ctx *actx) static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, struct skcipher_request *req, int init) { + dma_addr_t key_phys, src_phys, dst_phys; struct dcp *sdcp = global_sdcp; struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; struct dcp_aes_req_ctx *rctx = skcipher_request_ctx(req); int ret; - dma_addr_t key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key, - 2 * AES_KEYSIZE_128, - DMA_TO_DEVICE); - dma_addr_t src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf, - DCP_BUF_SZ, DMA_TO_DEVICE); - dma_addr_t dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf, - DCP_BUF_SZ, DMA_FROM_DEVICE); + key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key, + 2 * AES_KEYSIZE_128, DMA_TO_DEVICE); + ret = dma_mapping_error(sdcp->dev, key_phys); + if (ret) + return ret; + + src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf, + DCP_BUF_SZ, DMA_TO_DEVICE); + ret = dma_mapping_error(sdcp->dev, src_phys); + if (ret) + goto err_src; + + dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf, + DCP_BUF_SZ, DMA_FROM_DEVICE); + ret = dma_mapping_error(sdcp->dev, dst_phys); + if (ret) + goto err_dst; if (actx->fill % AES_BLOCK_SIZE) { dev_err(sdcp->dev, "Invalid block size!\n"); @@ -265,10 +280,12 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, ret = mxs_dcp_start_dma(actx); aes_done_run: + dma_unmap_single(sdcp->dev, dst_phys, DCP_BUF_SZ, DMA_FROM_DEVICE); +err_dst: + dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE); +err_src: dma_unmap_single(sdcp->dev, key_phys, 2 * AES_KEYSIZE_128, DMA_TO_DEVICE); - dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE); - dma_unmap_single(sdcp->dev, dst_phys, DCP_BUF_SZ, DMA_FROM_DEVICE); return ret; } @@ -557,6 +574,10 @@ static int mxs_dcp_run_sha(struct ahash_request *req) dma_addr_t buf_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_in_buf, DCP_BUF_SZ, DMA_TO_DEVICE); + ret = dma_mapping_error(sdcp->dev, buf_phys); + if (ret) + return ret; + /* Fill in the DMA descriptor. */ desc->control0 = MXS_DCP_CONTROL0_DECR_SEMAPHORE | MXS_DCP_CONTROL0_INTERRUPT | @@ -589,6 +610,10 @@ static int mxs_dcp_run_sha(struct ahash_request *req) if (rctx->fini) { digest_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_out_buf, DCP_SHA_PAY_SZ, DMA_FROM_DEVICE); + ret = dma_mapping_error(sdcp->dev, digest_phys); + if (ret) + goto done_run; + desc->control0 |= MXS_DCP_CONTROL0_HASH_TERM; desc->payload = digest_phys; } -- cgit v1.2.3 From 2e6d793e1bf07fe5e20cfbbdcec9e1af7e5097eb Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Thu, 1 Jul 2021 14:56:38 -0400 Subject: crypto: mxs-dcp - Use sg_mapping_iter to copy data This uses the sg_pcopy_from_buffer to copy data, instead of doing it ourselves. In addition to reducing code size, this fixes the following oops resulting from failing to kmap the page: [ 68.896381] Unable to handle kernel NULL pointer dereference at virtual address 00000ab8 [ 68.904539] pgd = 3561adb3 [ 68.907475] [00000ab8] *pgd=00000000 [ 68.911153] Internal error: Oops: 805 [#1] ARM [ 68.915618] Modules linked in: cfg80211 rfkill des_generic libdes arc4 libarc4 cbc ecb algif_skcipher sha256_generic libsha256 sha1_generic hmac aes_generic libaes cmac sha512_generic md5 md4 algif_hash af_alg i2c_imx i2c_core ci_hdrc_imx ci_hdrc mxs_dcp ulpi roles udc_core imx_sdma usbmisc_imx usb_common firmware_class virt_dma phy_mxs_usb nf_tables nfnetlink ip_tables x_tables ipv6 autofs4 [ 68.950741] CPU: 0 PID: 139 Comm: mxs_dcp_chan/ae Not tainted 5.10.34 #296 [ 68.958501] Hardware name: Freescale i.MX6 Ultralite (Device Tree) [ 68.964710] PC is at memcpy+0xa8/0x330 [ 68.968479] LR is at 0xd7b2bc9d [ 68.971638] pc : [] lr : [] psr: 000f0013 [ 68.977920] sp : c2cbbee4 ip : 00000010 fp : 00000010 [ 68.983159] r10: 00000000 r9 : c3283a40 r8 : 1a5a6f08 [ 68.988402] r7 : 4bfe0ecc r6 : 76d8a220 r5 : c32f9050 r4 : 00000001 [ 68.994945] r3 : 00000ab8 r2 : fffffff0 r1 : c32f9050 r0 : 00000ab8 [ 69.001492] Flags: nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none [ 69.008646] Control: 10c53c7d Table: 83664059 DAC: 00000051 [ 69.014414] Process mxs_dcp_chan/ae (pid: 139, stack limit = 0x667b57ab) [ 69.021133] Stack: (0xc2cbbee4 to 0xc2cbc000) [ 69.025519] bee0: c32f9050 c3235408 00000010 00000010 00000ab8 00000001 bf10406c [ 69.033720] bf00: 00000000 00000000 00000010 00000000 c32355d0 832fb080 00000000 c13de2fc [ 69.041921] bf20: c3628010 00000010 c33d5780 00000ab8 bf1067e8 00000002 c21e5010 c2cba000 [ 69.050125] bf40: c32f8040 00000000 bf106a40 c32f9040 c3283a80 00000001 bf105240 c3234040 [ 69.058327] bf60: ffffe000 c3204100 c2c69800 c2cba000 00000000 bf103b84 00000000 c2eddc54 [ 69.066530] bf80: c3204144 c0140d1c c2cba000 c2c69800 c0140be8 00000000 00000000 00000000 [ 69.074730] bfa0: 00000000 00000000 00000000 c0100114 00000000 00000000 00000000 00000000 [ 69.082932] bfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 69.091131] bfe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000 [ 69.099364] [] (memcpy) from [] (dcp_chan_thread_aes+0x4e8/0x840 [mxs_dcp]) [ 69.108117] [] (dcp_chan_thread_aes [mxs_dcp]) from [] (kthread+0x134/0x160) [ 69.116941] [] (kthread) from [] (ret_from_fork+0x14/0x20) [ 69.124178] Exception stack(0xc2cbbfb0 to 0xc2cbbff8) [ 69.129250] bfa0: 00000000 00000000 00000000 00000000 [ 69.137450] bfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 69.145648] bfe0: 00000000 00000000 00000000 00000000 00000013 00000000 [ 69.152289] Code: e320f000 e4803004 e4804004 e4805004 (e4806004) Signed-off-by: Sean Anderson Signed-off-by: Herbert Xu --- drivers/crypto/mxs-dcp.c | 36 +++++++++--------------------------- 1 file changed, 9 insertions(+), 27 deletions(-) diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index f397cc5bf102..d19e5ffb5104 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -300,21 +300,20 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) struct scatterlist *dst = req->dst; struct scatterlist *src = req->src; - const int nents = sg_nents(req->src); + int dst_nents = sg_nents(dst); const int out_off = DCP_BUF_SZ; uint8_t *in_buf = sdcp->coh->aes_in_buf; uint8_t *out_buf = sdcp->coh->aes_out_buf; - uint8_t *out_tmp, *src_buf, *dst_buf = NULL; uint32_t dst_off = 0; + uint8_t *src_buf = NULL; uint32_t last_out_len = 0; uint8_t *key = sdcp->coh->aes_key; int ret = 0; - int split = 0; - unsigned int i, len, clen, rem = 0, tlen = 0; + unsigned int i, len, clen, tlen = 0; int init = 0; bool limit_hit = false; @@ -332,7 +331,7 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) memset(key + AES_KEYSIZE_128, 0, AES_KEYSIZE_128); } - for_each_sg(req->src, src, nents, i) { + for_each_sg(req->src, src, sg_nents(src), i) { src_buf = sg_virt(src); len = sg_dma_len(src); tlen += len; @@ -357,34 +356,17 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) * submit the buffer. */ if (actx->fill == out_off || sg_is_last(src) || - limit_hit) { + limit_hit) { ret = mxs_dcp_run_aes(actx, req, init); if (ret) return ret; init = 0; - out_tmp = out_buf; + sg_pcopy_from_buffer(dst, dst_nents, out_buf, + actx->fill, dst_off); + dst_off += actx->fill; last_out_len = actx->fill; - while (dst && actx->fill) { - if (!split) { - dst_buf = sg_virt(dst); - dst_off = 0; - } - rem = min(sg_dma_len(dst) - dst_off, - actx->fill); - - memcpy(dst_buf + dst_off, out_tmp, rem); - out_tmp += rem; - dst_off += rem; - actx->fill -= rem; - - if (dst_off == sg_dma_len(dst)) { - dst = sg_next(dst); - split = 0; - } else { - split = 1; - } - } + actx->fill = 0; } } while (len); -- cgit v1.2.3 From cb5f09e8148440d3581403c0415bc9678a64488d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 11 Jul 2021 15:31:43 -0700 Subject: crypto: arm/curve25519 - rename 'mod_init' & 'mod_exit' functions to be module-specific Rename module_init & module_exit functions that are named "mod_init" and "mod_exit" so that they are unique in both the System.map file and in initcall_debug output instead of showing up as almost anonymous "mod_init". This is helpful for debugging and in determining how long certain module_init calls take to execute. Signed-off-by: Randy Dunlap Cc: Jason A. Donenfeld Cc: linux-arm-kernel@lists.infradead.org Cc: patches@armlinux.org.uk Acked-by: Russell King (Oracle) Signed-off-by: Herbert Xu --- arch/arm/crypto/curve25519-glue.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c index 31eb75b6002f..9bdafd57888c 100644 --- a/arch/arm/crypto/curve25519-glue.c +++ b/arch/arm/crypto/curve25519-glue.c @@ -112,7 +112,7 @@ static struct kpp_alg curve25519_alg = { .max_size = curve25519_max_size, }; -static int __init mod_init(void) +static int __init arm_curve25519_init(void) { if (elf_hwcap & HWCAP_NEON) { static_branch_enable(&have_neon); @@ -122,14 +122,14 @@ static int __init mod_init(void) return 0; } -static void __exit mod_exit(void) +static void __exit arm_curve25519_exit(void) { if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON) crypto_unregister_kpp(&curve25519_alg); } -module_init(mod_init); -module_exit(mod_exit); +module_init(arm_curve25519_init); +module_exit(arm_curve25519_exit); MODULE_ALIAS_CRYPTO("curve25519"); MODULE_ALIAS_CRYPTO("curve25519-neon"); -- cgit v1.2.3 From f0d9ff8c8efb90ae335d41c2fc0d92724af55a63 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 11 Jul 2021 15:31:44 -0700 Subject: hwrng: rename 'mod_init' & 'mod_exit' functions to be module-specific Rename module_init & module_exit functions that are named "mod_init" and "mod_exit" so that they are unique in both the System.map file and in initcall_debug output instead of showing up as almost anonymous "mod_init". This is helpful for debugging and in determining how long certain module_init calls take to execute. Signed-off-by: Randy Dunlap Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Andres Salomon Cc: linux-geode@lists.infradead.org Cc: Matt Mackall Cc: Herbert Xu Cc: "David S. Miller" Cc: linux-crypto@vger.kernel.org Cc: Jason A. Donenfeld Signed-off-by: Herbert Xu --- drivers/char/hw_random/amd-rng.c | 8 ++++---- drivers/char/hw_random/geode-rng.c | 8 ++++---- drivers/char/hw_random/intel-rng.c | 8 ++++---- drivers/char/hw_random/via-rng.c | 8 ++++---- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/char/hw_random/amd-rng.c b/drivers/char/hw_random/amd-rng.c index d8d4ef5214a1..c22d4184bb61 100644 --- a/drivers/char/hw_random/amd-rng.c +++ b/drivers/char/hw_random/amd-rng.c @@ -124,7 +124,7 @@ static struct hwrng amd_rng = { .read = amd_rng_read, }; -static int __init mod_init(void) +static int __init amd_rng_mod_init(void) { int err; struct pci_dev *pdev = NULL; @@ -188,7 +188,7 @@ out: return err; } -static void __exit mod_exit(void) +static void __exit amd_rng_mod_exit(void) { struct amd768_priv *priv; @@ -203,8 +203,8 @@ static void __exit mod_exit(void) kfree(priv); } -module_init(mod_init); -module_exit(mod_exit); +module_init(amd_rng_mod_init); +module_exit(amd_rng_mod_exit); MODULE_AUTHOR("The Linux Kernel team"); MODULE_DESCRIPTION("H/W RNG driver for AMD chipsets"); diff --git a/drivers/char/hw_random/geode-rng.c b/drivers/char/hw_random/geode-rng.c index e1d421a36a13..138ce434f86b 100644 --- a/drivers/char/hw_random/geode-rng.c +++ b/drivers/char/hw_random/geode-rng.c @@ -83,7 +83,7 @@ static struct hwrng geode_rng = { }; -static int __init mod_init(void) +static int __init geode_rng_init(void) { int err = -ENODEV; struct pci_dev *pdev = NULL; @@ -124,7 +124,7 @@ err_unmap: goto out; } -static void __exit mod_exit(void) +static void __exit geode_rng_exit(void) { void __iomem *mem = (void __iomem *)geode_rng.priv; @@ -132,8 +132,8 @@ static void __exit mod_exit(void) iounmap(mem); } -module_init(mod_init); -module_exit(mod_exit); +module_init(geode_rng_init); +module_exit(geode_rng_exit); MODULE_DESCRIPTION("H/W RNG driver for AMD Geode LX CPUs"); MODULE_LICENSE("GPL"); diff --git a/drivers/char/hw_random/intel-rng.c b/drivers/char/hw_random/intel-rng.c index d740b8814bf3..7b171cb3b825 100644 --- a/drivers/char/hw_random/intel-rng.c +++ b/drivers/char/hw_random/intel-rng.c @@ -325,7 +325,7 @@ PFX "RNG, try using the 'no_fwh_detect' option.\n"; } -static int __init mod_init(void) +static int __init intel_rng_mod_init(void) { int err = -ENODEV; int i; @@ -403,7 +403,7 @@ out: } -static void __exit mod_exit(void) +static void __exit intel_rng_mod_exit(void) { void __iomem *mem = (void __iomem *)intel_rng.priv; @@ -411,8 +411,8 @@ static void __exit mod_exit(void) iounmap(mem); } -module_init(mod_init); -module_exit(mod_exit); +module_init(intel_rng_mod_init); +module_exit(intel_rng_mod_exit); MODULE_DESCRIPTION("H/W RNG driver for Intel chipsets"); MODULE_LICENSE("GPL"); diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c index 39943bc3651a..7444cc146e86 100644 --- a/drivers/char/hw_random/via-rng.c +++ b/drivers/char/hw_random/via-rng.c @@ -192,7 +192,7 @@ static struct hwrng via_rng = { }; -static int __init mod_init(void) +static int __init via_rng_mod_init(void) { int err; @@ -209,13 +209,13 @@ static int __init mod_init(void) out: return err; } -module_init(mod_init); +module_init(via_rng_mod_init); -static void __exit mod_exit(void) +static void __exit via_rng_mod_exit(void) { hwrng_unregister(&via_rng); } -module_exit(mod_exit); +module_exit(via_rng_mod_exit); static struct x86_cpu_id __maybe_unused via_rng_cpu_id[] = { X86_MATCH_FEATURE(X86_FEATURE_XSTORE, NULL), -- cgit v1.2.3 From f03a3cab26c1b7f628a3be6d33ae1b483829b630 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 11 Jul 2021 15:31:45 -0700 Subject: crypto: lib - rename 'mod_init' & 'mod_exit' functions to be module-specific Rename module_init & module_exit functions that are named "mod_init" and "mod_exit" so that they are unique in both the System.map file and in initcall_debug output instead of showing up as almost anonymous "mod_init". This is helpful for debugging and in determining how long certain module_init calls take to execute. Signed-off-by: Randy Dunlap Cc: Herbert Xu Cc: "David S. Miller" Cc: linux-crypto@vger.kernel.org Cc: Jason A. Donenfeld Signed-off-by: Herbert Xu --- lib/crypto/blake2s.c | 8 ++++---- lib/crypto/chacha20poly1305.c | 8 ++++---- lib/crypto/curve25519.c | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index c64ac8bfb6a9..4055aa593ec4 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -73,7 +73,7 @@ void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, } EXPORT_SYMBOL(blake2s256_hmac); -static int __init mod_init(void) +static int __init blake2s_mod_init(void) { if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && WARN_ON(!blake2s_selftest())) @@ -81,12 +81,12 @@ static int __init mod_init(void) return 0; } -static void __exit mod_exit(void) +static void __exit blake2s_mod_exit(void) { } -module_init(mod_init); -module_exit(mod_exit); +module_init(blake2s_mod_init); +module_exit(blake2s_mod_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("BLAKE2s hash function"); MODULE_AUTHOR("Jason A. Donenfeld "); diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c index c2fcdb98cc02..fa6a9440fc95 100644 --- a/lib/crypto/chacha20poly1305.c +++ b/lib/crypto/chacha20poly1305.c @@ -354,7 +354,7 @@ bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len } EXPORT_SYMBOL(chacha20poly1305_decrypt_sg_inplace); -static int __init mod_init(void) +static int __init chacha20poly1305_init(void) { if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && WARN_ON(!chacha20poly1305_selftest())) @@ -362,12 +362,12 @@ static int __init mod_init(void) return 0; } -static void __exit mod_exit(void) +static void __exit chacha20poly1305_exit(void) { } -module_init(mod_init); -module_exit(mod_exit); +module_init(chacha20poly1305_init); +module_exit(chacha20poly1305_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("ChaCha20Poly1305 AEAD construction"); MODULE_AUTHOR("Jason A. Donenfeld "); diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c index fb29739e8c29..064b352c6907 100644 --- a/lib/crypto/curve25519.c +++ b/lib/crypto/curve25519.c @@ -13,7 +13,7 @@ #include #include -static int __init mod_init(void) +static int __init curve25519_init(void) { if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && WARN_ON(!curve25519_selftest())) @@ -21,12 +21,12 @@ static int __init mod_init(void) return 0; } -static void __exit mod_exit(void) +static void __exit curve25519_exit(void) { } -module_init(mod_init); -module_exit(mod_exit); +module_init(curve25519_init); +module_exit(curve25519_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Curve25519 scalar multiplication"); -- cgit v1.2.3 From 821720b9f34ec54106ebf012a712ba73bbcf47c2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 16 Jul 2021 18:54:03 +0200 Subject: crypto: x86/aes-ni - add missing error checks in XTS code The updated XTS code fails to check the return code of skcipher_walk_virt, which may lead to skcipher_walk_abort() or skcipher_walk_done() being called while the walk argument is in an inconsistent state. So check the return value after each such call, and bail on errors. Fixes: 2481104fe98d ("crypto: x86/aes-ni-xts - rewrite and drop indirections via glue helper") Reported-by: Dave Hansen Reported-by: syzbot Signed-off-by: Ard Biesheuvel Reviewed-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 2144e54a6c89..388643ca2177 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -849,6 +849,8 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt) return -EINVAL; err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; if (unlikely(tail > 0 && walk.nbytes < walk.total)) { int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; @@ -862,7 +864,10 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt) skcipher_request_set_crypt(&subreq, req->src, req->dst, blocks * AES_BLOCK_SIZE, req->iv); req = &subreq; + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; } else { tail = 0; } -- cgit v1.2.3 From 2b31277af577b1b2da62c3ad7d3315b422869102 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Tue, 20 Jul 2021 11:46:39 +0800 Subject: crypto: sm4 - create SM4 library based on sm4 generic code Take the existing small footprint and mostly time invariant C code and turn it into a SM4 library that can be used for non-performance critical, casual use of SM4, and as a fallback for, e.g., SIMD code that needs a secondary path that can be taken in contexts where the SIMD unit is off limits. Secondly, some codes have been optimized, such as unrolling small times loop, removing unnecessary memory shifts, exporting sbox, fk, ck arrays, and basic encryption and decryption functions. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- crypto/Kconfig | 1 + crypto/sm4_generic.c | 153 ++------------------------------------------ include/crypto/sm4.h | 24 ++++++- lib/crypto/Kconfig | 3 + lib/crypto/Makefile | 3 + lib/crypto/sm4.c | 176 +++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 208 insertions(+), 152 deletions(-) create mode 100644 lib/crypto/sm4.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 64b772c5d1c9..5bf86f5d59db 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1547,6 +1547,7 @@ config CRYPTO_SERPENT_AVX2_X86_64 config CRYPTO_SM4 tristate "SM4 cipher algorithm" select CRYPTO_ALGAPI + select CRYPTO_LIB_SM4 help SM4 cipher algorithms (OSCCA GB/T 32907-2016). diff --git a/crypto/sm4_generic.c b/crypto/sm4_generic.c index 016dbc595705..d19d01f852a9 100644 --- a/crypto/sm4_generic.c +++ b/crypto/sm4_generic.c @@ -16,139 +16,13 @@ #include #include -static const u32 fk[4] = { - 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc -}; - -static const u8 sbox[256] = { - 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, - 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, - 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, - 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, - 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, - 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62, - 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, - 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6, - 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, - 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8, - 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, - 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35, - 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, - 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87, - 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, - 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e, - 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, - 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1, - 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, - 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3, - 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, - 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f, - 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, - 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51, - 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, - 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8, - 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, - 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0, - 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, - 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84, - 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, - 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48 -}; - -static const u32 ck[] = { - 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269, - 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9, - 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249, - 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9, - 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229, - 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299, - 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209, - 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279 -}; - -static u32 sm4_t_non_lin_sub(u32 x) -{ - int i; - u8 *b = (u8 *)&x; - - for (i = 0; i < 4; ++i) - b[i] = sbox[b[i]]; - - return x; -} - -static u32 sm4_key_lin_sub(u32 x) -{ - return x ^ rol32(x, 13) ^ rol32(x, 23); - -} - -static u32 sm4_enc_lin_sub(u32 x) -{ - return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24); -} - -static u32 sm4_key_sub(u32 x) -{ - return sm4_key_lin_sub(sm4_t_non_lin_sub(x)); -} - -static u32 sm4_enc_sub(u32 x) -{ - return sm4_enc_lin_sub(sm4_t_non_lin_sub(x)); -} - -static u32 sm4_round(const u32 *x, const u32 rk) -{ - return x[0] ^ sm4_enc_sub(x[1] ^ x[2] ^ x[3] ^ rk); -} - - -/** - * crypto_sm4_expand_key - Expands the SM4 key as described in GB/T 32907-2016 - * @ctx: The location where the computed key will be stored. - * @in_key: The supplied key. - * @key_len: The length of the supplied key. - * - * Returns 0 on success. The function fails only if an invalid key size (or - * pointer) is supplied. - */ -int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key, - unsigned int key_len) -{ - u32 rk[4], t; - const u32 *key = (u32 *)in_key; - int i; - - if (key_len != SM4_KEY_SIZE) - return -EINVAL; - - for (i = 0; i < 4; ++i) - rk[i] = get_unaligned_be32(&key[i]) ^ fk[i]; - - for (i = 0; i < 32; ++i) { - t = rk[0] ^ sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i]); - ctx->rkey_enc[i] = t; - rk[0] = rk[1]; - rk[1] = rk[2]; - rk[2] = rk[3]; - rk[3] = t; - } - - for (i = 0; i < 32; ++i) - ctx->rkey_dec[i] = ctx->rkey_enc[31 - i]; - - return 0; -} -EXPORT_SYMBOL_GPL(crypto_sm4_expand_key); - /** * crypto_sm4_set_key - Set the SM4 key. * @tfm: The %crypto_tfm that is used in the context. * @in_key: The input key. * @key_len: The size of the key. * - * This function uses crypto_sm4_expand_key() to expand the key. + * This function uses sm4_expandkey() to expand the key. * &crypto_sm4_ctx _must_ be the private data embedded in @tfm which is * retrieved with crypto_tfm_ctx(). * @@ -159,36 +33,17 @@ int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key, { struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); - return crypto_sm4_expand_key(ctx, in_key, key_len); + return sm4_expandkey(ctx, in_key, key_len); } EXPORT_SYMBOL_GPL(crypto_sm4_set_key); -static void sm4_do_crypt(const u32 *rk, u32 *out, const u32 *in) -{ - u32 x[4], i, t; - - for (i = 0; i < 4; ++i) - x[i] = get_unaligned_be32(&in[i]); - - for (i = 0; i < 32; ++i) { - t = sm4_round(x, rk[i]); - x[0] = x[1]; - x[1] = x[2]; - x[2] = x[3]; - x[3] = t; - } - - for (i = 0; i < 4; ++i) - put_unaligned_be32(x[3 - i], &out[i]); -} - /* encrypt a block of text */ void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); - sm4_do_crypt(ctx->rkey_enc, (u32 *)out, (u32 *)in); + sm4_crypt_block(ctx->rkey_enc, out, in); } EXPORT_SYMBOL_GPL(crypto_sm4_encrypt); @@ -198,7 +53,7 @@ void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); - sm4_do_crypt(ctx->rkey_dec, (u32 *)out, (u32 *)in); + sm4_crypt_block(ctx->rkey_dec, out, in); } EXPORT_SYMBOL_GPL(crypto_sm4_decrypt); diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h index 7afd730d16ff..06322325f862 100644 --- a/include/crypto/sm4.h +++ b/include/crypto/sm4.h @@ -3,6 +3,7 @@ /* * Common values for the SM4 algorithm * Copyright (C) 2018 ARM Limited or its affiliates. + * Copyright (c) 2021 Tianjia Zhang */ #ifndef _CRYPTO_SM4_H @@ -20,11 +21,28 @@ struct crypto_sm4_ctx { u32 rkey_dec[SM4_RKEY_WORDS]; }; -int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len); -int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key, +/** + * sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016 + * @ctx: The location where the computed key will be stored. + * @in_key: The supplied key. + * @key_len: The length of the supplied key. + * + * Returns 0 on success. The function fails only if an invalid key size (or + * pointer) is supplied. + */ +int sm4_expandkey(struct crypto_sm4_ctx *ctx, const u8 *in_key, unsigned int key_len); +/** + * sm4_crypt_block - Encrypt or decrypt a single SM4 block + * @rk: The rkey_enc for encrypt or rkey_dec for decrypt + * @out: Buffer to store output data + * @in: Buffer containing the input data + */ +void sm4_crypt_block(const u32 *rk, u8 *out, const u8 *in); + +int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len); void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in); void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in); diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 14c032de276e..545ccbddf6a1 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -128,3 +128,6 @@ config CRYPTO_LIB_CHACHA20POLY1305 config CRYPTO_LIB_SHA256 tristate + +config CRYPTO_LIB_SM4 + tristate diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 3a435629d9ce..73205ed269ba 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -38,6 +38,9 @@ libpoly1305-y += poly1305.o obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o libsha256-y := sha256.o +obj-$(CONFIG_CRYPTO_LIB_SM4) += libsm4.o +libsm4-y := sm4.o + ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y) libblake2s-y += blake2s-selftest.o libchacha20poly1305-y += chacha20poly1305-selftest.o diff --git a/lib/crypto/sm4.c b/lib/crypto/sm4.c new file mode 100644 index 000000000000..5fbf8c741a2f --- /dev/null +++ b/lib/crypto/sm4.c @@ -0,0 +1,176 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4, as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2018 ARM Limited or its affiliates. + * Copyright (c) 2021 Tianjia Zhang + */ + +#include +#include +#include + +static const u32 fk[4] = { + 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc +}; + +static const u32 __cacheline_aligned ck[32] = { + 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269, + 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9, + 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249, + 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9, + 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229, + 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299, + 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209, + 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279 +}; + +static const u8 __cacheline_aligned sbox[256] = { + 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, + 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, + 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, + 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, + 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, + 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62, + 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, + 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6, + 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, + 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8, + 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, + 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35, + 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, + 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87, + 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, + 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e, + 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, + 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1, + 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, + 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3, + 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, + 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f, + 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, + 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51, + 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, + 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8, + 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, + 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0, + 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, + 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84, + 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, + 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48 +}; + +static inline u32 sm4_t_non_lin_sub(u32 x) +{ + u32 out; + + out = (u32)sbox[x & 0xff]; + out |= (u32)sbox[(x >> 8) & 0xff] << 8; + out |= (u32)sbox[(x >> 16) & 0xff] << 16; + out |= (u32)sbox[(x >> 24) & 0xff] << 24; + + return out; +} + +static inline u32 sm4_key_lin_sub(u32 x) +{ + return x ^ rol32(x, 13) ^ rol32(x, 23); +} + +static inline u32 sm4_enc_lin_sub(u32 x) +{ + return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24); +} + +static inline u32 sm4_key_sub(u32 x) +{ + return sm4_key_lin_sub(sm4_t_non_lin_sub(x)); +} + +static inline u32 sm4_enc_sub(u32 x) +{ + return sm4_enc_lin_sub(sm4_t_non_lin_sub(x)); +} + +static inline u32 sm4_round(u32 x0, u32 x1, u32 x2, u32 x3, u32 rk) +{ + return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk); +} + + +/** + * sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016 + * @ctx: The location where the computed key will be stored. + * @in_key: The supplied key. + * @key_len: The length of the supplied key. + * + * Returns 0 on success. The function fails only if an invalid key size (or + * pointer) is supplied. + */ +int sm4_expandkey(struct crypto_sm4_ctx *ctx, const u8 *in_key, + unsigned int key_len) +{ + u32 rk[4]; + const u32 *key = (u32 *)in_key; + int i; + + if (key_len != SM4_KEY_SIZE) + return -EINVAL; + + rk[0] = get_unaligned_be32(&key[0]) ^ fk[0]; + rk[1] = get_unaligned_be32(&key[1]) ^ fk[1]; + rk[2] = get_unaligned_be32(&key[2]) ^ fk[2]; + rk[3] = get_unaligned_be32(&key[3]) ^ fk[3]; + + for (i = 0; i < 32; i += 4) { + rk[0] ^= sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i + 0]); + rk[1] ^= sm4_key_sub(rk[2] ^ rk[3] ^ rk[0] ^ ck[i + 1]); + rk[2] ^= sm4_key_sub(rk[3] ^ rk[0] ^ rk[1] ^ ck[i + 2]); + rk[3] ^= sm4_key_sub(rk[0] ^ rk[1] ^ rk[2] ^ ck[i + 3]); + + ctx->rkey_enc[i + 0] = rk[0]; + ctx->rkey_enc[i + 1] = rk[1]; + ctx->rkey_enc[i + 2] = rk[2]; + ctx->rkey_enc[i + 3] = rk[3]; + ctx->rkey_dec[31 - 0 - i] = rk[0]; + ctx->rkey_dec[31 - 1 - i] = rk[1]; + ctx->rkey_dec[31 - 2 - i] = rk[2]; + ctx->rkey_dec[31 - 3 - i] = rk[3]; + } + + return 0; +} +EXPORT_SYMBOL_GPL(sm4_expandkey); + +/** + * sm4_crypt_block - Encrypt or decrypt a single SM4 block + * @rk: The rkey_enc for encrypt or rkey_dec for decrypt + * @out: Buffer to store output data + * @in: Buffer containing the input data + */ +void sm4_crypt_block(const u32 *rk, u8 *out, const u8 *in) +{ + u32 x[4], i; + + x[0] = get_unaligned_be32(in + 0 * 4); + x[1] = get_unaligned_be32(in + 1 * 4); + x[2] = get_unaligned_be32(in + 2 * 4); + x[3] = get_unaligned_be32(in + 3 * 4); + + for (i = 0; i < 32; i += 4) { + x[0] = sm4_round(x[0], x[1], x[2], x[3], rk[i + 0]); + x[1] = sm4_round(x[1], x[2], x[3], x[0], rk[i + 1]); + x[2] = sm4_round(x[2], x[3], x[0], x[1], rk[i + 2]); + x[3] = sm4_round(x[3], x[0], x[1], x[2], rk[i + 3]); + } + + put_unaligned_be32(x[3 - 0], out + 0 * 4); + put_unaligned_be32(x[3 - 1], out + 1 * 4); + put_unaligned_be32(x[3 - 2], out + 2 * 4); + put_unaligned_be32(x[3 - 3], out + 3 * 4); +} +EXPORT_SYMBOL_GPL(sm4_crypt_block); + +MODULE_DESCRIPTION("Generic SM4 library"); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From c59de48e125c6d49a8abd165e388ca57bfe37b17 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Tue, 20 Jul 2021 11:46:40 +0800 Subject: crypto: arm64/sm4-ce - Make dependent on sm4 library instead of sm4-generic SM4 library is abstracted from sm4-generic algorithm, sm4-ce can depend on the SM4 library instead of sm4-generic, and some functions in sm4-generic do not need to be exported. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 2 +- arch/arm64/crypto/sm4-ce-glue.c | 20 ++++++++++++++------ crypto/sm4_generic.c | 27 ++++++++++++--------------- include/crypto/sm4.h | 9 ++------- lib/crypto/sm4.c | 2 +- 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index b8eb0453123d..55f19450091b 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -51,7 +51,7 @@ config CRYPTO_SM4_ARM64_CE tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_ALGAPI - select CRYPTO_SM4 + select CRYPTO_LIB_SM4 config CRYPTO_GHASH_ARM64_CE tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions" diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c index 2754c875d39c..9c93cfc4841b 100644 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -17,12 +17,20 @@ MODULE_LICENSE("GPL v2"); asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in); +static int sm4_ce_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); + + return sm4_expandkey(ctx, key, key_len); +} + static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); if (!crypto_simd_usable()) { - crypto_sm4_encrypt(tfm, out, in); + sm4_crypt_block(ctx->rkey_enc, out, in); } else { kernel_neon_begin(); sm4_ce_do_crypt(ctx->rkey_enc, out, in); @@ -32,10 +40,10 @@ static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); if (!crypto_simd_usable()) { - crypto_sm4_decrypt(tfm, out, in); + sm4_crypt_block(ctx->rkey_dec, out, in); } else { kernel_neon_begin(); sm4_ce_do_crypt(ctx->rkey_dec, out, in); @@ -49,12 +57,12 @@ static struct crypto_alg sm4_ce_alg = { .cra_priority = 200, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = SM4_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_sm4_ctx), + .cra_ctxsize = sizeof(struct sm4_ctx), .cra_module = THIS_MODULE, .cra_u.cipher = { .cia_min_keysize = SM4_KEY_SIZE, .cia_max_keysize = SM4_KEY_SIZE, - .cia_setkey = crypto_sm4_set_key, + .cia_setkey = sm4_ce_setkey, .cia_encrypt = sm4_ce_encrypt, .cia_decrypt = sm4_ce_decrypt } diff --git a/crypto/sm4_generic.c b/crypto/sm4_generic.c index d19d01f852a9..4a6480a27fee 100644 --- a/crypto/sm4_generic.c +++ b/crypto/sm4_generic.c @@ -17,45 +17,42 @@ #include /** - * crypto_sm4_set_key - Set the SM4 key. + * sm4_setkey - Set the SM4 key. * @tfm: The %crypto_tfm that is used in the context. * @in_key: The input key. * @key_len: The size of the key. * * This function uses sm4_expandkey() to expand the key. - * &crypto_sm4_ctx _must_ be the private data embedded in @tfm which is + * &sm4_ctx _must_ be the private data embedded in @tfm which is * retrieved with crypto_tfm_ctx(). * * Return: 0 on success; -EINVAL on failure (only happens for bad key lengths) */ -int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key, +static int sm4_setkey(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { - struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); return sm4_expandkey(ctx, in_key, key_len); } -EXPORT_SYMBOL_GPL(crypto_sm4_set_key); /* encrypt a block of text */ -void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +static void sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); sm4_crypt_block(ctx->rkey_enc, out, in); } -EXPORT_SYMBOL_GPL(crypto_sm4_encrypt); /* decrypt a block of text */ -void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +static void sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); sm4_crypt_block(ctx->rkey_dec, out, in); } -EXPORT_SYMBOL_GPL(crypto_sm4_decrypt); static struct crypto_alg sm4_alg = { .cra_name = "sm4", @@ -63,15 +60,15 @@ static struct crypto_alg sm4_alg = { .cra_priority = 100, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = SM4_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_sm4_ctx), + .cra_ctxsize = sizeof(struct sm4_ctx), .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = SM4_KEY_SIZE, .cia_max_keysize = SM4_KEY_SIZE, - .cia_setkey = crypto_sm4_set_key, - .cia_encrypt = crypto_sm4_encrypt, - .cia_decrypt = crypto_sm4_decrypt + .cia_setkey = sm4_setkey, + .cia_encrypt = sm4_encrypt, + .cia_decrypt = sm4_decrypt } } }; diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h index 06322325f862..709f286e7b25 100644 --- a/include/crypto/sm4.h +++ b/include/crypto/sm4.h @@ -16,7 +16,7 @@ #define SM4_BLOCK_SIZE 16 #define SM4_RKEY_WORDS 32 -struct crypto_sm4_ctx { +struct sm4_ctx { u32 rkey_enc[SM4_RKEY_WORDS]; u32 rkey_dec[SM4_RKEY_WORDS]; }; @@ -30,7 +30,7 @@ struct crypto_sm4_ctx { * Returns 0 on success. The function fails only if an invalid key size (or * pointer) is supplied. */ -int sm4_expandkey(struct crypto_sm4_ctx *ctx, const u8 *in_key, +int sm4_expandkey(struct sm4_ctx *ctx, const u8 *in_key, unsigned int key_len); /** @@ -41,9 +41,4 @@ int sm4_expandkey(struct crypto_sm4_ctx *ctx, const u8 *in_key, */ void sm4_crypt_block(const u32 *rk, u8 *out, const u8 *in); -int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len); -void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in); -void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in); - #endif diff --git a/lib/crypto/sm4.c b/lib/crypto/sm4.c index 5fbf8c741a2f..633b59fed9db 100644 --- a/lib/crypto/sm4.c +++ b/lib/crypto/sm4.c @@ -108,7 +108,7 @@ static inline u32 sm4_round(u32 x0, u32 x1, u32 x2, u32 x3, u32 rk) * Returns 0 on success. The function fails only if an invalid key size (or * pointer) is supplied. */ -int sm4_expandkey(struct crypto_sm4_ctx *ctx, const u8 *in_key, +int sm4_expandkey(struct sm4_ctx *ctx, const u8 *in_key, unsigned int key_len) { u32 rk[4]; -- cgit v1.2.3 From a7ee22ee1445c7fdb00ab80116bb9710ca86a860 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Tue, 20 Jul 2021 11:46:41 +0800 Subject: crypto: x86/sm4 - add AES-NI/AVX/x86_64 implementation This patch adds AES-NI/AVX/x86_64 assembler implementation of SM4 block cipher. Through two affine transforms, we can use the AES S-Box to simulate the SM4 S-Box to achieve the effect of instruction acceleration. The main algorithm implementation comes from SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at: https://github.com/mjosaarinen/sm4ni This optimization supports the four modes of SM4, ECB, CBC, CFB, and CTR. Since CBC and CFB do not support multiple block parallel encryption, the optimization effect is not obvious. Benchmark on Intel Xeon Cascadelake, the data comes from the 218 mode and 518 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 40.99 46.50 48.05 48.41 49.20 49.25 49.28 ECB dec | 41.07 46.99 48.15 48.67 49.20 49.25 49.29 CBC enc | 37.71 45.28 46.77 47.60 48.32 48.37 48.40 CBC dec | 36.48 44.82 46.43 47.45 48.23 48.30 48.36 CFB enc | 37.94 44.84 46.12 46.94 47.57 47.46 47.68 CFB dec | 37.50 42.84 43.74 44.37 44.85 44.80 44.96 CTR enc | 39.20 45.63 46.75 47.49 48.09 47.85 48.08 CTR dec | 39.64 45.70 46.72 47.47 47.98 47.88 48.06 sm4-aesni-avx ECB enc | 33.75 134.47 221.64 243.43 264.05 251.58 258.13 ECB dec | 34.02 134.92 223.11 245.14 264.12 251.04 258.33 CBC enc | 38.85 46.18 47.67 48.34 49.00 48.96 49.14 CBC dec | 33.54 131.29 223.88 245.27 265.50 252.41 263.78 CFB enc | 38.70 46.10 47.58 48.29 49.01 48.94 49.19 CFB dec | 32.79 128.40 223.23 244.87 265.77 253.31 262.79 CTR enc | 32.58 122.23 220.29 241.16 259.57 248.32 256.69 CTR dec | 32.81 122.47 218.99 241.54 258.42 248.58 256.61 Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 3 + arch/x86/crypto/sm4-aesni-avx-asm_64.S | 589 +++++++++++++++++++++++++++++++++ arch/x86/crypto/sm4_aesni_avx_glue.c | 459 +++++++++++++++++++++++++ crypto/Kconfig | 21 ++ 4 files changed, 1072 insertions(+) create mode 100644 arch/x86/crypto/sm4-aesni-avx-asm_64.S create mode 100644 arch/x86/crypto/sm4_aesni_avx_glue.c diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index d0959e7b809f..08f95d4e1e7c 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -88,6 +88,9 @@ nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o +obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o +sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o + quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $< > $@ $(obj)/%.S: $(src)/%.pl FORCE diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S new file mode 100644 index 000000000000..fa2c3f50aecb --- /dev/null +++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S @@ -0,0 +1,589 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm, AES-NI/AVX optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2018 Markku-Juhani O. Saarinen + * Copyright (C) 2020 Jussi Kivilinna + * Copyright (c) 2021 Tianjia Zhang + */ + +/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at: + * https://github.com/mjosaarinen/sm4ni + */ + +#include +#include + +#define rRIP (%rip) + +#define RX0 %xmm0 +#define RX1 %xmm1 +#define MASK_4BIT %xmm2 +#define RTMP0 %xmm3 +#define RTMP1 %xmm4 +#define RTMP2 %xmm5 +#define RTMP3 %xmm6 +#define RTMP4 %xmm7 + +#define RA0 %xmm8 +#define RA1 %xmm9 +#define RA2 %xmm10 +#define RA3 %xmm11 + +#define RB0 %xmm12 +#define RB1 %xmm13 +#define RB2 %xmm14 +#define RB3 %xmm15 + +#define RNOT %xmm0 +#define RBSWAP %xmm1 + + +/* Transpose four 32-bit words between 128-bit vectors. */ +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +/* pre-SubByte transform. */ +#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \ + vpand x, mask4bit, tmp0; \ + vpandn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + +/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by + * 'vaeslastenc' instruction. + */ +#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \ + vpandn mask4bit, x, tmp0; \ + vpsrld $4, x, x; \ + vpand x, mask4bit, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + + +.section .rodata.cst164, "aM", @progbits, 164 +.align 16 + +/* + * Following four affine transform look-up tables are from work by + * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni + * + * These allow exposing SM4 S-Box from AES SubByte. + */ + +/* pre-SubByte affine transform, from SM4 field to AES field. */ +.Lpre_tf_lo_s: + .quad 0x9197E2E474720701, 0xC7C1B4B222245157 +.Lpre_tf_hi_s: + .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012 + +/* post-SubByte affine transform, from AES field to SM4 field. */ +.Lpost_tf_lo_s: + .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82 +.Lpost_tf_hi_s: + .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF + +/* For isolating SubBytes from AESENCLAST, inverse shift row */ +.Linv_shift_row: + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 + +/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_8: + .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e + .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06 + +/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_16: + .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01 + .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09 + +/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_24: + .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04 + .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c + +/* For CTR-mode IV byteswap */ +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +/* For input word byte-swap */ +.Lbswap32_mask: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 + +.align 4 +/* 4-bit mask */ +.L0f0f0f0f: + .long 0x0f0f0f0f + + +.text +.align 16 + +/* + * void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst, + * const u8 *src, int nblocks) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_crypt4) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (1..4 blocks) + * %rdx: src (1..4 blocks) + * %rcx: num blocks (1..4) + */ + FRAME_BEGIN + + vmovdqu 0*16(%rdx), RA0; + vmovdqa RA0, RA1; + vmovdqa RA0, RA2; + vmovdqa RA0, RA3; + cmpq $2, %rcx; + jb .Lblk4_load_input_done; + vmovdqu 1*16(%rdx), RA1; + je .Lblk4_load_input_done; + vmovdqu 2*16(%rdx), RA2; + cmpq $3, %rcx; + je .Lblk4_load_input_done; + vmovdqu 3*16(%rdx), RA3; + +.Lblk4_load_input_done: + + vmovdqa .Lbswap32_mask rRIP, RTMP2; + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + + vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT; + vmovdqa .Lpre_tf_lo_s rRIP, RTMP4; + vmovdqa .Lpre_tf_hi_s rRIP, RB0; + vmovdqa .Lpost_tf_lo_s rRIP, RB1; + vmovdqa .Lpost_tf_hi_s rRIP, RB2; + vmovdqa .Linv_shift_row rRIP, RB3; + vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP2; + vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP3; + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + +#define ROUND(round, s0, s1, s2, s3) \ + vbroadcastss (4*(round))(%rdi), RX0; \ + vpxor s1, RX0, RX0; \ + vpxor s2, RX0, RX0; \ + vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \ + \ + /* sbox, non-linear part */ \ + transform_pre(RX0, RTMP4, RB0, MASK_4BIT, RTMP0); \ + vaesenclast MASK_4BIT, RX0, RX0; \ + transform_post(RX0, RB1, RB2, MASK_4BIT, RTMP0); \ + \ + /* linear part */ \ + vpshufb RB3, RX0, RTMP0; \ + vpxor RTMP0, s0, s0; /* s0 ^ x */ \ + vpshufb RTMP2, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \ + vpshufb RTMP3, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb .Linv_shift_row_rol_24 rRIP, RX0, RTMP1; \ + vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \ + vpslld $2, RTMP0, RTMP1; \ + vpsrld $30, RTMP0, RTMP0; \ + vpxor RTMP0, s0, s0; \ + /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpxor RTMP1, s0, s0; + + leaq (32*4)(%rdi), %rax; +.align 16 +.Lroundloop_blk4: + ROUND(0, RA0, RA1, RA2, RA3); + ROUND(1, RA1, RA2, RA3, RA0); + ROUND(2, RA2, RA3, RA0, RA1); + ROUND(3, RA3, RA0, RA1, RA2); + leaq (4*4)(%rdi), %rdi; + cmpq %rax, %rdi; + jne .Lroundloop_blk4; + +#undef ROUND + + vmovdqa .Lbswap128_mask rRIP, RTMP2; + + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + + vmovdqu RA0, 0*16(%rsi); + cmpq $2, %rcx; + jb .Lblk4_store_output_done; + vmovdqu RA1, 1*16(%rsi); + je .Lblk4_store_output_done; + vmovdqu RA2, 2*16(%rsi); + cmpq $3, %rcx; + je .Lblk4_store_output_done; + vmovdqu RA3, 3*16(%rsi); + +.Lblk4_store_output_done: + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_crypt4) + +.align 8 +SYM_FUNC_START_LOCAL(__sm4_crypt_blk8) + /* input: + * %rdi: round key array, CTX + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel + * plaintext blocks + * output: + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel + * ciphertext blocks + */ + FRAME_BEGIN + + vmovdqa .Lbswap32_mask rRIP, RTMP2; + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT; + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + +#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \ + vbroadcastss (4*(round))(%rdi), RX0; \ + vmovdqa .Lpre_tf_lo_s rRIP, RTMP4; \ + vmovdqa .Lpre_tf_hi_s rRIP, RTMP1; \ + vmovdqa RX0, RX1; \ + vpxor s1, RX0, RX0; \ + vpxor s2, RX0, RX0; \ + vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \ + vmovdqa .Lpost_tf_lo_s rRIP, RTMP2; \ + vmovdqa .Lpost_tf_hi_s rRIP, RTMP3; \ + vpxor r1, RX1, RX1; \ + vpxor r2, RX1, RX1; \ + vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \ + \ + /* sbox, non-linear part */ \ + transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + vmovdqa .Linv_shift_row rRIP, RTMP4; \ + vaesenclast MASK_4BIT, RX0, RX0; \ + vaesenclast MASK_4BIT, RX1, RX1; \ + transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + \ + /* linear part */ \ + vpshufb RTMP4, RX0, RTMP0; \ + vpxor RTMP0, s0, s0; /* s0 ^ x */ \ + vpshufb RTMP4, RX1, RTMP2; \ + vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP4; \ + vpxor RTMP2, r0, r0; /* r0 ^ x */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vmovdqa .Linv_shift_row_rol_24 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \ + /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpslld $2, RTMP0, RTMP1; \ + vpsrld $30, RTMP0, RTMP0; \ + vpxor RTMP0, s0, s0; \ + vpxor RTMP1, s0, s0; \ + vpshufb RTMP4, RX1, RTMP3; \ + vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \ + /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpslld $2, RTMP2, RTMP3; \ + vpsrld $30, RTMP2, RTMP2; \ + vpxor RTMP2, r0, r0; \ + vpxor RTMP3, r0, r0; + + leaq (32*4)(%rdi), %rax; +.align 16 +.Lroundloop_blk8: + ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3); + ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0); + ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1); + ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2); + leaq (4*4)(%rdi), %rdi; + cmpq %rax, %rdi; + jne .Lroundloop_blk8; + +#undef ROUND + + vmovdqa .Lbswap128_mask rRIP, RTMP2; + + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + FRAME_END + ret; +SYM_FUNC_END(__sm4_crypt_blk8) + +/* + * void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst, + * const u8 *src, int nblocks) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_crypt8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (1..8 blocks) + * %rdx: src (1..8 blocks) + * %rcx: num blocks (1..8) + */ + FRAME_BEGIN + + cmpq $5, %rcx; + jb sm4_aesni_avx_crypt4; + vmovdqu (0 * 16)(%rdx), RA0; + vmovdqu (1 * 16)(%rdx), RA1; + vmovdqu (2 * 16)(%rdx), RA2; + vmovdqu (3 * 16)(%rdx), RA3; + vmovdqu (4 * 16)(%rdx), RB0; + vmovdqa RB0, RB1; + vmovdqa RB0, RB2; + vmovdqa RB0, RB3; + je .Lblk8_load_input_done; + vmovdqu (5 * 16)(%rdx), RB1; + cmpq $7, %rcx; + jb .Lblk8_load_input_done; + vmovdqu (6 * 16)(%rdx), RB2; + je .Lblk8_load_input_done; + vmovdqu (7 * 16)(%rdx), RB3; + +.Lblk8_load_input_done: + call __sm4_crypt_blk8; + + cmpq $6, %rcx; + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + jb .Lblk8_store_output_done; + vmovdqu RB1, (5 * 16)(%rsi); + je .Lblk8_store_output_done; + vmovdqu RB2, (6 * 16)(%rsi); + cmpq $7, %rcx; + je .Lblk8_store_output_done; + vmovdqu RB3, (7 * 16)(%rsi); + +.Lblk8_store_output_done: + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_crypt8) + +/* + * void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_ctr_enc_blk8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv (big endian, 128bit) + */ + FRAME_BEGIN + + /* load IV and byteswap */ + vmovdqu (%rcx), RA0; + + vmovdqa .Lbswap128_mask rRIP, RBSWAP; + vpshufb RBSWAP, RA0, RTMP0; /* be => le */ + + vpcmpeqd RNOT, RNOT, RNOT; + vpsrldq $8, RNOT, RNOT; /* low: -1, high: 0 */ + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + + /* construct IVs */ + inc_le128(RTMP0, RNOT, RTMP2); /* +1 */ + vpshufb RBSWAP, RTMP0, RA1; + inc_le128(RTMP0, RNOT, RTMP2); /* +2 */ + vpshufb RBSWAP, RTMP0, RA2; + inc_le128(RTMP0, RNOT, RTMP2); /* +3 */ + vpshufb RBSWAP, RTMP0, RA3; + inc_le128(RTMP0, RNOT, RTMP2); /* +4 */ + vpshufb RBSWAP, RTMP0, RB0; + inc_le128(RTMP0, RNOT, RTMP2); /* +5 */ + vpshufb RBSWAP, RTMP0, RB1; + inc_le128(RTMP0, RNOT, RTMP2); /* +6 */ + vpshufb RBSWAP, RTMP0, RB2; + inc_le128(RTMP0, RNOT, RTMP2); /* +7 */ + vpshufb RBSWAP, RTMP0, RB3; + inc_le128(RTMP0, RNOT, RTMP2); /* +8 */ + vpshufb RBSWAP, RTMP0, RTMP1; + + /* store new IV */ + vmovdqu RTMP1, (%rcx); + + call __sm4_crypt_blk8; + + vpxor (0 * 16)(%rdx), RA0, RA0; + vpxor (1 * 16)(%rdx), RA1, RA1; + vpxor (2 * 16)(%rdx), RA2, RA2; + vpxor (3 * 16)(%rdx), RA3, RA3; + vpxor (4 * 16)(%rdx), RB0, RB0; + vpxor (5 * 16)(%rdx), RB1, RB1; + vpxor (6 * 16)(%rdx), RB2, RB2; + vpxor (7 * 16)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + vmovdqu RB1, (5 * 16)(%rsi); + vmovdqu RB2, (6 * 16)(%rsi); + vmovdqu RB3, (7 * 16)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8) + +/* + * void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_cbc_dec_blk8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + vmovdqu (0 * 16)(%rdx), RA0; + vmovdqu (1 * 16)(%rdx), RA1; + vmovdqu (2 * 16)(%rdx), RA2; + vmovdqu (3 * 16)(%rdx), RA3; + vmovdqu (4 * 16)(%rdx), RB0; + vmovdqu (5 * 16)(%rdx), RB1; + vmovdqu (6 * 16)(%rdx), RB2; + vmovdqu (7 * 16)(%rdx), RB3; + + call __sm4_crypt_blk8; + + vmovdqu (7 * 16)(%rdx), RNOT; + vpxor (%rcx), RA0, RA0; + vpxor (0 * 16)(%rdx), RA1, RA1; + vpxor (1 * 16)(%rdx), RA2, RA2; + vpxor (2 * 16)(%rdx), RA3, RA3; + vpxor (3 * 16)(%rdx), RB0, RB0; + vpxor (4 * 16)(%rdx), RB1, RB1; + vpxor (5 * 16)(%rdx), RB2, RB2; + vpxor (6 * 16)(%rdx), RB3, RB3; + vmovdqu RNOT, (%rcx); /* store new IV */ + + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + vmovdqu RB1, (5 * 16)(%rsi); + vmovdqu RB2, (6 * 16)(%rsi); + vmovdqu RB3, (7 * 16)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8) + +/* + * void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_cfb_dec_blk8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + /* Load input */ + vmovdqu (%rcx), RA0; + vmovdqu 0 * 16(%rdx), RA1; + vmovdqu 1 * 16(%rdx), RA2; + vmovdqu 2 * 16(%rdx), RA3; + vmovdqu 3 * 16(%rdx), RB0; + vmovdqu 4 * 16(%rdx), RB1; + vmovdqu 5 * 16(%rdx), RB2; + vmovdqu 6 * 16(%rdx), RB3; + + /* Update IV */ + vmovdqu 7 * 16(%rdx), RNOT; + vmovdqu RNOT, (%rcx); + + call __sm4_crypt_blk8; + + vpxor (0 * 16)(%rdx), RA0, RA0; + vpxor (1 * 16)(%rdx), RA1, RA1; + vpxor (2 * 16)(%rdx), RA2, RA2; + vpxor (3 * 16)(%rdx), RA3, RA3; + vpxor (4 * 16)(%rdx), RB0, RB0; + vpxor (5 * 16)(%rdx), RB1, RB1; + vpxor (6 * 16)(%rdx), RB2, RB2; + vpxor (7 * 16)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + vmovdqu RB1, (5 * 16)(%rsi); + vmovdqu RB2, (6 * 16)(%rsi); + vmovdqu RB3, (7 * 16)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_cfb_dec_blk8) diff --git a/arch/x86/crypto/sm4_aesni_avx_glue.c b/arch/x86/crypto/sm4_aesni_avx_glue.c new file mode 100644 index 000000000000..c1f5728efd1d --- /dev/null +++ b/arch/x86/crypto/sm4_aesni_avx_glue.c @@ -0,0 +1,459 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm, AES-NI/AVX optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (c) 2021, Alibaba Group. + * Copyright (c) 2021 Tianjia Zhang + */ + +#include +#include +#include +#include +#include +#include +#include + +#define SM4_CRYPT8_BLOCK_SIZE (SM4_BLOCK_SIZE * 8) + +asmlinkage void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst, + const u8 *src, int nblocks); +asmlinkage void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst, + const u8 *src, int nblocks); +asmlinkage void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); + +static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_expandkey(ctx, key, key_len); +} + +static int ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) +{ + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) { + sm4_aesni_avx_crypt8(rkey, dst, src, 8); + dst += SM4_CRYPT8_BLOCK_SIZE; + src += SM4_CRYPT8_BLOCK_SIZE; + nbytes -= SM4_CRYPT8_BLOCK_SIZE; + } + while (nbytes >= SM4_BLOCK_SIZE) { + unsigned int nblocks = min(nbytes >> 4, 4u); + sm4_aesni_avx_crypt4(rkey, dst, src, nblocks); + dst += nblocks * SM4_BLOCK_SIZE; + src += nblocks * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + kernel_fpu_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int ecb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_do_crypt(req, ctx->rkey_enc); +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_do_crypt(req, ctx->rkey_dec); +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *iv = walk.iv; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + while (nbytes >= SM4_BLOCK_SIZE) { + crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE); + sm4_crypt_block(ctx->rkey_enc, dst, dst); + iv = dst; + src += SM4_BLOCK_SIZE; + dst += SM4_BLOCK_SIZE; + nbytes -= SM4_BLOCK_SIZE; + } + if (iv != walk.iv) + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + + while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) { + sm4_aesni_avx_cbc_dec_blk8(ctx->rkey_dec, dst, + src, walk.iv); + dst += SM4_CRYPT8_BLOCK_SIZE; + src += SM4_CRYPT8_BLOCK_SIZE; + nbytes -= SM4_CRYPT8_BLOCK_SIZE; + } + + if (nbytes >= SM4_BLOCK_SIZE) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + u8 iv[SM4_BLOCK_SIZE]; + unsigned int nblocks = min(nbytes >> 4, 8u); + int i; + + sm4_aesni_avx_crypt8(ctx->rkey_dec, keystream, + src, nblocks); + + src += ((int)nblocks - 2) * SM4_BLOCK_SIZE; + dst += (nblocks - 1) * SM4_BLOCK_SIZE; + memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE); + + for (i = nblocks - 1; i > 0; i--) { + crypto_xor_cpy(dst, src, + &keystream[i * SM4_BLOCK_SIZE], + SM4_BLOCK_SIZE); + src -= SM4_BLOCK_SIZE; + dst -= SM4_BLOCK_SIZE; + } + crypto_xor_cpy(dst, walk.iv, keystream, SM4_BLOCK_SIZE); + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + + kernel_fpu_end(); + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int cfb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + const u8 *iv = walk.iv; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + while (nbytes >= SM4_BLOCK_SIZE) { + sm4_crypt_block(ctx->rkey_enc, keystream, iv); + crypto_xor_cpy(dst, src, keystream, SM4_BLOCK_SIZE); + iv = dst; + src += SM4_BLOCK_SIZE; + dst += SM4_BLOCK_SIZE; + nbytes -= SM4_BLOCK_SIZE; + } + if (iv != walk.iv) + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int cfb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + + while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) { + sm4_aesni_avx_cfb_dec_blk8(ctx->rkey_enc, dst, + src, walk.iv); + dst += SM4_CRYPT8_BLOCK_SIZE; + src += SM4_CRYPT8_BLOCK_SIZE; + nbytes -= SM4_CRYPT8_BLOCK_SIZE; + } + + if (nbytes >= SM4_BLOCK_SIZE) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + unsigned int nblocks = min(nbytes >> 4, 8u); + + memcpy(keystream, walk.iv, SM4_BLOCK_SIZE); + if (nblocks > 1) + memcpy(&keystream[SM4_BLOCK_SIZE], src, + (nblocks - 1) * SM4_BLOCK_SIZE); + memcpy(walk.iv, src + (nblocks - 1) * SM4_BLOCK_SIZE, + SM4_BLOCK_SIZE); + + sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream, + keystream, nblocks); + + crypto_xor_cpy(dst, src, keystream, + nblocks * SM4_BLOCK_SIZE); + dst += nblocks * SM4_BLOCK_SIZE; + src += nblocks * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + + kernel_fpu_end(); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int ctr_crypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + + while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) { + sm4_aesni_avx_ctr_enc_blk8(ctx->rkey_enc, dst, + src, walk.iv); + dst += SM4_CRYPT8_BLOCK_SIZE; + src += SM4_CRYPT8_BLOCK_SIZE; + nbytes -= SM4_CRYPT8_BLOCK_SIZE; + } + + if (nbytes >= SM4_BLOCK_SIZE) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + unsigned int nblocks = min(nbytes >> 4, 8u); + int i; + + for (i = 0; i < nblocks; i++) { + memcpy(&keystream[i * SM4_BLOCK_SIZE], + walk.iv, SM4_BLOCK_SIZE); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + } + sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream, + keystream, nblocks); + + crypto_xor_cpy(dst, src, keystream, + nblocks * SM4_BLOCK_SIZE); + dst += nblocks * SM4_BLOCK_SIZE; + src += nblocks * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + + kernel_fpu_end(); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + memcpy(keystream, walk.iv, SM4_BLOCK_SIZE); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + + sm4_crypt_block(ctx->rkey_enc, keystream, keystream); + + crypto_xor_cpy(dst, src, keystream, nbytes); + dst += nbytes; + src += nbytes; + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static struct skcipher_alg sm4_aesni_avx_skciphers[] = { + { + .base = { + .cra_name = "__ecb(sm4)", + .cra_driver_name = "__ecb-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base = { + .cra_name = "__cbc(sm4)", + .cra_driver_name = "__cbc-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base = { + .cra_name = "__cfb(sm4)", + .cra_driver_name = "__cfb-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = cfb_encrypt, + .decrypt = cfb_decrypt, + }, { + .base = { + .cra_name = "__ctr(sm4)", + .cra_driver_name = "__ctr-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + } +}; + +static struct simd_skcipher_alg * +simd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)]; + +static int __init sm4_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX) || + !boot_cpu_has(X86_FEATURE_AES) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { + pr_info("AVX or AES-NI instructions are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return simd_register_skciphers_compat(sm4_aesni_avx_skciphers, + ARRAY_SIZE(sm4_aesni_avx_skciphers), + simd_sm4_aesni_avx_skciphers); +} + +static void __exit sm4_exit(void) +{ + simd_unregister_skciphers(sm4_aesni_avx_skciphers, + ARRAY_SIZE(sm4_aesni_avx_skciphers), + simd_sm4_aesni_avx_skciphers); +} + +module_init(sm4_init); +module_exit(sm4_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX optimized"); +MODULE_ALIAS_CRYPTO("sm4"); +MODULE_ALIAS_CRYPTO("sm4-aesni-avx"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 5bf86f5d59db..8a4010d91124 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1570,6 +1570,27 @@ config CRYPTO_SM4 If unsure, say N. +config CRYPTO_SM4_AESNI_AVX_X86_64 + tristate "SM4 cipher algorithm (x86_64/AES-NI/AVX)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SIMD + select CRYPTO_ALGAPI + select CRYPTO_LIB_SM4 + help + SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX). + + SM4 (GBT.32907-2016) is a cryptographic standard issued by the + Organization of State Commercial Administration of China (OSCCA) + as an authorized cryptographic algorithms for the use within China. + + This is SM4 optimized implementation using AES-NI/AVX/x86_64 + instruction set for block cipher. Through two affine transforms, + we can use the AES S-Box to simulate the SM4 S-Box to achieve the + effect of instruction acceleration. + + If unsure, say N. + config CRYPTO_TEA tristate "TEA, XTEA and XETA cipher algorithms" depends on CRYPTO_USER_API_ENABLE_OBSOLETE -- cgit v1.2.3 From a7fc80bb22eb0f13791ee4f70484e88316cc2a24 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Tue, 20 Jul 2021 11:46:42 +0800 Subject: crypto: tcrypt - add the asynchronous speed test for SM4 tcrypt supports testing of SM4 cipher algorithms that use avx instruction set acceleration. The implementation of sm4 instruction set acceleration supports up to 8 blocks in parallel encryption and decryption, which is 128 bytes. Therefore, the 128-byte block size is also added to block_sizes. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index f8d06da78e4f..d73a42fdaa9b 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -77,7 +77,7 @@ static const char *check[] = { NULL }; -static const int block_sizes[] = { 16, 64, 256, 1024, 1420, 4096, 0 }; +static const int block_sizes[] = { 16, 64, 128, 256, 1024, 1420, 4096, 0 }; static const int aead_sizes[] = { 16, 64, 256, 512, 1024, 1420, 4096, 8192, 0 }; #define XBUFSIZE 8 @@ -2031,6 +2031,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) case 191: ret += tcrypt_test("ecb(sm4)"); ret += tcrypt_test("cbc(sm4)"); + ret += tcrypt_test("cfb(sm4)"); ret += tcrypt_test("ctr(sm4)"); break; case 200: @@ -2289,6 +2290,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) speed_template_16); test_cipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0, speed_template_16); + test_cipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0, + speed_template_16); + test_cipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0, + speed_template_16); test_cipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0, speed_template_16); test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0, @@ -2757,6 +2762,25 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) speed_template_8_32); break; + case 518: + test_acipher_speed("ecb(sm4)", ENCRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("ecb(sm4)", DECRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("cbc(sm4)", ENCRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0, + speed_template_16); + break; + case 600: test_mb_skcipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0, speed_template_16_24_32, num_mb); -- cgit v1.2.3 From 632a761abb29db2c541782b9fe1f1e8d6f833896 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 20 Jul 2021 11:55:27 +0300 Subject: crypto: atmel-tdes - Clarify how tdes dev gets allocated to the tfm The tdes dev gets allocated to the tfm at alg->init time, there's no need to overwrite the pointer to tdes_dd afterwards. There's a single IP per SoC anyway, the first entry from the atmel_tdes.dev_list is chosen without counting for tfms for example, in case one thinks of an even distribution of tfms across the TDES IPs: there's only one. At alg->init time the ctx->dd should already be NULL, there's no need to check its value before requesting for a tdes dev. Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu --- drivers/crypto/atmel-tdes.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c index 6f01c51e3c37..dda70dbe0838 100644 --- a/drivers/crypto/atmel-tdes.c +++ b/drivers/crypto/atmel-tdes.c @@ -196,23 +196,15 @@ static void atmel_tdes_write_n(struct atmel_tdes_dev *dd, u32 offset, atmel_tdes_write(dd, offset, *value); } -static struct atmel_tdes_dev *atmel_tdes_find_dev(struct atmel_tdes_ctx *ctx) +static struct atmel_tdes_dev *atmel_tdes_dev_alloc(void) { - struct atmel_tdes_dev *tdes_dd = NULL; - struct atmel_tdes_dev *tmp; + struct atmel_tdes_dev *tdes_dd; spin_lock_bh(&atmel_tdes.lock); - if (!ctx->dd) { - list_for_each_entry(tmp, &atmel_tdes.dev_list, list) { - tdes_dd = tmp; - break; - } - ctx->dd = tdes_dd; - } else { - tdes_dd = ctx->dd; - } + /* One TDES IP per SoC. */ + tdes_dd = list_first_entry_or_null(&atmel_tdes.dev_list, + struct atmel_tdes_dev, list); spin_unlock_bh(&atmel_tdes.lock); - return tdes_dd; } @@ -646,7 +638,6 @@ static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd, rctx->mode &= TDES_FLAGS_MODE_MASK; dd->flags = (dd->flags & ~TDES_FLAGS_MODE_MASK) | rctx->mode; dd->ctx = ctx; - ctx->dd = dd; err = atmel_tdes_write_ctrl(dd); if (!err) @@ -897,14 +888,13 @@ static int atmel_tdes_ofb_decrypt(struct skcipher_request *req) static int atmel_tdes_init_tfm(struct crypto_skcipher *tfm) { struct atmel_tdes_ctx *ctx = crypto_skcipher_ctx(tfm); - struct atmel_tdes_dev *dd; - - crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_tdes_reqctx)); - dd = atmel_tdes_find_dev(ctx); - if (!dd) + ctx->dd = atmel_tdes_dev_alloc(); + if (!ctx->dd) return -ENODEV; + crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_tdes_reqctx)); + return 0; } -- cgit v1.2.3 From 817b804ca36747f0c9db667d7d8aa9fdd55335c7 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 20 Jul 2021 11:55:28 +0300 Subject: crypto: atmel-tdes - Handle error messages Downgrade all runtime error messages to dev_dbg so that we don't pollute the console. All probe error messages are kept with dev_err. Get rid of pr_err and use dev_dbg instead, so that we know from which device the error comes. dma_mapping_error() return code was overwritten, use the error code that the function returns. Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu --- drivers/crypto/atmel-tdes.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c index dda70dbe0838..abbf1b7a75ab 100644 --- a/drivers/crypto/atmel-tdes.c +++ b/drivers/crypto/atmel-tdes.c @@ -312,7 +312,7 @@ static int atmel_tdes_crypt_pdc_stop(struct atmel_tdes_dev *dd) dd->buf_out, dd->buflen, dd->dma_size, 1); if (count != dd->dma_size) { err = -EINVAL; - pr_err("not all data converted: %zu\n", count); + dev_dbg(dd->dev, "not all data converted: %zu\n", count); } } @@ -329,24 +329,24 @@ static int atmel_tdes_buff_init(struct atmel_tdes_dev *dd) dd->buflen &= ~(DES_BLOCK_SIZE - 1); if (!dd->buf_in || !dd->buf_out) { - dev_err(dd->dev, "unable to alloc pages.\n"); + dev_dbg(dd->dev, "unable to alloc pages.\n"); goto err_alloc; } /* MAP here */ dd->dma_addr_in = dma_map_single(dd->dev, dd->buf_in, dd->buflen, DMA_TO_DEVICE); - if (dma_mapping_error(dd->dev, dd->dma_addr_in)) { - dev_err(dd->dev, "dma %zd bytes error\n", dd->buflen); - err = -EINVAL; + err = dma_mapping_error(dd->dev, dd->dma_addr_in); + if (err) { + dev_dbg(dd->dev, "dma %zd bytes error\n", dd->buflen); goto err_map_in; } dd->dma_addr_out = dma_map_single(dd->dev, dd->buf_out, dd->buflen, DMA_FROM_DEVICE); - if (dma_mapping_error(dd->dev, dd->dma_addr_out)) { - dev_err(dd->dev, "dma %zd bytes error\n", dd->buflen); - err = -EINVAL; + err = dma_mapping_error(dd->dev, dd->dma_addr_out); + if (err) { + dev_dbg(dd->dev, "dma %zd bytes error\n", dd->buflen); goto err_map_out; } @@ -359,8 +359,6 @@ err_map_in: err_alloc: free_page((unsigned long)dd->buf_out); free_page((unsigned long)dd->buf_in); - if (err) - pr_err("error: %d\n", err); return err; } @@ -512,14 +510,14 @@ static int atmel_tdes_crypt_start(struct atmel_tdes_dev *dd) err = dma_map_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); if (!err) { - dev_err(dd->dev, "dma_map_sg() error\n"); + dev_dbg(dd->dev, "dma_map_sg() error\n"); return -EINVAL; } err = dma_map_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE); if (!err) { - dev_err(dd->dev, "dma_map_sg() error\n"); + dev_dbg(dd->dev, "dma_map_sg() error\n"); dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); return -EINVAL; @@ -670,7 +668,7 @@ static int atmel_tdes_crypt_dma_stop(struct atmel_tdes_dev *dd) dd->buf_out, dd->buflen, dd->dma_size, 1); if (count != dd->dma_size) { err = -EINVAL; - pr_err("not all data converted: %zu\n", count); + dev_dbg(dd->dev, "not all data converted: %zu\n", count); } } } @@ -682,11 +680,12 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode) struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); struct atmel_tdes_ctx *ctx = crypto_skcipher_ctx(skcipher); struct atmel_tdes_reqctx *rctx = skcipher_request_ctx(req); + struct device *dev = ctx->dd->dev; switch (mode & TDES_FLAGS_OPMODE_MASK) { case TDES_FLAGS_CFB8: if (!IS_ALIGNED(req->cryptlen, CFB8_BLOCK_SIZE)) { - pr_err("request size is not exact amount of CFB8 blocks\n"); + dev_dbg(dev, "request size is not exact amount of CFB8 blocks\n"); return -EINVAL; } ctx->block_size = CFB8_BLOCK_SIZE; @@ -694,7 +693,7 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode) case TDES_FLAGS_CFB16: if (!IS_ALIGNED(req->cryptlen, CFB16_BLOCK_SIZE)) { - pr_err("request size is not exact amount of CFB16 blocks\n"); + dev_dbg(dev, "request size is not exact amount of CFB16 blocks\n"); return -EINVAL; } ctx->block_size = CFB16_BLOCK_SIZE; @@ -702,7 +701,7 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode) case TDES_FLAGS_CFB32: if (!IS_ALIGNED(req->cryptlen, CFB32_BLOCK_SIZE)) { - pr_err("request size is not exact amount of CFB32 blocks\n"); + dev_dbg(dev, "request size is not exact amount of CFB32 blocks\n"); return -EINVAL; } ctx->block_size = CFB32_BLOCK_SIZE; @@ -710,7 +709,7 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode) default: if (!IS_ALIGNED(req->cryptlen, DES_BLOCK_SIZE)) { - pr_err("request size is not exact amount of DES blocks\n"); + dev_dbg(dev, "request size is not exact amount of DES blocks\n"); return -EINVAL; } ctx->block_size = DES_BLOCK_SIZE; -- cgit v1.2.3 From 534b32a8be27dc0eb54dccc302b3c9a6f0fe88a2 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 20 Jul 2021 11:55:29 +0300 Subject: crypto: atmel-aes - Add blocksize constraint for ECB and CBC modes NIST 800-38A requires for the ECB and CBC modes that the total number of bits in the plaintext to be a multiple of the block cipher. Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu --- drivers/crypto/atmel-aes.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index b1d286004295..9c6d80d1d7a0 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -1089,6 +1089,11 @@ static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode) struct atmel_aes_base_ctx *ctx = crypto_skcipher_ctx(skcipher); struct atmel_aes_reqctx *rctx; struct atmel_aes_dev *dd; + u32 opmode = mode & AES_FLAGS_OPMODE_MASK; + + if ((opmode == AES_FLAGS_ECB || opmode == AES_FLAGS_CBC) && + !IS_ALIGNED(req->cryptlen, crypto_skcipher_blocksize(skcipher))) + return -EINVAL; switch (mode & AES_FLAGS_OPMODE_MASK) { case AES_FLAGS_CFB8: @@ -1120,7 +1125,7 @@ static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode) rctx = skcipher_request_ctx(req); rctx->mode = mode; - if ((mode & AES_FLAGS_OPMODE_MASK) != AES_FLAGS_ECB && + if (opmode != AES_FLAGS_ECB && !(mode & AES_FLAGS_ENCRYPT) && req->src == req->dst) { unsigned int ivsize = crypto_skcipher_ivsize(skcipher); -- cgit v1.2.3 From 26d769ae90907e6f4724b800ba16f11681079f23 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 20 Jul 2021 11:55:30 +0300 Subject: crypto: atmel-aes - Add XTS input length constraint Input length smaller than block size does not make sense for XTS. Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu --- drivers/crypto/atmel-aes.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 9c6d80d1d7a0..4e9515e8dd25 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -1091,6 +1091,9 @@ static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode) struct atmel_aes_dev *dd; u32 opmode = mode & AES_FLAGS_OPMODE_MASK; + if (opmode == AES_FLAGS_XTS && req->cryptlen < XTS_BLOCK_SIZE) + return -EINVAL; + if ((opmode == AES_FLAGS_ECB || opmode == AES_FLAGS_CBC) && !IS_ALIGNED(req->cryptlen, crypto_skcipher_blocksize(skcipher))) return -EINVAL; -- cgit v1.2.3 From 0d0433599d84bf7db8caa8fb76915dc0ff818150 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 20 Jul 2021 11:55:31 +0300 Subject: crypto: atmel-aes - Add NIST 800-38A's zero length cryptlen constraint NIST 800-38A requires for the ECB, CBC, CFB, OFB and CTR modes that the plaintext and ciphertext to have a positive integer length. Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu --- drivers/crypto/atmel-aes.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 4e9515e8dd25..8ea873bf6b86 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -1094,6 +1094,13 @@ static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode) if (opmode == AES_FLAGS_XTS && req->cryptlen < XTS_BLOCK_SIZE) return -EINVAL; + /* + * ECB, CBC, CFB, OFB or CTR mode require the plaintext and ciphertext + * to have a positve integer length. + */ + if (!req->cryptlen && opmode != AES_FLAGS_XTS) + return 0; + if ((opmode == AES_FLAGS_ECB || opmode == AES_FLAGS_CBC) && !IS_ALIGNED(req->cryptlen, crypto_skcipher_blocksize(skcipher))) return -EINVAL; -- cgit v1.2.3 From 031f5e00150895232e658f67e66382b6c867ba13 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 20 Jul 2021 11:55:32 +0300 Subject: crypto: atmel-tdes - Add FIPS81's zero length cryptlen constraint FIPS81 requires for the ECB, CBC, CFB, and OFB modes that the plaintext and ciphertext to have a positive integer length. Add this constraint and just return 0 for a zero length cryptlen. Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu --- drivers/crypto/atmel-tdes.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c index abbf1b7a75ab..8380e0ab149a 100644 --- a/drivers/crypto/atmel-tdes.c +++ b/drivers/crypto/atmel-tdes.c @@ -682,6 +682,9 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode) struct atmel_tdes_reqctx *rctx = skcipher_request_ctx(req); struct device *dev = ctx->dd->dev; + if (!req->cryptlen) + return 0; + switch (mode & TDES_FLAGS_OPMODE_MASK) { case TDES_FLAGS_CFB8: if (!IS_ALIGNED(req->cryptlen, CFB8_BLOCK_SIZE)) { -- cgit v1.2.3 From 76d579f251a2d8501f4504b07355c8738b72c2bd Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 20 Jul 2021 11:55:33 +0300 Subject: crypto: atmel - Set OFB's blocksize to 1 Set cra_blocksize to 1 to indicate OFB is a stream cipher. Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu --- drivers/crypto/atmel-aes.c | 2 +- drivers/crypto/atmel-tdes.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 8ea873bf6b86..9ec007b4f8fc 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -1305,7 +1305,7 @@ static struct skcipher_alg aes_algs[] = { { .base.cra_name = "ofb(aes)", .base.cra_driver_name = "atmel-ofb-aes", - .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct atmel_aes_ctx), .init = atmel_aes_init_tfm, diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c index 8380e0ab149a..e30786ec9f2d 100644 --- a/drivers/crypto/atmel-tdes.c +++ b/drivers/crypto/atmel-tdes.c @@ -991,7 +991,7 @@ static struct skcipher_alg tdes_algs[] = { { .base.cra_name = "ofb(des)", .base.cra_driver_name = "atmel-ofb-des", - .base.cra_blocksize = DES_BLOCK_SIZE, + .base.cra_blocksize = 1, .base.cra_alignmask = 0x7, .min_keysize = DES_KEY_SIZE, -- cgit v1.2.3 From bf2db8e74249e691cbd1df7fca01810d8f48532b Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 20 Jul 2021 11:55:34 +0300 Subject: crypto: atmel-aes - Add fallback to XTS software implementation XTS is supported just for input lengths with data units of 128-bit blocks. Add a fallback to software implementation when the last block is shorter than 128 bits. Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu --- drivers/crypto/atmel-aes.c | 55 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 9ec007b4f8fc..e74fcaac551e 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -143,6 +143,7 @@ struct atmel_aes_xts_ctx { struct atmel_aes_base_ctx base; u32 key2[AES_KEYSIZE_256 / sizeof(u32)]; + struct crypto_skcipher *fallback_tfm; }; #if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC) @@ -155,6 +156,7 @@ struct atmel_aes_authenc_ctx { struct atmel_aes_reqctx { unsigned long mode; u8 lastc[AES_BLOCK_SIZE]; + struct skcipher_request fallback_req; }; #if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC) @@ -1083,6 +1085,22 @@ static int atmel_aes_ctr_start(struct atmel_aes_dev *dd) return atmel_aes_ctr_transfer(dd); } +static int atmel_aes_xts_fallback(struct skcipher_request *req, bool enc) +{ + struct atmel_aes_reqctx *rctx = skcipher_request_ctx(req); + struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx( + crypto_skcipher_reqtfm(req)); + + skcipher_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + skcipher_request_set_callback(&rctx->fallback_req, req->base.flags, + req->base.complete, req->base.data); + skcipher_request_set_crypt(&rctx->fallback_req, req->src, req->dst, + req->cryptlen, req->iv); + + return enc ? crypto_skcipher_encrypt(&rctx->fallback_req) : + crypto_skcipher_decrypt(&rctx->fallback_req); +} + static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode) { struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); @@ -1091,8 +1109,14 @@ static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode) struct atmel_aes_dev *dd; u32 opmode = mode & AES_FLAGS_OPMODE_MASK; - if (opmode == AES_FLAGS_XTS && req->cryptlen < XTS_BLOCK_SIZE) - return -EINVAL; + if (opmode == AES_FLAGS_XTS) { + if (req->cryptlen < XTS_BLOCK_SIZE) + return -EINVAL; + + if (!IS_ALIGNED(req->cryptlen, XTS_BLOCK_SIZE)) + return atmel_aes_xts_fallback(req, + mode & AES_FLAGS_ENCRYPT); + } /* * ECB, CBC, CFB, OFB or CTR mode require the plaintext and ciphertext @@ -1864,6 +1888,13 @@ static int atmel_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, if (err) return err; + crypto_skcipher_clear_flags(ctx->fallback_tfm, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(ctx->fallback_tfm, tfm->base.crt_flags & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); + if (err) + return err; + memcpy(ctx->base.key, key, keylen/2); memcpy(ctx->key2, key + keylen/2, keylen/2); ctx->base.keylen = keylen/2; @@ -1884,18 +1915,33 @@ static int atmel_aes_xts_decrypt(struct skcipher_request *req) static int atmel_aes_xts_init_tfm(struct crypto_skcipher *tfm) { struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + const char *tfm_name = crypto_tfm_alg_name(&tfm->base); - crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx)); + ctx->fallback_tfm = crypto_alloc_skcipher(tfm_name, 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->fallback_tfm)) + return PTR_ERR(ctx->fallback_tfm); + + crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx) + + crypto_skcipher_reqsize(ctx->fallback_tfm)); ctx->base.start = atmel_aes_xts_start; return 0; } +static void atmel_aes_xts_exit_tfm(struct crypto_skcipher *tfm) +{ + struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + + crypto_free_skcipher(ctx->fallback_tfm); +} + static struct skcipher_alg aes_xts_alg = { .base.cra_name = "xts(aes)", .base.cra_driver_name = "atmel-xts-aes", .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct atmel_aes_xts_ctx), + .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK, .min_keysize = 2 * AES_MIN_KEY_SIZE, .max_keysize = 2 * AES_MAX_KEY_SIZE, @@ -1904,6 +1950,7 @@ static struct skcipher_alg aes_xts_alg = { .encrypt = atmel_aes_xts_encrypt, .decrypt = atmel_aes_xts_decrypt, .init = atmel_aes_xts_init_tfm, + .exit = atmel_aes_xts_exit_tfm, }; #if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC) @@ -2373,7 +2420,7 @@ static void atmel_aes_unregister_algs(struct atmel_aes_dev *dd) static void atmel_aes_crypto_alg_init(struct crypto_alg *alg) { - alg->cra_flags = CRYPTO_ALG_ASYNC; + alg->cra_flags |= CRYPTO_ALG_ASYNC; alg->cra_alignmask = 0xf; alg->cra_priority = ATMEL_AES_PRIORITY; alg->cra_module = THIS_MODULE; -- cgit v1.2.3 From ec2088b66f7a143eb14063f598d0bb3f64654c38 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 20 Jul 2021 11:55:35 +0300 Subject: crypto: atmel-aes - Allocate aes dev at tfm init time Allocate the atmel_aes_dev data at tfm init time, and not for each crypt request. There's a single AES IP per SoC, clarify that in the code. Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu --- drivers/crypto/atmel-aes.c | 76 ++++++++++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 33 deletions(-) diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index e74fcaac551e..d0f387674d32 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -420,24 +420,15 @@ static inline size_t atmel_aes_padlen(size_t len, size_t block_size) return len ? block_size - len : 0; } -static struct atmel_aes_dev *atmel_aes_find_dev(struct atmel_aes_base_ctx *ctx) +static struct atmel_aes_dev *atmel_aes_dev_alloc(struct atmel_aes_base_ctx *ctx) { - struct atmel_aes_dev *aes_dd = NULL; - struct atmel_aes_dev *tmp; + struct atmel_aes_dev *aes_dd; spin_lock_bh(&atmel_aes.lock); - if (!ctx->dd) { - list_for_each_entry(tmp, &atmel_aes.dev_list, list) { - aes_dd = tmp; - break; - } - ctx->dd = aes_dd; - } else { - aes_dd = ctx->dd; - } - + /* One AES IP per SoC. */ + aes_dd = list_first_entry_or_null(&atmel_aes.dev_list, + struct atmel_aes_dev, list); spin_unlock_bh(&atmel_aes.lock); - return aes_dd; } @@ -969,7 +960,6 @@ static int atmel_aes_handle_queue(struct atmel_aes_dev *dd, ctx = crypto_tfm_ctx(areq->tfm); dd->areq = areq; - dd->ctx = ctx; start_async = (areq != new_areq); dd->is_async = start_async; @@ -1106,7 +1096,6 @@ static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode) struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); struct atmel_aes_base_ctx *ctx = crypto_skcipher_ctx(skcipher); struct atmel_aes_reqctx *rctx; - struct atmel_aes_dev *dd; u32 opmode = mode & AES_FLAGS_OPMODE_MASK; if (opmode == AES_FLAGS_XTS) { @@ -1152,10 +1141,6 @@ static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode) } ctx->is_aead = false; - dd = atmel_aes_find_dev(ctx); - if (!dd) - return -ENODEV; - rctx = skcipher_request_ctx(req); rctx->mode = mode; @@ -1169,7 +1154,7 @@ static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode) ivsize, 0); } - return atmel_aes_handle_queue(dd, &req->base); + return atmel_aes_handle_queue(ctx->dd, &req->base); } static int atmel_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, @@ -1281,8 +1266,15 @@ static int atmel_aes_ctr_decrypt(struct skcipher_request *req) static int atmel_aes_init_tfm(struct crypto_skcipher *tfm) { struct atmel_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct atmel_aes_dev *dd; + + dd = atmel_aes_dev_alloc(&ctx->base); + if (!dd) + return -ENODEV; crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx)); + ctx->base.dd = dd; + ctx->base.dd->ctx = &ctx->base; ctx->base.start = atmel_aes_start; return 0; @@ -1291,8 +1283,15 @@ static int atmel_aes_init_tfm(struct crypto_skcipher *tfm) static int atmel_aes_ctr_init_tfm(struct crypto_skcipher *tfm) { struct atmel_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct atmel_aes_dev *dd; + + dd = atmel_aes_dev_alloc(&ctx->base); + if (!dd) + return -ENODEV; crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx)); + ctx->base.dd = dd; + ctx->base.dd->ctx = &ctx->base; ctx->base.start = atmel_aes_ctr_start; return 0; @@ -1730,20 +1729,15 @@ static int atmel_aes_gcm_crypt(struct aead_request *req, { struct atmel_aes_base_ctx *ctx; struct atmel_aes_reqctx *rctx; - struct atmel_aes_dev *dd; ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); ctx->block_size = AES_BLOCK_SIZE; ctx->is_aead = true; - dd = atmel_aes_find_dev(ctx); - if (!dd) - return -ENODEV; - rctx = aead_request_ctx(req); rctx->mode = AES_FLAGS_GCM | mode; - return atmel_aes_handle_queue(dd, &req->base); + return atmel_aes_handle_queue(ctx->dd, &req->base); } static int atmel_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key, @@ -1781,8 +1775,15 @@ static int atmel_aes_gcm_decrypt(struct aead_request *req) static int atmel_aes_gcm_init(struct crypto_aead *tfm) { struct atmel_aes_gcm_ctx *ctx = crypto_aead_ctx(tfm); + struct atmel_aes_dev *dd; + + dd = atmel_aes_dev_alloc(&ctx->base); + if (!dd) + return -ENODEV; crypto_aead_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx)); + ctx->base.dd = dd; + ctx->base.dd->ctx = &ctx->base; ctx->base.start = atmel_aes_gcm_start; return 0; @@ -1915,8 +1916,13 @@ static int atmel_aes_xts_decrypt(struct skcipher_request *req) static int atmel_aes_xts_init_tfm(struct crypto_skcipher *tfm) { struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct atmel_aes_dev *dd; const char *tfm_name = crypto_tfm_alg_name(&tfm->base); + dd = atmel_aes_dev_alloc(&ctx->base); + if (!dd) + return -ENODEV; + ctx->fallback_tfm = crypto_alloc_skcipher(tfm_name, 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->fallback_tfm)) @@ -1924,6 +1930,8 @@ static int atmel_aes_xts_init_tfm(struct crypto_skcipher *tfm) crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx) + crypto_skcipher_reqsize(ctx->fallback_tfm)); + ctx->base.dd = dd; + ctx->base.dd->ctx = &ctx->base; ctx->base.start = atmel_aes_xts_start; return 0; @@ -2137,6 +2145,11 @@ static int atmel_aes_authenc_init_tfm(struct crypto_aead *tfm, { struct atmel_aes_authenc_ctx *ctx = crypto_aead_ctx(tfm); unsigned int auth_reqsize = atmel_sha_authenc_get_reqsize(); + struct atmel_aes_dev *dd; + + dd = atmel_aes_dev_alloc(&ctx->base); + if (!dd) + return -ENODEV; ctx->auth = atmel_sha_authenc_spawn(auth_mode); if (IS_ERR(ctx->auth)) @@ -2144,6 +2157,8 @@ static int atmel_aes_authenc_init_tfm(struct crypto_aead *tfm, crypto_aead_set_reqsize(tfm, (sizeof(struct atmel_aes_authenc_reqctx) + auth_reqsize)); + ctx->base.dd = dd; + ctx->base.dd->ctx = &ctx->base; ctx->base.start = atmel_aes_authenc_start; return 0; @@ -2189,7 +2204,6 @@ static int atmel_aes_authenc_crypt(struct aead_request *req, struct atmel_aes_base_ctx *ctx = crypto_aead_ctx(tfm); u32 authsize = crypto_aead_authsize(tfm); bool enc = (mode & AES_FLAGS_ENCRYPT); - struct atmel_aes_dev *dd; /* Compute text length. */ if (!enc && req->cryptlen < authsize) @@ -2208,11 +2222,7 @@ static int atmel_aes_authenc_crypt(struct aead_request *req, ctx->block_size = AES_BLOCK_SIZE; ctx->is_aead = true; - dd = atmel_aes_find_dev(ctx); - if (!dd) - return -ENODEV; - - return atmel_aes_handle_queue(dd, &req->base); + return atmel_aes_handle_queue(ctx->dd, &req->base); } static int atmel_aes_authenc_cbc_aes_encrypt(struct aead_request *req) -- cgit v1.2.3 From 192b722f3866d3fb45b9e6a6ecd02ff09f2aefbe Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 20 Jul 2021 21:01:04 +0800 Subject: crypto: sun8i-ss - Use kfree_sensitive The kfree_sensitive is a kernel API to clear sensitive information that should not be leaked to other future users of the same memory objects and free the memory. Its function is the same as the combination of memzero_explicit and kfree. Thus, we can replace the combination APIs with the single kfree_sensitive API. Signed-off-by: Jason Wang Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c index 3191527928e4..246a6782674c 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c @@ -20,8 +20,7 @@ int sun8i_ss_prng_seed(struct crypto_rng *tfm, const u8 *seed, struct sun8i_ss_rng_tfm_ctx *ctx = crypto_rng_ctx(tfm); if (ctx->seed && ctx->slen != slen) { - memzero_explicit(ctx->seed, ctx->slen); - kfree(ctx->seed); + kfree_sensitive(ctx->seed); ctx->slen = 0; ctx->seed = NULL; } @@ -48,8 +47,7 @@ void sun8i_ss_prng_exit(struct crypto_tfm *tfm) { struct sun8i_ss_rng_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - memzero_explicit(ctx->seed, ctx->slen); - kfree(ctx->seed); + kfree_sensitive(ctx->seed); ctx->seed = NULL; ctx->slen = 0; } @@ -167,9 +165,8 @@ err_iv: /* Update seed */ memcpy(ctx->seed, d + dlen, ctx->slen); } - memzero_explicit(d, todo); err_free: - kfree(d); + kfree_sensitive(d); return err; } -- cgit v1.2.3 From d5ee8e750c9449e9849a09ce6fb6b8adeaa66adc Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Tue, 20 Jul 2021 11:05:11 -0400 Subject: padata: Convert from atomic_t to refcount_t on parallel_data->refcnt refcount_t type and corresponding API can protect refcounters from accidental underflow and overflow and further use-after-free situations. Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Acked-by: Daniel Jordan Signed-off-by: Herbert Xu --- include/linux/padata.h | 3 ++- kernel/padata.c | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/padata.h b/include/linux/padata.h index a433f13fc4bf..495b16b6b4d7 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -12,6 +12,7 @@ #ifndef PADATA_H #define PADATA_H +#include #include #include #include @@ -96,7 +97,7 @@ struct parallel_data { struct padata_shell *ps; struct padata_list __percpu *reorder_list; struct padata_serial_queue __percpu *squeue; - atomic_t refcnt; + refcount_t refcnt; unsigned int seq_nr; unsigned int processed; int cpu; diff --git a/kernel/padata.c b/kernel/padata.c index d4d3ba6e1728..378c36080781 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -211,7 +211,7 @@ int padata_do_parallel(struct padata_shell *ps, if ((pinst->flags & PADATA_RESET)) goto out; - atomic_inc(&pd->refcnt); + refcount_inc(&pd->refcnt); padata->pd = pd; padata->cb_cpu = *cb_cpu; @@ -383,7 +383,7 @@ static void padata_serial_worker(struct work_struct *serial_work) } local_bh_enable(); - if (atomic_sub_and_test(cnt, &pd->refcnt)) + if (refcount_sub_and_test(cnt, &pd->refcnt)) padata_free_pd(pd); } @@ -593,7 +593,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_shell *ps) padata_init_reorder_list(pd); padata_init_squeues(pd); pd->seq_nr = -1; - atomic_set(&pd->refcnt, 1); + refcount_set(&pd->refcnt, 1); spin_lock_init(&pd->lock); pd->cpu = cpumask_first(pd->cpumask.pcpu); INIT_WORK(&pd->reorder_work, invoke_padata_reorder); @@ -667,7 +667,7 @@ static int padata_replace(struct padata_instance *pinst) synchronize_rcu(); list_for_each_entry_continue_reverse(ps, &pinst->pslist, list) - if (atomic_dec_and_test(&ps->opd->refcnt)) + if (refcount_dec_and_test(&ps->opd->refcnt)) padata_free_pd(ps->opd); pinst->flags &= ~PADATA_RESET; -- cgit v1.2.3 From 0469dede0eeeefe12a9a2fd76078f4a266513457 Mon Sep 17 00:00:00 2001 From: Mian Yousaf Kaukab Date: Wed, 21 Jul 2021 10:39:05 +0200 Subject: crypto: ecc - handle unaligned input buffer in ecc_swap_digits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ecdsa_set_pub_key() makes an u64 pointer at 1 byte offset of the key. This results in an unaligned u64 pointer. This pointer is passed to ecc_swap_digits() which assumes natural alignment. This causes a kernel crash on an armv7 platform: [ 0.409022] Unhandled fault: alignment exception (0x001) at 0xc2a0a6a9 ... [ 0.416982] PC is at ecdsa_set_pub_key+0xdc/0x120 ... [ 0.491492] Backtrace: [ 0.492059] [] (ecdsa_set_pub_key) from [] (test_akcipher_one+0xf4/0x6c0) Handle unaligned input buffer in ecc_swap_digits() by replacing be64_to_cpu() to get_unaligned_be64(). Change type of in pointer to void to reflect it doesn’t necessarily need to be aligned. Fixes: 4e6602916bc6 ("crypto: ecdsa - Add support for ECDSA signature verification") Reported-by: Guillaume Gardet Suggested-by: Takashi Iwai Signed-off-by: Mian Yousaf Kaukab Tested-by: Stefan Berger Signed-off-by: Herbert Xu --- crypto/ecc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crypto/ecc.h b/crypto/ecc.h index a006132646a4..1350e8eb6ac2 100644 --- a/crypto/ecc.h +++ b/crypto/ecc.h @@ -27,6 +27,7 @@ #define _CRYPTO_ECC_H #include +#include /* One digit is u64 qword. */ #define ECC_CURVE_NIST_P192_DIGITS 3 @@ -46,13 +47,13 @@ * @out: Output array * @ndigits: Number of digits to copy */ -static inline void ecc_swap_digits(const u64 *in, u64 *out, unsigned int ndigits) +static inline void ecc_swap_digits(const void *in, u64 *out, unsigned int ndigits) { const __be64 *src = (__force __be64 *)in; int i; for (i = 0; i < ndigits; i++) - out[i] = be64_to_cpu(src[ndigits - 1 - i]); + out[i] = get_unaligned_be64(&src[ndigits - 1 - i]); } /** -- cgit v1.2.3 From 089015d36127f2f620c7a1dca28449f676654850 Mon Sep 17 00:00:00 2001 From: Salah Triki Date: Thu, 22 Jul 2021 18:08:27 +0100 Subject: crypto: atmel-aes - use swap() Use swap() instead of implementing it in order to make code more clean. Signed-off-by: Salah Triki Reviewed-by: Tudor Ambarus Signed-off-by: Herbert Xu --- drivers/crypto/atmel-aes.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index d0f387674d32..9391ccc03382 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -1859,12 +1859,8 @@ static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd) * the order of the ciphered tweak bytes need to be reversed before * writing them into the ODATARx registers. */ - for (i = 0; i < AES_BLOCK_SIZE/2; ++i) { - u8 tmp = tweak_bytes[AES_BLOCK_SIZE - 1 - i]; - - tweak_bytes[AES_BLOCK_SIZE - 1 - i] = tweak_bytes[i]; - tweak_bytes[i] = tmp; - } + for (i = 0; i < AES_BLOCK_SIZE/2; ++i) + swap(tweak_bytes[i], tweak_bytes[AES_BLOCK_SIZE - 1 - i]); /* Process the data. */ atmel_aes_write_ctrl(dd, use_dma, NULL); -- cgit v1.2.3 From fe28140b3393b0ba1eb95cc109f974a7e58b26fd Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 27 Jul 2021 13:23:34 +0300 Subject: crypto: omap-sham - clear dma flags only after omap_sham_update_dma_stop() We should not clear FLAGS_DMA_ACTIVE before omap_sham_update_dma_stop() is done calling dma_unmap_sg(). We already clear FLAGS_DMA_ACTIVE at the end of omap_sham_update_dma_stop(). The early clearing of FLAGS_DMA_ACTIVE is not causing issues as we do not need to defer anything based on FLAGS_DMA_ACTIVE currently. So this can be applied as clean-up. Cc: Lokesh Vutla Cc: Tero Kristo Signed-off-by: Tony Lindgren Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index dd53ad9987b0..a47ac60a4ee1 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -1736,7 +1736,7 @@ static void omap_sham_done_task(unsigned long data) if (test_and_clear_bit(FLAGS_OUTPUT_READY, &dd->flags)) goto finish; } else if (test_bit(FLAGS_DMA_READY, &dd->flags)) { - if (test_and_clear_bit(FLAGS_DMA_ACTIVE, &dd->flags)) { + if (test_bit(FLAGS_DMA_ACTIVE, &dd->flags)) { omap_sham_update_dma_stop(dd); if (dd->err) { err = dd->err; -- cgit v1.2.3 From 6a1ec89f2c56da9c2bd0afedb48268dde086d729 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 27 Jul 2021 13:23:35 +0300 Subject: crypto: omap-sham - initialize req only after omap_sham_hw_init() Let's only initialize dd->req after omap_sham_hw_init() in case of errors. Looks like leaving dd->req initialized on omap_sham_hw_init() errors is is not causing issues though as we return on errors. So this patch can be applied as clean-up. Cc: Lokesh Vutla Cc: Tero Kristo Signed-off-by: Tony Lindgren Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index a47ac60a4ee1..c4411ad5788d 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -1093,12 +1093,12 @@ static int omap_sham_hash_one_req(struct crypto_engine *engine, void *areq) dev_dbg(dd->dev, "hash-one: op: %u, total: %u, digcnt: %zd, final: %d", ctx->op, ctx->total, ctx->digcnt, final); - dd->req = req; - err = omap_sham_hw_init(dd); if (err) return err; + dd->req = req; + if (ctx->digcnt) dd->pdata->copy_hash(req, 0); -- cgit v1.2.3 From f83fc1a0ee322d6577eca2bb6a3275245de7fccc Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 27 Jul 2021 13:23:36 +0300 Subject: crypto: omap-sham - add missing pm_runtime_dontuse_autosuspend() We should pair the usage of pm_runtime_use_autosuspend() with pm_runtime_dont_use_autosuspend(). Cc: Lokesh Vutla Cc: Tero Kristo Signed-off-by: Tony Lindgren Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index c4411ad5788d..b825f64f42f6 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -2198,6 +2198,7 @@ err_engine: list_del(&dd->list); spin_unlock(&sham.lock); err_pm: + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); if (!dd->polling_mode) dma_release_channel(dd->dma_lch); @@ -2225,6 +2226,7 @@ static int omap_sham_remove(struct platform_device *pdev) dd->pdata->algs_info[i].registered--; } tasklet_kill(&dd->done_task); + pm_runtime_dont_use_autosuspend(&pdev->dev); pm_runtime_disable(&pdev->dev); if (!dd->polling_mode) -- cgit v1.2.3 From f23f2186a4d0c9ff681bcf00e02575c0712f8fb5 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 27 Jul 2021 13:23:37 +0300 Subject: crypto: omap-sham - drop old hw_init and unused FLAGS_INIT FLAGS_INIT is now unused and we can just use standard runtime PM functions instead. Cc: Lokesh Vutla Cc: Tero Kristo Signed-off-by: Tony Lindgren Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index b825f64f42f6..7221169a7af0 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -105,7 +105,6 @@ #define FLAGS_FINAL 1 #define FLAGS_DMA_ACTIVE 2 #define FLAGS_OUTPUT_READY 3 -#define FLAGS_INIT 4 #define FLAGS_CPU 5 #define FLAGS_DMA_READY 6 #define FLAGS_AUTO_XOR 7 @@ -368,24 +367,6 @@ static void omap_sham_copy_ready_hash(struct ahash_request *req) hash[i] = le32_to_cpup((__le32 *)in + i); } -static int omap_sham_hw_init(struct omap_sham_dev *dd) -{ - int err; - - err = pm_runtime_resume_and_get(dd->dev); - if (err < 0) { - dev_err(dd->dev, "failed to get sync: %d\n", err); - return err; - } - - if (!test_bit(FLAGS_INIT, &dd->flags)) { - set_bit(FLAGS_INIT, &dd->flags); - dd->err = 0; - } - - return 0; -} - static void omap_sham_write_ctrl_omap2(struct omap_sham_dev *dd, size_t length, int final, int dma) { @@ -1093,10 +1074,13 @@ static int omap_sham_hash_one_req(struct crypto_engine *engine, void *areq) dev_dbg(dd->dev, "hash-one: op: %u, total: %u, digcnt: %zd, final: %d", ctx->op, ctx->total, ctx->digcnt, final); - err = omap_sham_hw_init(dd); - if (err) + err = pm_runtime_resume_and_get(dd->dev); + if (err < 0) { + dev_err(dd->dev, "failed to get sync: %d\n", err); return err; + } + dd->err = 0; dd->req = req; if (ctx->digcnt) -- cgit v1.2.3 From 70c68d163986985a41fb86979d379212e11f007f Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 27 Jul 2021 13:23:38 +0300 Subject: crypto: omap-sham - drop suspend and resume functions Let's get rid of the suspend and resume calls to runtime PM as these calls do not idle the hardware. The runtime suspend has been disabled for system suspend since commit 88d26136a256 ("PM: Prevent runtime suspend during system resume"). Instead of runtime PM, the system suspend and resume functions should call driver internal shared functions to idle the hardware as needed. Cc: Lokesh Vutla Cc: Tero Kristo Signed-off-by: Tony Lindgren Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 7221169a7af0..04c1b7b4d18a 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -2221,32 +2221,11 @@ static int omap_sham_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int omap_sham_suspend(struct device *dev) -{ - pm_runtime_put_sync(dev); - return 0; -} - -static int omap_sham_resume(struct device *dev) -{ - int err = pm_runtime_resume_and_get(dev); - if (err < 0) { - dev_err(dev, "failed to get sync: %d\n", err); - return err; - } - return 0; -} -#endif - -static SIMPLE_DEV_PM_OPS(omap_sham_pm_ops, omap_sham_suspend, omap_sham_resume); - static struct platform_driver omap_sham_driver = { .probe = omap_sham_probe, .remove = omap_sham_remove, .driver = { .name = "omap-sham", - .pm = &omap_sham_pm_ops, .of_match_table = omap_sham_of_match, }, }; -- cgit v1.2.3 From 1dd0d7fe4b7a50f3a9580e994060e8a8a1c3263e Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 27 Jul 2021 13:23:39 +0300 Subject: crypto: omap-sham - drop pm_runtime_irqsafe() usage Commit b0a3d8986a76 ("crypto: omap-sham - Use pm_runtime_irq_safe()") added the use of pm_runtime_irq_safe() as pm_runtime_get_sync() was called from a tasklet. We now use the crypto engine queue instead of a custom queue since commit 33c3d434d91 ("crypto: omap-sham - convert to use crypto engine"). We want to drop the use of pm_runtime_irq_safe() in general as it takes a permanent usage count on the parent device causing issues for power management. Based on testing with CONFIG_DEBUG_ATOMIC_SLEEP=y, modprobe omap-sham, followed by modprobe tcrypt sec=1 mode=423, I have not been able to reproduce the scheduling while atomic issue seen earlier with current kernels and we can just drop the call to pm_runtime_irq_safe(). Cc: Lokesh Vutla Cc: Tero Kristo Signed-off-by: Tony Lindgren Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 04c1b7b4d18a..735abd8f68d6 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -2113,7 +2113,6 @@ static int omap_sham_probe(struct platform_device *pdev) dd->fallback_sz = OMAP_SHA_DMA_THRESHOLD; pm_runtime_enable(dev); - pm_runtime_irq_safe(dev); err = pm_runtime_get_sync(dev); if (err < 0) { -- cgit v1.2.3 From 5441a07a127f106c9936e4f9fa1a8a93e3f31828 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Wed, 28 Jul 2021 10:15:21 -0500 Subject: crypto: ccp - shutdown SEV firmware on kexec The commit 97f9ac3db6612 ("crypto: ccp - Add support for SEV-ES to the PSP driver") added support to allocate Trusted Memory Region (TMR) used during the SEV-ES firmware initialization. The TMR gets locked during the firmware initialization and unlocked during the shutdown. While the TMR is locked, access to it is disallowed. Currently, the CCP driver does not shutdown the firmware during the kexec reboot, leaving the TMR memory locked. Register a callback to shutdown the SEV firmware on the kexec boot. Fixes: 97f9ac3db6612 ("crypto: ccp - Add support for SEV-ES to the PSP driver") Reported-by: Lucas Nussbaum Tested-by: Lucas Nussbaum Cc: Cc: Tom Lendacky Cc: Joerg Roedel Cc: Herbert Xu Cc: David Rientjes Signed-off-by: Brijesh Singh Acked-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 49 +++++++++++++++++++++----------------------- drivers/crypto/ccp/sp-pci.c | 12 +++++++++++ 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 91808402e0bf..2ecb0e1f65d8 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -300,6 +300,9 @@ static int __sev_platform_shutdown_locked(int *error) struct sev_device *sev = psp_master->sev_data; int ret; + if (sev->state == SEV_STATE_UNINIT) + return 0; + ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error); if (ret) return ret; @@ -1019,6 +1022,20 @@ e_err: return ret; } +static void sev_firmware_shutdown(struct sev_device *sev) +{ + sev_platform_shutdown(NULL); + + if (sev_es_tmr) { + /* The TMR area was encrypted, flush it from the cache */ + wbinvd_on_all_cpus(); + + free_pages((unsigned long)sev_es_tmr, + get_order(SEV_ES_TMR_SIZE)); + sev_es_tmr = NULL; + } +} + void sev_dev_destroy(struct psp_device *psp) { struct sev_device *sev = psp->sev_data; @@ -1026,6 +1043,8 @@ void sev_dev_destroy(struct psp_device *psp) if (!sev) return; + sev_firmware_shutdown(sev); + if (sev->misc) kref_put(&misc_dev->refcount, sev_exit); @@ -1056,21 +1075,6 @@ void sev_pci_init(void) if (sev_get_api_version()) goto err; - /* - * If platform is not in UNINIT state then firmware upgrade and/or - * platform INIT command will fail. These command require UNINIT state. - * - * In a normal boot we should never run into case where the firmware - * is not in UNINIT state on boot. But in case of kexec boot, a reboot - * may not go through a typical shutdown sequence and may leave the - * firmware in INIT or WORKING state. - */ - - if (sev->state != SEV_STATE_UNINIT) { - sev_platform_shutdown(NULL); - sev->state = SEV_STATE_UNINIT; - } - if (sev_version_greater_or_equal(0, 15) && sev_update_firmware(sev->dev) == 0) sev_get_api_version(); @@ -1115,17 +1119,10 @@ err: void sev_pci_exit(void) { - if (!psp_master->sev_data) - return; - - sev_platform_shutdown(NULL); + struct sev_device *sev = psp_master->sev_data; - if (sev_es_tmr) { - /* The TMR area was encrypted, flush it from the cache */ - wbinvd_on_all_cpus(); + if (!sev) + return; - free_pages((unsigned long)sev_es_tmr, - get_order(SEV_ES_TMR_SIZE)); - sev_es_tmr = NULL; - } + sev_firmware_shutdown(sev); } diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c index 6fb6ba35f89d..9bcc1884c06a 100644 --- a/drivers/crypto/ccp/sp-pci.c +++ b/drivers/crypto/ccp/sp-pci.c @@ -241,6 +241,17 @@ e_err: return ret; } +static void sp_pci_shutdown(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct sp_device *sp = dev_get_drvdata(dev); + + if (!sp) + return; + + sp_destroy(sp); +} + static void sp_pci_remove(struct pci_dev *pdev) { struct device *dev = &pdev->dev; @@ -371,6 +382,7 @@ static struct pci_driver sp_pci_driver = { .id_table = sp_pci_table, .probe = sp_pci_probe, .remove = sp_pci_remove, + .shutdown = sp_pci_shutdown, .driver.pm = &sp_pci_pm_ops, }; -- cgit v1.2.3 From b83c2d92be719bf7b1d24ac02aaa53db3991ea58 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Sat, 31 Jul 2021 21:48:44 +0100 Subject: firmware: smccc: Register smccc_trng platform device At the moment we probe for the Random Number Generator SMCCC service, and use that in the core code (arch_get_random). However the hardware entropy can also be useful to access from userland, and be it to assess its quality. Register a platform device when the SMCCC TRNG service is detected, to allow a hw_random driver to hook onto this. The function registering the device is deliberately made in a way which allows expansion, so other services that could be exposed via a platform device (or some other interface), can be added here easily. Signed-off-by: Andre Przywara Reviewed-by: Ard Biesheuvel Reviewed-by: Mark Brown Signed-off-by: Herbert Xu --- drivers/firmware/smccc/smccc.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/firmware/smccc/smccc.c b/drivers/firmware/smccc/smccc.c index 9f937b125ab0..60ccf3e90d7d 100644 --- a/drivers/firmware/smccc/smccc.c +++ b/drivers/firmware/smccc/smccc.c @@ -9,6 +9,7 @@ #include #include #include +#include #include static u32 smccc_version = ARM_SMCCC_VERSION_1_0; @@ -42,3 +43,19 @@ u32 arm_smccc_get_version(void) return smccc_version; } EXPORT_SYMBOL_GPL(arm_smccc_get_version); + +static int __init smccc_devices_init(void) +{ + struct platform_device *pdev; + + if (smccc_trng_available) { + pdev = platform_device_register_simple("smccc_trng", -1, + NULL, 0); + if (IS_ERR(pdev)) + pr_err("smccc_trng: could not register device: %ld\n", + PTR_ERR(pdev)); + } + + return 0; +} +device_initcall(smccc_devices_init); -- cgit v1.2.3 From 0888d04b47a165ae8c429c6fe11b3c43f5017f31 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Sat, 31 Jul 2021 21:48:45 +0100 Subject: hwrng: Add Arm SMCCC TRNG based driver The "Arm True Random Number Generator Firmware Interface"[1] provides an SMCCC based interface to a true hardware random number generator. So far we are using that in arch_get_random_seed(), but it might be useful to expose the entropy through the /dev/hwrng device as well. This allows to assess the quality of the implementation, by using "rngtest" from the rng-tools package, for example. Add a simple platform driver implementing the hw_random interface. The corresponding platform device is created by the SMCCC core code, we just match it here by name and provide a module alias. Since the firmware takes care about serialisation, this can happily coexist with the arch_get_random_seed() bits. [1] https://developer.arm.com/documentation/den0098/latest/ Signed-off-by: Andre Przywara Reviewed-by: Ard Biesheuvel Reviewed-by: Mark Brown Signed-off-by: Herbert Xu --- drivers/char/hw_random/Kconfig | 14 ++++ drivers/char/hw_random/Makefile | 1 + drivers/char/hw_random/arm_smccc_trng.c | 123 ++++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+) create mode 100644 drivers/char/hw_random/arm_smccc_trng.c diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 3f166c8a4099..239eca4d6805 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -524,6 +524,20 @@ config HW_RANDOM_XIPHERA To compile this driver as a module, choose M here: the module will be called xiphera-trng. +config HW_RANDOM_ARM_SMCCC_TRNG + tristate "Arm SMCCC TRNG firmware interface support" + depends on HAVE_ARM_SMCCC_DISCOVERY + default HW_RANDOM + help + Say 'Y' to enable the True Random Number Generator driver using + the Arm SMCCC TRNG firmware interface. This reads entropy from + higher exception levels (firmware, hypervisor). Uses SMCCC for + communicating with the firmware: + https://developer.arm.com/documentation/den0098/latest/ + + To compile this driver as a module, choose M here: the + module will be called arm_smccc_trng. + endif # HW_RANDOM config UML_RANDOM diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile index 8933fada74f2..a5a1c765a394 100644 --- a/drivers/char/hw_random/Makefile +++ b/drivers/char/hw_random/Makefile @@ -45,3 +45,4 @@ obj-$(CONFIG_HW_RANDOM_OPTEE) += optee-rng.o obj-$(CONFIG_HW_RANDOM_NPCM) += npcm-rng.o obj-$(CONFIG_HW_RANDOM_CCTRNG) += cctrng.o obj-$(CONFIG_HW_RANDOM_XIPHERA) += xiphera-trng.o +obj-$(CONFIG_HW_RANDOM_ARM_SMCCC_TRNG) += arm_smccc_trng.o diff --git a/drivers/char/hw_random/arm_smccc_trng.c b/drivers/char/hw_random/arm_smccc_trng.c new file mode 100644 index 000000000000..b24ac39a903b --- /dev/null +++ b/drivers/char/hw_random/arm_smccc_trng.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Randomness driver for the ARM SMCCC TRNG Firmware Interface + * https://developer.arm.com/documentation/den0098/latest/ + * + * Copyright (C) 2020 Arm Ltd. + * + * The ARM TRNG firmware interface specifies a protocol to read entropy + * from a higher exception level, to abstract from any machine specific + * implemenations and allow easier use in hypervisors. + * + * The firmware interface is realised using the SMCCC specification. + */ + +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_ARM64 +#define ARM_SMCCC_TRNG_RND ARM_SMCCC_TRNG_RND64 +#define MAX_BITS_PER_CALL (3 * 64UL) +#else +#define ARM_SMCCC_TRNG_RND ARM_SMCCC_TRNG_RND32 +#define MAX_BITS_PER_CALL (3 * 32UL) +#endif + +/* We don't want to allow the firmware to stall us forever. */ +#define SMCCC_TRNG_MAX_TRIES 20 + +#define SMCCC_RET_TRNG_INVALID_PARAMETER -2 +#define SMCCC_RET_TRNG_NO_ENTROPY -3 + +static int copy_from_registers(char *buf, struct arm_smccc_res *res, + size_t bytes) +{ + unsigned int chunk, copied; + + if (bytes == 0) + return 0; + + chunk = min(bytes, sizeof(long)); + memcpy(buf, &res->a3, chunk); + copied = chunk; + if (copied >= bytes) + return copied; + + chunk = min((bytes - copied), sizeof(long)); + memcpy(&buf[copied], &res->a2, chunk); + copied += chunk; + if (copied >= bytes) + return copied; + + chunk = min((bytes - copied), sizeof(long)); + memcpy(&buf[copied], &res->a1, chunk); + + return copied + chunk; +} + +static int smccc_trng_read(struct hwrng *rng, void *data, size_t max, bool wait) +{ + struct arm_smccc_res res; + u8 *buf = data; + unsigned int copied = 0; + int tries = 0; + + while (copied < max) { + size_t bits = min_t(size_t, (max - copied) * BITS_PER_BYTE, + MAX_BITS_PER_CALL); + + arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND, bits, &res); + if ((int)res.a0 < 0) + return (int)res.a0; + + switch ((int)res.a0) { + case SMCCC_RET_SUCCESS: + copied += copy_from_registers(buf + copied, &res, + bits / BITS_PER_BYTE); + tries = 0; + break; + case SMCCC_RET_TRNG_NO_ENTROPY: + if (!wait) + return copied; + tries++; + if (tries >= SMCCC_TRNG_MAX_TRIES) + return copied; + cond_resched(); + break; + } + } + + return copied; +} + +static int smccc_trng_probe(struct platform_device *pdev) +{ + struct hwrng *trng; + + trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL); + if (!trng) + return -ENOMEM; + + trng->name = "smccc_trng"; + trng->read = smccc_trng_read; + + platform_set_drvdata(pdev, trng); + + return devm_hwrng_register(&pdev->dev, trng); +} + +static struct platform_driver smccc_trng_driver = { + .driver = { + .name = "smccc_trng", + }, + .probe = smccc_trng_probe, +}; +module_platform_driver(smccc_trng_driver); + +MODULE_ALIAS("platform:smccc_trng"); +MODULE_AUTHOR("Andre Przywara"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From c391714c04971f5f68e3685bd7da940c9b90036d Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 3 Aug 2021 20:55:25 +0800 Subject: crypto: sun8i-ce - use kfree_sensitive to clear and free sensitive data The kfree_sensitive is a kernel API to clear sensitive information that should not be leaked to other future users of the same memory objects and free the memory. Its function is the same as the combination of memzero_explicit and kfree. Thus, we can replace the combination APIs with the single kfree_sensitive API. Signed-off-by: Jason Wang Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c | 9 +++------ drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c | 3 +-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c index cd1baee424a1..b3a9bbfb8831 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c @@ -26,8 +26,7 @@ void sun8i_ce_prng_exit(struct crypto_tfm *tfm) { struct sun8i_ce_rng_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - memzero_explicit(ctx->seed, ctx->slen); - kfree(ctx->seed); + kfree_sensitive(ctx->seed); ctx->seed = NULL; ctx->slen = 0; } @@ -38,8 +37,7 @@ int sun8i_ce_prng_seed(struct crypto_rng *tfm, const u8 *seed, struct sun8i_ce_rng_tfm_ctx *ctx = crypto_rng_ctx(tfm); if (ctx->seed && ctx->slen != slen) { - memzero_explicit(ctx->seed, ctx->slen); - kfree(ctx->seed); + kfree_sensitive(ctx->seed); ctx->slen = 0; ctx->seed = NULL; } @@ -157,9 +155,8 @@ err_dst: memcpy(dst, d, dlen); memcpy(ctx->seed, d + dlen, ctx->slen); } - memzero_explicit(d, todo); err_iv: - kfree(d); + kfree_sensitive(d); err_mem: return err; } diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c index 5b7af4498bd5..19cd2e52f89d 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c @@ -95,9 +95,8 @@ err_pm: memcpy(data, d, max); err = max; } - memzero_explicit(d, todo); err_dst: - kfree(d); + kfree_sensitive(d); return err; } -- cgit v1.2.3 From d01a9f7009c3812a8955b7ae5798470cd6ab3590 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 3 Aug 2021 16:15:55 +0200 Subject: crypto: virtio - Replace deprecated CPU-hotplug functions. The functions get_online_cpus() and put_online_cpus() have been deprecated during the CPU hotplug rework. They map directly to cpus_read_lock() and cpus_read_unlock(). Replace deprecated CPU-hotplug functions with the official version. The behavior remains unchanged. Cc: Gonglei Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Herbert Xu Cc: "David S. Miller" Cc: virtualization@lists.linux-foundation.org Cc: linux-crypto@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Herbert Xu --- drivers/crypto/virtio/virtio_crypto_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index 080955a1dd9c..e2375d992308 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -187,9 +187,9 @@ static int virtcrypto_init_vqs(struct virtio_crypto *vi) if (ret) goto err_free; - get_online_cpus(); + cpus_read_lock(); virtcrypto_set_affinity(vi); - put_online_cpus(); + cpus_read_unlock(); return 0; -- cgit v1.2.3 From 80771c8228029daff4b3402e00883cde06e07d46 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 3 Aug 2021 16:16:10 +0200 Subject: padata: Replace deprecated CPU-hotplug functions. The functions get_online_cpus() and put_online_cpus() have been deprecated during the CPU hotplug rework. They map directly to cpus_read_lock() and cpus_read_unlock(). Replace deprecated CPU-hotplug functions with the official version. The behavior remains unchanged. Cc: Steffen Klassert Cc: Daniel Jordan Cc: linux-crypto@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Acked-by: Daniel Jordan Signed-off-by: Herbert Xu --- kernel/padata.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/padata.c b/kernel/padata.c index 378c36080781..3258ab89c2a3 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -733,7 +733,7 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, struct cpumask *serial_mask, *parallel_mask; int err = -EINVAL; - get_online_cpus(); + cpus_read_lock(); mutex_lock(&pinst->lock); switch (cpumask_type) { @@ -753,7 +753,7 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, out: mutex_unlock(&pinst->lock); - put_online_cpus(); + cpus_read_unlock(); return err; } @@ -992,7 +992,7 @@ struct padata_instance *padata_alloc(const char *name) if (!pinst->parallel_wq) goto err_free_inst; - get_online_cpus(); + cpus_read_lock(); pinst->serial_wq = alloc_workqueue("%s_serial", WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE, 1, name); @@ -1026,7 +1026,7 @@ struct padata_instance *padata_alloc(const char *name) &pinst->cpu_dead_node); #endif - put_online_cpus(); + cpus_read_unlock(); return pinst; @@ -1036,7 +1036,7 @@ err_free_masks: err_free_serial_wq: destroy_workqueue(pinst->serial_wq); err_put_cpus: - put_online_cpus(); + cpus_read_unlock(); destroy_workqueue(pinst->parallel_wq); err_free_inst: kfree(pinst); @@ -1074,9 +1074,9 @@ struct padata_shell *padata_alloc_shell(struct padata_instance *pinst) ps->pinst = pinst; - get_online_cpus(); + cpus_read_lock(); pd = padata_alloc_pd(ps); - put_online_cpus(); + cpus_read_unlock(); if (!pd) goto out_free_ps; -- cgit v1.2.3 From b6f756726e4dfe75be1883f6a0202dcecdc801ab Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 5 Aug 2021 16:53:32 +0800 Subject: lib/mpi: use kcalloc in mpi_resize We should set the additional space to 0 in mpi_resize(). So use kcalloc() instead of kmalloc_array(). In lib/mpi/ec.c: /**************** * Resize the array of A to NLIMBS. the additional space is cleared * (set to 0) [done by m_realloc()] */ int mpi_resize(MPI a, unsigned nlimbs) Like the comment of kernel's mpi_resize() said, the additional space need to be set to 0, but when a->d is not NULL, it does not set. The kernel's mpi lib is from libgcrypt, the mpi resize in libgcrypt is _gcry_mpi_resize() which set the additional space to 0. This bug may cause mpi api which use mpi_resize() get wrong result under the condition of using the additional space without initiation. If this condition is not met, the bug would not be triggered. Currently in kernel, rsa, sm2 and dh use mpi lib, and they works well, so the bug is not triggered in these cases. add_points_edwards() use the additional space directly, so it will get a wrong result. Fixes: cdec9cb5167a ("crypto: GnuPG based MPI lib - source files (part 1)") Signed-off-by: Hongbo Li Signed-off-by: Herbert Xu --- lib/mpi/mpiutil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c index 9a75ca3f7edf..bc81419f400c 100644 --- a/lib/mpi/mpiutil.c +++ b/lib/mpi/mpiutil.c @@ -148,7 +148,7 @@ int mpi_resize(MPI a, unsigned nlimbs) return 0; /* no need to do it */ if (a->d) { - p = kmalloc_array(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL); + p = kcalloc(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL); if (!p) return -ENOMEM; memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t)); -- cgit v1.2.3 From ed5fa39fa8a62fc55c1c4d53b71f3f4f08a90d22 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 7 Aug 2021 14:29:09 +0800 Subject: crypto: hisilicon - enable zip device clock gating Kunpeng930 zip device supports dynamic clock gating. When executing tasks, the algorithm core is opened, and when idle, the algorithm core is closed. This patch enables zip dynamic clock gating by writing hardware registers. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/zip/zip_main.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index f8482ceebf2a..d1ca474ea8e3 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -107,6 +107,14 @@ #define HZIP_DELAY_1_US 1 #define HZIP_POLL_TIMEOUT_US 1000 +/* clock gating */ +#define HZIP_PEH_CFG_AUTO_GATE 0x3011A8 +#define HZIP_PEH_CFG_AUTO_GATE_EN BIT(0) +#define HZIP_CORE_GATED_EN GENMASK(15, 8) +#define HZIP_CORE_GATED_OOO_EN BIT(29) +#define HZIP_CLOCK_GATED_EN (HZIP_CORE_GATED_EN | \ + HZIP_CORE_GATED_OOO_EN) + static const char hisi_zip_name[] = "hisi_zip"; static struct dentry *hzip_debugfs_root; @@ -312,6 +320,22 @@ static void hisi_zip_close_sva_prefetch(struct hisi_qm *qm) pci_err(qm->pdev, "failed to close sva prefetch\n"); } +static void hisi_zip_enable_clock_gate(struct hisi_qm *qm) +{ + u32 val; + + if (qm->ver < QM_HW_V3) + return; + + val = readl(qm->io_base + HZIP_CLOCK_GATE_CTRL); + val |= HZIP_CLOCK_GATED_EN; + writel(val, qm->io_base + HZIP_CLOCK_GATE_CTRL); + + val = readl(qm->io_base + HZIP_PEH_CFG_AUTO_GATE); + val |= HZIP_PEH_CFG_AUTO_GATE_EN; + writel(val, qm->io_base + HZIP_PEH_CFG_AUTO_GATE); +} + static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm) { void __iomem *base = qm->io_base; @@ -359,6 +383,8 @@ static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm) CQC_CACHE_WB_ENABLE | FIELD_PREP(SQC_CACHE_WB_THRD, 1) | FIELD_PREP(CQC_CACHE_WB_THRD, 1), base + QM_CACHE_CTL); + hisi_zip_enable_clock_gate(qm); + return 0; } -- cgit v1.2.3 From 3d845d497b23547150fe7f9b3261ead9f4295686 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 7 Aug 2021 14:29:10 +0800 Subject: crypto: hisilicon - enable sec device clock gating Kunpeng930 sec device supports dynamic clock gating. When doing tasks, the algorithm core is opened, and when idle, the algorithm core is closed. This patch enables sec dynamic clock gating by writing hardware registers. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_main.c | 46 ++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 490db7bccf61..db4dbcf0492a 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -57,10 +57,16 @@ #define SEC_MEM_START_INIT_REG 0x301100 #define SEC_MEM_INIT_DONE_REG 0x301104 +/* clock gating */ #define SEC_CONTROL_REG 0x301200 -#define SEC_TRNG_EN_SHIFT 8 +#define SEC_DYNAMIC_GATE_REG 0x30121c +#define SEC_CORE_AUTO_GATE 0x30212c +#define SEC_DYNAMIC_GATE_EN 0x7bff +#define SEC_CORE_AUTO_GATE_EN GENMASK(3, 0) #define SEC_CLK_GATE_ENABLE BIT(3) #define SEC_CLK_GATE_DISABLE (~BIT(3)) + +#define SEC_TRNG_EN_SHIFT 8 #define SEC_AXI_SHUTDOWN_ENABLE BIT(12) #define SEC_AXI_SHUTDOWN_DISABLE 0xFFFFEFFF @@ -378,15 +384,43 @@ static void sec_close_sva_prefetch(struct hisi_qm *qm) pci_err(qm->pdev, "failed to close sva prefetch\n"); } +static void sec_enable_clock_gate(struct hisi_qm *qm) +{ + u32 val; + + if (qm->ver < QM_HW_V3) + return; + + val = readl_relaxed(qm->io_base + SEC_CONTROL_REG); + val |= SEC_CLK_GATE_ENABLE; + writel_relaxed(val, qm->io_base + SEC_CONTROL_REG); + + val = readl(qm->io_base + SEC_DYNAMIC_GATE_REG); + val |= SEC_DYNAMIC_GATE_EN; + writel(val, qm->io_base + SEC_DYNAMIC_GATE_REG); + + val = readl(qm->io_base + SEC_CORE_AUTO_GATE); + val |= SEC_CORE_AUTO_GATE_EN; + writel(val, qm->io_base + SEC_CORE_AUTO_GATE); +} + +static void sec_disable_clock_gate(struct hisi_qm *qm) +{ + u32 val; + + /* Kunpeng920 needs to close clock gating */ + val = readl_relaxed(qm->io_base + SEC_CONTROL_REG); + val &= SEC_CLK_GATE_DISABLE; + writel_relaxed(val, qm->io_base + SEC_CONTROL_REG); +} + static int sec_engine_init(struct hisi_qm *qm) { int ret; u32 reg; - /* disable clock gate control */ - reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG); - reg &= SEC_CLK_GATE_DISABLE; - writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG); + /* disable clock gate control before mem init */ + sec_disable_clock_gate(qm); writel_relaxed(0x1, qm->io_base + SEC_MEM_START_INIT_REG); @@ -433,6 +467,8 @@ static int sec_engine_init(struct hisi_qm *qm) reg |= sec_get_endian(qm); writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG); + sec_enable_clock_gate(qm); + return 0; } -- cgit v1.2.3 From ea5202dff79ce23e1a9fee3e1b2f09e28b77ba3a Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 7 Aug 2021 14:29:11 +0800 Subject: crypto: hisilicon - enable hpre device clock gating Kunpeng930 hpre device supports dynamic clock gating. When doing tasks, the algorithm core is opened, and when idle, the algorithm core is closed. This patch enables hpre dynamic clock gating by writing hardware registers. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 63 +++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 8b0640fb04be..6a5de3073de9 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -81,6 +81,16 @@ #define HPRE_PREFETCH_DISABLE BIT(30) #define HPRE_SVA_DISABLE_READY (BIT(4) | BIT(8)) +/* clock gate */ +#define HPRE_CLKGATE_CTL 0x301a10 +#define HPRE_PEH_CFG_AUTO_GATE 0x301a2c +#define HPRE_CLUSTER_DYN_CTL 0x302010 +#define HPRE_CORE_SHB_CFG 0x302088 +#define HPRE_CLKGATE_CTL_EN BIT(0) +#define HPRE_PEH_CFG_AUTO_GATE_EN BIT(0) +#define HPRE_CLUSTER_DYN_CTL_EN BIT(0) +#define HPRE_CORE_GATE_EN (BIT(30) | BIT(31)) + #define HPRE_AM_OOO_SHUTDOWN_ENB 0x301044 #define HPRE_AM_OOO_SHUTDOWN_ENABLE BIT(0) #define HPRE_WR_MSI_PORT BIT(2) @@ -417,12 +427,63 @@ static void hpre_close_sva_prefetch(struct hisi_qm *qm) pci_err(qm->pdev, "failed to close sva prefetch\n"); } +static void hpre_enable_clock_gate(struct hisi_qm *qm) +{ + u32 val; + + if (qm->ver < QM_HW_V3) + return; + + val = readl(qm->io_base + HPRE_CLKGATE_CTL); + val |= HPRE_CLKGATE_CTL_EN; + writel(val, qm->io_base + HPRE_CLKGATE_CTL); + + val = readl(qm->io_base + HPRE_PEH_CFG_AUTO_GATE); + val |= HPRE_PEH_CFG_AUTO_GATE_EN; + writel(val, qm->io_base + HPRE_PEH_CFG_AUTO_GATE); + + val = readl(qm->io_base + HPRE_CLUSTER_DYN_CTL); + val |= HPRE_CLUSTER_DYN_CTL_EN; + writel(val, qm->io_base + HPRE_CLUSTER_DYN_CTL); + + val = readl_relaxed(qm->io_base + HPRE_CORE_SHB_CFG); + val |= HPRE_CORE_GATE_EN; + writel(val, qm->io_base + HPRE_CORE_SHB_CFG); +} + +static void hpre_disable_clock_gate(struct hisi_qm *qm) +{ + u32 val; + + if (qm->ver < QM_HW_V3) + return; + + val = readl(qm->io_base + HPRE_CLKGATE_CTL); + val &= ~HPRE_CLKGATE_CTL_EN; + writel(val, qm->io_base + HPRE_CLKGATE_CTL); + + val = readl(qm->io_base + HPRE_PEH_CFG_AUTO_GATE); + val &= ~HPRE_PEH_CFG_AUTO_GATE_EN; + writel(val, qm->io_base + HPRE_PEH_CFG_AUTO_GATE); + + val = readl(qm->io_base + HPRE_CLUSTER_DYN_CTL); + val &= ~HPRE_CLUSTER_DYN_CTL_EN; + writel(val, qm->io_base + HPRE_CLUSTER_DYN_CTL); + + val = readl_relaxed(qm->io_base + HPRE_CORE_SHB_CFG); + val &= ~HPRE_CORE_GATE_EN; + writel(val, qm->io_base + HPRE_CORE_SHB_CFG); +} + static int hpre_set_user_domain_and_cache(struct hisi_qm *qm) { struct device *dev = &qm->pdev->dev; u32 val; int ret; + /* disabel dynamic clock gate before sram init */ + hpre_disable_clock_gate(qm); + writel(HPRE_QM_USR_CFG_MASK, qm->io_base + QM_ARUSER_M_CFG_ENABLE); writel(HPRE_QM_USR_CFG_MASK, qm->io_base + QM_AWUSER_M_CFG_ENABLE); writel_relaxed(HPRE_QM_AXI_CFG_MASK, qm->io_base + QM_AXI_M_CFG); @@ -473,6 +534,8 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm) /* Config data buffer pasid needed by Kunpeng 920 */ hpre_config_pasid(qm); + hpre_enable_clock_gate(qm); + return ret; } -- cgit v1.2.3 From 9491923e4a68d696f7d0817a02829ed238783716 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 8 Aug 2021 13:52:33 -0700 Subject: crypto: wp512 - correct a non-kernel-doc comment Don't use "/**" to begin a comment that is not kernel-doc notation. crypto/wp512.c:779: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * The core Whirlpool transform. Signed-off-by: Randy Dunlap Cc: Herbert Xu Cc: "David S. Miller" Cc: linux-crypto@vger.kernel.org Signed-off-by: Herbert Xu --- crypto/wp512.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/wp512.c b/crypto/wp512.c index bf79fbb2340f..5e820afa3c78 100644 --- a/crypto/wp512.c +++ b/crypto/wp512.c @@ -775,7 +775,7 @@ static const u64 rc[WHIRLPOOL_ROUNDS] = { 0xca2dbf07ad5a8333ULL, }; -/** +/* * The core Whirlpool transform. */ -- cgit v1.2.3 From ffe3ee8bb68aa6c49832c6c101ab0bb1cb635624 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 11 Aug 2021 02:05:55 +0200 Subject: crypto: omap - Avoid redundant copy when using truncated sg list omap_crypto_cleanup() currently copies data from sg to orig if either copy flag is set. However OMAP_CRYPTO_SG_COPIED means that sg refers to the same pages as orig, truncated to len bytes. There is no need to copy in this case. Only copy data if the OMAP_CRYPTO_DATA_COPIED flag is set. Fixes: 74ed87e7e7f7 ("crypto: omap - add base support library for common ...") Signed-off-by: Ben Hutchings Signed-off-by: Herbert Xu --- drivers/crypto/omap-crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/omap-crypto.c b/drivers/crypto/omap-crypto.c index 31bdb1d76d11..a4cc6bf146ec 100644 --- a/drivers/crypto/omap-crypto.c +++ b/drivers/crypto/omap-crypto.c @@ -210,7 +210,7 @@ void omap_crypto_cleanup(struct scatterlist *sg, struct scatterlist *orig, buf = sg_virt(sg); pages = get_order(len); - if (orig && (flags & OMAP_CRYPTO_COPY_MASK)) + if (orig && (flags & OMAP_CRYPTO_DATA_COPIED)) omap_crypto_copy_data(sg, orig, offset, len); if (flags & OMAP_CRYPTO_DATA_COPIED) -- cgit v1.2.3 From fe4d55773b879c785ae61da9b1c2160f0110f67e Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 11 Aug 2021 02:06:09 +0200 Subject: crypto: omap - Fix inconsistent locking of device lists lockdep complains that in omap-aes, the list_lock is taken both with softirqs enabled at probe time, and also in softirq context, which could lead to a deadlock: ================================ WARNING: inconsistent lock state 5.14.0-rc1-00035-gc836005b01c5-dirty #69 Not tainted -------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. ksoftirqd/0/7 [HC0[0]:SC1[3]:HE1:SE0] takes: bf00e014 (list_lock){+.?.}-{2:2}, at: omap_aes_find_dev+0x18/0x54 [omap_aes_driver] {SOFTIRQ-ON-W} state was registered at: _raw_spin_lock+0x40/0x50 omap_aes_probe+0x1d4/0x664 [omap_aes_driver] platform_probe+0x58/0xb8 really_probe+0xbc/0x314 __driver_probe_device+0x80/0xe4 driver_probe_device+0x30/0xc8 __driver_attach+0x70/0xf4 bus_for_each_dev+0x70/0xb4 bus_add_driver+0xf0/0x1d4 driver_register+0x74/0x108 do_one_initcall+0x84/0x2e4 do_init_module+0x5c/0x240 load_module+0x221c/0x2584 sys_finit_module+0xb0/0xec ret_fast_syscall+0x0/0x2c 0xbed90b30 irq event stamp: 111800 hardirqs last enabled at (111800): [] __kmalloc+0x484/0x5ec hardirqs last disabled at (111799): [] __kmalloc+0x490/0x5ec softirqs last enabled at (111776): [] __do_softirq+0x2b8/0x4d0 softirqs last disabled at (111781): [] run_ksoftirqd+0x34/0x50 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(list_lock); lock(list_lock); *** DEADLOCK *** 2 locks held by ksoftirqd/0/7: #0: c0f5e8c8 (rcu_read_lock){....}-{1:2}, at: netif_receive_skb+0x6c/0x260 #1: c0f5e8c8 (rcu_read_lock){....}-{1:2}, at: ip_local_deliver_finish+0x2c/0xdc stack backtrace: CPU: 0 PID: 7 Comm: ksoftirqd/0 Not tainted 5.14.0-rc1-00035-gc836005b01c5-dirty #69 Hardware name: Generic AM43 (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (mark_lock.part.17+0x5bc/0xd04) [] (mark_lock.part.17) from [] (__lock_acquire+0x960/0x2fa4) [] (__lock_acquire) from [] (lock_acquire+0x10c/0x358) [] (lock_acquire) from [] (_raw_spin_lock_bh+0x44/0x58) [] (_raw_spin_lock_bh) from [] (omap_aes_find_dev+0x18/0x54 [omap_aes_driver]) [] (omap_aes_find_dev [omap_aes_driver]) from [] (omap_aes_crypt+0x94/0xd4 [omap_aes_driver]) [] (omap_aes_crypt [omap_aes_driver]) from [] (esp_input+0x1b0/0x2c8) [] (esp_input) from [] (xfrm_input+0x410/0x1290) [] (xfrm_input) from [] (xfrm4_esp_rcv+0x54/0x11c) [] (xfrm4_esp_rcv) from [] (ip_protocol_deliver_rcu+0x48/0x3bc) [] (ip_protocol_deliver_rcu) from [] (ip_local_deliver_finish+0x9c/0xdc) [] (ip_local_deliver_finish) from [] (ip_local_deliver+0x148/0x1b0) [] (ip_local_deliver) from [] (ip_rcv+0x11c/0x180) [] (ip_rcv) from [] (__netif_receive_skb_one_core+0x54/0x74) [] (__netif_receive_skb_one_core) from [] (netif_receive_skb+0xa8/0x260) [] (netif_receive_skb) from [] (cpsw_rx_handler+0x224/0x2fc) [] (cpsw_rx_handler) from [] (__cpdma_chan_process+0xf4/0x188) [] (__cpdma_chan_process) from [] (cpdma_chan_process+0x3c/0x5c) [] (cpdma_chan_process) from [] (cpsw_rx_mq_poll+0x44/0x98) [] (cpsw_rx_mq_poll) from [] (__napi_poll+0x28/0x268) [] (__napi_poll) from [] (net_rx_action+0xcc/0x204) [] (net_rx_action) from [] (__do_softirq+0x140/0x4d0) [] (__do_softirq) from [] (run_ksoftirqd+0x34/0x50) [] (run_ksoftirqd) from [] (smpboot_thread_fn+0xf4/0x1d8) [] (smpboot_thread_fn) from [] (kthread+0x14c/0x174) [] (kthread) from [] (ret_from_fork+0x14/0x38) ... The omap-des and omap-sham drivers appear to have a similar issue. Fix this by using spin_{,un}lock_bh() around device list access in all the probe and remove functions. Signed-off-by: Ben Hutchings Signed-off-by: Herbert Xu --- drivers/crypto/omap-aes.c | 8 ++++---- drivers/crypto/omap-des.c | 8 ++++---- drivers/crypto/omap-sham.c | 12 ++++++------ 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 0dd4c6b157de..9b968ac4ee7b 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -1175,9 +1175,9 @@ static int omap_aes_probe(struct platform_device *pdev) spin_lock_init(&dd->lock); INIT_LIST_HEAD(&dd->list); - spin_lock(&list_lock); + spin_lock_bh(&list_lock); list_add_tail(&dd->list, &dev_list); - spin_unlock(&list_lock); + spin_unlock_bh(&list_lock); /* Initialize crypto engine */ dd->engine = crypto_engine_alloc_init(dev, 1); @@ -1264,9 +1264,9 @@ static int omap_aes_remove(struct platform_device *pdev) if (!dd) return -ENODEV; - spin_lock(&list_lock); + spin_lock_bh(&list_lock); list_del(&dd->list); - spin_unlock(&list_lock); + spin_unlock_bh(&list_lock); for (i = dd->pdata->algs_info_size - 1; i >= 0; i--) for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) { diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index bc8631363d72..be77656864e3 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -1033,9 +1033,9 @@ static int omap_des_probe(struct platform_device *pdev) INIT_LIST_HEAD(&dd->list); - spin_lock(&list_lock); + spin_lock_bh(&list_lock); list_add_tail(&dd->list, &dev_list); - spin_unlock(&list_lock); + spin_unlock_bh(&list_lock); /* Initialize des crypto engine */ dd->engine = crypto_engine_alloc_init(dev, 1); @@ -1094,9 +1094,9 @@ static int omap_des_remove(struct platform_device *pdev) if (!dd) return -ENODEV; - spin_lock(&list_lock); + spin_lock_bh(&list_lock); list_del(&dd->list); - spin_unlock(&list_lock); + spin_unlock_bh(&list_lock); for (i = dd->pdata->algs_info_size - 1; i >= 0; i--) for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 735abd8f68d6..f6bf53c00b61 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -2127,9 +2127,9 @@ static int omap_sham_probe(struct platform_device *pdev) (rev & dd->pdata->major_mask) >> dd->pdata->major_shift, (rev & dd->pdata->minor_mask) >> dd->pdata->minor_shift); - spin_lock(&sham.lock); + spin_lock_bh(&sham.lock); list_add_tail(&dd->list, &sham.dev_list); - spin_unlock(&sham.lock); + spin_unlock_bh(&sham.lock); dd->engine = crypto_engine_alloc_init(dev, 1); if (!dd->engine) { @@ -2177,9 +2177,9 @@ err_algs: err_engine_start: crypto_engine_exit(dd->engine); err_engine: - spin_lock(&sham.lock); + spin_lock_bh(&sham.lock); list_del(&dd->list); - spin_unlock(&sham.lock); + spin_unlock_bh(&sham.lock); err_pm: pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); @@ -2199,9 +2199,9 @@ static int omap_sham_remove(struct platform_device *pdev) dd = platform_get_drvdata(pdev); if (!dd) return -ENODEV; - spin_lock(&sham.lock); + spin_lock_bh(&sham.lock); list_del(&dd->list); - spin_unlock(&sham.lock); + spin_unlock_bh(&sham.lock); for (i = dd->pdata->algs_info_size - 1; i >= 0; i--) for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) { crypto_unregister_ahash( -- cgit v1.2.3 From 6e422ccea4a67929e277f619f75995115511e206 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 12 Aug 2021 09:18:13 +0100 Subject: crypto: qat - simplify code and axe the use of a deprecated API The wrappers in include/linux/pci-dma-compat.h should go away. Replace 'pci_set_dma_mask/pci_set_consistent_dma_mask' by an equivalent and less verbose 'dma_set_mask_and_coherent()' call. Signed-off-by: Christophe JAILLET Reviewed-by: Andy Shevchenko Co-developed-by: Giovanni Cabiddu Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_4xxx/adf_drv.c | 14 ++++---------- drivers/crypto/qat/qat_c3xxx/adf_drv.c | 15 ++++----------- drivers/crypto/qat/qat_c3xxxvf/adf_drv.c | 15 ++++----------- drivers/crypto/qat/qat_c62x/adf_drv.c | 15 ++++----------- drivers/crypto/qat/qat_c62xvf/adf_drv.c | 15 ++++----------- drivers/crypto/qat/qat_dh895xcc/adf_drv.c | 15 ++++----------- drivers/crypto/qat/qat_dh895xccvf/adf_drv.c | 15 ++++----------- 7 files changed, 28 insertions(+), 76 deletions(-) diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c index a8805c815d16..359fb7989dfb 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c @@ -221,16 +221,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Set DMA identifier */ - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) { - dev_err(&pdev->dev, "No usable DMA configuration.\n"); - ret = -EFAULT; - goto out_err; - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) { + dev_err(&pdev->dev, "No usable DMA configuration.\n"); + goto out_err; } /* Get accelerator capabilities mask */ diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c index 7fb3343ae8b0..b561851cf955 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c @@ -159,17 +159,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* set dma identifier */ - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) { - dev_err(&pdev->dev, "No usable DMA configuration\n"); - ret = -EFAULT; - goto out_err_disable; - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } - - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) { + dev_err(&pdev->dev, "No usable DMA configuration\n"); + goto out_err_disable; } if (pci_request_regions(pdev, ADF_C3XXX_DEVICE_NAME)) { diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c index 067ca5e17d38..5095ee30c6a0 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c @@ -141,17 +141,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* set dma identifier */ - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) { - dev_err(&pdev->dev, "No usable DMA configuration\n"); - ret = -EFAULT; - goto out_err_disable; - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } - - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) { + dev_err(&pdev->dev, "No usable DMA configuration\n"); + goto out_err_disable; } if (pci_request_regions(pdev, ADF_C3XXXVF_DEVICE_NAME)) { diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c index 1f5de442e1e6..f3ac4cda75e9 100644 --- a/drivers/crypto/qat/qat_c62x/adf_drv.c +++ b/drivers/crypto/qat/qat_c62x/adf_drv.c @@ -159,17 +159,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* set dma identifier */ - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) { - dev_err(&pdev->dev, "No usable DMA configuration\n"); - ret = -EFAULT; - goto out_err_disable; - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } - - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) { + dev_err(&pdev->dev, "No usable DMA configuration\n"); + goto out_err_disable; } if (pci_request_regions(pdev, ADF_C62X_DEVICE_NAME)) { diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c index 51ea88c0b17d..7bd23438bcee 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c @@ -141,17 +141,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* set dma identifier */ - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) { - dev_err(&pdev->dev, "No usable DMA configuration\n"); - ret = -EFAULT; - goto out_err_disable; - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } - - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) { + dev_err(&pdev->dev, "No usable DMA configuration\n"); + goto out_err_disable; } if (pci_request_regions(pdev, ADF_C62XVF_DEVICE_NAME)) { diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c index a9ec4357144c..cc300a2662d5 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c @@ -159,17 +159,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* set dma identifier */ - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) { - dev_err(&pdev->dev, "No usable DMA configuration\n"); - ret = -EFAULT; - goto out_err_disable; - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } - - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) { + dev_err(&pdev->dev, "No usable DMA configuration\n"); + goto out_err_disable; } if (pci_request_regions(pdev, ADF_DH895XCC_DEVICE_NAME)) { diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c index 29999da716cc..da9c7434628c 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c @@ -141,17 +141,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* set dma identifier */ - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) { - dev_err(&pdev->dev, "No usable DMA configuration\n"); - ret = -EFAULT; - goto out_err_disable; - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } - - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) { + dev_err(&pdev->dev, "No usable DMA configuration\n"); + goto out_err_disable; } if (pci_request_regions(pdev, ADF_DH895XCCVF_DEVICE_NAME)) { -- cgit v1.2.3 From ae1f5043e2595bf29d348f2b4633fca5e930d3e3 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 12 Aug 2021 09:18:14 +0100 Subject: crypto: qat - set DMA mask to 48 bits for Gen2 Change the DMA mask from 64 to 48 for Gen2 devices as they cannot handle addresses greater than 48 bits. Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_c3xxx/adf_drv.c | 2 +- drivers/crypto/qat/qat_c3xxxvf/adf_drv.c | 2 +- drivers/crypto/qat/qat_c62x/adf_drv.c | 2 +- drivers/crypto/qat/qat_c62xvf/adf_drv.c | 2 +- drivers/crypto/qat/qat_dh895xcc/adf_drv.c | 2 +- drivers/crypto/qat/qat_dh895xccvf/adf_drv.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c index b561851cf955..2df26643dbc9 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c @@ -159,7 +159,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* set dma identifier */ - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); if (ret) { dev_err(&pdev->dev, "No usable DMA configuration\n"); goto out_err_disable; diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c index 5095ee30c6a0..7ef5a5185d29 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c @@ -141,7 +141,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* set dma identifier */ - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); if (ret) { dev_err(&pdev->dev, "No usable DMA configuration\n"); goto out_err_disable; diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c index f3ac4cda75e9..efdba841d720 100644 --- a/drivers/crypto/qat/qat_c62x/adf_drv.c +++ b/drivers/crypto/qat/qat_c62x/adf_drv.c @@ -159,7 +159,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* set dma identifier */ - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); if (ret) { dev_err(&pdev->dev, "No usable DMA configuration\n"); goto out_err_disable; diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c index 7bd23438bcee..c91beedd267c 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c @@ -141,7 +141,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* set dma identifier */ - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); if (ret) { dev_err(&pdev->dev, "No usable DMA configuration\n"); goto out_err_disable; diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c index cc300a2662d5..e1c167507157 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c @@ -159,7 +159,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* set dma identifier */ - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); if (ret) { dev_err(&pdev->dev, "No usable DMA configuration\n"); goto out_err_disable; diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c index da9c7434628c..d332b68795f2 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c @@ -141,7 +141,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* set dma identifier */ - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); if (ret) { dev_err(&pdev->dev, "No usable DMA configuration\n"); goto out_err_disable; -- cgit v1.2.3 From 3660f25186aff60fb7d2f5aba784dddf400aec99 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 12 Aug 2021 09:18:15 +0100 Subject: crypto: qat - disable AER if an error occurs in probe functions If an error occurs after a 'adf_enable_aer()' call, it must be undone by a corresponding 'adf_disable_aer()' call, as already done in the remove function. Signed-off-by: Christophe JAILLET Reviewed-by: Andy Shevchenko Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_c3xxx/adf_drv.c | 6 ++++-- drivers/crypto/qat/qat_c62x/adf_drv.c | 6 ++++-- drivers/crypto/qat/qat_dh895xcc/adf_drv.c | 6 ++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c index 2df26643dbc9..cc6e75dc60de 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c @@ -201,12 +201,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (pci_save_state(pdev)) { dev_err(&pdev->dev, "Failed to save pci state\n"); ret = -ENOMEM; - goto out_err_free_reg; + goto out_err_disable_aer; } ret = qat_crypto_dev_config(accel_dev); if (ret) - goto out_err_free_reg; + goto out_err_disable_aer; ret = adf_dev_init(accel_dev); if (ret) @@ -222,6 +222,8 @@ out_err_dev_stop: adf_dev_stop(accel_dev); out_err_dev_shutdown: adf_dev_shutdown(accel_dev); +out_err_disable_aer: + adf_disable_aer(accel_dev); out_err_free_reg: pci_release_regions(accel_pci_dev->pci_dev); out_err_disable: diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c index efdba841d720..bf251dfe74b3 100644 --- a/drivers/crypto/qat/qat_c62x/adf_drv.c +++ b/drivers/crypto/qat/qat_c62x/adf_drv.c @@ -201,12 +201,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (pci_save_state(pdev)) { dev_err(&pdev->dev, "Failed to save pci state\n"); ret = -ENOMEM; - goto out_err_free_reg; + goto out_err_disable_aer; } ret = qat_crypto_dev_config(accel_dev); if (ret) - goto out_err_free_reg; + goto out_err_disable_aer; ret = adf_dev_init(accel_dev); if (ret) @@ -222,6 +222,8 @@ out_err_dev_stop: adf_dev_stop(accel_dev); out_err_dev_shutdown: adf_dev_shutdown(accel_dev); +out_err_disable_aer: + adf_disable_aer(accel_dev); out_err_free_reg: pci_release_regions(accel_pci_dev->pci_dev); out_err_disable: diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c index e1c167507157..3976a81bd99b 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c @@ -201,12 +201,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (pci_save_state(pdev)) { dev_err(&pdev->dev, "Failed to save pci state\n"); ret = -ENOMEM; - goto out_err_free_reg; + goto out_err_disable_aer; } ret = qat_crypto_dev_config(accel_dev); if (ret) - goto out_err_free_reg; + goto out_err_disable_aer; ret = adf_dev_init(accel_dev); if (ret) @@ -222,6 +222,8 @@ out_err_dev_stop: adf_dev_stop(accel_dev); out_err_dev_shutdown: adf_dev_shutdown(accel_dev); +out_err_disable_aer: + adf_disable_aer(accel_dev); out_err_free_reg: pci_release_regions(accel_pci_dev->pci_dev); out_err_disable: -- cgit v1.2.3 From c02b51b3edb0c5c110301884a638a3360236440f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 12 Aug 2021 09:18:16 +0100 Subject: crypto: qat - fix a typo in a comment s/Enable/Disable/ when describing 'adf_disable_aer()' Signed-off-by: Christophe JAILLET Reviewed-by: Andy Shevchenko Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_aer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c index d2ae293d0df6..ed3e40bc56eb 100644 --- a/drivers/crypto/qat/qat_common/adf_aer.c +++ b/drivers/crypto/qat/qat_common/adf_aer.c @@ -194,7 +194,7 @@ int adf_enable_aer(struct adf_accel_dev *accel_dev) EXPORT_SYMBOL_GPL(adf_enable_aer); /** - * adf_disable_aer() - Enable Advance Error Reporting for acceleration device + * adf_disable_aer() - Disable Advance Error Reporting for acceleration device * @accel_dev: Pointer to acceleration device. * * Function disables PCI Advance Error Reporting for the -- cgit v1.2.3 From 462354d986b6a89c6449b85f17aaacf44e455216 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 12 Aug 2021 21:21:10 +0100 Subject: crypto: qat - use proper type for vf_mask Replace vf_mask type with unsigned long to avoid a stack-out-of-bound. This is to fix the following warning reported by KASAN the first time adf_msix_isr_ae() gets called. [ 692.091987] BUG: KASAN: stack-out-of-bounds in find_first_bit+0x28/0x50 [ 692.092017] Read of size 8 at addr ffff88afdf789e60 by task swapper/32/0 [ 692.092076] Call Trace: [ 692.092089] [ 692.092101] dump_stack+0x9c/0xcf [ 692.092132] print_address_description.constprop.0+0x18/0x130 [ 692.092164] ? find_first_bit+0x28/0x50 [ 692.092185] kasan_report.cold+0x7f/0x111 [ 692.092213] ? static_obj+0x10/0x80 [ 692.092234] ? find_first_bit+0x28/0x50 [ 692.092262] find_first_bit+0x28/0x50 [ 692.092288] adf_msix_isr_ae+0x16e/0x230 [intel_qat] Fixes: ed8ccaef52fa ("crypto: qat - Add support for SRIOV") Signed-off-by: Giovanni Cabiddu Reviewed-by: Marco Chiappero Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_isr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c index e3ad5587be49..daab02011717 100644 --- a/drivers/crypto/qat/qat_common/adf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_isr.c @@ -15,6 +15,8 @@ #include "adf_transport_access_macros.h" #include "adf_transport_internal.h" +#define ADF_MAX_NUM_VFS 32 + static int adf_enable_msix(struct adf_accel_dev *accel_dev) { struct adf_accel_pci *pci_dev_info = &accel_dev->accel_pci_dev; @@ -72,7 +74,7 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr) struct adf_bar *pmisc = &GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)]; void __iomem *pmisc_bar_addr = pmisc->virt_addr; - u32 vf_mask; + unsigned long vf_mask; /* Get the interrupt sources triggered by VFs */ vf_mask = ((ADF_CSR_RD(pmisc_bar_addr, ADF_ERRSOU5) & @@ -93,8 +95,7 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr) * unless the VF is malicious and is attempting to * flood the host OS with VF2PF interrupts. */ - for_each_set_bit(i, (const unsigned long *)&vf_mask, - (sizeof(vf_mask) * BITS_PER_BYTE)) { + for_each_set_bit(i, &vf_mask, ADF_MAX_NUM_VFS) { vf_info = accel_dev->pf.vf_info + i; if (!__ratelimit(&vf_info->vf2pf_ratelimit)) { -- cgit v1.2.3 From 462584ca17b4ca85721475cf2744d3229d4006cd Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 12 Aug 2021 21:21:11 +0100 Subject: crypto: qat - remove empty sriov_configure() Remove the empty implementation of sriov_configure() and set the sriov_configure member of the pci_driver structure to NULL. This way, if a user tries to enable VFs on a device, when kernel and driver are built with CONFIG_PCI_IOV=n, the kernel reports an error message saying that the driver does not support SRIOV configuration via sysfs. Signed-off-by: Marco Chiappero Co-developed-by: Giovanni Cabiddu Signed-off-by: Giovanni Cabiddu Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_common_drv.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index c61476553728..c7deca7f0607 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -205,10 +205,7 @@ void adf_exit_pf_wq(void); int adf_init_vf_wq(void); void adf_exit_vf_wq(void); #else -static inline int adf_sriov_configure(struct pci_dev *pdev, int numvfs) -{ - return 0; -} +#define adf_sriov_configure NULL static inline void adf_disable_sriov(struct adf_accel_dev *accel_dev) { -- cgit v1.2.3 From a48afd6c7a4ee908f0e3c5691bd1a8e74f8e5d16 Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 12 Aug 2021 21:21:12 +0100 Subject: crypto: qat - enable interrupts only after ISR allocation Enable device interrupts after the setup of the interrupt handlers. Signed-off-by: Marco Chiappero Co-developed-by: Giovanni Cabiddu Signed-off-by: Giovanni Cabiddu Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c index 744c40351428..14e9f3b22c60 100644 --- a/drivers/crypto/qat/qat_common/adf_init.c +++ b/drivers/crypto/qat/qat_common/adf_init.c @@ -88,8 +88,6 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) return -EFAULT; } - hw_data->enable_ints(accel_dev); - if (adf_ae_init(accel_dev)) { dev_err(&GET_DEV(accel_dev), "Failed to initialise Acceleration Engine\n"); @@ -110,6 +108,8 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) } set_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status); + hw_data->enable_ints(accel_dev); + /* * Subservice initialisation is divided into two stages: init and start. * This is to facilitate any ordering dependencies between services -- cgit v1.2.3 From 5147f0906d50a9d26f2b8698cd06b5680e9867ff Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 12 Aug 2021 21:21:13 +0100 Subject: crypto: qat - do not ignore errors from enable_vf2pf_comms() The function adf_dev_init() ignores the error code reported by enable_vf2pf_comms(). If the latter fails, e.g. the VF is not compatible with the pf, then the load of the VF driver progresses. This patch changes adf_dev_init() so that the error code from enable_vf2pf_comms() is returned to the caller. Signed-off-by: Giovanni Cabiddu Reviewed-by: Marco Chiappero Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_init.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c index 14e9f3b22c60..346dcb8bcca5 100644 --- a/drivers/crypto/qat/qat_common/adf_init.c +++ b/drivers/crypto/qat/qat_common/adf_init.c @@ -61,6 +61,7 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) struct service_hndl *service; struct list_head *list_itr; struct adf_hw_device_data *hw_data = accel_dev->hw_device; + int ret; if (!hw_data) { dev_err(&GET_DEV(accel_dev), @@ -127,9 +128,9 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) } hw_data->enable_error_correction(accel_dev); - hw_data->enable_vf2pf_comms(accel_dev); + ret = hw_data->enable_vf2pf_comms(accel_dev); - return 0; + return ret; } EXPORT_SYMBOL_GPL(adf_dev_init); -- cgit v1.2.3 From 0a73c762e1eee33a5e5dc0e3488f1b7cd17249b3 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 12 Aug 2021 21:21:14 +0100 Subject: crypto: qat - handle both source of interrupt in VF ISR The top half of the VF drivers handled only a source at the time. If an interrupt for PF2VF and bundle occurred at the same time, the ISR scheduled only the bottom half for PF2VF. This patch fixes the VF top half so that if both sources of interrupt trigger at the same time, both bottom halves are scheduled. This patch is based on earlier work done by Conor McLoughlin. Signed-off-by: Giovanni Cabiddu Reviewed-by: Marco Chiappero Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_vf_isr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c index 888388acb6bd..3e4f64d248f9 100644 --- a/drivers/crypto/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c @@ -160,6 +160,7 @@ static irqreturn_t adf_isr(int irq, void *privdata) struct adf_bar *pmisc = &GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)]; void __iomem *pmisc_bar_addr = pmisc->virt_addr; + bool handled = false; u32 v_int; /* Read VF INT source CSR to determine the source of VF interrupt */ @@ -172,7 +173,7 @@ static irqreturn_t adf_isr(int irq, void *privdata) /* Schedule tasklet to handle interrupt BH */ tasklet_hi_schedule(&accel_dev->vf.pf2vf_bh_tasklet); - return IRQ_HANDLED; + handled = true; } /* Check bundle interrupt */ @@ -184,10 +185,10 @@ static irqreturn_t adf_isr(int irq, void *privdata) csr_ops->write_csr_int_flag_and_col(bank->csr_addr, bank->bank_number, 0); tasklet_hi_schedule(&bank->resp_handler); - return IRQ_HANDLED; + handled = true; } - return IRQ_NONE; + return handled ? IRQ_HANDLED : IRQ_NONE; } static int adf_request_msi_irq(struct adf_accel_dev *accel_dev) -- cgit v1.2.3 From 7eadcfd633d8ef0082b194693c5057c9652fe243 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 12 Aug 2021 21:21:15 +0100 Subject: crypto: qat - prevent spurious MSI interrupt in VF QAT GEN2 devices suffer from a defect where the MSI interrupt can be sent multiple times. If the second (spurious) interrupt is handled before the bottom half handler runs, then the extra interrupt is effectively ignored because the bottom half is only scheduled once. However, if the top half runs again after the bottom half runs, this will appear as a spurious PF to VF interrupt. This can be avoided by checking the interrupt mask register in addition to the interrupt source register in the interrupt handler. This patch is based on earlier work done by Conor McLoughlin. Signed-off-by: Giovanni Cabiddu Co-developed-by: Marco Chiappero Signed-off-by: Marco Chiappero Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_vf_isr.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c index 3e4f64d248f9..4359ca633ea9 100644 --- a/drivers/crypto/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c @@ -18,6 +18,7 @@ #include "adf_pf2vf_msg.h" #define ADF_VINTSOU_OFFSET 0x204 +#define ADF_VINTMSK_OFFSET 0x208 #define ADF_VINTSOU_BUN BIT(0) #define ADF_VINTSOU_PF2VF BIT(1) @@ -161,11 +162,20 @@ static irqreturn_t adf_isr(int irq, void *privdata) &GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)]; void __iomem *pmisc_bar_addr = pmisc->virt_addr; bool handled = false; - u32 v_int; + u32 v_int, v_mask; /* Read VF INT source CSR to determine the source of VF interrupt */ v_int = ADF_CSR_RD(pmisc_bar_addr, ADF_VINTSOU_OFFSET); + /* Read VF INT mask CSR to determine which sources are masked */ + v_mask = ADF_CSR_RD(pmisc_bar_addr, ADF_VINTMSK_OFFSET); + + /* + * Recompute v_int ignoring sources that are masked. This is to + * avoid rescheduling the tasklet for interrupts already handled + */ + v_int &= ~v_mask; + /* Check for PF2VF interrupt */ if (v_int & ADF_VINTSOU_PF2VF) { /* Disable PF to VF interrupt */ -- cgit v1.2.3 From 3213488db01e7ddc389be2390cc154d926e714a2 Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 12 Aug 2021 21:21:16 +0100 Subject: crypto: qat - prevent spurious MSI interrupt in PF There is a chance that the PFVF handler, adf_vf2pf_req_hndl(), runs twice for the same request when multiple interrupts come simultaneously from different VFs. Since the source VF is identified by a positional bit set in the ERRSOU registers and that is not cleared until the bottom half completes, new top halves from other VFs may reschedule a second bottom half for previous interrupts. This patch solves the problem in the ISR handler by not considering sources with already disabled interrupts (and processing pending), as set in the ERRMSK registers. Also, move some definitions where actually needed. Signed-off-by: Marco Chiappero Co-developed-by: Giovanni Cabiddu Signed-off-by: Giovanni Cabiddu Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_accel_devices.h | 2 -- drivers/crypto/qat/qat_common/adf_isr.c | 25 ++++++++++++++++++----- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index ac435b44f1d2..2ee11b4763cd 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -18,8 +18,6 @@ #define ADF_4XXX_DEVICE_NAME "4xxx" #define ADF_4XXX_PCI_DEVICE_ID 0x4940 #define ADF_4XXXIOV_PCI_DEVICE_ID 0x4941 -#define ADF_ERRSOU3 (0x3A000 + 0x0C) -#define ADF_ERRSOU5 (0x3A000 + 0xD8) #define ADF_DEVICE_FUSECTL_OFFSET 0x40 #define ADF_DEVICE_LEGFUSE_OFFSET 0x4C #define ADF_DEVICE_FUSECTL_MASK 0x80000000 diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c index daab02011717..a25b7845c9d3 100644 --- a/drivers/crypto/qat/qat_common/adf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_isr.c @@ -16,6 +16,12 @@ #include "adf_transport_internal.h" #define ADF_MAX_NUM_VFS 32 +#define ADF_ERRSOU3 (0x3A000 + 0x0C) +#define ADF_ERRSOU5 (0x3A000 + 0xD8) +#define ADF_ERRMSK3 (0x3A000 + 0x1C) +#define ADF_ERRMSK5 (0x3A000 + 0xDC) +#define ADF_ERR_REG_VF2PF_L(vf_src) (((vf_src) & 0x01FFFE00) >> 9) +#define ADF_ERR_REG_VF2PF_U(vf_src) (((vf_src) & 0x0000FFFF) << 16) static int adf_enable_msix(struct adf_accel_dev *accel_dev) { @@ -73,14 +79,23 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr) struct adf_hw_device_data *hw_data = accel_dev->hw_device; struct adf_bar *pmisc = &GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)]; - void __iomem *pmisc_bar_addr = pmisc->virt_addr; + void __iomem *pmisc_addr = pmisc->virt_addr; + u32 errsou3, errsou5, errmsk3, errmsk5; unsigned long vf_mask; /* Get the interrupt sources triggered by VFs */ - vf_mask = ((ADF_CSR_RD(pmisc_bar_addr, ADF_ERRSOU5) & - 0x0000FFFF) << 16) | - ((ADF_CSR_RD(pmisc_bar_addr, ADF_ERRSOU3) & - 0x01FFFE00) >> 9); + errsou3 = ADF_CSR_RD(pmisc_addr, ADF_ERRSOU3); + errsou5 = ADF_CSR_RD(pmisc_addr, ADF_ERRSOU5); + vf_mask = ADF_ERR_REG_VF2PF_L(errsou3); + vf_mask |= ADF_ERR_REG_VF2PF_U(errsou5); + + /* To avoid adding duplicate entries to work queue, clear + * vf_int_mask_sets bits that are already masked in ERRMSK register. + */ + errmsk3 = ADF_CSR_RD(pmisc_addr, ADF_ERRMSK3); + errmsk5 = ADF_CSR_RD(pmisc_addr, ADF_ERRMSK5); + vf_mask &= ~ADF_ERR_REG_VF2PF_L(errmsk3); + vf_mask &= ~ADF_ERR_REG_VF2PF_U(errmsk5); if (vf_mask) { struct adf_accel_vf_info *vf_info; -- cgit v1.2.3 From 506a16642901e7ab6fd9a2629eee60c7cc844769 Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 12 Aug 2021 21:21:17 +0100 Subject: crypto: qat - rename compatibility version definition Rename ADF_PFVF_COMPATIBILITY_VERSION in ADF_PFVF_COMPAT_THIS_VERSION since it is used to indicate the current version of the PFVF protocol. Signed-off-by: Marco Chiappero Co-developed-by: Giovanni Cabiddu Signed-off-by: Giovanni Cabiddu Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c | 2 +- drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c | 2 +- drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c | 2 +- drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c | 2 +- drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c | 2 +- drivers/crypto/qat/qat_common/adf_pf2vf_msg.c | 18 +++++++++--------- drivers/crypto/qat/qat_common/adf_pf2vf_msg.h | 2 +- drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c | 2 +- .../crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c | 2 +- 9 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c index 3524ddd48930..a72142413caa 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -218,7 +218,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data) hw_data->enable_ints = adf_enable_ints; hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; hw_data->reset_device = adf_reset_flr; - hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; + hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; hw_data->admin_ae_mask = ADF_4XXX_ADMIN_AE_MASK; hw_data->uof_get_num_objs = uof_get_num_objs; hw_data->uof_get_name = uof_get_name; diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c index 1dd64af22bea..1c7f6a6f6f2d 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c @@ -211,7 +211,7 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data) hw_data->enable_ints = adf_enable_ints; hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; hw_data->reset_device = adf_reset_flr; - hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; + hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer; adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); } diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c index 15f6b9bdfb22..476a4bf3de56 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c @@ -96,7 +96,7 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data) hw_data->get_sku = get_sku; hw_data->enable_ints = adf_vf_void_noop; hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms; - hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; + hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; hw_data->dev_class->instances++; adf_devmgr_update_class_index(hw_data); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c index 30337390513c..a202f912820c 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c @@ -213,7 +213,7 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data) hw_data->enable_ints = adf_enable_ints; hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; hw_data->reset_device = adf_reset_flr; - hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; + hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer; adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); } diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c index d231583428c9..0c867208eb90 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c @@ -96,7 +96,7 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data) hw_data->get_sku = get_sku; hw_data->enable_ints = adf_vf_void_noop; hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms; - hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; + hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; hw_data->dev_class->instances++; adf_devmgr_update_class_index(hw_data); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); diff --git a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c index a1b77bd7a894..e29f5f1dc806 100644 --- a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c +++ b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c @@ -216,7 +216,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info) resp = (ADF_PF2VF_MSGORIGIN_SYSTEM | (ADF_PF2VF_MSGTYPE_VERSION_RESP << ADF_PF2VF_MSGTYPE_SHIFT) | - (ADF_PFVF_COMPATIBILITY_VERSION << + (ADF_PFVF_COMPAT_THIS_VERSION << ADF_PF2VF_VERSION_RESP_VERS_SHIFT)); dev_dbg(&GET_DEV(accel_dev), @@ -226,19 +226,19 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info) if (vf_compat_ver < hw_data->min_iov_compat_ver) { dev_err(&GET_DEV(accel_dev), "VF (vers %d) incompatible with PF (vers %d)\n", - vf_compat_ver, ADF_PFVF_COMPATIBILITY_VERSION); + vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION); resp |= ADF_PF2VF_VF_INCOMPATIBLE << ADF_PF2VF_VERSION_RESP_RESULT_SHIFT; - } else if (vf_compat_ver > ADF_PFVF_COMPATIBILITY_VERSION) { + } else if (vf_compat_ver > ADF_PFVF_COMPAT_THIS_VERSION) { dev_err(&GET_DEV(accel_dev), "VF (vers %d) compat with PF (vers %d) unkn.\n", - vf_compat_ver, ADF_PFVF_COMPATIBILITY_VERSION); + vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION); resp |= ADF_PF2VF_VF_COMPAT_UNKNOWN << ADF_PF2VF_VERSION_RESP_RESULT_SHIFT; } else { dev_dbg(&GET_DEV(accel_dev), "VF (vers %d) compatible with PF (vers %d)\n", - vf_compat_ver, ADF_PFVF_COMPATIBILITY_VERSION); + vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION); resp |= ADF_PF2VF_VF_COMPATIBLE << ADF_PF2VF_VERSION_RESP_RESULT_SHIFT; } @@ -251,7 +251,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info) resp = (ADF_PF2VF_MSGORIGIN_SYSTEM | (ADF_PF2VF_MSGTYPE_VERSION_RESP << ADF_PF2VF_MSGTYPE_SHIFT) | - (ADF_PFVF_COMPATIBILITY_VERSION << + (ADF_PFVF_COMPAT_THIS_VERSION << ADF_PF2VF_VERSION_RESP_VERS_SHIFT)); resp |= ADF_PF2VF_VF_COMPATIBLE << ADF_PF2VF_VERSION_RESP_RESULT_SHIFT; @@ -313,8 +313,8 @@ static int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev) msg = ADF_VF2PF_MSGORIGIN_SYSTEM; msg |= ADF_VF2PF_MSGTYPE_COMPAT_VER_REQ << ADF_VF2PF_MSGTYPE_SHIFT; - msg |= ADF_PFVF_COMPATIBILITY_VERSION << ADF_VF2PF_COMPAT_VER_REQ_SHIFT; - BUILD_BUG_ON(ADF_PFVF_COMPATIBILITY_VERSION > 255); + msg |= ADF_PFVF_COMPAT_THIS_VERSION << ADF_VF2PF_COMPAT_VER_REQ_SHIFT; + BUILD_BUG_ON(ADF_PFVF_COMPAT_THIS_VERSION > 255); /* Send request from VF to PF */ ret = adf_iov_putmsg(accel_dev, msg, 0); @@ -345,7 +345,7 @@ static int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev) dev_err(&GET_DEV(accel_dev), "PF (vers %d) and VF (vers %d) are not compatible\n", accel_dev->vf.pf_version, - ADF_PFVF_COMPATIBILITY_VERSION); + ADF_PFVF_COMPAT_THIS_VERSION); return -EINVAL; default: dev_err(&GET_DEV(accel_dev), diff --git a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.h b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.h index 0690c031bfce..ffd43aa50b57 100644 --- a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.h +++ b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.h @@ -52,7 +52,7 @@ * IN_USE_BY pattern as part of a collision control scheme (see adf_iov_putmsg). */ -#define ADF_PFVF_COMPATIBILITY_VERSION 0x1 /* PF<->VF compat */ +#define ADF_PFVF_COMPAT_THIS_VERSION 0x1 /* PF<->VF compat */ /* PF->VF messages */ #define ADF_PF2VF_INT BIT(0) diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index 7dd7cd6c3ef8..dced2426edc1 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -232,7 +232,7 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) hw_data->enable_ints = adf_enable_ints; hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; hw_data->reset_device = adf_reset_sbr; - hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; + hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); } diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c index f14fb82ed6df..ac233a39a530 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c @@ -96,7 +96,7 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data) hw_data->get_sku = get_sku; hw_data->enable_ints = adf_vf_void_noop; hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms; - hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; + hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; hw_data->dev_class->instances++; adf_devmgr_update_class_index(hw_data); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); -- cgit v1.2.3 From e6eefd12dd779c098961a698ad98be1e0cd25635 Mon Sep 17 00:00:00 2001 From: Svyatoslav Pankratov Date: Thu, 12 Aug 2021 21:21:18 +0100 Subject: crypto: qat - remove intermediate tasklet for vf2pf The PF driver uses the tasklet vf2pf_bh_tasklet to schedule a workqueue to handle the vf2vf protocol (pf2vf_resp_wq). Since the tasklet is only used to schedule the workqueue, this patch removes it and schedules the pf2vf_resp_wq workqueue directly for the top half. Signed-off-by: Svyatoslav Pankratov Co-developed-by: Giovanni Cabiddu Signed-off-by: Giovanni Cabiddu Reviewed-by: Marco Chiappero Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_accel_devices.h | 1 - drivers/crypto/qat/qat_common/adf_common_drv.h | 1 + drivers/crypto/qat/qat_common/adf_isr.c | 8 +++----- drivers/crypto/qat/qat_common/adf_sriov.c | 8 +------- 4 files changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index 2ee11b4763cd..180c7dba3ff2 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -225,7 +225,6 @@ struct adf_fw_loader_data { struct adf_accel_vf_info { struct adf_accel_dev *accel_dev; - struct tasklet_struct vf2pf_bh_tasklet; struct mutex pf2vf_lock; /* protect CSR access for PF2VF messages */ struct ratelimit_state vf2pf_ratelimit; u32 vf_nr; diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index c7deca7f0607..bd76b8a86b86 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -197,6 +197,7 @@ void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask); void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev); void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev); +void adf_schedule_vf2pf_handler(struct adf_accel_vf_info *vf_info); int adf_vf2pf_init(struct adf_accel_dev *accel_dev); void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev); diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c index a25b7845c9d3..2302e43dfaf4 100644 --- a/drivers/crypto/qat/qat_common/adf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_isr.c @@ -106,9 +106,8 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr) adf_disable_vf2pf_interrupts(accel_dev, vf_mask); /* - * Schedule tasklets to handle VF2PF interrupt BHs - * unless the VF is malicious and is attempting to - * flood the host OS with VF2PF interrupts. + * Handle VF2PF interrupt unless the VF is malicious and + * is attempting to flood the host OS with VF2PF interrupts. */ for_each_set_bit(i, &vf_mask, ADF_MAX_NUM_VFS) { vf_info = accel_dev->pf.vf_info + i; @@ -120,8 +119,7 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr) continue; } - /* Tasklet will re-enable ints from this VF */ - tasklet_hi_schedule(&vf_info->vf2pf_bh_tasklet); + adf_schedule_vf2pf_handler(vf_info); irq_handled = true; } diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c index 8c822c2861c2..90ec057f9183 100644 --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -24,9 +24,8 @@ static void adf_iov_send_resp(struct work_struct *work) kfree(pf2vf_resp); } -static void adf_vf2pf_bh_handler(void *data) +void adf_schedule_vf2pf_handler(struct adf_accel_vf_info *vf_info) { - struct adf_accel_vf_info *vf_info = (struct adf_accel_vf_info *)data; struct adf_pf2vf_resp *pf2vf_resp; pf2vf_resp = kzalloc(sizeof(*pf2vf_resp), GFP_ATOMIC); @@ -52,9 +51,6 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev) vf_info->accel_dev = accel_dev; vf_info->vf_nr = i; - tasklet_init(&vf_info->vf2pf_bh_tasklet, - (void *)adf_vf2pf_bh_handler, - (unsigned long)vf_info); mutex_init(&vf_info->pf2vf_lock); ratelimit_state_init(&vf_info->vf2pf_ratelimit, DEFAULT_RATELIMIT_INTERVAL, @@ -110,8 +106,6 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev) hw_data->configure_iov_threads(accel_dev, false); for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++) { - tasklet_disable(&vf->vf2pf_bh_tasklet); - tasklet_kill(&vf->vf2pf_bh_tasklet); mutex_destroy(&vf->pf2vf_lock); } -- cgit v1.2.3 From 3d655732b0199562267a05c7ff69ecdd11632939 Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 12 Aug 2021 21:21:19 +0100 Subject: crypto: qat - fix reuse of completion variable Use reinit_completion() to set to a clean state a completion variable, used to coordinate the VF to PF request-response flow, before every new VF request. Signed-off-by: Marco Chiappero Co-developed-by: Giovanni Cabiddu Signed-off-by: Giovanni Cabiddu Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_pf2vf_msg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c index e29f5f1dc806..d42461cb611f 100644 --- a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c +++ b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c @@ -316,6 +316,8 @@ static int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev) msg |= ADF_PFVF_COMPAT_THIS_VERSION << ADF_VF2PF_COMPAT_VER_REQ_SHIFT; BUILD_BUG_ON(ADF_PFVF_COMPAT_THIS_VERSION > 255); + reinit_completion(&accel_dev->vf.iov_msg_completion); + /* Send request from VF to PF */ ret = adf_iov_putmsg(accel_dev, msg, 0); if (ret) { -- cgit v1.2.3 From 9800678f05a8431532e8f87c79b3cba6ec8ab8f5 Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 12 Aug 2021 21:21:20 +0100 Subject: crypto: qat - move pf2vf interrupt [en|dis]able to adf_vf_isr.c Interrupt code to enable interrupts from PF does not belong to the protocol code, so move it to the interrupt handling specific file for better code organization. Signed-off-by: Marco Chiappero Co-developed-by: Giovanni Cabiddu Signed-off-by: Giovanni Cabiddu Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_pf2vf_msg.c | 20 -------------------- drivers/crypto/qat/qat_common/adf_vf_isr.c | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c index d42461cb611f..0a927ed91b19 100644 --- a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c +++ b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c @@ -11,26 +11,6 @@ #define ADF_DH895XCC_ERRMSK5 (ADF_DH895XCC_EP_OFFSET + 0xDC) #define ADF_DH895XCC_ERRMSK5_VF2PF_U_MASK(vf_mask) (vf_mask >> 16) -void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) -{ - struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev; - struct adf_hw_device_data *hw_data = accel_dev->hw_device; - void __iomem *pmisc_bar_addr = - pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr; - - ADF_CSR_WR(pmisc_bar_addr, hw_data->get_vintmsk_offset(0), 0x0); -} - -void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) -{ - struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev; - struct adf_hw_device_data *hw_data = accel_dev->hw_device; - void __iomem *pmisc_bar_addr = - pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr; - - ADF_CSR_WR(pmisc_bar_addr, hw_data->get_vintmsk_offset(0), 0x2); -} - void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask) { diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c index 4359ca633ea9..aa44e8638fa8 100644 --- a/drivers/crypto/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c @@ -29,6 +29,26 @@ struct adf_vf_stop_data { struct work_struct work; }; +void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) +{ + struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + void __iomem *pmisc_bar_addr = + pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr; + + ADF_CSR_WR(pmisc_bar_addr, hw_data->get_vintmsk_offset(0), 0x0); +} + +void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) +{ + struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + void __iomem *pmisc_bar_addr = + pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr; + + ADF_CSR_WR(pmisc_bar_addr, hw_data->get_vintmsk_offset(0), 0x2); +} + static int adf_enable_msi(struct adf_accel_dev *accel_dev) { struct adf_accel_pci *pci_dev_info = &accel_dev->accel_pci_dev; -- cgit v1.2.3 From 07df385e645ed3b7fd3ac2d5c4c3e825b54bb7e4 Mon Sep 17 00:00:00 2001 From: Kanchana Velusamy Date: Thu, 12 Aug 2021 21:21:21 +0100 Subject: crypto: qat - protect interrupt mask CSRs with a spinlock In the PF interrupt handler, the interrupt is disabled for a set of VFs by writing to the interrupt source mask register, ERRMSK. The interrupt is re-enabled in the bottom half handler by writing to the same CSR. This is done through the functions enable_vf2pf_interrupts() and disable_vf2pf_interrupts() which perform a read-modify-write operation on the ERRMSK registers to mask and unmask the source of interrupt. There can be a race condition where the top half handler for one VF interrupt runs just as the bottom half for another VF is about to re-enable the interrupt. Depending on whether the top or bottom half updates the CSR first, this would result either in a spurious interrupt or in the interrupt not being re-enabled. This patch protects the access of ERRMSK with a spinlock. The functions adf_enable_vf2pf_interrupts() and adf_disable_vf2pf_interrupts() have been changed to acquire a spin lock before accessing and modifying the ERRMSK registers. These functions use spin_lock_irqsave() to disable IRQs and avoid potential deadlocks. In addition, the function adf_disable_vf2pf_interrupts_irq() has been added. This uses spin_lock() and it is meant to be used in the top half only. Signed-off-by: Kanchana Velusamy Co-developed-by: Marco Chiappero Signed-off-by: Marco Chiappero Co-developed-by: Giovanni Cabiddu Signed-off-by: Giovanni Cabiddu Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c | 2 ++ drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c | 2 ++ drivers/crypto/qat/qat_common/adf_accel_devices.h | 2 ++ drivers/crypto/qat/qat_common/adf_common_drv.h | 2 ++ drivers/crypto/qat/qat_common/adf_isr.c | 2 +- drivers/crypto/qat/qat_common/adf_pf2vf_msg.c | 33 ++++++++++++++++++++-- .../crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c | 2 ++ 7 files changed, 41 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c index 1c7f6a6f6f2d..912e84ecb9a3 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c @@ -161,6 +161,8 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev) static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev) { + spin_lock_init(&accel_dev->pf.vf2pf_ints_lock); + return 0; } diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c index a202f912820c..069b5d6857e8 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c @@ -163,6 +163,8 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev) static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev) { + spin_lock_init(&accel_dev->pf.vf2pf_ints_lock); + return 0; } diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index 180c7dba3ff2..8250cf856e07 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -246,6 +246,8 @@ struct adf_accel_dev { struct adf_accel_pci accel_pci_dev; union { struct { + /* protects VF2PF interrupts access */ + spinlock_t vf2pf_ints_lock; /* vf_info is non-zero when SR-IOV is init'ed */ struct adf_accel_vf_info *vf_info; } pf; diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index bd76b8a86b86..3f6277347278 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -193,6 +193,8 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs); void adf_disable_sriov(struct adf_accel_dev *accel_dev); void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask); +void adf_disable_vf2pf_interrupts_irq(struct adf_accel_dev *accel_dev, + u32 vf_mask); void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask); void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev); diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c index 2302e43dfaf4..c678d5c531aa 100644 --- a/drivers/crypto/qat/qat_common/adf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_isr.c @@ -103,7 +103,7 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr) int i; /* Disable VF2PF interrupts for VFs with pending ints */ - adf_disable_vf2pf_interrupts(accel_dev, vf_mask); + adf_disable_vf2pf_interrupts_irq(accel_dev, vf_mask); /* * Handle VF2PF interrupt unless the VF is malicious and diff --git a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c index 0a927ed91b19..1b0df4a5b8b7 100644 --- a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c +++ b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c @@ -11,8 +11,8 @@ #define ADF_DH895XCC_ERRMSK5 (ADF_DH895XCC_EP_OFFSET + 0xDC) #define ADF_DH895XCC_ERRMSK5_VF2PF_U_MASK(vf_mask) (vf_mask >> 16) -void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, - u32 vf_mask) +static void __adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, + u32 vf_mask) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; struct adf_bar *pmisc = @@ -35,7 +35,17 @@ void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, } } -void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask) +void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask) +{ + unsigned long flags; + + spin_lock_irqsave(&accel_dev->pf.vf2pf_ints_lock, flags); + __adf_enable_vf2pf_interrupts(accel_dev, vf_mask); + spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags); +} + +static void __adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, + u32 vf_mask) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; struct adf_bar *pmisc = @@ -58,6 +68,22 @@ void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask) } } +void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask) +{ + unsigned long flags; + + spin_lock_irqsave(&accel_dev->pf.vf2pf_ints_lock, flags); + __adf_disable_vf2pf_interrupts(accel_dev, vf_mask); + spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags); +} + +void adf_disable_vf2pf_interrupts_irq(struct adf_accel_dev *accel_dev, u32 vf_mask) +{ + spin_lock(&accel_dev->pf.vf2pf_ints_lock); + __adf_disable_vf2pf_interrupts(accel_dev, vf_mask); + spin_unlock(&accel_dev->pf.vf2pf_ints_lock); +} + static int __adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr) { struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev; @@ -264,6 +290,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info) /* re-enable interrupt on PF from this VF */ adf_enable_vf2pf_interrupts(accel_dev, (1 << vf_nr)); + return; err: dev_dbg(&GET_DEV(accel_dev), "Unknown message from VF%d (0x%x);\n", diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index dced2426edc1..07e7ba5c057d 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -182,6 +182,8 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev) static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev) { + spin_lock_init(&accel_dev->pf.vf2pf_ints_lock); + return 0; } -- cgit v1.2.3 From b90c1c4d3fa8cd90f4e8245b13564380fd0bfad1 Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 12 Aug 2021 21:21:22 +0100 Subject: crypto: qat - fix naming for init/shutdown VF to PF notifications At start and shutdown, VFs notify the PF about their state. These notifications are carried out through a message exchange using the PFVF protocol. Function names lead to believe they do perform init or shutdown logic. This is to fix the naming to better reflect their purpose. Signed-off-by: Marco Chiappero Co-developed-by: Giovanni Cabiddu Signed-off-by: Giovanni Cabiddu Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c | 4 ++-- drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c | 4 ++-- drivers/crypto/qat/qat_common/adf_common_drv.h | 8 ++++---- drivers/crypto/qat/qat_common/adf_vf2pf_msg.c | 12 ++++++------ drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c | 4 ++-- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c index 476a4bf3de56..da0790f26574 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c @@ -81,10 +81,10 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data) hw_data->enable_error_correction = adf_vf_void_noop; hw_data->init_admin_comms = adf_vf_int_noop; hw_data->exit_admin_comms = adf_vf_void_noop; - hw_data->send_admin_init = adf_vf2pf_init; + hw_data->send_admin_init = adf_vf2pf_notify_init; hw_data->init_arb = adf_vf_int_noop; hw_data->exit_arb = adf_vf_void_noop; - hw_data->disable_iov = adf_vf2pf_shutdown; + hw_data->disable_iov = adf_vf2pf_notify_shutdown; hw_data->get_accel_mask = get_accel_mask; hw_data->get_ae_mask = get_ae_mask; hw_data->get_num_accels = get_num_accels; diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c index 0c867208eb90..e8ed5970b2f2 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c @@ -81,10 +81,10 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data) hw_data->enable_error_correction = adf_vf_void_noop; hw_data->init_admin_comms = adf_vf_int_noop; hw_data->exit_admin_comms = adf_vf_void_noop; - hw_data->send_admin_init = adf_vf2pf_init; + hw_data->send_admin_init = adf_vf2pf_notify_init; hw_data->init_arb = adf_vf_int_noop; hw_data->exit_arb = adf_vf_void_noop; - hw_data->disable_iov = adf_vf2pf_shutdown; + hw_data->disable_iov = adf_vf2pf_notify_shutdown; hw_data->get_accel_mask = get_accel_mask; hw_data->get_ae_mask = get_ae_mask; hw_data->get_num_accels = get_num_accels; diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index 3f6277347278..4b18843f9845 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -201,8 +201,8 @@ void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev); void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev); void adf_schedule_vf2pf_handler(struct adf_accel_vf_info *vf_info); -int adf_vf2pf_init(struct adf_accel_dev *accel_dev); -void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev); +int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev); +void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev); int adf_init_pf_wq(void); void adf_exit_pf_wq(void); int adf_init_vf_wq(void); @@ -222,12 +222,12 @@ static inline void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) { } -static inline int adf_vf2pf_init(struct adf_accel_dev *accel_dev) +static inline int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev) { return 0; } -static inline void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev) +static inline void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev) { } diff --git a/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c b/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c index e85bd62d134a..3e25fac051b2 100644 --- a/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c +++ b/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c @@ -5,14 +5,14 @@ #include "adf_pf2vf_msg.h" /** - * adf_vf2pf_init() - send init msg to PF + * adf_vf2pf_notify_init() - send init msg to PF * @accel_dev: Pointer to acceleration VF device. * * Function sends an init message from the VF to a PF * * Return: 0 on success, error code otherwise. */ -int adf_vf2pf_init(struct adf_accel_dev *accel_dev) +int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev) { u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM | (ADF_VF2PF_MSGTYPE_INIT << ADF_VF2PF_MSGTYPE_SHIFT)); @@ -25,17 +25,17 @@ int adf_vf2pf_init(struct adf_accel_dev *accel_dev) set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status); return 0; } -EXPORT_SYMBOL_GPL(adf_vf2pf_init); +EXPORT_SYMBOL_GPL(adf_vf2pf_notify_init); /** - * adf_vf2pf_shutdown() - send shutdown msg to PF + * adf_vf2pf_notify_shutdown() - send shutdown msg to PF * @accel_dev: Pointer to acceleration VF device. * * Function sends a shutdown message from the VF to a PF * * Return: void */ -void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev) +void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev) { u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM | (ADF_VF2PF_MSGTYPE_SHUTDOWN << ADF_VF2PF_MSGTYPE_SHIFT)); @@ -45,4 +45,4 @@ void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev) dev_err(&GET_DEV(accel_dev), "Failed to send Shutdown event to PF\n"); } -EXPORT_SYMBOL_GPL(adf_vf2pf_shutdown); +EXPORT_SYMBOL_GPL(adf_vf2pf_notify_shutdown); diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c index ac233a39a530..643a91f29e40 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c @@ -81,10 +81,10 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data) hw_data->enable_error_correction = adf_vf_void_noop; hw_data->init_admin_comms = adf_vf_int_noop; hw_data->exit_admin_comms = adf_vf_void_noop; - hw_data->send_admin_init = adf_vf2pf_init; + hw_data->send_admin_init = adf_vf2pf_notify_init; hw_data->init_arb = adf_vf_int_noop; hw_data->exit_arb = adf_vf_void_noop; - hw_data->disable_iov = adf_vf2pf_shutdown; + hw_data->disable_iov = adf_vf2pf_notify_shutdown; hw_data->get_accel_mask = get_accel_mask; hw_data->get_ae_mask = get_ae_mask; hw_data->get_num_accels = get_num_accels; -- cgit v1.2.3 From 0b7b6c195845ebc3a9f74bea0db47006e9944995 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 12 Aug 2021 21:21:23 +0100 Subject: crypto: qat - move IO virtualization functions Move IOV functions at the end of hw_data so that PFVF functions related functions are group together. Signed-off-by: Giovanni Cabiddu Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c | 6 +++--- drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c | 9 +++++---- drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c | 10 +++++----- drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c | 7 ++++--- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c index a72142413caa..82e67d679513 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -210,21 +210,21 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data) hw_data->fw_mmp_name = ADF_4XXX_MMP; hw_data->init_admin_comms = adf_init_admin_comms; hw_data->exit_admin_comms = adf_exit_admin_comms; - hw_data->disable_iov = adf_disable_sriov; hw_data->send_admin_init = adf_send_admin_init; hw_data->init_arb = adf_init_arb; hw_data->exit_arb = adf_exit_arb; hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; - hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; hw_data->reset_device = adf_reset_flr; - hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; hw_data->admin_ae_mask = ADF_4XXX_ADMIN_AE_MASK; hw_data->uof_get_num_objs = uof_get_num_objs; hw_data->uof_get_name = uof_get_name; hw_data->uof_get_ae_mask = uof_get_ae_mask; hw_data->set_msix_rttable = set_msix_default_rttable; hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer; + hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->disable_iov = adf_disable_sriov; + hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; adf_gen4_init_hw_csr_ops(&hw_data->csr_ops); } diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c index 912e84ecb9a3..45f3905f7f35 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c @@ -195,7 +195,6 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data) hw_data->get_sram_bar_id = get_sram_bar_id; hw_data->get_etr_bar_id = get_etr_bar_id; hw_data->get_misc_bar_id = get_misc_bar_id; - hw_data->get_pf2vf_offset = get_pf2vf_offset; hw_data->get_vintmsk_offset = get_vintmsk_offset; hw_data->get_admin_info = adf_gen2_get_admin_info; hw_data->get_arb_info = adf_gen2_get_arb_info; @@ -205,16 +204,18 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data) hw_data->init_admin_comms = adf_init_admin_comms; hw_data->exit_admin_comms = adf_exit_admin_comms; hw_data->configure_iov_threads = configure_iov_threads; - hw_data->disable_iov = adf_disable_sriov; hw_data->send_admin_init = adf_send_admin_init; hw_data->init_arb = adf_init_arb; hw_data->exit_arb = adf_exit_arb; hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; - hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; hw_data->reset_device = adf_reset_flr; - hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer; + hw_data->get_pf2vf_offset = get_pf2vf_offset; + hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->disable_iov = adf_disable_sriov; + hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; + adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); } diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c index 069b5d6857e8..dbce08b753c7 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c @@ -112,7 +112,6 @@ static u32 get_pf2vf_offset(u32 i) { return ADF_C62X_PF2VF_OFFSET(i); } - static u32 get_vintmsk_offset(u32 i) { return ADF_C62X_VINTMSK_OFFSET(i); @@ -197,7 +196,6 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data) hw_data->get_sram_bar_id = get_sram_bar_id; hw_data->get_etr_bar_id = get_etr_bar_id; hw_data->get_misc_bar_id = get_misc_bar_id; - hw_data->get_pf2vf_offset = get_pf2vf_offset; hw_data->get_vintmsk_offset = get_vintmsk_offset; hw_data->get_admin_info = adf_gen2_get_admin_info; hw_data->get_arb_info = adf_gen2_get_arb_info; @@ -207,16 +205,18 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data) hw_data->init_admin_comms = adf_init_admin_comms; hw_data->exit_admin_comms = adf_exit_admin_comms; hw_data->configure_iov_threads = configure_iov_threads; - hw_data->disable_iov = adf_disable_sriov; hw_data->send_admin_init = adf_send_admin_init; hw_data->init_arb = adf_init_arb; hw_data->exit_arb = adf_exit_arb; hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; - hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; hw_data->reset_device = adf_reset_flr; - hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer; + hw_data->get_pf2vf_offset = get_pf2vf_offset; + hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->disable_iov = adf_disable_sriov; + hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; + adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); } diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index 07e7ba5c057d..6f5dab2a63a7 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -215,7 +215,6 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) hw_data->get_num_aes = get_num_aes; hw_data->get_etr_bar_id = get_etr_bar_id; hw_data->get_misc_bar_id = get_misc_bar_id; - hw_data->get_pf2vf_offset = get_pf2vf_offset; hw_data->get_vintmsk_offset = get_vintmsk_offset; hw_data->get_admin_info = adf_gen2_get_admin_info; hw_data->get_arb_info = adf_gen2_get_arb_info; @@ -226,15 +225,17 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) hw_data->init_admin_comms = adf_init_admin_comms; hw_data->exit_admin_comms = adf_exit_admin_comms; hw_data->configure_iov_threads = configure_iov_threads; - hw_data->disable_iov = adf_disable_sriov; hw_data->send_admin_init = adf_send_admin_init; hw_data->init_arb = adf_init_arb; hw_data->exit_arb = adf_exit_arb; hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; - hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; hw_data->reset_device = adf_reset_sbr; + hw_data->get_pf2vf_offset = get_pf2vf_offset; + hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->disable_iov = adf_disable_sriov; hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; + adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); } -- cgit v1.2.3 From 7c258f501ee09c1388a1ecef232db7b648d46aed Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 12 Aug 2021 21:21:24 +0100 Subject: crypto: qat - complete all the init steps before service notification Make sure all the steps in the initialization sequence are complete before any completion event notification. Signed-off-by: Marco Chiappero Co-developed-by: Giovanni Cabiddu Signed-off-by: Giovanni Cabiddu Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_init.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c index 346dcb8bcca5..52ce1fde9e93 100644 --- a/drivers/crypto/qat/qat_common/adf_init.c +++ b/drivers/crypto/qat/qat_common/adf_init.c @@ -110,6 +110,11 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) set_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status); hw_data->enable_ints(accel_dev); + hw_data->enable_error_correction(accel_dev); + + ret = hw_data->enable_vf2pf_comms(accel_dev); + if (ret) + return ret; /* * Subservice initialisation is divided into two stages: init and start. @@ -127,10 +132,7 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) set_bit(accel_dev->accel_id, service->init_status); } - hw_data->enable_error_correction(accel_dev); - ret = hw_data->enable_vf2pf_comms(accel_dev); - - return ret; + return 0; } EXPORT_SYMBOL_GPL(adf_dev_init); -- cgit v1.2.3 From 9ffd49dfba6d5142189ceccf217ac315f8aac884 Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 12 Aug 2021 21:21:25 +0100 Subject: crypto: qat - fix naming of PF/VF enable functions Currently all the functions related to the activation of the PFVF protocol, both on PF and VF, include the direction specific "vf2pf" name. Replace the existing naming schema with: - a direction agnostic naming, that applies to both PF and VF, for the function pointer ("pfvf") - a direction specific naming schema for the implementations ("pf2vf" or "vf2pf") In particular this patch renames: - adf_pf_enable_vf2pf_comms() in adf_enable_pf2vf_comms() - enable_vf2pf_comms() in enable_pfvf_comms() Signed-off-by: Marco Chiappero Co-developed-by: Giovanni Cabiddu Signed-off-by: Giovanni Cabiddu Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c | 4 ++-- drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c | 4 ++-- drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c | 2 +- drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c | 4 ++-- drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c | 2 +- drivers/crypto/qat/qat_common/adf_accel_devices.h | 2 +- drivers/crypto/qat/qat_common/adf_init.c | 2 +- drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c | 4 ++-- drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c index 82e67d679513..33d8e50dcbda 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -161,7 +161,7 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev) ADF_CSR_WR(addr, ADF_4XXX_SMIAPF_MASK_OFFSET, 0); } -static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev) +static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev) { return 0; } @@ -222,7 +222,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data) hw_data->uof_get_ae_mask = uof_get_ae_mask; hw_data->set_msix_rttable = set_msix_default_rttable; hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer; - hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms; hw_data->disable_iov = adf_disable_sriov; hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c index 45f3905f7f35..569d4a9fd8cf 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c @@ -159,7 +159,7 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev) ADF_C3XXX_SMIA1_MASK); } -static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev) +static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev) { spin_lock_init(&accel_dev->pf.vf2pf_ints_lock); @@ -212,7 +212,7 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data) hw_data->reset_device = adf_reset_flr; hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer; hw_data->get_pf2vf_offset = get_pf2vf_offset; - hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms; hw_data->disable_iov = adf_disable_sriov; hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c index da0790f26574..3f840560d702 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c @@ -95,7 +95,7 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data) hw_data->get_vintmsk_offset = get_vintmsk_offset; hw_data->get_sku = get_sku; hw_data->enable_ints = adf_vf_void_noop; - hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms; + hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms; hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; hw_data->dev_class->instances++; adf_devmgr_update_class_index(hw_data); diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c index dbce08b753c7..6660001d4297 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c @@ -160,7 +160,7 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev) ADF_C62X_SMIA1_MASK); } -static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev) +static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev) { spin_lock_init(&accel_dev->pf.vf2pf_ints_lock); @@ -213,7 +213,7 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data) hw_data->reset_device = adf_reset_flr; hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer; hw_data->get_pf2vf_offset = get_pf2vf_offset; - hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms; hw_data->disable_iov = adf_disable_sriov; hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c index e8ed5970b2f2..67fd41662f20 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c @@ -95,7 +95,7 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data) hw_data->get_vintmsk_offset = get_vintmsk_offset; hw_data->get_sku = get_sku; hw_data->enable_ints = adf_vf_void_noop; - hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms; + hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms; hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; hw_data->dev_class->instances++; adf_devmgr_update_class_index(hw_data); diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index 8250cf856e07..c00b16be8b0d 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -172,7 +172,7 @@ struct adf_hw_device_data { bool enable); void (*enable_ints)(struct adf_accel_dev *accel_dev); void (*set_ssm_wdtimer)(struct adf_accel_dev *accel_dev); - int (*enable_vf2pf_comms)(struct adf_accel_dev *accel_dev); + int (*enable_pfvf_comms)(struct adf_accel_dev *accel_dev); void (*reset_device)(struct adf_accel_dev *accel_dev); void (*set_msix_rttable)(struct adf_accel_dev *accel_dev); char *(*uof_get_name)(u32 obj_num); diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c index 52ce1fde9e93..60bc7b991d35 100644 --- a/drivers/crypto/qat/qat_common/adf_init.c +++ b/drivers/crypto/qat/qat_common/adf_init.c @@ -112,7 +112,7 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) hw_data->enable_ints(accel_dev); hw_data->enable_error_correction(accel_dev); - ret = hw_data->enable_vf2pf_comms(accel_dev); + ret = hw_data->enable_pfvf_comms(accel_dev); if (ret) return ret; diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index 6f5dab2a63a7..94fa65bac7e7 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -180,7 +180,7 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev) ADF_DH895XCC_SMIA1_MASK); } -static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev) +static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev) { spin_lock_init(&accel_dev->pf.vf2pf_ints_lock); @@ -232,7 +232,7 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) hw_data->enable_ints = adf_enable_ints; hw_data->reset_device = adf_reset_sbr; hw_data->get_pf2vf_offset = get_pf2vf_offset; - hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms; hw_data->disable_iov = adf_disable_sriov; hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c index 643a91f29e40..5f4e264016c9 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c @@ -95,7 +95,7 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data) hw_data->get_vintmsk_offset = get_vintmsk_offset; hw_data->get_sku = get_sku; hw_data->enable_ints = adf_vf_void_noop; - hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms; + hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms; hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; hw_data->dev_class->instances++; adf_devmgr_update_class_index(hw_data); -- cgit v1.2.3 From e6dac5ea6f8e68c1d4fda7ffb6903c33161b3b4e Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 12 Aug 2021 21:21:26 +0100 Subject: crypto: qat - remove the unnecessary get_vintmsk_offset() All QAT GEN2 devices share the same register offset for masking interrupts, so they don't need any complex device specific infrastructure. Remove this function in favor of a constant in order to simplify the code. Also, future generations may require a more complex device specific handling, making the current approach obsolete anyway. Signed-off-by: Marco Chiappero Co-developed-by: Giovanni Cabiddu Signed-off-by: Giovanni Cabiddu Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c | 6 ------ drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h | 1 - drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c | 6 ------ drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h | 1 - drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c | 5 ----- drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h | 1 - drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c | 6 ------ drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h | 1 - drivers/crypto/qat/qat_common/adf_accel_devices.h | 1 - drivers/crypto/qat/qat_common/adf_vf_isr.c | 4 ++-- drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c | 6 ------ drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h | 1 - drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c | 6 ------ drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h | 1 - 14 files changed, 2 insertions(+), 44 deletions(-) diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c index 569d4a9fd8cf..3027c01bc89e 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c @@ -111,11 +111,6 @@ static u32 get_pf2vf_offset(u32 i) return ADF_C3XXX_PF2VF_OFFSET(i); } -static u32 get_vintmsk_offset(u32 i) -{ - return ADF_C3XXX_VINTMSK_OFFSET(i); -} - static void adf_enable_error_correction(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_device = accel_dev->hw_device; @@ -195,7 +190,6 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data) hw_data->get_sram_bar_id = get_sram_bar_id; hw_data->get_etr_bar_id = get_etr_bar_id; hw_data->get_misc_bar_id = get_misc_bar_id; - hw_data->get_vintmsk_offset = get_vintmsk_offset; hw_data->get_admin_info = adf_gen2_get_admin_info; hw_data->get_arb_info = adf_gen2_get_arb_info; hw_data->get_sku = get_sku; diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h index fece8e38025a..86ee02a86789 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h @@ -29,7 +29,6 @@ #define ADF_C3XXX_ERRSSMSH_EN BIT(3) #define ADF_C3XXX_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04)) -#define ADF_C3XXX_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04)) /* AE to function mapping */ #define ADF_C3XXX_AE2FUNC_MAP_GRP_A_NUM_REGS 48 diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c index 3f840560d702..3e69b520e82f 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c @@ -52,11 +52,6 @@ static u32 get_pf2vf_offset(u32 i) return ADF_C3XXXIOV_PF2VF_OFFSET; } -static u32 get_vintmsk_offset(u32 i) -{ - return ADF_C3XXXIOV_VINTMSK_OFFSET; -} - static int adf_vf_int_noop(struct adf_accel_dev *accel_dev) { return 0; @@ -92,7 +87,6 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data) hw_data->get_etr_bar_id = get_etr_bar_id; hw_data->get_misc_bar_id = get_misc_bar_id; hw_data->get_pf2vf_offset = get_pf2vf_offset; - hw_data->get_vintmsk_offset = get_vintmsk_offset; hw_data->get_sku = get_sku; hw_data->enable_ints = adf_vf_void_noop; hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms; diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h index 7945a9cd1c60..f5de4ce66014 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h @@ -13,7 +13,6 @@ #define ADF_C3XXXIOV_ETR_BAR 0 #define ADF_C3XXXIOV_ETR_MAX_BANKS 1 #define ADF_C3XXXIOV_PF2VF_OFFSET 0x200 -#define ADF_C3XXXIOV_VINTMSK_OFFSET 0x208 void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data); void adf_clean_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data); diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c index 6660001d4297..b023c80873bb 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c @@ -112,10 +112,6 @@ static u32 get_pf2vf_offset(u32 i) { return ADF_C62X_PF2VF_OFFSET(i); } -static u32 get_vintmsk_offset(u32 i) -{ - return ADF_C62X_VINTMSK_OFFSET(i); -} static void adf_enable_error_correction(struct adf_accel_dev *accel_dev) { @@ -196,7 +192,6 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data) hw_data->get_sram_bar_id = get_sram_bar_id; hw_data->get_etr_bar_id = get_etr_bar_id; hw_data->get_misc_bar_id = get_misc_bar_id; - hw_data->get_vintmsk_offset = get_vintmsk_offset; hw_data->get_admin_info = adf_gen2_get_admin_info; hw_data->get_arb_info = adf_gen2_get_arb_info; hw_data->get_sku = get_sku; diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h index 53d3cb577f5b..e6664bd20c91 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h @@ -30,7 +30,6 @@ #define ADF_C62X_ERRSSMSH_EN BIT(3) #define ADF_C62X_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04)) -#define ADF_C62X_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04)) /* AE to function mapping */ #define ADF_C62X_AE2FUNC_MAP_GRP_A_NUM_REGS 80 diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c index 67fd41662f20..3bee3e467363 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c @@ -52,11 +52,6 @@ static u32 get_pf2vf_offset(u32 i) return ADF_C62XIOV_PF2VF_OFFSET; } -static u32 get_vintmsk_offset(u32 i) -{ - return ADF_C62XIOV_VINTMSK_OFFSET; -} - static int adf_vf_int_noop(struct adf_accel_dev *accel_dev) { return 0; @@ -92,7 +87,6 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data) hw_data->get_etr_bar_id = get_etr_bar_id; hw_data->get_misc_bar_id = get_misc_bar_id; hw_data->get_pf2vf_offset = get_pf2vf_offset; - hw_data->get_vintmsk_offset = get_vintmsk_offset; hw_data->get_sku = get_sku; hw_data->enable_ints = adf_vf_void_noop; hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms; diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h index a6c04cf7a43c..794778c48678 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h +++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h @@ -13,7 +13,6 @@ #define ADF_C62XIOV_ETR_BAR 0 #define ADF_C62XIOV_ETR_MAX_BANKS 1 #define ADF_C62XIOV_PF2VF_OFFSET 0x200 -#define ADF_C62XIOV_VINTMSK_OFFSET 0x208 void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data); void adf_clean_hw_data_c62xiov(struct adf_hw_device_data *hw_data); diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index c00b16be8b0d..38c0af6d4e43 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -154,7 +154,6 @@ struct adf_hw_device_data { u32 (*get_num_aes)(struct adf_hw_device_data *self); u32 (*get_num_accels)(struct adf_hw_device_data *self); u32 (*get_pf2vf_offset)(u32 i); - u32 (*get_vintmsk_offset)(u32 i); void (*get_arb_info)(struct arb_info *arb_csrs_info); void (*get_admin_info)(struct admin_info *admin_csrs_info); enum dev_sku_info (*get_sku)(struct adf_hw_device_data *self); diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c index aa44e8638fa8..078f33d583e8 100644 --- a/drivers/crypto/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c @@ -36,7 +36,7 @@ void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) void __iomem *pmisc_bar_addr = pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr; - ADF_CSR_WR(pmisc_bar_addr, hw_data->get_vintmsk_offset(0), 0x0); + ADF_CSR_WR(pmisc_bar_addr, ADF_VINTMSK_OFFSET, 0x0); } void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) @@ -46,7 +46,7 @@ void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) void __iomem *pmisc_bar_addr = pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr; - ADF_CSR_WR(pmisc_bar_addr, hw_data->get_vintmsk_offset(0), 0x2); + ADF_CSR_WR(pmisc_bar_addr, ADF_VINTMSK_OFFSET, 0x2); } static int adf_enable_msi(struct adf_accel_dev *accel_dev) diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index 94fa65bac7e7..0a9ce365a544 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -131,11 +131,6 @@ static u32 get_pf2vf_offset(u32 i) return ADF_DH895XCC_PF2VF_OFFSET(i); } -static u32 get_vintmsk_offset(u32 i) -{ - return ADF_DH895XCC_VINTMSK_OFFSET(i); -} - static void adf_enable_error_correction(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_device = accel_dev->hw_device; @@ -215,7 +210,6 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) hw_data->get_num_aes = get_num_aes; hw_data->get_etr_bar_id = get_etr_bar_id; hw_data->get_misc_bar_id = get_misc_bar_id; - hw_data->get_vintmsk_offset = get_vintmsk_offset; hw_data->get_admin_info = adf_gen2_get_admin_info; hw_data->get_arb_info = adf_gen2_get_arb_info; hw_data->get_sram_bar_id = get_sram_bar_id; diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h index 4d613923d155..f99319cd4543 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h @@ -35,7 +35,6 @@ #define ADF_DH895XCC_ERRSSMSH_EN BIT(3) #define ADF_DH895XCC_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04)) -#define ADF_DH895XCC_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04)) /* AE to function mapping */ #define ADF_DH895XCC_AE2FUNC_MAP_GRP_A_NUM_REGS 96 diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c index 5f4e264016c9..7c6ed6bc8abf 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c @@ -52,11 +52,6 @@ static u32 get_pf2vf_offset(u32 i) return ADF_DH895XCCIOV_PF2VF_OFFSET; } -static u32 get_vintmsk_offset(u32 i) -{ - return ADF_DH895XCCIOV_VINTMSK_OFFSET; -} - static int adf_vf_int_noop(struct adf_accel_dev *accel_dev) { return 0; @@ -92,7 +87,6 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data) hw_data->get_etr_bar_id = get_etr_bar_id; hw_data->get_misc_bar_id = get_misc_bar_id; hw_data->get_pf2vf_offset = get_pf2vf_offset; - hw_data->get_vintmsk_offset = get_vintmsk_offset; hw_data->get_sku = get_sku; hw_data->enable_ints = adf_vf_void_noop; hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms; diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h index 2bfcc67f8f39..306ebb71a408 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h @@ -13,7 +13,6 @@ #define ADF_DH895XCCIOV_ETR_BAR 0 #define ADF_DH895XCCIOV_ETR_MAX_BANKS 1 #define ADF_DH895XCCIOV_PF2VF_OFFSET 0x200 -#define ADF_DH895XCCIOV_VINTMSK_OFFSET 0x208 void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data); void adf_clean_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data); -- cgit v1.2.3 From 8af4a436e665201872348aa2ba6e7033d4c17823 Mon Sep 17 00:00:00 2001 From: Ahsan Atta Date: Thu, 12 Aug 2021 21:21:27 +0100 Subject: crypto: qat - flush vf workqueue at driver removal There is a race condition during shutdown in adf_disable_sriov() where both the PF and the VF drivers are loaded on the host system. The PF notifies a VF with a "RESTARTING" message due to which the VF starts an asynchronous worker to stop and shutdown itself. At the same time the PF calls pci_disable_sriov() which invokes the remove() routine on the VF device driver triggering the shutdown flow again. This change fixes the problem by ensuring that the VF flushes the worker that performs stop()/shutdown() before these two functions are called in the remove(). To make sure that no additional PV/VF messages are processed by the VF, interrupts are disabled before flushing the workqueue. Signed-off-by: Ahsan Atta Co-developed-by: Giovanni Cabiddu Signed-off-by: Giovanni Cabiddu Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_c3xxxvf/adf_drv.c | 1 + drivers/crypto/qat/qat_c62xvf/adf_drv.c | 1 + drivers/crypto/qat/qat_common/adf_common_drv.h | 5 +++++ drivers/crypto/qat/qat_common/adf_vf_isr.c | 25 +++++++++++++++++++++++++ drivers/crypto/qat/qat_dh895xccvf/adf_drv.c | 1 + 5 files changed, 33 insertions(+) diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c index 7ef5a5185d29..1df1b868978d 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c @@ -211,6 +211,7 @@ static void adf_remove(struct pci_dev *pdev) pr_err("QAT: Driver removal failed\n"); return; } + adf_flush_vf_wq(accel_dev); adf_dev_stop(accel_dev); adf_dev_shutdown(accel_dev); adf_cleanup_accel(accel_dev); diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c index c91beedd267c..8103bd81d617 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c @@ -211,6 +211,7 @@ static void adf_remove(struct pci_dev *pdev) pr_err("QAT: Driver removal failed\n"); return; } + adf_flush_vf_wq(accel_dev); adf_dev_stop(accel_dev); adf_dev_shutdown(accel_dev); adf_cleanup_accel(accel_dev); diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index 4b18843f9845..4261749fae8d 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -207,6 +207,7 @@ int adf_init_pf_wq(void); void adf_exit_pf_wq(void); int adf_init_vf_wq(void); void adf_exit_vf_wq(void); +void adf_flush_vf_wq(struct adf_accel_dev *accel_dev); #else #define adf_sriov_configure NULL @@ -249,5 +250,9 @@ static inline void adf_exit_vf_wq(void) { } +static inline void adf_flush_vf_wq(struct adf_accel_dev *accel_dev) +{ +} + #endif #endif diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c index 078f33d583e8..7828a6573f3e 100644 --- a/drivers/crypto/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c @@ -48,6 +48,7 @@ void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) ADF_CSR_WR(pmisc_bar_addr, ADF_VINTMSK_OFFSET, 0x2); } +EXPORT_SYMBOL_GPL(adf_disable_pf2vf_interrupts); static int adf_enable_msi(struct adf_accel_dev *accel_dev) { @@ -316,6 +317,30 @@ err_out: } EXPORT_SYMBOL_GPL(adf_vf_isr_resource_alloc); +/** + * adf_flush_vf_wq() - Flush workqueue for VF + * @accel_dev: Pointer to acceleration device. + * + * Function disables the PF/VF interrupts on the VF so that no new messages + * are received and flushes the workqueue 'adf_vf_stop_wq'. + * + * Return: void. + */ +void adf_flush_vf_wq(struct adf_accel_dev *accel_dev) +{ + adf_disable_pf2vf_interrupts(accel_dev); + + flush_workqueue(adf_vf_stop_wq); +} +EXPORT_SYMBOL_GPL(adf_flush_vf_wq); + +/** + * adf_init_vf_wq() - Init workqueue for VF + * + * Function init workqueue 'adf_vf_stop_wq' for VF. + * + * Return: 0 on success, error code otherwise. + */ int __init adf_init_vf_wq(void) { adf_vf_stop_wq = alloc_workqueue("adf_vf_stop_wq", WQ_MEM_RECLAIM, 0); diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c index d332b68795f2..99d90f3ea2b7 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c @@ -211,6 +211,7 @@ static void adf_remove(struct pci_dev *pdev) pr_err("QAT: Driver removal failed\n"); return; } + adf_flush_vf_wq(accel_dev); adf_dev_stop(accel_dev); adf_dev_shutdown(accel_dev); adf_cleanup_accel(accel_dev); -- cgit v1.2.3 From 645ae0af1840199086c33e4f841892ebee73f615 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 12 Aug 2021 21:21:28 +0100 Subject: crypto: qat - do not export adf_iov_putmsg() The function adf_iov_putmsg() is only used inside the intel_qat module therefore should not be exported. Remove EXPORT_SYMBOL for the function adf_iov_putmsg(). Signed-off-by: Giovanni Cabiddu Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_pf2vf_msg.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c index 1b0df4a5b8b7..2670995b097f 100644 --- a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c +++ b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c @@ -192,7 +192,6 @@ int adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr) return ret; } -EXPORT_SYMBOL_GPL(adf_iov_putmsg); void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info) { -- cgit v1.2.3 From 598cf42554749a7c59205879fd6e06ef6d021d94 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 12 Aug 2021 21:21:29 +0100 Subject: crypto: qat - store vf.compatible flag If the VF is newer than the PF, it decides whether it is compatible or not. In case it is compatible, store that information in the vf.compatible flag in the accel_dev structure. Signed-off-by: Giovanni Cabiddu Suggested-by: Fiona Trahe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_pf2vf_msg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c index 2670995b097f..976b9ab7617c 100644 --- a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c +++ b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c @@ -346,8 +346,10 @@ static int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev) break; case ADF_PF2VF_VF_COMPAT_UNKNOWN: /* VF is newer than PF and decides whether it is compatible */ - if (accel_dev->vf.pf_version >= hw_data->min_iov_compat_ver) + if (accel_dev->vf.pf_version >= hw_data->min_iov_compat_ver) { + accel_dev->vf.compatible = ADF_PF2VF_VF_COMPATIBLE; break; + } fallthrough; case ADF_PF2VF_VF_INCOMPATIBLE: dev_err(&GET_DEV(accel_dev), -- cgit v1.2.3 From 90367a027a22c3a9ca8b8bac15df34d9e859fc11 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 13 Aug 2021 15:41:01 +0800 Subject: crypto: hisilicon/sec - fix the abnormal exiting process Because the algs registration process has added a judgment. So need to add the judgment for the abnormal exiting process. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index db4dbcf0492a..45a1ddd31dc3 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -1020,7 +1020,8 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_alg_unregister: - hisi_qm_alg_unregister(qm, &sec_devices); + if (qm->qp_num >= ctx_q_num) + hisi_qm_alg_unregister(qm, &sec_devices); err_qm_stop: sec_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); -- cgit v1.2.3 From a52626106d6f7edf3d106c065e13a0313cfeb82f Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 13 Aug 2021 15:41:02 +0800 Subject: crypto: hisilicon/sec - modify the hardware endian configuration When the endian configuration of the hardware is abnormal, it will cause the SEC engine is faulty that reports empty message. And it will affect the normal function of the hardware. Currently the soft configuration method can't restore the faulty device. The endian needs to be configured according to the system properties. So fix it. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec.h | 5 ----- drivers/crypto/hisilicon/sec2/sec_main.c | 31 +++++++++---------------------- 2 files changed, 9 insertions(+), 27 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index 018415b9840a..d97cf02b1df7 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -157,11 +157,6 @@ struct sec_ctx { struct device *dev; }; -enum sec_endian { - SEC_LE = 0, - SEC_32BE, - SEC_64BE -}; enum sec_debug_file_index { SEC_CLEAR_ENABLE, diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 45a1ddd31dc3..2250c81d6158 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -318,31 +318,20 @@ static const struct pci_device_id sec_dev_ids[] = { }; MODULE_DEVICE_TABLE(pci, sec_dev_ids); -static u8 sec_get_endian(struct hisi_qm *qm) +static void sec_set_endian(struct hisi_qm *qm) { u32 reg; - /* - * As for VF, it is a wrong way to get endian setting by - * reading a register of the engine - */ - if (qm->pdev->is_virtfn) { - dev_err_ratelimited(&qm->pdev->dev, - "cannot access a register in VF!\n"); - return SEC_LE; - } reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG); - /* BD little endian mode */ - if (!(reg & BIT(0))) - return SEC_LE; + reg &= ~(BIT(1) | BIT(0)); + if (!IS_ENABLED(CONFIG_64BIT)) + reg |= BIT(1); - /* BD 32-bits big endian mode */ - else if (!(reg & BIT(1))) - return SEC_32BE; - /* BD 64-bits big endian mode */ - else - return SEC_64BE; + if (!IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) + reg |= BIT(0); + + writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG); } static void sec_open_sva_prefetch(struct hisi_qm *qm) @@ -463,9 +452,7 @@ static int sec_engine_init(struct hisi_qm *qm) qm->io_base + SEC_BD_ERR_CHK_EN_REG3); /* config endian */ - reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG); - reg |= sec_get_endian(qm); - writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG); + sec_set_endian(qm); sec_enable_clock_gate(qm); -- cgit v1.2.3 From 7b3d52683b3a47c0ba1dfd6b5994a3a795b06972 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Fri, 13 Aug 2021 15:55:06 +0800 Subject: crypto: tcrypt - Fix missing return value check There are several places where the return value check of crypto_aead_setkey and crypto_aead_setauthsize were lost. It is necessary to add these checks. At the same time, move the crypto_aead_setauthsize() call out of the loop, and only need to call it once after load transform. Fixee: 53f52d7aecb4 ("crypto: tcrypt - Added speed tests for AEAD crypto alogrithms in tcrypt test suite") Signed-off-by: Tianjia Zhang Reviewed-by: Vitaly Chikunov Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index d73a42fdaa9b..170102e92f7d 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -290,6 +290,11 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs, } ret = crypto_aead_setauthsize(tfm, authsize); + if (ret) { + pr_err("alg: aead: Failed to setauthsize for %s: %d\n", algo, + ret); + goto out_free_tfm; + } for (i = 0; i < num_mb; ++i) if (testmgr_alloc_buf(data[i].xbuf)) { @@ -315,7 +320,7 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs, for (i = 0; i < num_mb; ++i) { data[i].req = aead_request_alloc(tfm, GFP_KERNEL); if (!data[i].req) { - pr_err("alg: skcipher: Failed to allocate request for %s\n", + pr_err("alg: aead: Failed to allocate request for %s\n", algo); while (i--) aead_request_free(data[i].req); @@ -567,13 +572,19 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, sgout = &sg[9]; tfm = crypto_alloc_aead(algo, 0, 0); - if (IS_ERR(tfm)) { pr_err("alg: aead: Failed to load transform for %s: %ld\n", algo, PTR_ERR(tfm)); goto out_notfm; } + ret = crypto_aead_setauthsize(tfm, authsize); + if (ret) { + pr_err("alg: aead: Failed to setauthsize for %s: %d\n", algo, + ret); + goto out_noreq; + } + crypto_init_wait(&wait); printk(KERN_INFO "\ntesting speed of %s (%s) %s\n", algo, get_driver_name(crypto_aead, tfm), e); @@ -611,8 +622,13 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, break; } } + ret = crypto_aead_setkey(tfm, key, *keysize); - ret = crypto_aead_setauthsize(tfm, authsize); + if (ret) { + pr_err("setkey() failed flags=%x: %d\n", + crypto_aead_get_flags(tfm), ret); + goto out; + } iv_len = crypto_aead_ivsize(tfm); if (iv_len) @@ -622,15 +638,8 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ", i, *keysize * 8, bs); - memset(tvmem[0], 0xff, PAGE_SIZE); - if (ret) { - pr_err("setkey() failed flags=%x\n", - crypto_aead_get_flags(tfm)); - goto out; - } - sg_init_aead(sg, xbuf, bs + (enc ? 0 : authsize), assoc, aad_size); -- cgit v1.2.3 From 68039d605f7bb34ea6dbd4e099bf98599d52b0ac Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Fri, 13 Aug 2021 15:55:07 +0800 Subject: crypto: testmgr - Add GCM/CCM mode test of SM4 algorithm The GCM/CCM mode of the SM4 algorithm is defined in the rfc 8998 specification, and the test case data also comes from rfc 8998. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- crypto/testmgr.c | 29 +++++++++++ crypto/testmgr.h | 148 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 177 insertions(+) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index c978e41f11a1..70f69f0910c9 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -4450,6 +4450,12 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(aes_cbcmac_tv_template) } + }, { + .alg = "cbcmac(sm4)", + .test = alg_test_hash, + .suite = { + .hash = __VECS(sm4_cbcmac_tv_template) + } }, { .alg = "ccm(aes)", .generic_driver = "ccm_base(ctr(aes-generic),cbcmac(aes-generic))", @@ -4461,6 +4467,16 @@ static const struct alg_test_desc alg_test_descs[] = { .einval_allowed = 1, } } + }, { + .alg = "ccm(sm4)", + .generic_driver = "ccm_base(ctr(sm4-generic),cbcmac(sm4-generic))", + .test = alg_test_aead, + .suite = { + .aead = { + ____VECS(sm4_ccm_tv_template), + .einval_allowed = 1, + } + } }, { .alg = "cfb(aes)", .test = alg_test_skcipher, @@ -4494,6 +4510,12 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(des3_ede_cmac64_tv_template) } + }, { + .alg = "cmac(sm4)", + .test = alg_test_hash, + .suite = { + .hash = __VECS(sm4_cmac128_tv_template) + } }, { .alg = "compress_null", .test = alg_test_null, @@ -4967,6 +4989,13 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .aead = __VECS(aes_gcm_tv_template) } + }, { + .alg = "gcm(sm4)", + .generic_driver = "gcm_base(ctr(sm4-generic),ghash-generic)", + .test = alg_test_aead, + .suite = { + .aead = __VECS(sm4_gcm_tv_template) + } }, { .alg = "ghash", .test = alg_test_hash, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 3ed6ab34ab51..e6fca34b5b25 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -13328,6 +13328,154 @@ static const struct cipher_testvec sm4_cfb_tv_template[] = { } }; +static const struct aead_testvec sm4_gcm_tv_template[] = { + { /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.1 */ + .key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF" + "\xFE\xDC\xBA\x98\x76\x54\x32\x10", + .klen = 16, + .iv = "\x00\x00\x12\x34\x56\x78\x00\x00" + "\x00\x00\xAB\xCD", + .ptext = "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA" + "\xBB\xBB\xBB\xBB\xBB\xBB\xBB\xBB" + "\xCC\xCC\xCC\xCC\xCC\xCC\xCC\xCC" + "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD" + "\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE" + "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" + "\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE" + "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA", + .plen = 64, + .assoc = "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF" + "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF" + "\xAB\xAD\xDA\xD2", + .alen = 20, + .ctext = "\x17\xF3\x99\xF0\x8C\x67\xD5\xEE" + "\x19\xD0\xDC\x99\x69\xC4\xBB\x7D" + "\x5F\xD4\x6F\xD3\x75\x64\x89\x06" + "\x91\x57\xB2\x82\xBB\x20\x07\x35" + "\xD8\x27\x10\xCA\x5C\x22\xF0\xCC" + "\xFA\x7C\xBF\x93\xD4\x96\xAC\x15" + "\xA5\x68\x34\xCB\xCF\x98\xC3\x97" + "\xB4\x02\x4A\x26\x91\x23\x3B\x8D" + "\x83\xDE\x35\x41\xE4\xC2\xB5\x81" + "\x77\xE0\x65\xA9\xBF\x7B\x62\xEC", + .clen = 80, + } +}; + +static const struct aead_testvec sm4_ccm_tv_template[] = { + { /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.2 */ + .key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF" + "\xFE\xDC\xBA\x98\x76\x54\x32\x10", + .klen = 16, + .iv = "\x02\x00\x00\x12\x34\x56\x78\x00" + "\x00\x00\x00\xAB\xCD\x00\x00\x00", + .ptext = "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA" + "\xBB\xBB\xBB\xBB\xBB\xBB\xBB\xBB" + "\xCC\xCC\xCC\xCC\xCC\xCC\xCC\xCC" + "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD" + "\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE" + "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" + "\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE" + "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA", + .plen = 64, + .assoc = "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF" + "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF" + "\xAB\xAD\xDA\xD2", + .alen = 20, + .ctext = "\x48\xAF\x93\x50\x1F\xA6\x2A\xDB" + "\xCD\x41\x4C\xCE\x60\x34\xD8\x95" + "\xDD\xA1\xBF\x8F\x13\x2F\x04\x20" + "\x98\x66\x15\x72\xE7\x48\x30\x94" + "\xFD\x12\xE5\x18\xCE\x06\x2C\x98" + "\xAC\xEE\x28\xD9\x5D\xF4\x41\x6B" + "\xED\x31\xA2\xF0\x44\x76\xC1\x8B" + "\xB4\x0C\x84\xA7\x4B\x97\xDC\x5B" + "\x16\x84\x2D\x4F\xA1\x86\xF5\x6A" + "\xB3\x32\x56\x97\x1F\xA1\x10\xF4", + .clen = 80, + } +}; + +static const struct hash_testvec sm4_cbcmac_tv_template[] = { + { + .key = "\xff\xee\xdd\xcc\xbb\xaa\x99\x88" + "\x77\x66\x55\x44\x33\x22\x11\x00", + .plaintext = "\x01\x23\x45\x67\x89\xab\xcd\xef" + "\xfe\xdc\xba\x98\x76\x54\x32\x10", + .digest = "\x97\xb4\x75\x8f\x84\x92\x3d\x3f" + "\x86\x81\x0e\x0e\xea\x14\x6d\x73", + .psize = 16, + .ksize = 16, + }, { + .key = "\x01\x23\x45\x67\x89\xab\xcd\xef" + "\xfe\xdc\xBA\x98\x76\x54\x32\x10", + .plaintext = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb" + "\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xee", + .digest = "\xc7\xdb\x17\x71\xa1\x5c\x0d\x22" + "\xa3\x39\x3a\x31\x88\x91\x49\xa1", + .psize = 33, + .ksize = 16, + }, { + .key = "\x01\x23\x45\x67\x89\xab\xcd\xef" + "\xfe\xdc\xBA\x98\x76\x54\x32\x10", + .plaintext = "\xfb\xd1\xbe\x92\x7e\x50\x3f\x16" + "\xf9\xdd\xbe\x91\x73\x53\x37\x1a" + "\xfe\xdd\xba\x97\x7e\x53\x3c\x1c" + "\xfe\xd7\xbf\x9c\x75\x5f\x3e\x11" + "\xf0\xd8\xbc\x96\x73\x5c\x34\x11" + "\xf5\xdb\xb1\x99\x7a\x5a\x32\x1f" + "\xf6\xdf\xb4\x95\x7f\x5f\x3b\x17" + "\xfd\xdb\xb1\x9b\x76\x5c\x37", + .digest = "\x9b\x07\x88\x7f\xd5\x95\x23\x12" + "\x64\x0a\x66\x7f\x4e\x25\xca\xd0", + .psize = 63, + .ksize = 16, + } +}; + +static const struct hash_testvec sm4_cmac128_tv_template[] = { + { + .key = "\xff\xee\xdd\xcc\xbb\xaa\x99\x88" + "\x77\x66\x55\x44\x33\x22\x11\x00", + .plaintext = "\x01\x23\x45\x67\x89\xab\xcd\xef" + "\xfe\xdc\xba\x98\x76\x54\x32\x10", + .digest = "\x00\xd4\x63\xb4\x9a\xf3\x52\xe2" + "\x74\xa9\x00\x55\x13\x54\x2a\xd1", + .psize = 16, + .ksize = 16, + }, { + .key = "\x01\x23\x45\x67\x89\xab\xcd\xef" + "\xfe\xdc\xBA\x98\x76\x54\x32\x10", + .plaintext = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb" + "\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xee", + .digest = "\x8a\x8a\xe9\xc0\xc8\x97\x0e\x85" + "\x21\x57\x02\x10\x1a\xbf\x9c\xc6", + .psize = 33, + .ksize = 16, + }, { + .key = "\x01\x23\x45\x67\x89\xab\xcd\xef" + "\xfe\xdc\xBA\x98\x76\x54\x32\x10", + .plaintext = "\xfb\xd1\xbe\x92\x7e\x50\x3f\x16" + "\xf9\xdd\xbe\x91\x73\x53\x37\x1a" + "\xfe\xdd\xba\x97\x7e\x53\x3c\x1c" + "\xfe\xd7\xbf\x9c\x75\x5f\x3e\x11" + "\xf0\xd8\xbc\x96\x73\x5c\x34\x11" + "\xf5\xdb\xb1\x99\x7a\x5a\x32\x1f" + "\xf6\xdf\xb4\x95\x7f\x5f\x3b\x17" + "\xfd\xdb\xb1\x9b\x76\x5c\x37", + .digest = "\x5f\x14\xc9\xa9\x20\xb2\xb4\xf0" + "\x76\xe0\xd8\xd6\xdc\x4f\xe1\xbc", + .psize = 63, + .ksize = 16, + } +}; + /* Cast6 test vectors from RFC 2612 */ static const struct cipher_testvec cast6_tv_template[] = { { -- cgit v1.2.3 From 357a753f5ec7ccdec196fa825d906c3acc4bd57c Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Fri, 13 Aug 2021 15:55:08 +0800 Subject: crypto: tcrypt - add GCM/CCM mode test for SM4 algorithm tcrypt supports GCM/CCM mode, CMAC, CBCMAC, and speed test of SM4 algorithm. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 170102e92f7d..82b0400985a5 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1916,6 +1916,14 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret += tcrypt_test("streebog512"); break; + case 55: + ret += tcrypt_test("gcm(sm4)"); + break; + + case 56: + ret += tcrypt_test("ccm(sm4)"); + break; + case 100: ret += tcrypt_test("hmac(md5)"); break; @@ -2007,6 +2015,15 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) case 157: ret += tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))"); break; + + case 158: + ret += tcrypt_test("cbcmac(sm4)"); + break; + + case 159: + ret += tcrypt_test("cmac(sm4)"); + break; + case 181: ret += tcrypt_test("authenc(hmac(sha1),cbc(des))"); break; @@ -2336,6 +2353,34 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) NULL, 0, 16, 8, speed_template_16); break; + case 222: + test_aead_speed("gcm(sm4)", ENCRYPT, sec, + NULL, 0, 16, 8, speed_template_16); + test_aead_speed("gcm(sm4)", DECRYPT, sec, + NULL, 0, 16, 8, speed_template_16); + break; + + case 223: + test_aead_speed("rfc4309(ccm(sm4))", ENCRYPT, sec, + NULL, 0, 16, 16, aead_speed_template_19); + test_aead_speed("rfc4309(ccm(sm4))", DECRYPT, sec, + NULL, 0, 16, 16, aead_speed_template_19); + break; + + case 224: + test_mb_aead_speed("gcm(sm4)", ENCRYPT, sec, NULL, 0, 16, 8, + speed_template_16, num_mb); + test_mb_aead_speed("gcm(sm4)", DECRYPT, sec, NULL, 0, 16, 8, + speed_template_16, num_mb); + break; + + case 225: + test_mb_aead_speed("rfc4309(ccm(sm4))", ENCRYPT, sec, NULL, 0, + 16, 16, aead_speed_template_19, num_mb); + test_mb_aead_speed("rfc4309(ccm(sm4))", DECRYPT, sec, NULL, 0, + 16, 16, aead_speed_template_19, num_mb); + break; + case 300: if (alg) { test_hash_speed(alg, sec, generic_hash_speed_template); -- cgit v1.2.3 From 1295292d65b729fc8b234fcdf884d79ff5a63ca1 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Fri, 13 Aug 2021 17:50:05 +0800 Subject: crypto: hisilicon - using 'debugfs_create_file' instead of 'debugfs_create_regset32' The accelerator devices support runtime PM, when device is in suspended, an exception will occur if reading registers. Therefore, this patch uses 'debugfs_create_file' instead of 'debugfs_create_regset32' to create debugfs file, and then the driver can get the device status before reading the register. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 25 +++++++++++-- drivers/crypto/hisilicon/qm.c | 58 +++++++++++++++++++------------ drivers/crypto/hisilicon/qm.h | 2 ++ drivers/crypto/hisilicon/sec2/sec_main.c | 11 +++++- drivers/crypto/hisilicon/zip/zip_main.c | 12 ++++++- 5 files changed, 81 insertions(+), 27 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 6a5de3073de9..b216238c1bb3 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -763,6 +763,24 @@ static int hpre_debugfs_atomic64_set(void *data, u64 val) DEFINE_DEBUGFS_ATTRIBUTE(hpre_atomic64_ops, hpre_debugfs_atomic64_get, hpre_debugfs_atomic64_set, "%llu\n"); +static int hpre_com_regs_show(struct seq_file *s, void *unused) +{ + hisi_qm_regs_dump(s, s->private); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(hpre_com_regs); + +static int hpre_cluster_regs_show(struct seq_file *s, void *unused) +{ + hisi_qm_regs_dump(s, s->private); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(hpre_cluster_regs); + static int hpre_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir, enum hpre_ctrl_dbgfs_file type, int indx) { @@ -801,7 +819,9 @@ static int hpre_pf_comm_regs_debugfs_init(struct hisi_qm *qm) regset->nregs = ARRAY_SIZE(hpre_com_dfx_regs); regset->base = qm->io_base; - debugfs_create_regset32("regs", 0444, qm->debug.debug_root, regset); + debugfs_create_file("regs", 0444, qm->debug.debug_root, + regset, &hpre_com_regs_fops); + return 0; } @@ -828,7 +848,8 @@ static int hpre_cluster_debugfs_init(struct hisi_qm *qm) regset->nregs = ARRAY_SIZE(hpre_cluster_dfx_regs); regset->base = qm->io_base + hpre_cluster_offsets[i]; - debugfs_create_regset32("regs", 0444, tmp_d, regset); + debugfs_create_file("regs", 0444, tmp_d, regset, + &hpre_cluster_regs_fops); ret = hpre_create_debugfs_file(qm, tmp_d, HPRE_CLUSTER_CTRL, i + HPRE_CLUSTER_CTRL); if (ret) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 1d67f94a1d56..e417cd05f612 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -1337,13 +1336,8 @@ static const struct file_operations qm_debug_fops = { .write = qm_debug_write, }; -struct qm_dfx_registers { - char *reg_name; - u64 reg_offset; -}; - #define CNT_CYC_REGS_NUM 10 -static struct qm_dfx_registers qm_dfx_regs[] = { +static const struct debugfs_reg32 qm_dfx_regs[] = { /* XXX_CNT are reading clear register */ {"QM_ECC_1BIT_CNT ", 0x104000ull}, {"QM_ECC_MBIT_CNT ", 0x104008ull}, @@ -1369,31 +1363,49 @@ static struct qm_dfx_registers qm_dfx_regs[] = { {"QM_DFX_FF_ST5 ", 0x1040dcull}, {"QM_DFX_FF_ST6 ", 0x1040e0ull}, {"QM_IN_IDLE_ST ", 0x1040e4ull}, - { NULL, 0} }; -static struct qm_dfx_registers qm_vf_dfx_regs[] = { +static const struct debugfs_reg32 qm_vf_dfx_regs[] = { {"QM_DFX_FUNS_ACTIVE_ST ", 0x200ull}, - { NULL, 0} }; -static int qm_regs_show(struct seq_file *s, void *unused) +/** + * hisi_qm_regs_dump() - Dump registers's value. + * @s: debugfs file handle. + * @regset: accelerator registers information. + * + * Dump accelerator registers. + */ +void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset) { - struct hisi_qm *qm = s->private; - struct qm_dfx_registers *regs; + const struct debugfs_reg32 *regs = regset->regs; + int regs_len = regset->nregs; u32 val; + int i; - if (qm->fun_type == QM_HW_PF) - regs = qm_dfx_regs; - else - regs = qm_vf_dfx_regs; + for (i = 0; i < regs_len; i++) { + val = readl(regset->base + regs[i].offset); + seq_printf(s, "%s= 0x%08x\n", regs[i].name, val); + } +} +EXPORT_SYMBOL_GPL(hisi_qm_regs_dump); - while (regs->reg_name) { - val = readl(qm->io_base + regs->reg_offset); - seq_printf(s, "%s= 0x%08x\n", regs->reg_name, val); - regs++; +static int qm_regs_show(struct seq_file *s, void *unused) +{ + struct hisi_qm *qm = s->private; + struct debugfs_regset32 regset; + + if (qm->fun_type == QM_HW_PF) { + regset.regs = qm_dfx_regs; + regset.nregs = ARRAY_SIZE(qm_dfx_regs); + } else { + regset.regs = qm_vf_dfx_regs; + regset.nregs = ARRAY_SIZE(qm_vf_dfx_regs); } + regset.base = qm->io_base; + hisi_qm_regs_dump(s, ®set); + return 0; } @@ -4245,7 +4257,7 @@ EXPORT_SYMBOL_GPL(hisi_qm_debug_init); */ void hisi_qm_debug_regs_clear(struct hisi_qm *qm) { - struct qm_dfx_registers *regs; + const struct debugfs_reg32 *regs; int i; /* clear current_qm */ @@ -4264,7 +4276,7 @@ void hisi_qm_debug_regs_clear(struct hisi_qm *qm) regs = qm_dfx_regs; for (i = 0; i < CNT_CYC_REGS_NUM; i++) { - readl(qm->io_base + regs->reg_offset); + readl(qm->io_base + regs->offset); regs++; } diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index 035eaf8c442d..0e5df1c8b3f5 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -4,6 +4,7 @@ #define HISI_ACC_QM_H #include +#include #include #include #include @@ -430,4 +431,5 @@ void hisi_qm_dev_shutdown(struct pci_dev *pdev); void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list); int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list); void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list); +void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset); #endif diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 2250c81d6158..55ef3ae70fdb 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -676,6 +676,15 @@ static int sec_debugfs_atomic64_set(void *data, u64 val) DEFINE_DEBUGFS_ATTRIBUTE(sec_atomic64_ops, sec_debugfs_atomic64_get, sec_debugfs_atomic64_set, "%lld\n"); +static int sec_regs_show(struct seq_file *s, void *unused) +{ + hisi_qm_regs_dump(s, s->private); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(sec_regs); + static int sec_core_debug_init(struct hisi_qm *qm) { struct sec_dev *sec = container_of(qm, struct sec_dev, qm); @@ -696,7 +705,7 @@ static int sec_core_debug_init(struct hisi_qm *qm) regset->base = qm->io_base; if (qm->pdev->device == SEC_PF_PCI_DEVICE_ID) - debugfs_create_regset32("regs", 0444, tmp_d, regset); + debugfs_create_file("regs", 0444, tmp_d, regset, &sec_regs_fops); for (i = 0; i < ARRAY_SIZE(sec_dfx_labels); i++) { atomic64_t *data = (atomic64_t *)((uintptr_t)dfx + diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index d1ca474ea8e3..4438188adbb7 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -564,6 +564,15 @@ static int zip_debugfs_atomic64_get(void *data, u64 *val) DEFINE_DEBUGFS_ATTRIBUTE(zip_atomic64_ops, zip_debugfs_atomic64_get, zip_debugfs_atomic64_set, "%llu\n"); +static int hisi_zip_regs_show(struct seq_file *s, void *unused) +{ + hisi_qm_regs_dump(s, s->private); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(hisi_zip_regs); + static int hisi_zip_core_debug_init(struct hisi_qm *qm) { struct device *dev = &qm->pdev->dev; @@ -588,7 +597,8 @@ static int hisi_zip_core_debug_init(struct hisi_qm *qm) regset->base = qm->io_base + core_offsets[i]; tmp_d = debugfs_create_dir(buf, qm->debug.debug_root); - debugfs_create_regset32("regs", 0444, tmp_d, regset); + debugfs_create_file("regs", 0444, tmp_d, regset, + &hisi_zip_regs_fops); } return 0; -- cgit v1.2.3 From d7ea53395b723b1a87b9c0afb3301cc33fbe35e6 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Fri, 13 Aug 2021 17:50:06 +0800 Subject: crypto: hisilicon - add runtime PM ops Accelerator devices support runtime PM to reduce power consumption. This patch adds the runtime PM suspend/resume callbacks to the accelerator devices. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 118 ++++++++++++++++++++++++++++++++++++++++++ drivers/crypto/hisilicon/qm.h | 2 + 2 files changed, 120 insertions(+) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index e417cd05f612..dbe162a0bd02 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -5692,6 +5692,124 @@ err_pci_init: } EXPORT_SYMBOL_GPL(hisi_qm_init); + +static int qm_prepare_for_suspend(struct hisi_qm *qm) +{ + struct pci_dev *pdev = qm->pdev; + int ret; + u32 val; + + ret = qm->ops->set_msi(qm, false); + if (ret) { + pci_err(pdev, "failed to disable MSI before suspending!\n"); + return ret; + } + + /* shutdown OOO register */ + writel(ACC_MASTER_GLOBAL_CTRL_SHUTDOWN, + qm->io_base + ACC_MASTER_GLOBAL_CTRL); + + ret = readl_relaxed_poll_timeout(qm->io_base + ACC_MASTER_TRANS_RETURN, + val, + (val == ACC_MASTER_TRANS_RETURN_RW), + POLL_PERIOD, POLL_TIMEOUT); + if (ret) { + pci_emerg(pdev, "Bus lock! Please reset system.\n"); + return ret; + } + + ret = qm_set_pf_mse(qm, false); + if (ret) + pci_err(pdev, "failed to disable MSE before suspending!\n"); + + return ret; +} + +static int qm_rebuild_for_resume(struct hisi_qm *qm) +{ + struct pci_dev *pdev = qm->pdev; + int ret; + + ret = qm_set_pf_mse(qm, true); + if (ret) { + pci_err(pdev, "failed to enable MSE after resuming!\n"); + return ret; + } + + ret = qm->ops->set_msi(qm, true); + if (ret) { + pci_err(pdev, "failed to enable MSI after resuming!\n"); + return ret; + } + + ret = qm_dev_hw_init(qm); + if (ret) { + pci_err(pdev, "failed to init device after resuming\n"); + return ret; + } + + qm_cmd_init(qm); + hisi_qm_dev_err_init(qm); + + return 0; +} + +/** + * hisi_qm_suspend() - Runtime suspend of given device. + * @dev: device to suspend. + * + * Function that suspend the device. + */ +int hisi_qm_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct hisi_qm *qm = pci_get_drvdata(pdev); + int ret; + + pci_info(pdev, "entering suspended state\n"); + + ret = hisi_qm_stop(qm, QM_NORMAL); + if (ret) { + pci_err(pdev, "failed to stop qm(%d)\n", ret); + return ret; + } + + ret = qm_prepare_for_suspend(qm); + if (ret) + pci_err(pdev, "failed to prepare suspended(%d)\n", ret); + + return ret; +} +EXPORT_SYMBOL_GPL(hisi_qm_suspend); + +/** + * hisi_qm_resume() - Runtime resume of given device. + * @dev: device to resume. + * + * Function that resume the device. + */ +int hisi_qm_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct hisi_qm *qm = pci_get_drvdata(pdev); + int ret; + + pci_info(pdev, "resuming from suspend state\n"); + + ret = qm_rebuild_for_resume(qm); + if (ret) { + pci_err(pdev, "failed to rebuild resume(%d)\n", ret); + return ret; + } + + ret = hisi_qm_start(qm); + if (ret) + pci_err(pdev, "failed to start qm(%d)\n", ret); + + return 0; +} +EXPORT_SYMBOL_GPL(hisi_qm_resume); + MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Zhou Wang "); MODULE_DESCRIPTION("HiSilicon Accelerator queue manager driver"); diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index 0e5df1c8b3f5..59e16464d03d 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -431,5 +431,7 @@ void hisi_qm_dev_shutdown(struct pci_dev *pdev); void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list); int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list); void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list); +int hisi_qm_resume(struct device *dev); +int hisi_qm_suspend(struct device *dev); void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset); #endif -- cgit v1.2.3 From 607c191b371d72952c11dc209e583303a4515f14 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Fri, 13 Aug 2021 17:50:07 +0800 Subject: crypto: hisilicon - support runtime PM for accelerator device Add runtime PM support for Kunpeng930 accelerator device. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 35 ++++- drivers/crypto/hisilicon/qm.c | 218 +++++++++++++++++++++++++++--- drivers/crypto/hisilicon/qm.h | 4 + drivers/crypto/hisilicon/sec2/sec_main.c | 36 ++++- drivers/crypto/hisilicon/zip/zip_main.c | 34 ++++- 5 files changed, 297 insertions(+), 30 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index b216238c1bb3..65a641396c07 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include "hpre.h" @@ -658,10 +659,15 @@ static ssize_t hpre_ctrl_debug_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { struct hpre_debugfs_file *file = filp->private_data; + struct hisi_qm *qm = hpre_file_to_qm(file); char tbuf[HPRE_DBGFS_VAL_MAX_LEN]; u32 val; int ret; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + spin_lock_irq(&file->lock); switch (file->type) { case HPRE_CLEAR_ENABLE: @@ -671,18 +677,25 @@ static ssize_t hpre_ctrl_debug_read(struct file *filp, char __user *buf, val = hpre_cluster_inqry_read(file); break; default: - spin_unlock_irq(&file->lock); - return -EINVAL; + goto err_input; } spin_unlock_irq(&file->lock); + + hisi_qm_put_dfx_access(qm); ret = snprintf(tbuf, HPRE_DBGFS_VAL_MAX_LEN, "%u\n", val); return simple_read_from_buffer(buf, count, pos, tbuf, ret); + +err_input: + spin_unlock_irq(&file->lock); + hisi_qm_put_dfx_access(qm); + return -EINVAL; } static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct hpre_debugfs_file *file = filp->private_data; + struct hisi_qm *qm = hpre_file_to_qm(file); char tbuf[HPRE_DBGFS_VAL_MAX_LEN]; unsigned long val; int len, ret; @@ -702,6 +715,10 @@ static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf, if (kstrtoul(tbuf, 0, &val)) return -EFAULT; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + spin_lock_irq(&file->lock); switch (file->type) { case HPRE_CLEAR_ENABLE: @@ -718,12 +735,12 @@ static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf, ret = -EINVAL; goto err_input; } - spin_unlock_irq(&file->lock); - return count; + ret = count; err_input: spin_unlock_irq(&file->lock); + hisi_qm_put_dfx_access(qm); return ret; } @@ -818,6 +835,7 @@ static int hpre_pf_comm_regs_debugfs_init(struct hisi_qm *qm) regset->regs = hpre_com_dfx_regs; regset->nregs = ARRAY_SIZE(hpre_com_dfx_regs); regset->base = qm->io_base; + regset->dev = dev; debugfs_create_file("regs", 0444, qm->debug.debug_root, regset, &hpre_com_regs_fops); @@ -847,6 +865,7 @@ static int hpre_cluster_debugfs_init(struct hisi_qm *qm) regset->regs = hpre_cluster_dfx_regs; regset->nregs = ARRAY_SIZE(hpre_cluster_dfx_regs); regset->base = qm->io_base + hpre_cluster_offsets[i]; + regset->dev = dev; debugfs_create_file("regs", 0444, tmp_d, regset, &hpre_cluster_regs_fops); @@ -1101,6 +1120,8 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_with_alg_register; } + hisi_qm_pm_init(qm); + return 0; err_with_alg_register: @@ -1124,6 +1145,7 @@ static void hpre_remove(struct pci_dev *pdev) struct hisi_qm *qm = pci_get_drvdata(pdev); int ret; + hisi_qm_pm_uninit(qm); hisi_qm_wait_task_finish(qm, &hpre_devices); hisi_qm_alg_unregister(qm, &hpre_devices); if (qm->fun_type == QM_HW_PF && qm->vfs_num) { @@ -1146,6 +1168,10 @@ static void hpre_remove(struct pci_dev *pdev) hisi_qm_uninit(qm); } +static const struct dev_pm_ops hpre_pm_ops = { + SET_RUNTIME_PM_OPS(hisi_qm_suspend, hisi_qm_resume, NULL) +}; + static const struct pci_error_handlers hpre_err_handler = { .error_detected = hisi_qm_dev_err_detected, .slot_reset = hisi_qm_dev_slot_reset, @@ -1162,6 +1188,7 @@ static struct pci_driver hpre_pci_driver = { hisi_qm_sriov_configure : NULL, .err_handler = &hpre_err_handler, .shutdown = hisi_qm_dev_shutdown, + .driver.pm = &hpre_pm_ops, }; static void hpre_register_debugfs(void) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index dbe162a0bd02..74740035380d 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -269,6 +270,8 @@ #define QM_QOS_MAX_CIR_S 11 #define QM_QOS_VAL_MAX_LEN 32 +#define QM_AUTOSUSPEND_DELAY 3000 + #define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \ (((hop_num) << QM_CQ_HOP_NUM_SHIFT) | \ ((pg_sz) << QM_CQ_PAGE_SIZE_SHIFT) | \ @@ -733,6 +736,34 @@ static u32 qm_get_irq_num_v3(struct hisi_qm *qm) return QM_IRQ_NUM_VF_V3; } +static int qm_pm_get_sync(struct hisi_qm *qm) +{ + struct device *dev = &qm->pdev->dev; + int ret; + + if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3) + return 0; + + ret = pm_runtime_resume_and_get(dev); + if (ret < 0) { + dev_err(dev, "failed to get_sync(%d).\n", ret); + return ret; + } + + return 0; +} + +static void qm_pm_put_sync(struct hisi_qm *qm) +{ + struct device *dev = &qm->pdev->dev; + + if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3) + return; + + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); +} + static struct hisi_qp *qm_to_hisi_qp(struct hisi_qm *qm, struct qm_eqe *eqe) { u16 cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK; @@ -1258,10 +1289,15 @@ static ssize_t qm_debug_read(struct file *filp, char __user *buf, { struct debugfs_file *file = filp->private_data; enum qm_debug_file index = file->index; + struct hisi_qm *qm = file_to_qm(file); char tbuf[QM_DBG_TMP_BUF_LEN]; u32 val; int ret; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + mutex_lock(&file->lock); switch (index) { case CURRENT_QM: @@ -1274,13 +1310,18 @@ static ssize_t qm_debug_read(struct file *filp, char __user *buf, val = clear_enable_read(file); break; default: - mutex_unlock(&file->lock); - return -EINVAL; + goto err_input; } mutex_unlock(&file->lock); + hisi_qm_put_dfx_access(qm); ret = scnprintf(tbuf, QM_DBG_TMP_BUF_LEN, "%u\n", val); return simple_read_from_buffer(buf, count, pos, tbuf, ret); + +err_input: + mutex_unlock(&file->lock); + hisi_qm_put_dfx_access(qm); + return -EINVAL; } static ssize_t qm_debug_write(struct file *filp, const char __user *buf, @@ -1288,6 +1329,7 @@ static ssize_t qm_debug_write(struct file *filp, const char __user *buf, { struct debugfs_file *file = filp->private_data; enum qm_debug_file index = file->index; + struct hisi_qm *qm = file_to_qm(file); unsigned long val; char tbuf[QM_DBG_TMP_BUF_LEN]; int len, ret; @@ -1307,6 +1349,10 @@ static ssize_t qm_debug_write(struct file *filp, const char __user *buf, if (kstrtoul(tbuf, 0, &val)) return -EFAULT; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + mutex_lock(&file->lock); switch (index) { case CURRENT_QM: @@ -1323,6 +1369,8 @@ static ssize_t qm_debug_write(struct file *filp, const char __user *buf, } mutex_unlock(&file->lock); + hisi_qm_put_dfx_access(qm); + if (ret) return ret; @@ -1378,15 +1426,23 @@ static const struct debugfs_reg32 qm_vf_dfx_regs[] = { */ void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset) { + struct pci_dev *pdev = to_pci_dev(regset->dev); + struct hisi_qm *qm = pci_get_drvdata(pdev); const struct debugfs_reg32 *regs = regset->regs; int regs_len = regset->nregs; + int i, ret; u32 val; - int i; + + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return; for (i = 0; i < regs_len; i++) { val = readl(regset->base + regs[i].offset); seq_printf(s, "%s= 0x%08x\n", regs[i].name, val); } + + hisi_qm_put_dfx_access(qm); } EXPORT_SYMBOL_GPL(hisi_qm_regs_dump); @@ -1404,6 +1460,8 @@ static int qm_regs_show(struct seq_file *s, void *unused) } regset.base = qm->io_base; + regset.dev = &qm->pdev->dev; + hisi_qm_regs_dump(s, ®set); return 0; @@ -1835,16 +1893,24 @@ static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer, if (*pos) return 0; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + /* Judge if the instance is being reset. */ if (unlikely(atomic_read(&qm->status.flags) == QM_STOP)) return 0; - if (count > QM_DBG_WRITE_LEN) - return -ENOSPC; + if (count > QM_DBG_WRITE_LEN) { + ret = -ENOSPC; + goto put_dfx_access; + } cmd_buf = memdup_user_nul(buffer, count); - if (IS_ERR(cmd_buf)) - return PTR_ERR(cmd_buf); + if (IS_ERR(cmd_buf)) { + ret = PTR_ERR(cmd_buf); + goto put_dfx_access; + } cmd_buf_tmp = strchr(cmd_buf, '\n'); if (cmd_buf_tmp) { @@ -1855,12 +1921,16 @@ static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer, ret = qm_cmd_write_dump(qm, cmd_buf); if (ret) { kfree(cmd_buf); - return ret; + goto put_dfx_access; } kfree(cmd_buf); - return count; + ret = count; + +put_dfx_access: + hisi_qm_put_dfx_access(qm); + return ret; } static const struct file_operations qm_cmd_fops = { @@ -2457,11 +2527,19 @@ static struct hisi_qp *qm_create_qp_nolock(struct hisi_qm *qm, u8 alg_type) struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type) { struct hisi_qp *qp; + int ret; + + ret = qm_pm_get_sync(qm); + if (ret) + return ERR_PTR(ret); down_write(&qm->qps_lock); qp = qm_create_qp_nolock(qm, alg_type); up_write(&qm->qps_lock); + if (IS_ERR(qp)) + qm_pm_put_sync(qm); + return qp; } EXPORT_SYMBOL_GPL(hisi_qm_create_qp); @@ -2487,6 +2565,8 @@ void hisi_qm_release_qp(struct hisi_qp *qp) idr_remove(&qm->qp_idr, qp->qp_id); up_write(&qm->qps_lock); + + qm_pm_put_sync(qm); } EXPORT_SYMBOL_GPL(hisi_qm_release_qp); @@ -4069,10 +4149,15 @@ static ssize_t qm_algqos_read(struct file *filp, char __user *buf, u32 qos_val, ir; int ret; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + /* Mailbox and reset cannot be operated at the same time */ if (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) { pci_err(qm->pdev, "dev resetting, read alg qos failed!\n"); - return -EAGAIN; + ret = -EAGAIN; + goto err_put_dfx_access; } if (qm->fun_type == QM_HW_PF) { @@ -4091,6 +4176,8 @@ static ssize_t qm_algqos_read(struct file *filp, char __user *buf, err_get_status: clear_bit(QM_RESETTING, &qm->misc_ctl); +err_put_dfx_access: + hisi_qm_put_dfx_access(qm); return ret; } @@ -4171,15 +4258,23 @@ static ssize_t qm_algqos_write(struct file *filp, const char __user *buf, fun_index = device * 8 + function; + ret = qm_pm_get_sync(qm); + if (ret) { + ret = -EINVAL; + goto err_get_status; + } + ret = qm_func_shaper_enable(qm, fun_index, val); if (ret) { pci_err(qm->pdev, "failed to enable function shaper!\n"); ret = -EINVAL; - goto err_get_status; + goto err_put_sync; } - ret = count; + ret = count; +err_put_sync: + qm_pm_put_sync(qm); err_get_status: clear_bit(QM_RESETTING, &qm->misc_ctl); return ret; @@ -4299,19 +4394,23 @@ int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs) struct hisi_qm *qm = pci_get_drvdata(pdev); int pre_existing_vfs, num_vfs, total_vfs, ret; + ret = qm_pm_get_sync(qm); + if (ret) + return ret; + total_vfs = pci_sriov_get_totalvfs(pdev); pre_existing_vfs = pci_num_vf(pdev); if (pre_existing_vfs) { pci_err(pdev, "%d VFs already enabled. Please disable pre-enabled VFs!\n", pre_existing_vfs); - return 0; + goto err_put_sync; } num_vfs = min_t(int, max_vfs, total_vfs); ret = qm_vf_q_assign(qm, num_vfs); if (ret) { pci_err(pdev, "Can't assign queues for VF!\n"); - return ret; + goto err_put_sync; } qm->vfs_num = num_vfs; @@ -4320,12 +4419,16 @@ int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs) if (ret) { pci_err(pdev, "Can't enable VF!\n"); qm_clear_vft_config(qm); - return ret; + goto err_put_sync; } pci_info(pdev, "VF enabled, vfs_num(=%d)!\n", num_vfs); return num_vfs; + +err_put_sync: + qm_pm_put_sync(qm); + return ret; } EXPORT_SYMBOL_GPL(hisi_qm_sriov_enable); @@ -4340,6 +4443,7 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen) { struct hisi_qm *qm = pci_get_drvdata(pdev); int total_vfs = pci_sriov_get_totalvfs(qm->pdev); + int ret; if (pci_vfs_assigned(pdev)) { pci_err(pdev, "Failed to disable VFs as VFs are assigned!\n"); @@ -4355,8 +4459,13 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen) pci_disable_sriov(pdev); /* clear vf function shaper configure array */ memset(qm->factor + 1, 0, sizeof(struct qm_shaper_factor) * total_vfs); + ret = qm_clear_vft_config(qm); + if (ret) + return ret; + + qm_pm_put_sync(qm); - return qm_clear_vft_config(qm); + return 0; } EXPORT_SYMBOL_GPL(hisi_qm_sriov_disable); @@ -5176,11 +5285,18 @@ static void hisi_qm_controller_reset(struct work_struct *rst_work) struct hisi_qm *qm = container_of(rst_work, struct hisi_qm, rst_work); int ret; + ret = qm_pm_get_sync(qm); + if (ret) { + clear_bit(QM_RST_SCHED, &qm->misc_ctl); + return; + } + /* reset pcie device controller */ ret = qm_controller_reset(qm); if (ret) dev_err(&qm->pdev->dev, "controller reset failed (%d)\n", ret); + qm_pm_put_sync(qm); } static void qm_pf_reset_vf_prepare(struct hisi_qm *qm, @@ -5692,6 +5808,76 @@ err_pci_init: } EXPORT_SYMBOL_GPL(hisi_qm_init); +/** + * hisi_qm_get_dfx_access() - Try to get dfx access. + * @qm: pointer to accelerator device. + * + * Try to get dfx access, then user can get message. + * + * If device is in suspended, return failure, otherwise + * bump up the runtime PM usage counter. + */ +int hisi_qm_get_dfx_access(struct hisi_qm *qm) +{ + struct device *dev = &qm->pdev->dev; + + if (pm_runtime_suspended(dev)) { + dev_info(dev, "can not read/write - device in suspended.\n"); + return -EAGAIN; + } + + return qm_pm_get_sync(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_get_dfx_access); + +/** + * hisi_qm_put_dfx_access() - Put dfx access. + * @qm: pointer to accelerator device. + * + * Put dfx access, drop runtime PM usage counter. + */ +void hisi_qm_put_dfx_access(struct hisi_qm *qm) +{ + qm_pm_put_sync(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_put_dfx_access); + +/** + * hisi_qm_pm_init() - Initialize qm runtime PM. + * @qm: pointer to accelerator device. + * + * Function that initialize qm runtime PM. + */ +void hisi_qm_pm_init(struct hisi_qm *qm) +{ + struct device *dev = &qm->pdev->dev; + + if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3) + return; + + pm_runtime_set_autosuspend_delay(dev, QM_AUTOSUSPEND_DELAY); + pm_runtime_use_autosuspend(dev); + pm_runtime_put_noidle(dev); +} +EXPORT_SYMBOL_GPL(hisi_qm_pm_init); + +/** + * hisi_qm_pm_uninit() - Uninitialize qm runtime PM. + * @qm: pointer to accelerator device. + * + * Function that uninitialize qm runtime PM. + */ +void hisi_qm_pm_uninit(struct hisi_qm *qm) +{ + struct device *dev = &qm->pdev->dev; + + if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3) + return; + + pm_runtime_get_noresume(dev); + pm_runtime_dont_use_autosuspend(dev); +} +EXPORT_SYMBOL_GPL(hisi_qm_pm_uninit); static int qm_prepare_for_suspend(struct hisi_qm *qm) { diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index 59e16464d03d..3068093229a5 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -433,5 +433,9 @@ int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list); void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list); int hisi_qm_resume(struct device *dev); int hisi_qm_suspend(struct device *dev); +void hisi_qm_pm_uninit(struct hisi_qm *qm); +void hisi_qm_pm_init(struct hisi_qm *qm); +int hisi_qm_get_dfx_access(struct hisi_qm *qm); +void hisi_qm_put_dfx_access(struct hisi_qm *qm); void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset); #endif diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 55ef3ae70fdb..0ad1ebe4c7ef 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -584,9 +585,14 @@ static ssize_t sec_debug_read(struct file *filp, char __user *buf, { struct sec_debug_file *file = filp->private_data; char tbuf[SEC_DBGFS_VAL_MAX_LEN]; + struct hisi_qm *qm = file->qm; u32 val; int ret; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + spin_lock_irq(&file->lock); switch (file->index) { @@ -594,14 +600,19 @@ static ssize_t sec_debug_read(struct file *filp, char __user *buf, val = sec_clear_enable_read(file); break; default: - spin_unlock_irq(&file->lock); - return -EINVAL; + goto err_input; } spin_unlock_irq(&file->lock); - ret = snprintf(tbuf, SEC_DBGFS_VAL_MAX_LEN, "%u\n", val); + hisi_qm_put_dfx_access(qm); + ret = snprintf(tbuf, SEC_DBGFS_VAL_MAX_LEN, "%u\n", val); return simple_read_from_buffer(buf, count, pos, tbuf, ret); + +err_input: + spin_unlock_irq(&file->lock); + hisi_qm_put_dfx_access(qm); + return -EINVAL; } static ssize_t sec_debug_write(struct file *filp, const char __user *buf, @@ -609,6 +620,7 @@ static ssize_t sec_debug_write(struct file *filp, const char __user *buf, { struct sec_debug_file *file = filp->private_data; char tbuf[SEC_DBGFS_VAL_MAX_LEN]; + struct hisi_qm *qm = file->qm; unsigned long val; int len, ret; @@ -627,6 +639,10 @@ static ssize_t sec_debug_write(struct file *filp, const char __user *buf, if (kstrtoul(tbuf, 0, &val)) return -EFAULT; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + spin_lock_irq(&file->lock); switch (file->index) { @@ -640,12 +656,11 @@ static ssize_t sec_debug_write(struct file *filp, const char __user *buf, goto err_input; } - spin_unlock_irq(&file->lock); - - return count; + ret = count; err_input: spin_unlock_irq(&file->lock); + hisi_qm_put_dfx_access(qm); return ret; } @@ -703,6 +718,7 @@ static int sec_core_debug_init(struct hisi_qm *qm) regset->regs = sec_dfx_regs; regset->nregs = ARRAY_SIZE(sec_dfx_regs); regset->base = qm->io_base; + regset->dev = dev; if (qm->pdev->device == SEC_PF_PCI_DEVICE_ID) debugfs_create_file("regs", 0444, tmp_d, regset, &sec_regs_fops); @@ -1013,6 +1029,8 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_alg_unregister; } + hisi_qm_pm_init(qm); + return 0; err_alg_unregister: @@ -1032,6 +1050,7 @@ static void sec_remove(struct pci_dev *pdev) { struct hisi_qm *qm = pci_get_drvdata(pdev); + hisi_qm_pm_uninit(qm); hisi_qm_wait_task_finish(qm, &sec_devices); if (qm->qp_num >= ctx_q_num) hisi_qm_alg_unregister(qm, &sec_devices); @@ -1051,6 +1070,10 @@ static void sec_remove(struct pci_dev *pdev) sec_qm_uninit(qm); } +static const struct dev_pm_ops sec_pm_ops = { + SET_RUNTIME_PM_OPS(hisi_qm_suspend, hisi_qm_resume, NULL) +}; + static const struct pci_error_handlers sec_err_handler = { .error_detected = hisi_qm_dev_err_detected, .slot_reset = hisi_qm_dev_slot_reset, @@ -1066,6 +1089,7 @@ static struct pci_driver sec_pci_driver = { .err_handler = &sec_err_handler, .sriov_configure = hisi_qm_sriov_configure, .shutdown = hisi_qm_dev_shutdown, + .driver.pm = &sec_pm_ops, }; static void sec_register_debugfs(void) diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 4438188adbb7..b0b424c9fba1 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -476,22 +477,33 @@ static ssize_t hisi_zip_ctrl_debug_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { struct ctrl_debug_file *file = filp->private_data; + struct hisi_qm *qm = file_to_qm(file); char tbuf[HZIP_BUF_SIZE]; u32 val; int ret; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + spin_lock_irq(&file->lock); switch (file->index) { case HZIP_CLEAR_ENABLE: val = clear_enable_read(file); break; default: - spin_unlock_irq(&file->lock); - return -EINVAL; + goto err_input; } spin_unlock_irq(&file->lock); + + hisi_qm_put_dfx_access(qm); ret = scnprintf(tbuf, sizeof(tbuf), "%u\n", val); return simple_read_from_buffer(buf, count, pos, tbuf, ret); + +err_input: + spin_unlock_irq(&file->lock); + hisi_qm_put_dfx_access(qm); + return -EINVAL; } static ssize_t hisi_zip_ctrl_debug_write(struct file *filp, @@ -499,6 +511,7 @@ static ssize_t hisi_zip_ctrl_debug_write(struct file *filp, size_t count, loff_t *pos) { struct ctrl_debug_file *file = filp->private_data; + struct hisi_qm *qm = file_to_qm(file); char tbuf[HZIP_BUF_SIZE]; unsigned long val; int len, ret; @@ -517,6 +530,10 @@ static ssize_t hisi_zip_ctrl_debug_write(struct file *filp, if (kstrtoul(tbuf, 0, &val)) return -EFAULT; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + spin_lock_irq(&file->lock); switch (file->index) { case HZIP_CLEAR_ENABLE: @@ -528,12 +545,12 @@ static ssize_t hisi_zip_ctrl_debug_write(struct file *filp, ret = -EINVAL; goto err_input; } - spin_unlock_irq(&file->lock); - return count; + ret = count; err_input: spin_unlock_irq(&file->lock); + hisi_qm_put_dfx_access(qm); return ret; } @@ -595,6 +612,7 @@ static int hisi_zip_core_debug_init(struct hisi_qm *qm) regset->regs = hzip_dfx_regs; regset->nregs = ARRAY_SIZE(hzip_dfx_regs); regset->base = qm->io_base + core_offsets[i]; + regset->dev = dev; tmp_d = debugfs_create_dir(buf, qm->debug.debug_root); debugfs_create_file("regs", 0444, tmp_d, regset, @@ -934,6 +952,8 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_qm_alg_unregister; } + hisi_qm_pm_init(qm); + return 0; err_qm_alg_unregister: @@ -956,6 +976,7 @@ static void hisi_zip_remove(struct pci_dev *pdev) { struct hisi_qm *qm = pci_get_drvdata(pdev); + hisi_qm_pm_uninit(qm); hisi_qm_wait_task_finish(qm, &zip_devices); hisi_qm_alg_unregister(qm, &zip_devices); @@ -968,6 +989,10 @@ static void hisi_zip_remove(struct pci_dev *pdev) hisi_zip_qm_uninit(qm); } +static const struct dev_pm_ops hisi_zip_pm_ops = { + SET_RUNTIME_PM_OPS(hisi_qm_suspend, hisi_qm_resume, NULL) +}; + static const struct pci_error_handlers hisi_zip_err_handler = { .error_detected = hisi_qm_dev_err_detected, .slot_reset = hisi_qm_dev_slot_reset, @@ -984,6 +1009,7 @@ static struct pci_driver hisi_zip_pci_driver = { hisi_qm_sriov_configure : NULL, .err_handler = &hisi_zip_err_handler, .shutdown = hisi_qm_dev_shutdown, + .driver.pm = &hisi_zip_pm_ops, }; static void hisi_zip_register_debugfs(void) -- cgit v1.2.3 From 74f5edbffcd37162084b6883e059bb6bb686151d Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Fri, 13 Aug 2021 17:50:08 +0800 Subject: crypto: hisilicon - change parameter passing of debugfs function To avoid repeatedly obtaining 'qm' from 'filp', parameter passing of debugfs function directly use 'qm' instead of 'filp'. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 34 +++++++++++--------------------- drivers/crypto/hisilicon/sec2/sec_main.c | 11 ++++------- drivers/crypto/hisilicon/zip/zip_main.c | 11 ++++------- 3 files changed, 20 insertions(+), 36 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 74740035380d..e29ff971ad79 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -1203,16 +1203,13 @@ static struct hisi_qm *file_to_qm(struct debugfs_file *file) return container_of(debug, struct hisi_qm, debug); } -static u32 current_q_read(struct debugfs_file *file) +static u32 current_q_read(struct hisi_qm *qm) { - struct hisi_qm *qm = file_to_qm(file); - return readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) >> QM_DFX_QN_SHIFT; } -static int current_q_write(struct debugfs_file *file, u32 val) +static int current_q_write(struct hisi_qm *qm, u32 val) { - struct hisi_qm *qm = file_to_qm(file); u32 tmp; if (val >= qm->debug.curr_qm_qp_num) @@ -1229,18 +1226,14 @@ static int current_q_write(struct debugfs_file *file, u32 val) return 0; } -static u32 clear_enable_read(struct debugfs_file *file) +static u32 clear_enable_read(struct hisi_qm *qm) { - struct hisi_qm *qm = file_to_qm(file); - return readl(qm->io_base + QM_DFX_CNT_CLR_CE); } /* rd_clr_ctrl 1 enable read clear, otherwise 0 disable it */ -static int clear_enable_write(struct debugfs_file *file, u32 rd_clr_ctrl) +static int clear_enable_write(struct hisi_qm *qm, u32 rd_clr_ctrl) { - struct hisi_qm *qm = file_to_qm(file); - if (rd_clr_ctrl > 1) return -EINVAL; @@ -1249,16 +1242,13 @@ static int clear_enable_write(struct debugfs_file *file, u32 rd_clr_ctrl) return 0; } -static u32 current_qm_read(struct debugfs_file *file) +static u32 current_qm_read(struct hisi_qm *qm) { - struct hisi_qm *qm = file_to_qm(file); - return readl(qm->io_base + QM_DFX_MB_CNT_VF); } -static int current_qm_write(struct debugfs_file *file, u32 val) +static int current_qm_write(struct hisi_qm *qm, u32 val) { - struct hisi_qm *qm = file_to_qm(file); u32 tmp; if (val > qm->vfs_num) @@ -1301,13 +1291,13 @@ static ssize_t qm_debug_read(struct file *filp, char __user *buf, mutex_lock(&file->lock); switch (index) { case CURRENT_QM: - val = current_qm_read(file); + val = current_qm_read(qm); break; case CURRENT_Q: - val = current_q_read(file); + val = current_q_read(qm); break; case CLEAR_ENABLE: - val = clear_enable_read(file); + val = clear_enable_read(qm); break; default: goto err_input; @@ -1356,13 +1346,13 @@ static ssize_t qm_debug_write(struct file *filp, const char __user *buf, mutex_lock(&file->lock); switch (index) { case CURRENT_QM: - ret = current_qm_write(file, val); + ret = current_qm_write(qm, val); break; case CURRENT_Q: - ret = current_q_write(file, val); + ret = current_q_write(qm, val); break; case CLEAR_ENABLE: - ret = clear_enable_write(file, val); + ret = clear_enable_write(qm, val); break; default: ret = -EINVAL; diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 0ad1ebe4c7ef..90551bf38b52 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -557,17 +557,14 @@ static void sec_hw_error_disable(struct hisi_qm *qm) writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_NFE_REG); } -static u32 sec_clear_enable_read(struct sec_debug_file *file) +static u32 sec_clear_enable_read(struct hisi_qm *qm) { - struct hisi_qm *qm = file->qm; - return readl(qm->io_base + SEC_CTRL_CNT_CLR_CE) & SEC_CTRL_CNT_CLR_CE_BIT; } -static int sec_clear_enable_write(struct sec_debug_file *file, u32 val) +static int sec_clear_enable_write(struct hisi_qm *qm, u32 val) { - struct hisi_qm *qm = file->qm; u32 tmp; if (val != 1 && val) @@ -597,7 +594,7 @@ static ssize_t sec_debug_read(struct file *filp, char __user *buf, switch (file->index) { case SEC_CLEAR_ENABLE: - val = sec_clear_enable_read(file); + val = sec_clear_enable_read(qm); break; default: goto err_input; @@ -647,7 +644,7 @@ static ssize_t sec_debug_write(struct file *filp, const char __user *buf, switch (file->index) { case SEC_CLEAR_ENABLE: - ret = sec_clear_enable_write(file, val); + ret = sec_clear_enable_write(qm, val); if (ret) goto err_input; break; diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index b0b424c9fba1..7148201ce76e 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -450,17 +450,14 @@ static inline struct hisi_qm *file_to_qm(struct ctrl_debug_file *file) return &hisi_zip->qm; } -static u32 clear_enable_read(struct ctrl_debug_file *file) +static u32 clear_enable_read(struct hisi_qm *qm) { - struct hisi_qm *qm = file_to_qm(file); - return readl(qm->io_base + HZIP_SOFT_CTRL_CNT_CLR_CE) & HZIP_SOFT_CTRL_CNT_CLR_CE_BIT; } -static int clear_enable_write(struct ctrl_debug_file *file, u32 val) +static int clear_enable_write(struct hisi_qm *qm, u32 val) { - struct hisi_qm *qm = file_to_qm(file); u32 tmp; if (val != 1 && val != 0) @@ -489,7 +486,7 @@ static ssize_t hisi_zip_ctrl_debug_read(struct file *filp, char __user *buf, spin_lock_irq(&file->lock); switch (file->index) { case HZIP_CLEAR_ENABLE: - val = clear_enable_read(file); + val = clear_enable_read(qm); break; default: goto err_input; @@ -537,7 +534,7 @@ static ssize_t hisi_zip_ctrl_debug_write(struct file *filp, spin_lock_irq(&file->lock); switch (file->index) { case HZIP_CLEAR_ENABLE: - ret = clear_enable_write(file, val); + ret = clear_enable_write(qm, val); if (ret) goto err_input; break; -- cgit v1.2.3 From 3e1d2c52b2045ba7f90966b02daeb6c438432570 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Fri, 13 Aug 2021 17:50:09 +0800 Subject: crypto: hisilicon - check _PS0 and _PR0 method To support runtime PM, use the function 'pci_set_power_state' to change the power state. Therefore, method _PS0 or _PR0 needs to be filled by platform. So check whether the method is supported, if not, print a prompt information. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index e29ff971ad79..369562d34d66 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -3282,6 +3282,10 @@ static void hisi_qm_pre_init(struct hisi_qm *qm) init_rwsem(&qm->qps_lock); qm->qp_in_used = 0; qm->misc_ctl = false; + if (qm->fun_type == QM_HW_PF && qm->ver > QM_HW_V2) { + if (!acpi_device_power_manageable(ACPI_COMPANION(&pdev->dev))) + dev_info(&pdev->dev, "_PS0 and _PR0 are not defined"); + } } static void qm_cmd_uninit(struct hisi_qm *qm) -- cgit v1.2.3 From abfc7fad63940b8dfdfd25da6f0fa813d9561645 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 14 Aug 2021 09:11:14 +0800 Subject: crypto: skcipher - in_irq() cleanup Replace the obsolete and ambiguos macro in_irq() with new macro in_hardirq(). Signed-off-by: Changbin Du Signed-off-by: Herbert Xu --- crypto/skcipher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/skcipher.c b/crypto/skcipher.c index a15376245416..418211180cee 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -431,7 +431,7 @@ static int skcipher_copy_iv(struct skcipher_walk *walk) static int skcipher_walk_first(struct skcipher_walk *walk) { - if (WARN_ON_ONCE(in_irq())) + if (WARN_ON_ONCE(in_hardirq())) return -EDEADLK; walk->buffer = NULL; -- cgit v1.2.3 From ff1469a21df5a2e981dd2f78e96e412fecb3ba59 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 16 Aug 2021 14:44:33 +0200 Subject: crypto: rmd320 - remove rmd320 in Makefile Commit 93f64202926f ("crypto: rmd320 - remove RIPE-MD 320 hash algorithm") removes the Kconfig and code, but misses to adjust the Makefile. Hence, ./scripts/checkkconfigsymbols.py warns: CRYPTO_RMD320 Referencing files: crypto/Makefile Remove the missing piece of this code removal. Fixes: 93f64202926f ("crypto: rmd320 - remove RIPE-MD 320 hash algorithm") Signed-off-by: Lukas Bulwahn Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/crypto/Makefile b/crypto/Makefile index 10526d4559b8..c633f15a0481 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -74,7 +74,6 @@ obj-$(CONFIG_CRYPTO_NULL2) += crypto_null.o obj-$(CONFIG_CRYPTO_MD4) += md4.o obj-$(CONFIG_CRYPTO_MD5) += md5.o obj-$(CONFIG_CRYPTO_RMD160) += rmd160.o -obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o -- cgit v1.2.3 From de79d9aae493a29d02926f396a4fd1a1309436fc Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Wed, 18 Aug 2021 11:31:16 +0800 Subject: crypto: x86/sm4 - export reusable AESNI/AVX functions Export the reusable functions in the SM4 AESNI/AVX implementation, mainly public functions, which are used to develop the SM4 AESNI/AVX2 implementation, and eliminate unnecessary duplication of code. At the same time, in order to make the public function universal, minor fixes was added. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/x86/crypto/sm4-avx.h | 24 ++++++++++ arch/x86/crypto/sm4_aesni_avx_glue.c | 92 +++++++++++++++++++++++------------- 2 files changed, 84 insertions(+), 32 deletions(-) create mode 100644 arch/x86/crypto/sm4-avx.h diff --git a/arch/x86/crypto/sm4-avx.h b/arch/x86/crypto/sm4-avx.h new file mode 100644 index 000000000000..1bceab7516aa --- /dev/null +++ b/arch/x86/crypto/sm4-avx.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef ASM_X86_SM4_AVX_H +#define ASM_X86_SM4_AVX_H + +#include +#include + +typedef void (*sm4_crypt_func)(const u32 *rk, u8 *dst, const u8 *src, u8 *iv); + +int sm4_avx_ecb_encrypt(struct skcipher_request *req); +int sm4_avx_ecb_decrypt(struct skcipher_request *req); + +int sm4_cbc_encrypt(struct skcipher_request *req); +int sm4_avx_cbc_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func); + +int sm4_cfb_encrypt(struct skcipher_request *req); +int sm4_avx_cfb_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func); + +int sm4_avx_ctr_crypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func); + +#endif diff --git a/arch/x86/crypto/sm4_aesni_avx_glue.c b/arch/x86/crypto/sm4_aesni_avx_glue.c index c1f5728efd1d..7800f77d68ad 100644 --- a/arch/x86/crypto/sm4_aesni_avx_glue.c +++ b/arch/x86/crypto/sm4_aesni_avx_glue.c @@ -15,6 +15,7 @@ #include #include #include +#include "sm4-avx.h" #define SM4_CRYPT8_BLOCK_SIZE (SM4_BLOCK_SIZE * 8) @@ -71,23 +72,25 @@ static int ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) return err; } -static int ecb_encrypt(struct skcipher_request *req) +int sm4_avx_ecb_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); return ecb_do_crypt(req, ctx->rkey_enc); } +EXPORT_SYMBOL_GPL(sm4_avx_ecb_encrypt); -static int ecb_decrypt(struct skcipher_request *req) +int sm4_avx_ecb_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); return ecb_do_crypt(req, ctx->rkey_dec); } +EXPORT_SYMBOL_GPL(sm4_avx_ecb_decrypt); -static int cbc_encrypt(struct skcipher_request *req) +int sm4_cbc_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -118,8 +121,10 @@ static int cbc_encrypt(struct skcipher_request *req) return err; } +EXPORT_SYMBOL_GPL(sm4_cbc_encrypt); -static int cbc_decrypt(struct skcipher_request *req) +int sm4_avx_cbc_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -135,15 +140,14 @@ static int cbc_decrypt(struct skcipher_request *req) kernel_fpu_begin(); - while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) { - sm4_aesni_avx_cbc_dec_blk8(ctx->rkey_dec, dst, - src, walk.iv); - dst += SM4_CRYPT8_BLOCK_SIZE; - src += SM4_CRYPT8_BLOCK_SIZE; - nbytes -= SM4_CRYPT8_BLOCK_SIZE; + while (nbytes >= bsize) { + func(ctx->rkey_dec, dst, src, walk.iv); + dst += bsize; + src += bsize; + nbytes -= bsize; } - if (nbytes >= SM4_BLOCK_SIZE) { + while (nbytes >= SM4_BLOCK_SIZE) { u8 keystream[SM4_BLOCK_SIZE * 8]; u8 iv[SM4_BLOCK_SIZE]; unsigned int nblocks = min(nbytes >> 4, 8u); @@ -165,6 +169,8 @@ static int cbc_decrypt(struct skcipher_request *req) } crypto_xor_cpy(dst, walk.iv, keystream, SM4_BLOCK_SIZE); memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + dst += nblocks * SM4_BLOCK_SIZE; + src += (nblocks + 1) * SM4_BLOCK_SIZE; nbytes -= nblocks * SM4_BLOCK_SIZE; } @@ -174,8 +180,15 @@ static int cbc_decrypt(struct skcipher_request *req) return err; } +EXPORT_SYMBOL_GPL(sm4_avx_cbc_decrypt); + +static int cbc_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cbc_decrypt(req, SM4_CRYPT8_BLOCK_SIZE, + sm4_aesni_avx_cbc_dec_blk8); +} -static int cfb_encrypt(struct skcipher_request *req) +int sm4_cfb_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -214,8 +227,10 @@ static int cfb_encrypt(struct skcipher_request *req) return err; } +EXPORT_SYMBOL_GPL(sm4_cfb_encrypt); -static int cfb_decrypt(struct skcipher_request *req) +int sm4_avx_cfb_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -231,15 +246,14 @@ static int cfb_decrypt(struct skcipher_request *req) kernel_fpu_begin(); - while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) { - sm4_aesni_avx_cfb_dec_blk8(ctx->rkey_enc, dst, - src, walk.iv); - dst += SM4_CRYPT8_BLOCK_SIZE; - src += SM4_CRYPT8_BLOCK_SIZE; - nbytes -= SM4_CRYPT8_BLOCK_SIZE; + while (nbytes >= bsize) { + func(ctx->rkey_enc, dst, src, walk.iv); + dst += bsize; + src += bsize; + nbytes -= bsize; } - if (nbytes >= SM4_BLOCK_SIZE) { + while (nbytes >= SM4_BLOCK_SIZE) { u8 keystream[SM4_BLOCK_SIZE * 8]; unsigned int nblocks = min(nbytes >> 4, 8u); @@ -276,8 +290,16 @@ static int cfb_decrypt(struct skcipher_request *req) return err; } +EXPORT_SYMBOL_GPL(sm4_avx_cfb_decrypt); -static int ctr_crypt(struct skcipher_request *req) +static int cfb_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cfb_decrypt(req, SM4_CRYPT8_BLOCK_SIZE, + sm4_aesni_avx_cfb_dec_blk8); +} + +int sm4_avx_ctr_crypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -293,15 +315,14 @@ static int ctr_crypt(struct skcipher_request *req) kernel_fpu_begin(); - while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) { - sm4_aesni_avx_ctr_enc_blk8(ctx->rkey_enc, dst, - src, walk.iv); - dst += SM4_CRYPT8_BLOCK_SIZE; - src += SM4_CRYPT8_BLOCK_SIZE; - nbytes -= SM4_CRYPT8_BLOCK_SIZE; + while (nbytes >= bsize) { + func(ctx->rkey_enc, dst, src, walk.iv); + dst += bsize; + src += bsize; + nbytes -= bsize; } - if (nbytes >= SM4_BLOCK_SIZE) { + while (nbytes >= SM4_BLOCK_SIZE) { u8 keystream[SM4_BLOCK_SIZE * 8]; unsigned int nblocks = min(nbytes >> 4, 8u); int i; @@ -343,6 +364,13 @@ static int ctr_crypt(struct skcipher_request *req) return err; } +EXPORT_SYMBOL_GPL(sm4_avx_ctr_crypt); + +static int ctr_crypt(struct skcipher_request *req) +{ + return sm4_avx_ctr_crypt(req, SM4_CRYPT8_BLOCK_SIZE, + sm4_aesni_avx_ctr_enc_blk8); +} static struct skcipher_alg sm4_aesni_avx_skciphers[] = { { @@ -359,8 +387,8 @@ static struct skcipher_alg sm4_aesni_avx_skciphers[] = { .max_keysize = SM4_KEY_SIZE, .walksize = 8 * SM4_BLOCK_SIZE, .setkey = sm4_skcipher_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, + .encrypt = sm4_avx_ecb_encrypt, + .decrypt = sm4_avx_ecb_decrypt, }, { .base = { .cra_name = "__cbc(sm4)", @@ -376,7 +404,7 @@ static struct skcipher_alg sm4_aesni_avx_skciphers[] = { .ivsize = SM4_BLOCK_SIZE, .walksize = 8 * SM4_BLOCK_SIZE, .setkey = sm4_skcipher_setkey, - .encrypt = cbc_encrypt, + .encrypt = sm4_cbc_encrypt, .decrypt = cbc_decrypt, }, { .base = { @@ -394,7 +422,7 @@ static struct skcipher_alg sm4_aesni_avx_skciphers[] = { .chunksize = SM4_BLOCK_SIZE, .walksize = 8 * SM4_BLOCK_SIZE, .setkey = sm4_skcipher_setkey, - .encrypt = cfb_encrypt, + .encrypt = sm4_cfb_encrypt, .decrypt = cfb_decrypt, }, { .base = { -- cgit v1.2.3 From 5b2efa2bb865eb784e06987c7ce98c3c835b495b Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Wed, 18 Aug 2021 11:31:17 +0800 Subject: crypto: x86/sm4 - add AES-NI/AVX2/x86_64 implementation Like the implementation of AESNI/AVX, this patch adds an accelerated implementation of AESNI/AVX2. In terms of code implementation, by reusing AESNI/AVX mode-related codes, the amount of code is greatly reduced. From the benchmark data, it can be seen that when the block size is 1024, compared to AVX acceleration, the performance achieved by AVX2 has increased by about 70%, it is also 7.7 times of the pure software implementation of sm4-generic. The main algorithm implementation comes from SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at: https://github.com/mjosaarinen/sm4ni This optimization supports the four modes of SM4, ECB, CBC, CFB, and CTR. Since CBC and CFB do not support multiple block parallel encryption, the optimization effect is not obvious. Benchmark on Intel i5-6200U 2.30GHz, performance data of three implementation methods, pure software sm4-generic, aesni/avx acceleration, and aesni/avx2 acceleration, the data comes from the 218 mode and 518 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: block-size | 16 64 128 256 1024 1420 4096 sm4-generic ECB enc | 60.94 70.41 72.27 73.02 73.87 73.58 73.59 ECB dec | 61.87 70.53 72.15 73.09 73.89 73.92 73.86 CBC enc | 56.71 66.31 68.05 69.84 70.02 70.12 70.24 CBC dec | 54.54 65.91 68.22 69.51 70.63 70.79 70.82 CFB enc | 57.21 67.24 69.10 70.25 70.73 70.52 71.42 CFB dec | 57.22 64.74 66.31 67.24 67.40 67.64 67.58 CTR enc | 59.47 68.64 69.91 71.02 71.86 71.61 71.95 CTR dec | 59.94 68.77 69.95 71.00 71.84 71.55 71.95 sm4-aesni-avx ECB enc | 44.95 177.35 292.06 316.98 339.48 322.27 330.59 ECB dec | 45.28 178.66 292.31 317.52 339.59 322.52 331.16 CBC enc | 57.75 67.68 69.72 70.60 71.48 71.63 71.74 CBC dec | 44.32 176.83 284.32 307.24 328.61 312.61 325.82 CFB enc | 57.81 67.64 69.63 70.55 71.40 71.35 71.70 CFB dec | 43.14 167.78 282.03 307.20 328.35 318.24 325.95 CTR enc | 42.35 163.32 279.11 302.93 320.86 310.56 317.93 CTR dec | 42.39 162.81 278.49 302.37 321.11 310.33 318.37 sm4-aesni-avx2 ECB enc | 45.19 177.41 292.42 316.12 339.90 322.53 330.54 ECB dec | 44.83 178.90 291.45 317.31 339.85 322.55 331.07 CBC enc | 57.66 67.62 69.73 70.55 71.58 71.66 71.77 CBC dec | 44.34 176.86 286.10 501.68 559.58 483.87 527.46 CFB enc | 57.43 67.60 69.61 70.52 71.43 71.28 71.65 CFB dec | 43.12 167.75 268.09 499.33 558.35 490.36 524.73 CTR enc | 42.42 163.39 256.17 493.95 552.45 481.58 517.19 CTR dec | 42.49 163.11 256.36 493.34 552.62 481.49 516.83 Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 3 + arch/x86/crypto/sm4-aesni-avx2-asm_64.S | 497 ++++++++++++++++++++++++++++++++ arch/x86/crypto/sm4_aesni_avx2_glue.c | 169 +++++++++++ crypto/Kconfig | 22 ++ 4 files changed, 691 insertions(+) create mode 100644 arch/x86/crypto/sm4-aesni-avx2-asm_64.S create mode 100644 arch/x86/crypto/sm4_aesni_avx2_glue.c diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 08f95d4e1e7c..f307c93fc90a 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -91,6 +91,9 @@ obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o +obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64) += sm4-aesni-avx2-x86_64.o +sm4-aesni-avx2-x86_64-y := sm4-aesni-avx2-asm_64.o sm4_aesni_avx2_glue.o + quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $< > $@ $(obj)/%.S: $(src)/%.pl FORCE diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S new file mode 100644 index 000000000000..d2ffd7f76ee2 --- /dev/null +++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S @@ -0,0 +1,497 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SM4 Cipher Algorithm, AES-NI/AVX2 optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2018 Markku-Juhani O. Saarinen + * Copyright (C) 2020 Jussi Kivilinna + * Copyright (c) 2021 Tianjia Zhang + */ + +/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at: + * https://github.com/mjosaarinen/sm4ni + */ + +#include +#include + +#define rRIP (%rip) + +/* vector registers */ +#define RX0 %ymm0 +#define RX1 %ymm1 +#define MASK_4BIT %ymm2 +#define RTMP0 %ymm3 +#define RTMP1 %ymm4 +#define RTMP2 %ymm5 +#define RTMP3 %ymm6 +#define RTMP4 %ymm7 + +#define RA0 %ymm8 +#define RA1 %ymm9 +#define RA2 %ymm10 +#define RA3 %ymm11 + +#define RB0 %ymm12 +#define RB1 %ymm13 +#define RB2 %ymm14 +#define RB3 %ymm15 + +#define RNOT %ymm0 +#define RBSWAP %ymm1 + +#define RX0x %xmm0 +#define RX1x %xmm1 +#define MASK_4BITx %xmm2 + +#define RNOTx %xmm0 +#define RBSWAPx %xmm1 + +#define RTMP0x %xmm3 +#define RTMP1x %xmm4 +#define RTMP2x %xmm5 +#define RTMP3x %xmm6 +#define RTMP4x %xmm7 + + +/* helper macros */ + +/* Transpose four 32-bit words between 128-bit vector lanes. */ +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +/* post-SubByte transform. */ +#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \ + vpand x, mask4bit, tmp0; \ + vpandn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + +/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by + * 'vaeslastenc' instruction. */ +#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \ + vpandn mask4bit, x, tmp0; \ + vpsrld $4, x, x; \ + vpand x, mask4bit, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + + +.section .rodata.cst164, "aM", @progbits, 164 +.align 16 + +/* + * Following four affine transform look-up tables are from work by + * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni + * + * These allow exposing SM4 S-Box from AES SubByte. + */ + +/* pre-SubByte affine transform, from SM4 field to AES field. */ +.Lpre_tf_lo_s: + .quad 0x9197E2E474720701, 0xC7C1B4B222245157 +.Lpre_tf_hi_s: + .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012 + +/* post-SubByte affine transform, from AES field to SM4 field. */ +.Lpost_tf_lo_s: + .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82 +.Lpost_tf_hi_s: + .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF + +/* For isolating SubBytes from AESENCLAST, inverse shift row */ +.Linv_shift_row: + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 + +/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_8: + .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e + .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06 + +/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_16: + .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01 + .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09 + +/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_24: + .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04 + .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c + +/* For CTR-mode IV byteswap */ +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +/* For input word byte-swap */ +.Lbswap32_mask: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 + +.align 4 +/* 4-bit mask */ +.L0f0f0f0f: + .long 0x0f0f0f0f + +.text +.align 16 + +.align 8 +SYM_FUNC_START_LOCAL(__sm4_crypt_blk16) + /* input: + * %rdi: round key array, CTX + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel + * plaintext blocks + * output: + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel + * ciphertext blocks + */ + FRAME_BEGIN + + vbroadcasti128 .Lbswap32_mask rRIP, RTMP2; + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + vpbroadcastd .L0f0f0f0f rRIP, MASK_4BIT; + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + +#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \ + vpbroadcastd (4*(round))(%rdi), RX0; \ + vbroadcasti128 .Lpre_tf_lo_s rRIP, RTMP4; \ + vbroadcasti128 .Lpre_tf_hi_s rRIP, RTMP1; \ + vmovdqa RX0, RX1; \ + vpxor s1, RX0, RX0; \ + vpxor s2, RX0, RX0; \ + vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \ + vbroadcasti128 .Lpost_tf_lo_s rRIP, RTMP2; \ + vbroadcasti128 .Lpost_tf_hi_s rRIP, RTMP3; \ + vpxor r1, RX1, RX1; \ + vpxor r2, RX1, RX1; \ + vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \ + \ + /* sbox, non-linear part */ \ + transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + vextracti128 $1, RX0, RTMP4x; \ + vextracti128 $1, RX1, RTMP0x; \ + vaesenclast MASK_4BITx, RX0x, RX0x; \ + vaesenclast MASK_4BITx, RTMP4x, RTMP4x; \ + vaesenclast MASK_4BITx, RX1x, RX1x; \ + vaesenclast MASK_4BITx, RTMP0x, RTMP0x; \ + vinserti128 $1, RTMP4x, RX0, RX0; \ + vbroadcasti128 .Linv_shift_row rRIP, RTMP4; \ + vinserti128 $1, RTMP0x, RX1, RX1; \ + transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + \ + /* linear part */ \ + vpshufb RTMP4, RX0, RTMP0; \ + vpxor RTMP0, s0, s0; /* s0 ^ x */ \ + vpshufb RTMP4, RX1, RTMP2; \ + vbroadcasti128 .Linv_shift_row_rol_8 rRIP, RTMP4; \ + vpxor RTMP2, r0, r0; /* r0 ^ x */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vbroadcasti128 .Linv_shift_row_rol_16 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vbroadcasti128 .Linv_shift_row_rol_24 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \ + vpslld $2, RTMP0, RTMP1; \ + vpsrld $30, RTMP0, RTMP0; \ + vpxor RTMP0, s0, s0; \ + /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpxor RTMP1, s0, s0; \ + vpshufb RTMP4, RX1, RTMP3; \ + vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \ + vpslld $2, RTMP2, RTMP3; \ + vpsrld $30, RTMP2, RTMP2; \ + vpxor RTMP2, r0, r0; \ + /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpxor RTMP3, r0, r0; + + leaq (32*4)(%rdi), %rax; +.align 16 +.Lroundloop_blk8: + ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3); + ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0); + ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1); + ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2); + leaq (4*4)(%rdi), %rdi; + cmpq %rax, %rdi; + jne .Lroundloop_blk8; + +#undef ROUND + + vbroadcasti128 .Lbswap128_mask rRIP, RTMP2; + + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + FRAME_END + ret; +SYM_FUNC_END(__sm4_crypt_blk16) + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + +/* + * void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv (big endian, 128bit) + */ + FRAME_BEGIN + + movq 8(%rcx), %rax; + bswapq %rax; + + vzeroupper; + + vbroadcasti128 .Lbswap128_mask rRIP, RTMP3; + vpcmpeqd RNOT, RNOT, RNOT; + vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */ + vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */ + + /* load IV and byteswap */ + vmovdqu (%rcx), RTMP4x; + vpshufb RTMP3x, RTMP4x, RTMP4x; + vmovdqa RTMP4x, RTMP0x; + inc_le128(RTMP4x, RNOTx, RTMP1x); + vinserti128 $1, RTMP4x, RTMP0, RTMP0; + vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */ + + /* check need for handling 64-bit overflow and carry */ + cmpq $(0xffffffffffffffff - 16), %rax; + ja .Lhandle_ctr_carry; + + /* construct IVs */ + vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */ + vpshufb RTMP3, RTMP0, RA1; + vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */ + vpshufb RTMP3, RTMP0, RA2; + vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */ + vpshufb RTMP3, RTMP0, RA3; + vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */ + vpshufb RTMP3, RTMP0, RB0; + vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */ + vpshufb RTMP3, RTMP0, RB1; + vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */ + vpshufb RTMP3, RTMP0, RB2; + vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */ + vpshufb RTMP3, RTMP0, RB3; + vpsubq RTMP2, RTMP0, RTMP0; /* +16 */ + vpshufb RTMP3x, RTMP0x, RTMP0x; + + jmp .Lctr_carry_done; + +.Lhandle_ctr_carry: + /* construct IVs */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RA1; /* +3 ; +2 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RA2; /* +5 ; +4 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RA3; /* +7 ; +6 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB0; /* +9 ; +8 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB2; /* +13 ; +12 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB3; /* +15 ; +14 */ + inc_le128(RTMP0, RNOT, RTMP1); + vextracti128 $1, RTMP0, RTMP0x; + vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */ + +.align 4 +.Lctr_carry_done: + /* store new IV */ + vmovdqu RTMP0x, (%rcx); + + call __sm4_crypt_blk16; + + vpxor (0 * 32)(%rdx), RA0, RA0; + vpxor (1 * 32)(%rdx), RA1, RA1; + vpxor (2 * 32)(%rdx), RA2, RA2; + vpxor (3 * 32)(%rdx), RA3, RA3; + vpxor (4 * 32)(%rdx), RB0, RB0; + vpxor (5 * 32)(%rdx), RB1, RB1; + vpxor (6 * 32)(%rdx), RB2, RB2; + vpxor (7 * 32)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA3, (3 * 32)(%rsi); + vmovdqu RB0, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB3, (7 * 32)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16) + +/* + * void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + vzeroupper; + + vmovdqu (0 * 32)(%rdx), RA0; + vmovdqu (1 * 32)(%rdx), RA1; + vmovdqu (2 * 32)(%rdx), RA2; + vmovdqu (3 * 32)(%rdx), RA3; + vmovdqu (4 * 32)(%rdx), RB0; + vmovdqu (5 * 32)(%rdx), RB1; + vmovdqu (6 * 32)(%rdx), RB2; + vmovdqu (7 * 32)(%rdx), RB3; + + call __sm4_crypt_blk16; + + vmovdqu (%rcx), RNOTx; + vinserti128 $1, (%rdx), RNOT, RNOT; + vpxor RNOT, RA0, RA0; + vpxor (0 * 32 + 16)(%rdx), RA1, RA1; + vpxor (1 * 32 + 16)(%rdx), RA2, RA2; + vpxor (2 * 32 + 16)(%rdx), RA3, RA3; + vpxor (3 * 32 + 16)(%rdx), RB0, RB0; + vpxor (4 * 32 + 16)(%rdx), RB1, RB1; + vpxor (5 * 32 + 16)(%rdx), RB2, RB2; + vpxor (6 * 32 + 16)(%rdx), RB3, RB3; + vmovdqu (7 * 32 + 16)(%rdx), RNOTx; + vmovdqu RNOTx, (%rcx); /* store new IV */ + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA3, (3 * 32)(%rsi); + vmovdqu RB0, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB3, (7 * 32)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16) + +/* + * void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + vzeroupper; + + /* Load input */ + vmovdqu (%rcx), RNOTx; + vinserti128 $1, (%rdx), RNOT, RA0; + vmovdqu (0 * 32 + 16)(%rdx), RA1; + vmovdqu (1 * 32 + 16)(%rdx), RA2; + vmovdqu (2 * 32 + 16)(%rdx), RA3; + vmovdqu (3 * 32 + 16)(%rdx), RB0; + vmovdqu (4 * 32 + 16)(%rdx), RB1; + vmovdqu (5 * 32 + 16)(%rdx), RB2; + vmovdqu (6 * 32 + 16)(%rdx), RB3; + + /* Update IV */ + vmovdqu (7 * 32 + 16)(%rdx), RNOTx; + vmovdqu RNOTx, (%rcx); + + call __sm4_crypt_blk16; + + vpxor (0 * 32)(%rdx), RA0, RA0; + vpxor (1 * 32)(%rdx), RA1, RA1; + vpxor (2 * 32)(%rdx), RA2, RA2; + vpxor (3 * 32)(%rdx), RA3, RA3; + vpxor (4 * 32)(%rdx), RB0, RB0; + vpxor (5 * 32)(%rdx), RB1, RB1; + vpxor (6 * 32)(%rdx), RB2, RB2; + vpxor (7 * 32)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA3, (3 * 32)(%rsi); + vmovdqu RB0, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB3, (7 * 32)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx2_cfb_dec_blk16) diff --git a/arch/x86/crypto/sm4_aesni_avx2_glue.c b/arch/x86/crypto/sm4_aesni_avx2_glue.c new file mode 100644 index 000000000000..84bc718f49a3 --- /dev/null +++ b/arch/x86/crypto/sm4_aesni_avx2_glue.c @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm, AES-NI/AVX2 optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (c) 2021, Alibaba Group. + * Copyright (c) 2021 Tianjia Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include "sm4-avx.h" + +#define SM4_CRYPT16_BLOCK_SIZE (SM4_BLOCK_SIZE * 16) + +asmlinkage void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); + +static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_expandkey(ctx, key, key_len); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cbc_decrypt(req, SM4_CRYPT16_BLOCK_SIZE, + sm4_aesni_avx2_cbc_dec_blk16); +} + + +static int cfb_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cfb_decrypt(req, SM4_CRYPT16_BLOCK_SIZE, + sm4_aesni_avx2_cfb_dec_blk16); +} + +static int ctr_crypt(struct skcipher_request *req) +{ + return sm4_avx_ctr_crypt(req, SM4_CRYPT16_BLOCK_SIZE, + sm4_aesni_avx2_ctr_enc_blk16); +} + +static struct skcipher_alg sm4_aesni_avx2_skciphers[] = { + { + .base = { + .cra_name = "__ecb(sm4)", + .cra_driver_name = "__ecb-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_avx_ecb_encrypt, + .decrypt = sm4_avx_ecb_decrypt, + }, { + .base = { + .cra_name = "__cbc(sm4)", + .cra_driver_name = "__cbc-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base = { + .cra_name = "__cfb(sm4)", + .cra_driver_name = "__cfb-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_cfb_encrypt, + .decrypt = cfb_decrypt, + }, { + .base = { + .cra_name = "__ctr(sm4)", + .cra_driver_name = "__ctr-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + } +}; + +static struct simd_skcipher_alg * +simd_sm4_aesni_avx2_skciphers[ARRAY_SIZE(sm4_aesni_avx2_skciphers)]; + +static int __init sm4_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX) || + !boot_cpu_has(X86_FEATURE_AVX2) || + !boot_cpu_has(X86_FEATURE_AES) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { + pr_info("AVX2 or AES-NI instructions are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return simd_register_skciphers_compat(sm4_aesni_avx2_skciphers, + ARRAY_SIZE(sm4_aesni_avx2_skciphers), + simd_sm4_aesni_avx2_skciphers); +} + +static void __exit sm4_exit(void) +{ + simd_unregister_skciphers(sm4_aesni_avx2_skciphers, + ARRAY_SIZE(sm4_aesni_avx2_skciphers), + simd_sm4_aesni_avx2_skciphers); +} + +module_init(sm4_init); +module_exit(sm4_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX2 optimized"); +MODULE_ALIAS_CRYPTO("sm4"); +MODULE_ALIAS_CRYPTO("sm4-aesni-avx2"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 8a4010d91124..536df4b6b825 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1591,6 +1591,28 @@ config CRYPTO_SM4_AESNI_AVX_X86_64 If unsure, say N. +config CRYPTO_SM4_AESNI_AVX2_X86_64 + tristate "SM4 cipher algorithm (x86_64/AES-NI/AVX2)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SIMD + select CRYPTO_ALGAPI + select CRYPTO_LIB_SM4 + select CRYPTO_SM4_AESNI_AVX_X86_64 + help + SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX2). + + SM4 (GBT.32907-2016) is a cryptographic standard issued by the + Organization of State Commercial Administration of China (OSCCA) + as an authorized cryptographic algorithms for the use within China. + + This is SM4 optimized implementation using AES-NI/AVX2/x86_64 + instruction set for block cipher. Through two affine transforms, + we can use the AES S-Box to simulate the SM4 S-Box to achieve the + effect of instruction acceleration. + + If unsure, say N. + config CRYPTO_TEA tristate "TEA, XTEA and XETA cipher algorithms" depends on CRYPTO_USER_API_ENABLE_OBSOLETE -- cgit v1.2.3 From 3438de03e98ad305129bdd033cb63e46debd67f4 Mon Sep 17 00:00:00 2001 From: John Allen Date: Fri, 20 Aug 2021 14:56:21 +0000 Subject: crypto: ccp - Add support for new CCP/PSP device ID Add a new CCP/PSP PCI device ID and corresponding entry in the dev_vdata struct. Signed-off-by: John Allen Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sp-pci.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c index 9bcc1884c06a..88c672ad27e4 100644 --- a/drivers/crypto/ccp/sp-pci.c +++ b/drivers/crypto/ccp/sp-pci.c @@ -360,6 +360,12 @@ static const struct sp_dev_vdata dev_vdata[] = { #endif #ifdef CONFIG_CRYPTO_DEV_SP_PSP .psp_vdata = &pspv3, +#endif + }, + { /* 5 */ + .bar = 2, +#ifdef CONFIG_CRYPTO_DEV_SP_PSP + .psp_vdata = &pspv2, #endif }, }; @@ -370,6 +376,7 @@ static const struct pci_device_id sp_pci_table[] = { { PCI_VDEVICE(AMD, 0x1486), (kernel_ulong_t)&dev_vdata[3] }, { PCI_VDEVICE(AMD, 0x15DF), (kernel_ulong_t)&dev_vdata[4] }, { PCI_VDEVICE(AMD, 0x1649), (kernel_ulong_t)&dev_vdata[4] }, + { PCI_VDEVICE(AMD, 0x14CA), (kernel_ulong_t)&dev_vdata[5] }, /* Last entry must be zero */ { 0, } }; -- cgit v1.2.3 From cedcf527d59bcca5f87f52ea34a157bbc6e7a3a8 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Sun, 22 Aug 2021 10:27:34 +0800 Subject: padata: Remove repeated verbose license text remove it because SPDX-License-Identifier is already used Signed-off-by: Cai Huoqing Acked-by: Daniel Jordan Signed-off-by: Herbert Xu --- kernel/padata.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/kernel/padata.c b/kernel/padata.c index 3258ab89c2a3..18d3a5c699d8 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -9,19 +9,6 @@ * * Copyright (c) 2020 Oracle and/or its affiliates. * Author: Daniel Jordan - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ #include -- cgit v1.2.3 From 72ff2bf04db2a48840df93a461b7115900f46c05 Mon Sep 17 00:00:00 2001 From: Shreyansh Chouhan Date: Sun, 22 Aug 2021 09:15:14 +0530 Subject: crypto: aesni - xts_crypt() return if walk.nbytes is 0 xts_crypt() code doesn't call kernel_fpu_end() after calling kernel_fpu_begin() if walk.nbytes is 0. The correct behavior should be not calling kernel_fpu_begin() if walk.nbytes is 0. Reported-by: syzbot+20191dc583eff8602d2d@syzkaller.appspotmail.com Signed-off-by: Shreyansh Chouhan Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 388643ca2177..0fc961bef299 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -849,7 +849,7 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt) return -EINVAL; err = skcipher_walk_virt(&walk, req, false); - if (err) + if (!walk.nbytes) return err; if (unlikely(tail > 0 && walk.nbytes < walk.total)) { -- cgit v1.2.3 From 6ae51ffe5e768d9e25a7f4298e2e7a058472bcc3 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sun, 22 Aug 2021 12:31:07 +0200 Subject: crypto: sha512 - remove imaginary and mystifying clearing of variables The function sha512_transform() assigns all local variables to 0 before returning to its caller with the intent to erase sensitive data. However, make clang-analyzer warns that all these assignments are dead stores, and as commit 7a4295f6c9d5 ("crypto: lib/sha256 - Don't clear temporary variables") already points out for sha256_transform(): The assignments to clear a through h and t1/t2 are optimized out by the compiler because they are unused after the assignments. Clearing individual scalar variables is unlikely to be useful, as they may have been assigned to registers, and even if stack spilling was required, there may be compiler-generated temporaries that are impossible to clear in any case. This applies here again as well. Drop meaningless clearing of local variables and avoid this way that the code suggests that data is erased, which simply does not happen. Signed-off-by: Lukas Bulwahn Reviewed-by: Nick Desaulniers Signed-off-by: Herbert Xu --- crypto/sha512_generic.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index c72d72ad828e..be70e76d6d86 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c @@ -143,9 +143,6 @@ sha512_transform(u64 *state, const u8 *input) state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += e; state[5] += f; state[6] += g; state[7] += h; - - /* erase our data */ - a = b = c = d = e = f = g = h = t1 = t2 = 0; } static void sha512_generic_block_fn(struct sha512_state *sst, u8 const *src, -- cgit v1.2.3