diff options
Diffstat (limited to 'drivers/crypto')
57 files changed, 4686 insertions, 275 deletions
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 2fb0fdfc87df..800bf41718e1 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -391,7 +391,7 @@ config CRYPTO_DEV_ATMEL_SHA config CRYPTO_DEV_CCP bool "Support for AMD Cryptographic Coprocessor" - depends on (X86 && PCI) || ARM64 + depends on ((X86 && PCI) || (ARM64 && (OF_ADDRESS || ACPI))) && HAS_IOMEM default n help The AMD Cryptographic Coprocessor provides hardware support @@ -436,4 +436,26 @@ config CRYPTO_DEV_QCE hardware. To compile this driver as a module, choose M here. The module will be called qcrypto. +config CRYPTO_DEV_VMX + bool "Support for VMX cryptographic acceleration instructions" + depends on PPC64 + default n + help + Support for VMX cryptographic acceleration instructions. + +source "drivers/crypto/vmx/Kconfig" + +config CRYPTO_DEV_IMGTEC_HASH + depends on MIPS || COMPILE_TEST + tristate "Imagination Technologies hardware hash accelerator" + select CRYPTO_ALGAPI + select CRYPTO_MD5 + select CRYPTO_SHA1 + select CRYPTO_SHA256 + select CRYPTO_HASH + help + This driver interfaces with the Imagination Technologies + hardware hash accelerator. Supporting MD5/SHA1/SHA224/SHA256 + hashing algorithms. + endif # CRYPTO_HW diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 3924f93d5774..fb84be7e6be5 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/ obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/ obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o +obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o @@ -25,3 +26,4 @@ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/ obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/ +obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c index d02b77150070..3b28e8c3de28 100644 --- a/drivers/crypto/amcc/crypto4xx_core.c +++ b/drivers/crypto/amcc/crypto4xx_core.c @@ -1155,7 +1155,7 @@ struct crypto4xx_alg_common crypto4xx_alg[] = { /** * Module Initialization Routine */ -static int __init crypto4xx_probe(struct platform_device *ofdev) +static int crypto4xx_probe(struct platform_device *ofdev) { int rc; struct resource res; @@ -1263,7 +1263,7 @@ err_alloc_dev: return rc; } -static int __exit crypto4xx_remove(struct platform_device *ofdev) +static int crypto4xx_remove(struct platform_device *ofdev) { struct device *dev = &ofdev->dev; struct crypto4xx_core_device *core_dev = dev_get_drvdata(dev); @@ -1291,7 +1291,7 @@ static struct platform_driver crypto4xx_driver = { .of_match_table = crypto4xx_match, }, .probe = crypto4xx_probe, - .remove = __exit_p(crypto4xx_remove), + .remove = crypto4xx_remove, }; module_platform_driver(crypto4xx_driver); diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 6597aac9905d..0f9a9dc06a83 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -315,10 +315,10 @@ static int atmel_aes_crypt_dma(struct atmel_aes_dev *dd, dd->dma_size = length; - if (!(dd->flags & AES_FLAGS_FAST)) { - dma_sync_single_for_device(dd->dev, dma_addr_in, length, - DMA_TO_DEVICE); - } + dma_sync_single_for_device(dd->dev, dma_addr_in, length, + DMA_TO_DEVICE); + dma_sync_single_for_device(dd->dev, dma_addr_out, length, + DMA_FROM_DEVICE); if (dd->flags & AES_FLAGS_CFB8) { dd->dma_lch_in.dma_conf.dst_addr_width = @@ -391,6 +391,11 @@ static int atmel_aes_crypt_cpu_start(struct atmel_aes_dev *dd) { dd->flags &= ~AES_FLAGS_DMA; + dma_sync_single_for_cpu(dd->dev, dd->dma_addr_in, + dd->dma_size, DMA_TO_DEVICE); + dma_sync_single_for_cpu(dd->dev, dd->dma_addr_out, + dd->dma_size, DMA_FROM_DEVICE); + /* use cache buffers */ dd->nb_in_sg = atmel_aes_sg_length(dd->req, dd->in_sg); if (!dd->nb_in_sg) @@ -459,6 +464,9 @@ static int atmel_aes_crypt_dma_start(struct atmel_aes_dev *dd) dd->flags |= AES_FLAGS_FAST; } else { + dma_sync_single_for_cpu(dd->dev, dd->dma_addr_in, + dd->dma_size, DMA_TO_DEVICE); + /* use cache buffers */ count = atmel_aes_sg_copy(&dd->in_sg, &dd->in_offset, dd->buf_in, dd->buflen, dd->total, 0); @@ -619,7 +627,7 @@ static int atmel_aes_crypt_dma_stop(struct atmel_aes_dev *dd) dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE); dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); } else { - dma_sync_single_for_device(dd->dev, dd->dma_addr_out, + dma_sync_single_for_cpu(dd->dev, dd->dma_addr_out, dd->dma_size, DMA_FROM_DEVICE); /* copy data */ @@ -1246,6 +1254,11 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) /* keep only major version number */ switch (dd->hw_version & 0xff0) { + case 0x200: + dd->caps.has_dualbuff = 1; + dd->caps.has_cfb64 = 1; + dd->caps.max_burst_size = 4; + break; case 0x130: dd->caps.has_dualbuff = 1; dd->caps.has_cfb64 = 1; @@ -1336,6 +1349,7 @@ static int atmel_aes_probe(struct platform_device *pdev) platform_set_drvdata(pdev, aes_dd); INIT_LIST_HEAD(&aes_dd->list); + spin_lock_init(&aes_dd->lock); tasklet_init(&aes_dd->done_task, atmel_aes_done_task, (unsigned long)aes_dd); @@ -1374,7 +1388,7 @@ static int atmel_aes_probe(struct platform_device *pdev) /* Initializing the clock */ aes_dd->iclk = clk_get(&pdev->dev, "aes_clk"); if (IS_ERR(aes_dd->iclk)) { - dev_err(dev, "clock intialization failed.\n"); + dev_err(dev, "clock initialization failed.\n"); err = PTR_ERR(aes_dd->iclk); goto clk_err; } diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 34db04addc18..5b35433c5399 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -163,8 +163,20 @@ static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx) count = min(ctx->sg->length - ctx->offset, ctx->total); count = min(count, ctx->buflen - ctx->bufcnt); - if (count <= 0) - break; + if (count <= 0) { + /* + * Check if count <= 0 because the buffer is full or + * because the sg length is 0. In the latest case, + * check if there is another sg in the list, a 0 length + * sg doesn't necessarily mean the end of the sg list. + */ + if ((ctx->sg->length == 0) && !sg_is_last(ctx->sg)) { + ctx->sg = sg_next(ctx->sg); + continue; + } else { + break; + } + } scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg, ctx->offset, count, 0); @@ -420,14 +432,8 @@ static int atmel_sha_xmit_dma(struct atmel_sha_dev *dd, dma_addr_t dma_addr1, dev_dbg(dd->dev, "xmit_dma: digcnt: 0x%llx 0x%llx, length: %d, final: %d\n", ctx->digcnt[1], ctx->digcnt[0], length1, final); - if (ctx->flags & (SHA_FLAGS_SHA1 | SHA_FLAGS_SHA224 | - SHA_FLAGS_SHA256)) { - dd->dma_lch_in.dma_conf.src_maxburst = 16; - dd->dma_lch_in.dma_conf.dst_maxburst = 16; - } else { - dd->dma_lch_in.dma_conf.src_maxburst = 32; - dd->dma_lch_in.dma_conf.dst_maxburst = 32; - } + dd->dma_lch_in.dma_conf.src_maxburst = 16; + dd->dma_lch_in.dma_conf.dst_maxburst = 16; dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf); @@ -529,7 +535,7 @@ static int atmel_sha_update_dma_slow(struct atmel_sha_dev *dd) if (final) atmel_sha_fill_padding(ctx, 0); - if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) { + if (final || (ctx->bufcnt == ctx->buflen)) { count = ctx->bufcnt; ctx->bufcnt = 0; return atmel_sha_xmit_dma_map(dd, ctx, count, final); @@ -1266,6 +1272,12 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd) /* keep only major version number */ switch (dd->hw_version & 0xff0) { + case 0x420: + dd->caps.has_dma = 1; + dd->caps.has_dualbuff = 1; + dd->caps.has_sha224 = 1; + dd->caps.has_sha_384_512 = 1; + break; case 0x410: dd->caps.has_dma = 1; dd->caps.has_dualbuff = 1; @@ -1349,6 +1361,7 @@ static int atmel_sha_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sha_dd); INIT_LIST_HEAD(&sha_dd->list); + spin_lock_init(&sha_dd->lock); tasklet_init(&sha_dd->done_task, atmel_sha_done_task, (unsigned long)sha_dd); @@ -1385,7 +1398,7 @@ static int atmel_sha_probe(struct platform_device *pdev) /* Initializing the clock */ sha_dd->iclk = clk_get(&pdev->dev, "sha_clk"); if (IS_ERR(sha_dd->iclk)) { - dev_err(dev, "clock intialization failed.\n"); + dev_err(dev, "clock initialization failed.\n"); err = PTR_ERR(sha_dd->iclk); goto clk_err; } diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c index 258772d9b22f..ca2999709eb4 100644 --- a/drivers/crypto/atmel-tdes.c +++ b/drivers/crypto/atmel-tdes.c @@ -1370,6 +1370,7 @@ static int atmel_tdes_probe(struct platform_device *pdev) platform_set_drvdata(pdev, tdes_dd); INIT_LIST_HEAD(&tdes_dd->list); + spin_lock_init(&tdes_dd->lock); tasklet_init(&tdes_dd->done_task, atmel_tdes_done_task, (unsigned long)tdes_dd); @@ -1408,7 +1409,7 @@ static int atmel_tdes_probe(struct platform_device *pdev) /* Initializing the clock */ tdes_dd->iclk = clk_get(&pdev->dev, "tdes_clk"); if (IS_ERR(tdes_dd->iclk)) { - dev_err(dev, "clock intialization failed.\n"); + dev_err(dev, "clock initialization failed.\n"); err = PTR_ERR(tdes_dd->iclk); goto clk_err; } diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index f347ab7eea95..ba0532efd3ae 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -1172,6 +1172,7 @@ static int ahash_final_no_ctx(struct ahash_request *req) return -ENOMEM; } + edesc->sec4_sg_bytes = 0; sh_len = desc_len(sh_desc); desc = edesc->hw_desc; init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE); diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index ae31e555793c..26a544b505f1 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -52,7 +52,7 @@ /* length of descriptors */ #define DESC_JOB_O_LEN (CAAM_CMD_SZ * 2 + CAAM_PTR_SZ * 2) -#define DESC_RNG_LEN (10 * CAAM_CMD_SZ) +#define DESC_RNG_LEN (4 * CAAM_CMD_SZ) /* Buffer, its dma address and lock */ struct buf_data { @@ -90,8 +90,8 @@ static inline void rng_unmap_ctx(struct caam_rng_ctx *ctx) struct device *jrdev = ctx->jrdev; if (ctx->sh_desc_dma) - dma_unmap_single(jrdev, ctx->sh_desc_dma, DESC_RNG_LEN, - DMA_TO_DEVICE); + dma_unmap_single(jrdev, ctx->sh_desc_dma, + desc_bytes(ctx->sh_desc), DMA_TO_DEVICE); rng_unmap_buf(jrdev, &ctx->bufs[0]); rng_unmap_buf(jrdev, &ctx->bufs[1]); } diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile index 7f592d8d07bb..55a1f3951578 100644 --- a/drivers/crypto/ccp/Makefile +++ b/drivers/crypto/ccp/Makefile @@ -1,11 +1,6 @@ obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o -ccp-objs := ccp-dev.o ccp-ops.o -ifdef CONFIG_X86 -ccp-objs += ccp-pci.o -endif -ifdef CONFIG_ARM64 -ccp-objs += ccp-platform.o -endif +ccp-objs := ccp-dev.o ccp-ops.o ccp-platform.o +ccp-$(CONFIG_PCI) += ccp-pci.o obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o ccp-crypto-objs := ccp-crypto-main.o \ diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c index 8e162ad82085..ea7e8446956a 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c @@ -23,7 +23,6 @@ #include "ccp-crypto.h" - static int ccp_aes_cmac_complete(struct crypto_async_request *async_req, int ret) { @@ -38,11 +37,13 @@ static int ccp_aes_cmac_complete(struct crypto_async_request *async_req, if (rctx->hash_rem) { /* Save remaining data to buffer */ unsigned int offset = rctx->nbytes - rctx->hash_rem; + scatterwalk_map_and_copy(rctx->buf, rctx->src, offset, rctx->hash_rem, 0); rctx->buf_count = rctx->hash_rem; - } else + } else { rctx->buf_count = 0; + } /* Update result area if supplied */ if (req->result) @@ -202,7 +203,7 @@ static int ccp_aes_cmac_digest(struct ahash_request *req) } static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key, - unsigned int key_len) + unsigned int key_len) { struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); struct ccp_crypto_ahash_alg *alg = @@ -292,7 +293,8 @@ static int ccp_aes_cmac_cra_init(struct crypto_tfm *tfm) crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx)); cipher_tfm = crypto_alloc_cipher("aes", 0, - CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(cipher_tfm)) { pr_warn("could not load aes cipher driver\n"); return PTR_ERR(cipher_tfm); @@ -354,7 +356,7 @@ int ccp_register_aes_cmac_algs(struct list_head *head) ret = crypto_register_ahash(alg); if (ret) { pr_err("%s ahash algorithm registration error (%d)\n", - base->cra_name, ret); + base->cra_name, ret); kfree(ccp_alg); return ret; } diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c index 0cc5594b7de3..52c7395cb8d8 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c @@ -21,7 +21,6 @@ #include "ccp-crypto.h" - struct ccp_aes_xts_def { const char *name; const char *drv_name; @@ -216,7 +215,6 @@ static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm) ctx->u.aes.tfm_ablkcipher = NULL; } - static int ccp_register_aes_xts_alg(struct list_head *head, const struct ccp_aes_xts_def *def) { @@ -255,7 +253,7 @@ static int ccp_register_aes_xts_alg(struct list_head *head, ret = crypto_register_alg(alg); if (ret) { pr_err("%s ablkcipher algorithm registration error (%d)\n", - alg->cra_name, ret); + alg->cra_name, ret); kfree(ccp_alg); return ret; } diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c index e46490db0f63..7984f910884d 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes.c +++ b/drivers/crypto/ccp/ccp-crypto-aes.c @@ -22,7 +22,6 @@ #include "ccp-crypto.h" - static int ccp_aes_complete(struct crypto_async_request *async_req, int ret) { struct ablkcipher_request *req = ablkcipher_request_cast(async_req); @@ -345,7 +344,7 @@ static int ccp_register_aes_alg(struct list_head *head, ret = crypto_register_alg(alg); if (ret) { pr_err("%s ablkcipher algorithm registration error (%d)\n", - alg->cra_name, ret); + alg->cra_name, ret); kfree(ccp_alg); return ret; } diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c index 4d4e016d755b..bdec01ec608f 100644 --- a/drivers/crypto/ccp/ccp-crypto-main.c +++ b/drivers/crypto/ccp/ccp-crypto-main.c @@ -33,7 +33,6 @@ static unsigned int sha_disable; module_param(sha_disable, uint, 0444); MODULE_PARM_DESC(sha_disable, "Disable use of SHA - any non-zero value"); - /* List heads for the supported algorithms */ static LIST_HEAD(hash_algs); static LIST_HEAD(cipher_algs); @@ -48,6 +47,7 @@ struct ccp_crypto_queue { struct list_head *backlog; unsigned int cmd_count; }; + #define CCP_CRYPTO_MAX_QLEN 100 static struct ccp_crypto_queue req_queue; @@ -77,7 +77,6 @@ struct ccp_crypto_cpu { int err; }; - static inline bool ccp_crypto_success(int err) { if (err && (err != -EINPROGRESS) && (err != -EBUSY)) @@ -143,7 +142,7 @@ static void ccp_crypto_complete(void *data, int err) int ret; if (err == -EINPROGRESS) { - /* Only propogate the -EINPROGRESS if necessary */ + /* Only propagate the -EINPROGRESS if necessary */ if (crypto_cmd->ret == -EBUSY) { crypto_cmd->ret = -EINPROGRESS; req->complete(req, -EINPROGRESS); diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c index 96531571f7cf..507b34e0cc19 100644 --- a/drivers/crypto/ccp/ccp-crypto-sha.c +++ b/drivers/crypto/ccp/ccp-crypto-sha.c @@ -23,7 +23,6 @@ #include "ccp-crypto.h" - static int ccp_sha_complete(struct crypto_async_request *async_req, int ret) { struct ahash_request *req = ahash_request_cast(async_req); @@ -37,11 +36,13 @@ static int ccp_sha_complete(struct crypto_async_request *async_req, int ret) if (rctx->hash_rem) { /* Save remaining data to buffer */ unsigned int offset = rctx->nbytes - rctx->hash_rem; + scatterwalk_map_and_copy(rctx->buf, rctx->src, offset, rctx->hash_rem, 0); rctx->buf_count = rctx->hash_rem; - } else + } else { rctx->buf_count = 0; + } /* Update result area if supplied */ if (req->result) @@ -227,8 +228,9 @@ static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key, } key_len = digest_size; - } else + } else { memcpy(ctx->u.sha.key, key, key_len); + } for (i = 0; i < block_size; i++) { ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ 0x36; @@ -355,7 +357,7 @@ static int ccp_register_hmac_alg(struct list_head *head, ret = crypto_register_ahash(alg); if (ret) { pr_err("%s ahash algorithm registration error (%d)\n", - base->cra_name, ret); + base->cra_name, ret); kfree(ccp_alg); return ret; } @@ -410,7 +412,7 @@ static int ccp_register_sha_alg(struct list_head *head, ret = crypto_register_ahash(alg); if (ret) { pr_err("%s ahash algorithm registration error (%d)\n", - base->cra_name, ret); + base->cra_name, ret); kfree(ccp_alg); return ret; } diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h index 9aa4ae184f7f..76a96f0f44c6 100644 --- a/drivers/crypto/ccp/ccp-crypto.h +++ b/drivers/crypto/ccp/ccp-crypto.h @@ -13,7 +13,6 @@ #ifndef __CCP_CRYPTO_H__ #define __CCP_CRYPTO_H__ - #include <linux/list.h> #include <linux/wait.h> #include <linux/pci.h> @@ -25,7 +24,6 @@ #include <crypto/hash.h> #include <crypto/sha.h> - #define CCP_CRA_PRIORITY 300 struct ccp_crypto_ablkcipher_alg { @@ -68,7 +66,6 @@ static inline struct ccp_crypto_ahash_alg * return container_of(ahash_alg, struct ccp_crypto_ahash_alg, alg); } - /***** AES related defines *****/ struct ccp_aes_ctx { /* Fallback cipher for XTS with unsupported unit sizes */ diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c index ca29c120b85f..861bacc1bb94 100644 --- a/drivers/crypto/ccp/ccp-dev.c +++ b/drivers/crypto/ccp/ccp-dev.c @@ -37,7 +37,6 @@ struct ccp_tasklet_data { struct ccp_cmd *cmd; }; - static struct ccp_device *ccp_dev; static inline struct ccp_device *ccp_get_device(void) { @@ -296,11 +295,9 @@ struct ccp_device *ccp_alloc_struct(struct device *dev) { struct ccp_device *ccp; - ccp = kzalloc(sizeof(*ccp), GFP_KERNEL); - if (ccp == NULL) { - dev_err(dev, "unable to allocate device struct\n"); + ccp = devm_kzalloc(dev, sizeof(*ccp), GFP_KERNEL); + if (!ccp) return NULL; - } ccp->dev = dev; INIT_LIST_HEAD(&ccp->cmd); diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h index 62ff35a6b9ec..6ff89031fb96 100644 --- a/drivers/crypto/ccp/ccp-dev.h +++ b/drivers/crypto/ccp/ccp-dev.h @@ -21,7 +21,7 @@ #include <linux/wait.h> #include <linux/dmapool.h> #include <linux/hw_random.h> - +#include <linux/bitops.h> #define MAX_DMAPOOL_NAME_LEN 32 @@ -33,7 +33,6 @@ #define CACHE_NONE 0x00 #define CACHE_WB_NO_ALLOC 0xb7 - /****** Register Mappings ******/ #define Q_MASK_REG 0x000 #define TRNG_OUT_REG 0x00c @@ -54,8 +53,8 @@ #define CMD_Q_CACHE_BASE 0x228 #define CMD_Q_CACHE_INC 0x20 -#define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f); -#define CMD_Q_DEPTH(__qs) (((__qs) >> 12) & 0x0000000f); +#define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f) +#define CMD_Q_DEPTH(__qs) (((__qs) >> 12) & 0x0000000f) /****** REQ0 Related Values ******/ #define REQ0_WAIT_FOR_WRITE 0x00000004 @@ -103,7 +102,6 @@ /****** REQ6 Related Values ******/ #define REQ6_MEMTYPE_SHIFT 16 - /****** Key Storage Block ******/ #define KSB_START 77 #define KSB_END 127 @@ -114,7 +112,7 @@ #define CCP_JOBID_MASK 0x0000003f #define CCP_DMAPOOL_MAX_SIZE 64 -#define CCP_DMAPOOL_ALIGN (1 << 5) +#define CCP_DMAPOOL_ALIGN BIT(5) #define CCP_REVERSE_BUF_SIZE 64 @@ -142,7 +140,6 @@ #define CCP_ECC_RESULT_OFFSET 60 #define CCP_ECC_RESULT_SUCCESS 0x0001 - struct ccp_device; struct ccp_cmd; @@ -261,7 +258,6 @@ struct ccp_device { unsigned int axcache; }; - int ccp_pci_init(void); void ccp_pci_exit(void); diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index 8729364261d7..71f2e3c89424 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -27,7 +27,6 @@ #include "ccp-dev.h" - enum ccp_memtype { CCP_MEMTYPE_SYSTEM = 0, CCP_MEMTYPE_KSB, @@ -515,7 +514,6 @@ static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev, if (!wa->dma_count) return -ENOMEM; - return 0; } @@ -763,8 +761,9 @@ static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst, sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used; sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len); op_len = min(sg_src_len, sg_dst_len); - } else + } else { op_len = sg_src_len; + } /* The data operation length will be at least block_size in length * or the smaller of available sg room remaining for the source or @@ -1131,9 +1130,9 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) if (ret) goto e_ctx; - if (in_place) + if (in_place) { dst = src; - else { + } else { ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len, AES_BLOCK_SIZE, DMA_FROM_DEVICE); if (ret) @@ -1304,9 +1303,9 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, if (ret) goto e_ctx; - if (in_place) + if (in_place) { dst = src; - else { + } else { ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len, unit_size, DMA_FROM_DEVICE); if (ret) @@ -1451,8 +1450,9 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) goto e_ctx; } memcpy(ctx.address, init, CCP_SHA_CTXSIZE); - } else + } else { ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len); + } ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, CCP_PASSTHRU_BYTESWAP_256BIT); @@ -1732,9 +1732,9 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, if (ret) goto e_mask; - if (in_place) + if (in_place) { dst = src; - else { + } else { ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len, CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE); if (ret) @@ -1974,7 +1974,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) src.address += CCP_ECC_OPERAND_SIZE; /* Set the first point Z coordianate to 1 */ - *(src.address) = 0x01; + *src.address = 0x01; src.address += CCP_ECC_OPERAND_SIZE; if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) { @@ -1989,7 +1989,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) src.address += CCP_ECC_OPERAND_SIZE; /* Set the second point Z coordianate to 1 */ - *(src.address) = 0x01; + *src.address = 0x01; src.address += CCP_ECC_OPERAND_SIZE; } else { /* Copy the Domain "a" parameter */ diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/ccp-pci.c index 7f89c946adfe..af190d4795a8 100644 --- a/drivers/crypto/ccp/ccp-pci.c +++ b/drivers/crypto/ccp/ccp-pci.c @@ -174,11 +174,10 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (!ccp) goto e_err; - ccp_pci = kzalloc(sizeof(*ccp_pci), GFP_KERNEL); - if (!ccp_pci) { - ret = -ENOMEM; - goto e_free1; - } + ccp_pci = devm_kzalloc(dev, sizeof(*ccp_pci), GFP_KERNEL); + if (!ccp_pci) + goto e_err; + ccp->dev_specific = ccp_pci; ccp->get_irq = ccp_get_irqs; ccp->free_irq = ccp_free_irqs; @@ -186,7 +185,7 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) ret = pci_request_regions(pdev, "ccp"); if (ret) { dev_err(dev, "pci_request_regions failed (%d)\n", ret); - goto e_free2; + goto e_err; } ret = pci_enable_device(pdev); @@ -204,7 +203,7 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) ret = -EIO; ccp->io_map = pci_iomap(pdev, bar, 0); - if (ccp->io_map == NULL) { + if (!ccp->io_map) { dev_err(dev, "pci_iomap failed\n"); goto e_device; } @@ -239,12 +238,6 @@ e_device: e_regions: pci_release_regions(pdev); -e_free2: - kfree(ccp_pci); - -e_free1: - kfree(ccp); - e_err: dev_notice(dev, "initialization failed\n"); return ret; @@ -266,8 +259,6 @@ static void ccp_pci_remove(struct pci_dev *pdev) pci_release_regions(pdev); - kfree(ccp); - dev_notice(dev, "disabled\n"); } diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c index 8c50bad25f7e..b1c20b2b5647 100644 --- a/drivers/crypto/ccp/ccp-platform.c +++ b/drivers/crypto/ccp/ccp-platform.c @@ -23,9 +23,15 @@ #include <linux/delay.h> #include <linux/ccp.h> #include <linux/of.h> +#include <linux/of_address.h> +#include <linux/acpi.h> #include "ccp-dev.h" +struct ccp_platform { + int use_acpi; + int coherent; +}; static int ccp_get_irq(struct ccp_device *ccp) { @@ -84,10 +90,64 @@ static struct resource *ccp_find_mmio_area(struct ccp_device *ccp) return NULL; } +#ifdef CONFIG_ACPI +static int ccp_acpi_support(struct ccp_device *ccp) +{ + struct ccp_platform *ccp_platform = ccp->dev_specific; + struct acpi_device *adev = ACPI_COMPANION(ccp->dev); + acpi_handle handle; + acpi_status status; + unsigned long long data; + int cca; + + /* Retrieve the device cache coherency value */ + handle = adev->handle; + do { + status = acpi_evaluate_integer(handle, "_CCA", NULL, &data); + if (!ACPI_FAILURE(status)) { + cca = data; + break; + } + } while (!ACPI_FAILURE(status)); + + if (ACPI_FAILURE(status)) { + dev_err(ccp->dev, "error obtaining acpi coherency value\n"); + return -EINVAL; + } + + ccp_platform->coherent = !!cca; + + return 0; +} +#else /* CONFIG_ACPI */ +static int ccp_acpi_support(struct ccp_device *ccp) +{ + return -EINVAL; +} +#endif + +#ifdef CONFIG_OF +static int ccp_of_support(struct ccp_device *ccp) +{ + struct ccp_platform *ccp_platform = ccp->dev_specific; + + ccp_platform->coherent = of_dma_is_coherent(ccp->dev->of_node); + + return 0; +} +#else +static int ccp_of_support(struct ccp_device *ccp) +{ + return -EINVAL; +} +#endif + static int ccp_platform_probe(struct platform_device *pdev) { struct ccp_device *ccp; + struct ccp_platform *ccp_platform; struct device *dev = &pdev->dev; + struct acpi_device *adev = ACPI_COMPANION(dev); struct resource *ior; int ret; @@ -96,24 +156,40 @@ static int ccp_platform_probe(struct platform_device *pdev) if (!ccp) goto e_err; - ccp->dev_specific = NULL; + ccp_platform = devm_kzalloc(dev, sizeof(*ccp_platform), GFP_KERNEL); + if (!ccp_platform) + goto e_err; + + ccp->dev_specific = ccp_platform; ccp->get_irq = ccp_get_irqs; ccp->free_irq = ccp_free_irqs; + ccp_platform->use_acpi = (!adev || acpi_disabled) ? 0 : 1; + ior = ccp_find_mmio_area(ccp); ccp->io_map = devm_ioremap_resource(dev, ior); if (IS_ERR(ccp->io_map)) { ret = PTR_ERR(ccp->io_map); - goto e_free; + goto e_err; } ccp->io_regs = ccp->io_map; if (!dev->dma_mask) dev->dma_mask = &dev->coherent_dma_mask; - *(dev->dma_mask) = DMA_BIT_MASK(48); - dev->coherent_dma_mask = DMA_BIT_MASK(48); + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); + if (ret) { + dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret); + goto e_err; + } + + if (ccp_platform->use_acpi) + ret = ccp_acpi_support(ccp); + else + ret = ccp_of_support(ccp); + if (ret) + goto e_err; - if (of_property_read_bool(dev->of_node, "dma-coherent")) + if (ccp_platform->coherent) ccp->axcache = CACHE_WB_NO_ALLOC; else ccp->axcache = CACHE_NONE; @@ -122,15 +198,12 @@ static int ccp_platform_probe(struct platform_device *pdev) ret = ccp_init(ccp); if (ret) - goto e_free; + goto e_err; dev_notice(dev, "enabled\n"); return 0; -e_free: - kfree(ccp); - e_err: dev_notice(dev, "initialization failed\n"); return ret; @@ -143,8 +216,6 @@ static int ccp_platform_remove(struct platform_device *pdev) ccp_destroy(ccp); - kfree(ccp); - dev_notice(dev, "disabled\n"); return 0; @@ -200,15 +271,29 @@ static int ccp_platform_resume(struct platform_device *pdev) } #endif -static const struct of_device_id ccp_platform_ids[] = { +#ifdef CONFIG_ACPI +static const struct acpi_device_id ccp_acpi_match[] = { + { "AMDI0C00", 0 }, + { }, +}; +#endif + +#ifdef CONFIG_OF +static const struct of_device_id ccp_of_match[] = { { .compatible = "amd,ccp-seattle-v1a" }, { }, }; +#endif static struct platform_driver ccp_platform_driver = { .driver = { .name = "AMD Cryptographic Coprocessor", - .of_match_table = ccp_platform_ids, +#ifdef CONFIG_ACPI + .acpi_match_table = ccp_acpi_match, +#endif +#ifdef CONFIG_OF + .of_match_table = ccp_of_match, +#endif }, .probe = ccp_platform_probe, .remove = ccp_platform_remove, diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c new file mode 100644 index 000000000000..ad47d0d61098 --- /dev/null +++ b/drivers/crypto/img-hash.c @@ -0,0 +1,1029 @@ +/* + * Copyright (c) 2014 Imagination Technologies + * Authors: Will Thomas, James Hartley + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Interface structure taken from omap-sham driver + */ + +#include <linux/clk.h> +#include <linux/dmaengine.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/scatterlist.h> + +#include <crypto/internal/hash.h> +#include <crypto/md5.h> +#include <crypto/sha.h> + +#define CR_RESET 0 +#define CR_RESET_SET 1 +#define CR_RESET_UNSET 0 + +#define CR_MESSAGE_LENGTH_H 0x4 +#define CR_MESSAGE_LENGTH_L 0x8 + +#define CR_CONTROL 0xc +#define CR_CONTROL_BYTE_ORDER_3210 0 +#define CR_CONTROL_BYTE_ORDER_0123 1 +#define CR_CONTROL_BYTE_ORDER_2310 2 +#define CR_CONTROL_BYTE_ORDER_1032 3 +#define CR_CONTROL_BYTE_ORDER_SHIFT 8 +#define CR_CONTROL_ALGO_MD5 0 +#define CR_CONTROL_ALGO_SHA1 1 +#define CR_CONTROL_ALGO_SHA224 2 +#define CR_CONTROL_ALGO_SHA256 3 + +#define CR_INTSTAT 0x10 +#define CR_INTENAB 0x14 +#define CR_INTCLEAR 0x18 +#define CR_INT_RESULTS_AVAILABLE BIT(0) +#define CR_INT_NEW_RESULTS_SET BIT(1) +#define CR_INT_RESULT_READ_ERR BIT(2) +#define CR_INT_MESSAGE_WRITE_ERROR BIT(3) +#define CR_INT_STATUS BIT(8) + +#define CR_RESULT_QUEUE 0x1c +#define CR_RSD0 0x40 +#define CR_CORE_REV 0x50 +#define CR_CORE_DES1 0x60 +#define CR_CORE_DES2 0x70 + +#define DRIVER_FLAGS_BUSY BIT(0) +#define DRIVER_FLAGS_FINAL BIT(1) +#define DRIVER_FLAGS_DMA_ACTIVE BIT(2) +#define DRIVER_FLAGS_OUTPUT_READY BIT(3) +#define DRIVER_FLAGS_INIT BIT(4) +#define DRIVER_FLAGS_CPU BIT(5) +#define DRIVER_FLAGS_DMA_READY BIT(6) +#define DRIVER_FLAGS_ERROR BIT(7) +#define DRIVER_FLAGS_SG BIT(8) +#define DRIVER_FLAGS_SHA1 BIT(18) +#define DRIVER_FLAGS_SHA224 BIT(19) +#define DRIVER_FLAGS_SHA256 BIT(20) +#define DRIVER_FLAGS_MD5 BIT(21) + +#define IMG_HASH_QUEUE_LENGTH 20 +#define IMG_HASH_DMA_THRESHOLD 64 + +#ifdef __LITTLE_ENDIAN +#define IMG_HASH_BYTE_ORDER CR_CONTROL_BYTE_ORDER_3210 +#else +#define IMG_HASH_BYTE_ORDER CR_CONTROL_BYTE_ORDER_0123 +#endif + +struct img_hash_dev; + +struct img_hash_request_ctx { + struct img_hash_dev *hdev; + u8 digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32)); + unsigned long flags; + size_t digsize; + + dma_addr_t dma_addr; + size_t dma_ct; + + /* sg root */ + struct scatterlist *sgfirst; + /* walk state */ + struct scatterlist *sg; + size_t nents; + size_t offset; + unsigned int total; + size_t sent; + + unsigned long op; + + size_t bufcnt; + u8 buffer[0] __aligned(sizeof(u32)); + struct ahash_request fallback_req; +}; + +struct img_hash_ctx { + struct img_hash_dev *hdev; + unsigned long flags; + struct crypto_ahash *fallback; +}; + +struct img_hash_dev { + struct list_head list; + struct device *dev; + struct clk *hash_clk; + struct clk *sys_clk; + void __iomem *io_base; + + phys_addr_t bus_addr; + void __iomem *cpu_addr; + + spinlock_t lock; + int err; + struct tasklet_struct done_task; + struct tasklet_struct dma_task; + + unsigned long flags; + struct crypto_queue queue; + struct ahash_request *req; + + struct dma_chan *dma_lch; +}; + +struct img_hash_drv { + struct list_head dev_list; + spinlock_t lock; +}; + +static struct img_hash_drv img_hash = { + .dev_list = LIST_HEAD_INIT(img_hash.dev_list), + .lock = __SPIN_LOCK_UNLOCKED(img_hash.lock), +}; + +static inline u32 img_hash_read(struct img_hash_dev *hdev, u32 offset) +{ + return readl_relaxed(hdev->io_base + offset); +} + +static inline void img_hash_write(struct img_hash_dev *hdev, + u32 offset, u32 value) +{ + writel_relaxed(value, hdev->io_base + offset); +} + +static inline u32 img_hash_read_result_queue(struct img_hash_dev *hdev) +{ + return be32_to_cpu(img_hash_read(hdev, CR_RESULT_QUEUE)); +} + +static void img_hash_start(struct img_hash_dev *hdev, bool dma) +{ + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); + u32 cr = IMG_HASH_BYTE_ORDER << CR_CONTROL_BYTE_ORDER_SHIFT; + + if (ctx->flags & DRIVER_FLAGS_MD5) + cr |= CR_CONTROL_ALGO_MD5; + else if (ctx->flags & DRIVER_FLAGS_SHA1) + cr |= CR_CONTROL_ALGO_SHA1; + else if (ctx->flags & DRIVER_FLAGS_SHA224) + cr |= CR_CONTROL_ALGO_SHA224; + else if (ctx->flags & DRIVER_FLAGS_SHA256) + cr |= CR_CONTROL_ALGO_SHA256; + dev_dbg(hdev->dev, "Starting hash process\n"); + img_hash_write(hdev, CR_CONTROL, cr); + + /* + * The hardware block requires two cycles between writing the control + * register and writing the first word of data in non DMA mode, to + * ensure the first data write is not grouped in burst with the control + * register write a read is issued to 'flush' the bus. + */ + if (!dma) + img_hash_read(hdev, CR_CONTROL); +} + +static int img_hash_xmit_cpu(struct img_hash_dev *hdev, const u8 *buf, + size_t length, int final) +{ + u32 count, len32; + const u32 *buffer = (const u32 *)buf; + + dev_dbg(hdev->dev, "xmit_cpu: length: %zu bytes\n", length); + + if (final) + hdev->flags |= DRIVER_FLAGS_FINAL; + + len32 = DIV_ROUND_UP(length, sizeof(u32)); + + for (count = 0; count < len32; count++) + writel_relaxed(buffer[count], hdev->cpu_addr); + + return -EINPROGRESS; +} + +static void img_hash_dma_callback(void *data) +{ + struct img_hash_dev *hdev = (struct img_hash_dev *)data; + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); + + if (ctx->bufcnt) { + img_hash_xmit_cpu(hdev, ctx->buffer, ctx->bufcnt, 0); + ctx->bufcnt = 0; + } + if (ctx->sg) + tasklet_schedule(&hdev->dma_task); +} + +static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct scatterlist *sg) +{ + struct dma_async_tx_descriptor *desc; + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); + + ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV); + if (ctx->dma_ct == 0) { + dev_err(hdev->dev, "Invalid DMA sg\n"); + hdev->err = -EINVAL; + return -EINVAL; + } + + desc = dmaengine_prep_slave_sg(hdev->dma_lch, + sg, + ctx->dma_ct, + DMA_MEM_TO_DEV, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!desc) { + dev_err(hdev->dev, "Null DMA descriptor\n"); + hdev->err = -EINVAL; + dma_unmap_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV); + return -EINVAL; + } + desc->callback = img_hash_dma_callback; + desc->callback_param = hdev; + dmaengine_submit(desc); + dma_async_issue_pending(hdev->dma_lch); + + return 0; +} + +static int img_hash_write_via_cpu(struct img_hash_dev *hdev) +{ + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); + + ctx->bufcnt = sg_copy_to_buffer(hdev->req->src, sg_nents(ctx->sg), + ctx->buffer, hdev->req->nbytes); + + ctx->total = hdev->req->nbytes; + ctx->bufcnt = 0; + + hdev->flags |= (DRIVER_FLAGS_CPU | DRIVER_FLAGS_FINAL); + + img_hash_start(hdev, false); + + return img_hash_xmit_cpu(hdev, ctx->buffer, ctx->total, 1); +} + +static int img_hash_finish(struct ahash_request *req) +{ + struct img_hash_request_ctx *ctx = ahash_request_ctx(req); + + if (!req->result) + return -EINVAL; + + memcpy(req->result, ctx->digest, ctx->digsize); + + return 0; +} + +static void img_hash_copy_hash(struct ahash_request *req) +{ + struct img_hash_request_ctx *ctx = ahash_request_ctx(req); + u32 *hash = (u32 *)ctx->digest; + int i; + + for (i = (ctx->digsize / sizeof(u32)) - 1; i >= 0; i--) + hash[i] = img_hash_read_result_queue(ctx->hdev); +} + +static void img_hash_finish_req(struct ahash_request *req, int err) +{ + struct img_hash_request_ctx *ctx = ahash_request_ctx(req); + struct img_hash_dev *hdev = ctx->hdev; + + if (!err) { + img_hash_copy_hash(req); + if (DRIVER_FLAGS_FINAL & hdev->flags) + err = img_hash_finish(req); + } else { + dev_warn(hdev->dev, "Hash failed with error %d\n", err); + ctx->flags |= DRIVER_FLAGS_ERROR; + } + + hdev->flags &= ~(DRIVER_FLAGS_DMA_READY | DRIVER_FLAGS_OUTPUT_READY | + DRIVER_FLAGS_CPU | DRIVER_FLAGS_BUSY | DRIVER_FLAGS_FINAL); + + if (req->base.complete) + req->base.complete(&req->base, err); +} + +static int img_hash_write_via_dma(struct img_hash_dev *hdev) +{ + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); + + img_hash_start(hdev, true); + + dev_dbg(hdev->dev, "xmit dma size: %d\n", ctx->total); + + if (!ctx->total) + hdev->flags |= DRIVER_FLAGS_FINAL; + + hdev->flags |= DRIVER_FLAGS_DMA_ACTIVE | DRIVER_FLAGS_FINAL; + + tasklet_schedule(&hdev->dma_task); + + return -EINPROGRESS; +} + +static int img_hash_dma_init(struct img_hash_dev *hdev) +{ + struct dma_slave_config dma_conf; + int err = -EINVAL; + + hdev->dma_lch = dma_request_slave_channel(hdev->dev, "tx"); + if (!hdev->dma_lch) { + dev_err(hdev->dev, "Couldn't aquire a slave DMA channel.\n"); + return -EBUSY; + } + dma_conf.direction = DMA_MEM_TO_DEV; + dma_conf.dst_addr = hdev->bus_addr; + dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + dma_conf.dst_maxburst = 16; + dma_conf.device_fc = false; + + err = dmaengine_slave_config(hdev->dma_lch, &dma_conf); + if (err) { + dev_err(hdev->dev, "Couldn't configure DMA slave.\n"); + dma_release_channel(hdev->dma_lch); + return err; + } + + return 0; +} + +static void img_hash_dma_task(unsigned long d) +{ + struct img_hash_dev *hdev = (struct img_hash_dev *)d; + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); + u8 *addr; + size_t nbytes, bleft, wsend, len, tbc; + struct scatterlist tsg; + + if (!ctx->sg) + return; + + addr = sg_virt(ctx->sg); + nbytes = ctx->sg->length - ctx->offset; + + /* + * The hash accelerator does not support a data valid mask. This means + * that if each dma (i.e. per page) is not a multiple of 4 bytes, the + * padding bytes in the last word written by that dma would erroneously + * be included in the hash. To avoid this we round down the transfer, + * and add the excess to the start of the next dma. It does not matter + * that the final dma may not be a multiple of 4 bytes as the hashing + * block is programmed to accept the correct number of bytes. + */ + + bleft = nbytes % 4; + wsend = (nbytes / 4); + + if (wsend) { + sg_init_one(&tsg, addr + ctx->offset, wsend * 4); + if (img_hash_xmit_dma(hdev, &tsg)) { + dev_err(hdev->dev, "DMA failed, falling back to CPU"); + ctx->flags |= DRIVER_FLAGS_CPU; + hdev->err = 0; + img_hash_xmit_cpu(hdev, addr + ctx->offset, + wsend * 4, 0); + ctx->sent += wsend * 4; + wsend = 0; + } else { + ctx->sent += wsend * 4; + } + } + + if (bleft) { + ctx->bufcnt = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents, + ctx->buffer, bleft, ctx->sent); + tbc = 0; + ctx->sg = sg_next(ctx->sg); + while (ctx->sg && (ctx->bufcnt < 4)) { + len = ctx->sg->length; + if (likely(len > (4 - ctx->bufcnt))) + len = 4 - ctx->bufcnt; + tbc = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents, + ctx->buffer + ctx->bufcnt, len, + ctx->sent + ctx->bufcnt); + ctx->bufcnt += tbc; + if (tbc >= ctx->sg->length) { + ctx->sg = sg_next(ctx->sg); + tbc = 0; + } + } + + ctx->sent += ctx->bufcnt; + ctx->offset = tbc; + + if (!wsend) + img_hash_dma_callback(hdev); + } else { + ctx->offset = 0; + ctx->sg = sg_next(ctx->sg); + } +} + +static int img_hash_write_via_dma_stop(struct img_hash_dev *hdev) +{ + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); + + if (ctx->flags & DRIVER_FLAGS_SG) + dma_unmap_sg(hdev->dev, ctx->sg, ctx->dma_ct, DMA_TO_DEVICE); + + return 0; +} + +static int img_hash_process_data(struct img_hash_dev *hdev) +{ + struct ahash_request *req = hdev->req; + struct img_hash_request_ctx *ctx = ahash_request_ctx(req); + int err = 0; + + ctx->bufcnt = 0; + + if (req->nbytes >= IMG_HASH_DMA_THRESHOLD) { + dev_dbg(hdev->dev, "process data request(%d bytes) using DMA\n", + req->nbytes); + err = img_hash_write_via_dma(hdev); + } else { + dev_dbg(hdev->dev, "process data request(%d bytes) using CPU\n", + req->nbytes); + err = img_hash_write_via_cpu(hdev); + } + return err; +} + +static int img_hash_hw_init(struct img_hash_dev *hdev) +{ + unsigned long long nbits; + u32 u, l; + + img_hash_write(hdev, CR_RESET, CR_RESET_SET); + img_hash_write(hdev, CR_RESET, CR_RESET_UNSET); + img_hash_write(hdev, CR_INTENAB, CR_INT_NEW_RESULTS_SET); + + nbits = (u64)hdev->req->nbytes << 3; + u = nbits >> 32; + l = nbits; + img_hash_write(hdev, CR_MESSAGE_LENGTH_H, u); + img_hash_write(hdev, CR_MESSAGE_LENGTH_L, l); + + if (!(DRIVER_FLAGS_INIT & hdev->flags)) { + hdev->flags |= DRIVER_FLAGS_INIT; + hdev->err = 0; + } + dev_dbg(hdev->dev, "hw initialized, nbits: %llx\n", nbits); + return 0; +} + +static int img_hash_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct img_hash_request_ctx *rctx = ahash_request_ctx(req); + struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback); + rctx->fallback_req.base.flags = req->base.flags + & CRYPTO_TFM_REQ_MAY_SLEEP; + + return crypto_ahash_init(&rctx->fallback_req); +} + +static int img_hash_handle_queue(struct img_hash_dev *hdev, + struct ahash_request *req) +{ + struct crypto_async_request *async_req, *backlog; + struct img_hash_request_ctx *ctx; + unsigned long flags; + int err = 0, res = 0; + + spin_lock_irqsave(&hdev->lock, flags); + + if (req) + res = ahash_enqueue_request(&hdev->queue, req); + + if (DRIVER_FLAGS_BUSY & hdev->flags) { + spin_unlock_irqrestore(&hdev->lock, flags); + return res; + } + + backlog = crypto_get_backlog(&hdev->queue); + async_req = crypto_dequeue_request(&hdev->queue); + if (async_req) + hdev->flags |= DRIVER_FLAGS_BUSY; + + spin_unlock_irqrestore(&hdev->lock, flags); + + if (!async_req) + return res; + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + + req = ahash_request_cast(async_req); + hdev->req = req; + + ctx = ahash_request_ctx(req); + + dev_info(hdev->dev, "processing req, op: %lu, bytes: %d\n", + ctx->op, req->nbytes); + + err = img_hash_hw_init(hdev); + + if (!err) + err = img_hash_process_data(hdev); + + if (err != -EINPROGRESS) { + /* done_task will not finish so do it here */ + img_hash_finish_req(req, err); + } + return res; +} + +static int img_hash_update(struct ahash_request *req) +{ + struct img_hash_request_ctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback); + rctx->fallback_req.base.flags = req->base.flags + & CRYPTO_TFM_REQ_MAY_SLEEP; + rctx->fallback_req.nbytes = req->nbytes; + rctx->fallback_req.src = req->src; + + return crypto_ahash_update(&rctx->fallback_req); +} + +static int img_hash_final(struct ahash_request *req) +{ + struct img_hash_request_ctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback); + rctx->fallback_req.base.flags = req->base.flags + & CRYPTO_TFM_REQ_MAY_SLEEP; + rctx->fallback_req.result = req->result; + + return crypto_ahash_final(&rctx->fallback_req); +} + +static int img_hash_finup(struct ahash_request *req) +{ + struct img_hash_request_ctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback); + rctx->fallback_req.base.flags = req->base.flags + & CRYPTO_TFM_REQ_MAY_SLEEP; + rctx->fallback_req.nbytes = req->nbytes; + rctx->fallback_req.src = req->src; + rctx->fallback_req.result = req->result; + + return crypto_ahash_finup(&rctx->fallback_req); +} + +static int img_hash_digest(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct img_hash_ctx *tctx = crypto_ahash_ctx(tfm); + struct img_hash_request_ctx *ctx = ahash_request_ctx(req); + struct img_hash_dev *hdev = NULL; + struct img_hash_dev *tmp; + int err; + + spin_lock(&img_hash.lock); + if (!tctx->hdev) { + list_for_each_entry(tmp, &img_hash.dev_list, list) { + hdev = tmp; + break; + } + tctx->hdev = hdev; + + } else { + hdev = tctx->hdev; + } + + spin_unlock(&img_hash.lock); + ctx->hdev = hdev; + ctx->flags = 0; + ctx->digsize = crypto_ahash_digestsize(tfm); + + switch (ctx->digsize) { + case SHA1_DIGEST_SIZE: + ctx->flags |= DRIVER_FLAGS_SHA1; + break; + case SHA256_DIGEST_SIZE: + ctx->flags |= DRIVER_FLAGS_SHA256; + break; + case SHA224_DIGEST_SIZE: + ctx->flags |= DRIVER_FLAGS_SHA224; + break; + case MD5_DIGEST_SIZE: + ctx->flags |= DRIVER_FLAGS_MD5; + break; + default: + return -EINVAL; + } + + ctx->bufcnt = 0; + ctx->offset = 0; + ctx->sent = 0; + ctx->total = req->nbytes; + ctx->sg = req->src; + ctx->sgfirst = req->src; + ctx->nents = sg_nents(ctx->sg); + + err = img_hash_handle_queue(tctx->hdev, req); + + return err; +} + +static int img_hash_cra_init(struct crypto_tfm *tfm) +{ + struct img_hash_ctx *ctx = crypto_tfm_ctx(tfm); + const char *alg_name = crypto_tfm_alg_name(tfm); + int err = -ENOMEM; + + ctx->fallback = crypto_alloc_ahash(alg_name, 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->fallback)) { + pr_err("img_hash: Could not load fallback driver.\n"); + err = PTR_ERR(ctx->fallback); + goto err; + } + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct img_hash_request_ctx) + + IMG_HASH_DMA_THRESHOLD); + + return 0; + +err: + return err; +} + +static void img_hash_cra_exit(struct crypto_tfm *tfm) +{ + struct img_hash_ctx *tctx = crypto_tfm_ctx(tfm); + + crypto_free_ahash(tctx->fallback); +} + +static irqreturn_t img_irq_handler(int irq, void *dev_id) +{ + struct img_hash_dev *hdev = dev_id; + u32 reg; + + reg = img_hash_read(hdev, CR_INTSTAT); + img_hash_write(hdev, CR_INTCLEAR, reg); + + if (reg & CR_INT_NEW_RESULTS_SET) { + dev_dbg(hdev->dev, "IRQ CR_INT_NEW_RESULTS_SET\n"); + if (DRIVER_FLAGS_BUSY & hdev->flags) { + hdev->flags |= DRIVER_FLAGS_OUTPUT_READY; + if (!(DRIVER_FLAGS_CPU & hdev->flags)) + hdev->flags |= DRIVER_FLAGS_DMA_READY; + tasklet_schedule(&hdev->done_task); + } else { + dev_warn(hdev->dev, + "HASH interrupt when no active requests.\n"); + } + } else if (reg & CR_INT_RESULTS_AVAILABLE) { + dev_warn(hdev->dev, + "IRQ triggered before the hash had completed\n"); + } else if (reg & CR_INT_RESULT_READ_ERR) { + dev_warn(hdev->dev, + "Attempt to read from an empty result queue\n"); + } else if (reg & CR_INT_MESSAGE_WRITE_ERROR) { + dev_warn(hdev->dev, + "Data written before the hardware was configured\n"); + } + return IRQ_HANDLED; +} + +static struct ahash_alg img_algs[] = { + { + .init = img_hash_init, + .update = img_hash_update, + .final = img_hash_final, + .finup = img_hash_finup, + .digest = img_hash_digest, + .halg = { + .digestsize = MD5_DIGEST_SIZE, + .base = { + .cra_name = "md5", + .cra_driver_name = "img-md5", + .cra_priority = 300, + .cra_flags = + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = MD5_HMAC_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct img_hash_ctx), + .cra_init = img_hash_cra_init, + .cra_exit = img_hash_cra_exit, + .cra_module = THIS_MODULE, + } + } + }, + { + .init = img_hash_init, + .update = img_hash_update, + .final = img_hash_final, + .finup = img_hash_finup, + .digest = img_hash_digest, + .halg = { + .digestsize = SHA1_DIGEST_SIZE, + .base = { + .cra_name = "sha1", + .cra_driver_name = "img-sha1", + .cra_priority = 300, + .cra_flags = + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct img_hash_ctx), + .cra_init = img_hash_cra_init, + .cra_exit = img_hash_cra_exit, + .cra_module = THIS_MODULE, + } + } + }, + { + .init = img_hash_init, + .update = img_hash_update, + .final = img_hash_final, + .finup = img_hash_finup, + .digest = img_hash_digest, + .halg = { + .digestsize = SHA224_DIGEST_SIZE, + .base = { + .cra_name = "sha224", + .cra_driver_name = "img-sha224", + .cra_priority = 300, + .cra_flags = + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct img_hash_ctx), + .cra_init = img_hash_cra_init, + .cra_exit = img_hash_cra_exit, + .cra_module = THIS_MODULE, + } + } + }, + { + .init = img_hash_init, + .update = img_hash_update, + .final = img_hash_final, + .finup = img_hash_finup, + .digest = img_hash_digest, + .halg = { + .digestsize = SHA256_DIGEST_SIZE, + .base = { + .cra_name = "sha256", + .cra_driver_name = "img-sha256", + .cra_priority = 300, + .cra_flags = + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct img_hash_ctx), + .cra_init = img_hash_cra_init, + .cra_exit = img_hash_cra_exit, + .cra_module = THIS_MODULE, + } + } + } +}; + +static int img_register_algs(struct img_hash_dev *hdev) +{ + int i, err; + + for (i = 0; i < ARRAY_SIZE(img_algs); i++) { + err = crypto_register_ahash(&img_algs[i]); + if (err) + goto err_reg; + } + return 0; + +err_reg: + for (; i--; ) + crypto_unregister_ahash(&img_algs[i]); + + return err; +} + +static int img_unregister_algs(struct img_hash_dev *hdev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(img_algs); i++) + crypto_unregister_ahash(&img_algs[i]); + return 0; +} + +static void img_hash_done_task(unsigned long data) +{ + struct img_hash_dev *hdev = (struct img_hash_dev *)data; + int err = 0; + + if (hdev->err == -EINVAL) { + err = hdev->err; + goto finish; + } + + if (!(DRIVER_FLAGS_BUSY & hdev->flags)) { + img_hash_handle_queue(hdev, NULL); + return; + } + + if (DRIVER_FLAGS_CPU & hdev->flags) { + if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) { + hdev->flags &= ~DRIVER_FLAGS_OUTPUT_READY; + goto finish; + } + } else if (DRIVER_FLAGS_DMA_READY & hdev->flags) { + if (DRIVER_FLAGS_DMA_ACTIVE & hdev->flags) { + hdev->flags &= ~DRIVER_FLAGS_DMA_ACTIVE; + img_hash_write_via_dma_stop(hdev); + if (hdev->err) { + err = hdev->err; + goto finish; + } + } + if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) { + hdev->flags &= ~(DRIVER_FLAGS_DMA_READY | + DRIVER_FLAGS_OUTPUT_READY); + goto finish; + } + } + return; + +finish: + img_hash_finish_req(hdev->req, err); +} + +static const struct of_device_id img_hash_match[] = { + { .compatible = "img,hash-accelerator" }, + {} +}; +MODULE_DEVICE_TABLE(of, img_hash_match); + +static int img_hash_probe(struct platform_device *pdev) +{ + struct img_hash_dev *hdev; + struct device *dev = &pdev->dev; + struct resource *hash_res; + int irq; + int err; + + hdev = devm_kzalloc(dev, sizeof(*hdev), GFP_KERNEL); + if (hdev == NULL) + return -ENOMEM; + + spin_lock_init(&hdev->lock); + + hdev->dev = dev; + + platform_set_drvdata(pdev, hdev); + + INIT_LIST_HEAD(&hdev->list); + + tasklet_init(&hdev->done_task, img_hash_done_task, (unsigned long)hdev); + tasklet_init(&hdev->dma_task, img_hash_dma_task, (unsigned long)hdev); + + crypto_init_queue(&hdev->queue, IMG_HASH_QUEUE_LENGTH); + + /* Register bank */ + hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + + hdev->io_base = devm_ioremap_resource(dev, hash_res); + if (IS_ERR(hdev->io_base)) { + err = PTR_ERR(hdev->io_base); + dev_err(dev, "can't ioremap, returned %d\n", err); + + goto res_err; + } + + /* Write port (DMA or CPU) */ + hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + hdev->cpu_addr = devm_ioremap_resource(dev, hash_res); + if (IS_ERR(hdev->cpu_addr)) { + dev_err(dev, "can't ioremap write port\n"); + err = PTR_ERR(hdev->cpu_addr); + goto res_err; + } + hdev->bus_addr = hash_res->start; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(dev, "no IRQ resource info\n"); + err = irq; + goto res_err; + } + + err = devm_request_irq(dev, irq, img_irq_handler, 0, + dev_name(dev), hdev); + if (err) { + dev_err(dev, "unable to request irq\n"); + goto res_err; + } + dev_dbg(dev, "using IRQ channel %d\n", irq); + + hdev->hash_clk = devm_clk_get(&pdev->dev, "hash"); + if (IS_ERR(hdev->hash_clk)) { + dev_err(dev, "clock initialization failed.\n"); + err = PTR_ERR(hdev->hash_clk); + goto res_err; + } + + hdev->sys_clk = devm_clk_get(&pdev->dev, "sys"); + if (IS_ERR(hdev->sys_clk)) { + dev_err(dev, "clock initialization failed.\n"); + err = PTR_ERR(hdev->sys_clk); + goto res_err; + } + + err = clk_prepare_enable(hdev->hash_clk); + if (err) + goto res_err; + + err = clk_prepare_enable(hdev->sys_clk); + if (err) + goto clk_err; + + err = img_hash_dma_init(hdev); + if (err) + goto dma_err; + + dev_dbg(dev, "using %s for DMA transfers\n", + dma_chan_name(hdev->dma_lch)); + + spin_lock(&img_hash.lock); + list_add_tail(&hdev->list, &img_hash.dev_list); + spin_unlock(&img_hash.lock); + + err = img_register_algs(hdev); + if (err) + goto err_algs; + dev_dbg(dev, "Img MD5/SHA1/SHA224/SHA256 Hardware accelerator initialized\n"); + + return 0; + +err_algs: + spin_lock(&img_hash.lock); + list_del(&hdev->list); + spin_unlock(&img_hash.lock); + dma_release_channel(hdev->dma_lch); +dma_err: + clk_disable_unprepare(hdev->sys_clk); +clk_err: + clk_disable_unprepare(hdev->hash_clk); +res_err: + tasklet_kill(&hdev->done_task); + tasklet_kill(&hdev->dma_task); + + return err; +} + +static int img_hash_remove(struct platform_device *pdev) +{ + static struct img_hash_dev *hdev; + + hdev = platform_get_drvdata(pdev); + spin_lock(&img_hash.lock); + list_del(&hdev->list); + spin_unlock(&img_hash.lock); + + img_unregister_algs(hdev); + + tasklet_kill(&hdev->done_task); + tasklet_kill(&hdev->dma_task); + + dma_release_channel(hdev->dma_lch); + + clk_disable_unprepare(hdev->hash_clk); + clk_disable_unprepare(hdev->sys_clk); + + return 0; +} + +static struct platform_driver img_hash_driver = { + .probe = img_hash_probe, + .remove = img_hash_remove, + .driver = { + .name = "img-hash-accelerator", + .of_match_table = of_match_ptr(img_hash_match), + } +}; +module_platform_driver(img_hash_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Imgtec SHA1/224/256 & MD5 hw accelerator driver"); +MODULE_AUTHOR("Will Thomas."); +MODULE_AUTHOR("James Hartley <james.hartley@imgtec.com>"); diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 829d6394fb33..59ed54e464a9 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -153,7 +153,7 @@ static int mxs_dcp_start_dma(struct dcp_async_ctx *actx) struct dcp *sdcp = global_sdcp; const int chan = actx->chan; uint32_t stat; - int ret; + unsigned long ret; struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; dma_addr_t desc_phys = dma_map_single(sdcp->dev, desc, sizeof(*desc), diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 42f95a4326b0..9a28b7e07c71 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -554,15 +554,23 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd) return err; } -static int omap_aes_check_aligned(struct scatterlist *sg) +static int omap_aes_check_aligned(struct scatterlist *sg, int total) { + int len = 0; + while (sg) { if (!IS_ALIGNED(sg->offset, 4)) return -1; if (!IS_ALIGNED(sg->length, AES_BLOCK_SIZE)) return -1; + + len += sg->length; sg = sg_next(sg); } + + if (len != total) + return -1; + return 0; } @@ -633,8 +641,8 @@ static int omap_aes_handle_queue(struct omap_aes_dev *dd, dd->in_sg = req->src; dd->out_sg = req->dst; - if (omap_aes_check_aligned(dd->in_sg) || - omap_aes_check_aligned(dd->out_sg)) { + if (omap_aes_check_aligned(dd->in_sg, dd->total) || + omap_aes_check_aligned(dd->out_sg, dd->total)) { if (omap_aes_copy_sgs(dd)) pr_err("Failed to copy SGs for unaligned cases\n"); dd->sgs_copied = 1; diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 3c76696ee578..4d63e0d4da9a 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -640,6 +640,7 @@ static size_t omap_sham_append_sg(struct omap_sham_reqctx *ctx) while (ctx->sg) { vaddr = kmap_atomic(sg_page(ctx->sg)); + vaddr += ctx->sg->offset; count = omap_sham_append_buffer(ctx, vaddr + ctx->offset, @@ -1945,6 +1946,7 @@ static int omap_sham_probe(struct platform_device *pdev) dd->flags |= dd->pdata->flags; pm_runtime_enable(dev); + pm_runtime_irq_safe(dev); pm_runtime_get_sync(dev); rev = omap_sham_read(dd, SHA_REG_REV(dd)); pm_runtime_put_sync(&pdev->dev); diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index 19c0efa29ab3..f22ce7169fa5 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -52,7 +52,6 @@ #include <linux/io.h> #include "adf_cfg_common.h" -#define PCI_VENDOR_ID_INTEL 0x8086 #define ADF_DH895XCC_DEVICE_NAME "dh895xcc" #define ADF_DH895XCC_PCI_DEVICE_ID 0x435 #define ADF_PCI_MAX_BARS 3 diff --git a/drivers/crypto/qat/qat_common/adf_accel_engine.c b/drivers/crypto/qat/qat_common/adf_accel_engine.c index c77453b900a3..7f8b66c915ed 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_engine.c +++ b/drivers/crypto/qat/qat_common/adf_accel_engine.c @@ -60,36 +60,40 @@ int adf_ae_fw_load(struct adf_accel_dev *accel_dev) if (request_firmware(&loader_data->uof_fw, hw_device->fw_name, &accel_dev->accel_pci_dev.pci_dev->dev)) { - pr_err("QAT: Failed to load firmware %s\n", hw_device->fw_name); + dev_err(&GET_DEV(accel_dev), "Failed to load firmware %s\n", + hw_device->fw_name); return -EFAULT; } uof_size = loader_data->uof_fw->size; uof_addr = (void *)loader_data->uof_fw->data; if (qat_uclo_map_uof_obj(loader_data->fw_loader, uof_addr, uof_size)) { - pr_err("QAT: Failed to map UOF\n"); + dev_err(&GET_DEV(accel_dev), "Failed to map UOF\n"); goto out_err; } if (qat_uclo_wr_all_uimage(loader_data->fw_loader)) { - pr_err("QAT: Failed to map UOF\n"); + dev_err(&GET_DEV(accel_dev), "Failed to map UOF\n"); goto out_err; } return 0; out_err: - release_firmware(loader_data->uof_fw); + adf_ae_fw_release(accel_dev); return -EFAULT; } -int adf_ae_fw_release(struct adf_accel_dev *accel_dev) +void adf_ae_fw_release(struct adf_accel_dev *accel_dev) { struct adf_fw_loader_data *loader_data = accel_dev->fw_loader; - release_firmware(loader_data->uof_fw); qat_uclo_del_uof_obj(loader_data->fw_loader); qat_hal_deinit(loader_data->fw_loader); + + if (loader_data->uof_fw) + release_firmware(loader_data->uof_fw); + + loader_data->uof_fw = NULL; loader_data->fw_loader = NULL; - return 0; } int adf_ae_start(struct adf_accel_dev *accel_dev) @@ -104,8 +108,9 @@ int adf_ae_start(struct adf_accel_dev *accel_dev) ae_ctr++; } } - pr_info("QAT: qat_dev%d started %d acceleration engines\n", - accel_dev->accel_id, ae_ctr); + dev_info(&GET_DEV(accel_dev), + "qat_dev%d started %d acceleration engines\n", + accel_dev->accel_id, ae_ctr); return 0; } @@ -121,8 +126,9 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev) ae_ctr++; } } - pr_info("QAT: qat_dev%d stopped %d acceleration engines\n", - accel_dev->accel_id, ae_ctr); + dev_info(&GET_DEV(accel_dev), + "qat_dev%d stopped %d acceleration engines\n", + accel_dev->accel_id, ae_ctr); return 0; } @@ -147,12 +153,12 @@ int adf_ae_init(struct adf_accel_dev *accel_dev) accel_dev->fw_loader = loader_data; if (qat_hal_init(accel_dev)) { - pr_err("QAT: Failed to init the AEs\n"); + dev_err(&GET_DEV(accel_dev), "Failed to init the AEs\n"); kfree(loader_data); return -EFAULT; } if (adf_ae_reset(accel_dev, 0)) { - pr_err("QAT: Failed to reset the AEs\n"); + dev_err(&GET_DEV(accel_dev), "Failed to reset the AEs\n"); qat_hal_deinit(loader_data->fw_loader); kfree(loader_data); return -EFAULT; @@ -162,6 +168,9 @@ int adf_ae_init(struct adf_accel_dev *accel_dev) int adf_ae_shutdown(struct adf_accel_dev *accel_dev) { + struct adf_fw_loader_data *loader_data = accel_dev->fw_loader; + + qat_hal_deinit(loader_data->fw_loader); kfree(accel_dev->fw_loader); accel_dev->fw_loader = NULL; return 0; diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c index fa1fef824de2..2dbc733b8ab2 100644 --- a/drivers/crypto/qat/qat_common/adf_aer.c +++ b/drivers/crypto/qat/qat_common/adf_aer.c @@ -60,14 +60,14 @@ static pci_ers_result_t adf_error_detected(struct pci_dev *pdev, { struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev); - pr_info("QAT: Acceleration driver hardware error detected.\n"); + dev_info(&pdev->dev, "Acceleration driver hardware error detected.\n"); if (!accel_dev) { - pr_err("QAT: Can't find acceleration device\n"); + dev_err(&pdev->dev, "Can't find acceleration device\n"); return PCI_ERS_RESULT_DISCONNECT; } if (state == pci_channel_io_perm_failure) { - pr_err("QAT: Can't recover from device error\n"); + dev_err(&pdev->dev, "Can't recover from device error\n"); return PCI_ERS_RESULT_DISCONNECT; } @@ -88,10 +88,12 @@ static void adf_dev_restore(struct adf_accel_dev *accel_dev) struct pci_dev *parent = pdev->bus->self; uint16_t bridge_ctl = 0; - pr_info("QAT: Resetting device qat_dev%d\n", accel_dev->accel_id); + dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n", + accel_dev->accel_id); if (!pci_wait_for_pending_transaction(pdev)) - pr_info("QAT: Transaction still in progress. Proceeding\n"); + dev_info(&GET_DEV(accel_dev), + "Transaction still in progress. Proceeding\n"); pci_read_config_word(parent, PCI_BRIDGE_CONTROL, &bridge_ctl); bridge_ctl |= PCI_BRIDGE_CTL_BUS_RESET; @@ -158,7 +160,8 @@ static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev, unsigned long timeout = wait_for_completion_timeout( &reset_data->compl, wait_jiffies); if (!timeout) { - pr_err("QAT: Reset device timeout expired\n"); + dev_err(&GET_DEV(accel_dev), + "Reset device timeout expired\n"); ret = -EFAULT; } kfree(reset_data); @@ -184,8 +187,8 @@ static pci_ers_result_t adf_slot_reset(struct pci_dev *pdev) static void adf_resume(struct pci_dev *pdev) { - pr_info("QAT: Acceleration driver reset completed\n"); - pr_info("QAT: Device is up and runnig\n"); + dev_info(&pdev->dev, "Acceleration driver reset completed\n"); + dev_info(&pdev->dev, "Device is up and runnig\n"); } static struct pci_error_handlers adf_err_handler = { @@ -236,7 +239,7 @@ EXPORT_SYMBOL_GPL(adf_disable_aer); int adf_init_aer(void) { device_reset_wq = create_workqueue("qat_device_reset_wq"); - return (device_reset_wq == NULL) ? -EFAULT : 0; + return !device_reset_wq ? -EFAULT : 0; } void adf_exit_aer(void) diff --git a/drivers/crypto/qat/qat_common/adf_cfg.c b/drivers/crypto/qat/qat_common/adf_cfg.c index de16da9070a5..ab65bc274561 100644 --- a/drivers/crypto/qat/qat_common/adf_cfg.c +++ b/drivers/crypto/qat/qat_common/adf_cfg.c @@ -142,7 +142,8 @@ int adf_cfg_dev_add(struct adf_accel_dev *accel_dev) dev_cfg_data, &qat_dev_cfg_fops); if (!dev_cfg_data->debug) { - pr_err("QAT: Failed to create qat cfg debugfs entry.\n"); + dev_err(&GET_DEV(accel_dev), + "Failed to create qat cfg debugfs entry.\n"); kfree(dev_cfg_data); accel_dev->cfg = NULL; return -EFAULT; @@ -305,7 +306,7 @@ int adf_cfg_add_key_value_param(struct adf_accel_dev *accel_dev, snprintf(key_val->val, ADF_CFG_MAX_VAL_LEN_IN_BYTES, "0x%lx", (unsigned long)val); } else { - pr_err("QAT: Unknown type given.\n"); + dev_err(&GET_DEV(accel_dev), "Unknown type given.\n"); kfree(key_val); return -1; } diff --git a/drivers/crypto/qat/qat_common/adf_cfg_strings.h b/drivers/crypto/qat/qat_common/adf_cfg_strings.h index c7ac758ebc90..13575111382c 100644 --- a/drivers/crypto/qat/qat_common/adf_cfg_strings.h +++ b/drivers/crypto/qat/qat_common/adf_cfg_strings.h @@ -59,7 +59,7 @@ #define ADF_RING_SYM_TX "RingSymTx" #define ADF_RING_RND_TX "RingNrbgTx" #define ADF_RING_ASYM_RX "RingAsymRx" -#define ADF_RING_SYM_RX "RinSymRx" +#define ADF_RING_SYM_RX "RingSymRx" #define ADF_RING_RND_RX "RingNrbgRx" #define ADF_RING_DC_TX "RingTx" #define ADF_RING_DC_RX "RingRx" @@ -69,15 +69,15 @@ #define ADF_DC "Dc" #define ADF_ETRMGR_COALESCING_ENABLED "InterruptCoalescingEnabled" #define ADF_ETRMGR_COALESCING_ENABLED_FORMAT \ - ADF_ETRMGR_BANK"%d"ADF_ETRMGR_COALESCING_ENABLED + ADF_ETRMGR_BANK "%d" ADF_ETRMGR_COALESCING_ENABLED #define ADF_ETRMGR_COALESCE_TIMER "InterruptCoalescingTimerNs" #define ADF_ETRMGR_COALESCE_TIMER_FORMAT \ - ADF_ETRMGR_BANK"%d"ADF_ETRMGR_COALESCE_TIMER + ADF_ETRMGR_BANK "%d" ADF_ETRMGR_COALESCE_TIMER #define ADF_ETRMGR_COALESCING_MSG_ENABLED "InterruptCoalescingNumResponses" #define ADF_ETRMGR_COALESCING_MSG_ENABLED_FORMAT \ - ADF_ETRMGR_BANK"%d"ADF_ETRMGR_COALESCING_MSG_ENABLED + ADF_ETRMGR_BANK "%d" ADF_ETRMGR_COALESCING_MSG_ENABLED #define ADF_ETRMGR_CORE_AFFINITY "CoreAffinity" #define ADF_ETRMGR_CORE_AFFINITY_FORMAT \ - ADF_ETRMGR_BANK"%d"ADF_ETRMGR_CORE_AFFINITY + ADF_ETRMGR_BANK "%d" ADF_ETRMGR_CORE_AFFINITY #define ADF_ACCEL_STR "Accelerator%d" #endif diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index a62e485c8786..0666ee6a3360 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -115,7 +115,7 @@ int adf_dev_restarted_notify(struct adf_accel_dev *accel_dev); int adf_ae_init(struct adf_accel_dev *accel_dev); int adf_ae_shutdown(struct adf_accel_dev *accel_dev); int adf_ae_fw_load(struct adf_accel_dev *accel_dev); -int adf_ae_fw_release(struct adf_accel_dev *accel_dev); +void adf_ae_fw_release(struct adf_accel_dev *accel_dev); int adf_ae_start(struct adf_accel_dev *accel_dev); int adf_ae_stop(struct adf_accel_dev *accel_dev); diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c index 74207a6f0516..cb5f066e93a6 100644 --- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c +++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c @@ -77,14 +77,14 @@ struct adf_ctl_drv_info { struct class *drv_class; }; -static struct adf_ctl_drv_info adt_ctl_drv; +static struct adf_ctl_drv_info adf_ctl_drv; static void adf_chr_drv_destroy(void) { - device_destroy(adt_ctl_drv.drv_class, MKDEV(adt_ctl_drv.major, 0)); - cdev_del(&adt_ctl_drv.drv_cdev); - class_destroy(adt_ctl_drv.drv_class); - unregister_chrdev_region(MKDEV(adt_ctl_drv.major, 0), 1); + device_destroy(adf_ctl_drv.drv_class, MKDEV(adf_ctl_drv.major, 0)); + cdev_del(&adf_ctl_drv.drv_cdev); + class_destroy(adf_ctl_drv.drv_class); + unregister_chrdev_region(MKDEV(adf_ctl_drv.major, 0), 1); } static int adf_chr_drv_create(void) @@ -97,20 +97,20 @@ static int adf_chr_drv_create(void) return -EFAULT; } - adt_ctl_drv.drv_class = class_create(THIS_MODULE, DEVICE_NAME); - if (IS_ERR(adt_ctl_drv.drv_class)) { + adf_ctl_drv.drv_class = class_create(THIS_MODULE, DEVICE_NAME); + if (IS_ERR(adf_ctl_drv.drv_class)) { pr_err("QAT: class_create failed for adf_ctl\n"); goto err_chrdev_unreg; } - adt_ctl_drv.major = MAJOR(dev_id); - cdev_init(&adt_ctl_drv.drv_cdev, &adf_ctl_ops); - if (cdev_add(&adt_ctl_drv.drv_cdev, dev_id, 1)) { + adf_ctl_drv.major = MAJOR(dev_id); + cdev_init(&adf_ctl_drv.drv_cdev, &adf_ctl_ops); + if (cdev_add(&adf_ctl_drv.drv_cdev, dev_id, 1)) { pr_err("QAT: cdev add failed\n"); goto err_class_destr; } - drv_device = device_create(adt_ctl_drv.drv_class, NULL, - MKDEV(adt_ctl_drv.major, 0), + drv_device = device_create(adf_ctl_drv.drv_class, NULL, + MKDEV(adf_ctl_drv.major, 0), NULL, DEVICE_NAME); if (IS_ERR(drv_device)) { pr_err("QAT: failed to create device\n"); @@ -118,9 +118,9 @@ static int adf_chr_drv_create(void) } return 0; err_cdev_del: - cdev_del(&adt_ctl_drv.drv_cdev); + cdev_del(&adf_ctl_drv.drv_cdev); err_class_destr: - class_destroy(adt_ctl_drv.drv_class); + class_destroy(adf_ctl_drv.drv_class); err_chrdev_unreg: unregister_chrdev_region(dev_id, 1); return -EFAULT; @@ -159,14 +159,16 @@ static int adf_add_key_value_data(struct adf_accel_dev *accel_dev, if (adf_cfg_add_key_value_param(accel_dev, section, key_val->key, (void *)val, key_val->type)) { - pr_err("QAT: failed to add keyvalue.\n"); + dev_err(&GET_DEV(accel_dev), + "failed to add hex keyvalue.\n"); return -EFAULT; } } else { if (adf_cfg_add_key_value_param(accel_dev, section, key_val->key, key_val->val, key_val->type)) { - pr_err("QAT: failed to add keyvalue.\n"); + dev_err(&GET_DEV(accel_dev), + "failed to add keyvalue.\n"); return -EFAULT; } } @@ -185,12 +187,14 @@ static int adf_copy_key_value_data(struct adf_accel_dev *accel_dev, while (section_head) { if (copy_from_user(§ion, (void __user *)section_head, sizeof(*section_head))) { - pr_err("QAT: failed to copy section info\n"); + dev_err(&GET_DEV(accel_dev), + "failed to copy section info\n"); goto out_err; } if (adf_cfg_section_add(accel_dev, section.name)) { - pr_err("QAT: failed to add section.\n"); + dev_err(&GET_DEV(accel_dev), + "failed to add section.\n"); goto out_err; } @@ -199,7 +203,8 @@ static int adf_copy_key_value_data(struct adf_accel_dev *accel_dev, while (params_head) { if (copy_from_user(&key_val, (void __user *)params_head, sizeof(key_val))) { - pr_err("QAT: Failed to copy keyvalue.\n"); + dev_err(&GET_DEV(accel_dev), + "Failed to copy keyvalue.\n"); goto out_err; } if (adf_add_key_value_data(accel_dev, section.name, @@ -258,8 +263,9 @@ static int adf_ctl_is_device_in_use(int id) if (id == dev->accel_id || id == ADF_CFG_ALL_DEVICES) { if (adf_devmgr_in_reset(dev) || adf_dev_in_use(dev)) { - pr_info("QAT: device qat_dev%d is busy\n", - dev->accel_id); + dev_info(&GET_DEV(dev), + "device qat_dev%d is busy\n", + dev->accel_id); return -EBUSY; } } @@ -280,7 +286,8 @@ static int adf_ctl_stop_devices(uint32_t id) continue; if (adf_dev_stop(accel_dev)) { - pr_err("QAT: Failed to stop qat_dev%d\n", id); + dev_err(&GET_DEV(accel_dev), + "Failed to stop qat_dev%d\n", id); ret = -EFAULT; } else { adf_dev_shutdown(accel_dev); @@ -343,17 +350,20 @@ static int adf_ctl_ioctl_dev_start(struct file *fp, unsigned int cmd, } if (!adf_dev_started(accel_dev)) { - pr_info("QAT: Starting acceleration device qat_dev%d.\n", - ctl_data->device_id); + dev_info(&GET_DEV(accel_dev), + "Starting acceleration device qat_dev%d.\n", + ctl_data->device_id); ret = adf_dev_init(accel_dev); if (!ret) ret = adf_dev_start(accel_dev); } else { - pr_info("QAT: Acceleration device qat_dev%d already started.\n", - ctl_data->device_id); + dev_info(&GET_DEV(accel_dev), + "Acceleration device qat_dev%d already started.\n", + ctl_data->device_id); } if (ret) { - pr_err("QAT: Failed to start qat_dev%d\n", ctl_data->device_id); + dev_err(&GET_DEV(accel_dev), "Failed to start qat_dev%d\n", + ctl_data->device_id); adf_dev_stop(accel_dev); adf_dev_shutdown(accel_dev); } @@ -408,7 +418,7 @@ static int adf_ctl_ioctl_get_status(struct file *fp, unsigned int cmd, if (copy_to_user((void __user *)arg, &dev_info, sizeof(struct adf_dev_status_info))) { - pr_err("QAT: failed to copy status.\n"); + dev_err(&GET_DEV(accel_dev), "failed to copy status.\n"); return -EFAULT; } return 0; diff --git a/drivers/crypto/qat/qat_common/adf_dev_mgr.c b/drivers/crypto/qat/qat_common/adf_dev_mgr.c index 4a0a829d4500..3f0ff9e7d840 100644 --- a/drivers/crypto/qat/qat_common/adf_dev_mgr.c +++ b/drivers/crypto/qat/qat_common/adf_dev_mgr.c @@ -67,7 +67,8 @@ int adf_devmgr_add_dev(struct adf_accel_dev *accel_dev) struct list_head *itr; if (num_devices == ADF_MAX_DEVICES) { - pr_err("QAT: Only support up to %d devices\n", ADF_MAX_DEVICES); + dev_err(&GET_DEV(accel_dev), "Only support up to %d devices\n", + ADF_MAX_DEVICES); return -EFAULT; } diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c index 8f0ca498ab87..245f43237a2d 100644 --- a/drivers/crypto/qat/qat_common/adf_init.c +++ b/drivers/crypto/qat/qat_common/adf_init.c @@ -124,12 +124,12 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) if (!hw_data) { dev_err(&GET_DEV(accel_dev), - "QAT: Failed to init device - hw_data not set\n"); + "Failed to init device - hw_data not set\n"); return -EFAULT; } if (!test_bit(ADF_STATUS_CONFIGURED, &accel_dev->status)) { - pr_info("QAT: Device not configured\n"); + dev_err(&GET_DEV(accel_dev), "Device not configured\n"); return -EFAULT; } @@ -151,20 +151,21 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) hw_data->enable_ints(accel_dev); if (adf_ae_init(accel_dev)) { - pr_err("QAT: Failed to initialise Acceleration Engine\n"); + dev_err(&GET_DEV(accel_dev), + "Failed to initialise Acceleration Engine\n"); return -EFAULT; } set_bit(ADF_STATUS_AE_INITIALISED, &accel_dev->status); if (adf_ae_fw_load(accel_dev)) { - pr_err("QAT: Failed to load acceleration FW\n"); - adf_ae_fw_release(accel_dev); + dev_err(&GET_DEV(accel_dev), + "Failed to load acceleration FW\n"); return -EFAULT; } set_bit(ADF_STATUS_AE_UCODE_LOADED, &accel_dev->status); if (hw_data->alloc_irq(accel_dev)) { - pr_err("QAT: Failed to allocate interrupts\n"); + dev_err(&GET_DEV(accel_dev), "Failed to allocate interrupts\n"); return -EFAULT; } set_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status); @@ -179,8 +180,9 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) if (!service->admin) continue; if (service->event_hld(accel_dev, ADF_EVENT_INIT)) { - pr_err("QAT: Failed to initialise service %s\n", - service->name); + dev_err(&GET_DEV(accel_dev), + "Failed to initialise service %s\n", + service->name); return -EFAULT; } set_bit(accel_dev->accel_id, &service->init_status); @@ -190,8 +192,9 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) if (service->admin) continue; if (service->event_hld(accel_dev, ADF_EVENT_INIT)) { - pr_err("QAT: Failed to initialise service %s\n", - service->name); + dev_err(&GET_DEV(accel_dev), + "Failed to initialise service %s\n", + service->name); return -EFAULT; } set_bit(accel_dev->accel_id, &service->init_status); @@ -221,7 +224,7 @@ int adf_dev_start(struct adf_accel_dev *accel_dev) set_bit(ADF_STATUS_STARTING, &accel_dev->status); if (adf_ae_start(accel_dev)) { - pr_err("QAT: AE Start Failed\n"); + dev_err(&GET_DEV(accel_dev), "AE Start Failed\n"); return -EFAULT; } set_bit(ADF_STATUS_AE_STARTED, &accel_dev->status); @@ -231,8 +234,9 @@ int adf_dev_start(struct adf_accel_dev *accel_dev) if (!service->admin) continue; if (service->event_hld(accel_dev, ADF_EVENT_START)) { - pr_err("QAT: Failed to start service %s\n", - service->name); + dev_err(&GET_DEV(accel_dev), + "Failed to start service %s\n", + service->name); return -EFAULT; } set_bit(accel_dev->accel_id, &service->start_status); @@ -242,8 +246,9 @@ int adf_dev_start(struct adf_accel_dev *accel_dev) if (service->admin) continue; if (service->event_hld(accel_dev, ADF_EVENT_START)) { - pr_err("QAT: Failed to start service %s\n", - service->name); + dev_err(&GET_DEV(accel_dev), + "Failed to start service %s\n", + service->name); return -EFAULT; } set_bit(accel_dev->accel_id, &service->start_status); @@ -253,7 +258,8 @@ int adf_dev_start(struct adf_accel_dev *accel_dev) set_bit(ADF_STATUS_STARTED, &accel_dev->status); if (qat_algs_register()) { - pr_err("QAT: Failed to register crypto algs\n"); + dev_err(&GET_DEV(accel_dev), + "Failed to register crypto algs\n"); set_bit(ADF_STATUS_STARTING, &accel_dev->status); clear_bit(ADF_STATUS_STARTED, &accel_dev->status); return -EFAULT; @@ -287,7 +293,8 @@ int adf_dev_stop(struct adf_accel_dev *accel_dev) clear_bit(ADF_STATUS_STARTED, &accel_dev->status); if (qat_algs_unregister()) - pr_err("QAT: Failed to unregister crypto algs\n"); + dev_err(&GET_DEV(accel_dev), + "Failed to unregister crypto algs\n"); list_for_each(list_itr, &service_table) { service = list_entry(list_itr, struct service_hndl, list); @@ -310,8 +317,9 @@ int adf_dev_stop(struct adf_accel_dev *accel_dev) if (!test_bit(accel_dev->accel_id, &service->start_status)) continue; if (service->event_hld(accel_dev, ADF_EVENT_STOP)) - pr_err("QAT: Failed to shutdown service %s\n", - service->name); + dev_err(&GET_DEV(accel_dev), + "Failed to shutdown service %s\n", + service->name); else clear_bit(accel_dev->accel_id, &service->start_status); } @@ -321,7 +329,7 @@ int adf_dev_stop(struct adf_accel_dev *accel_dev) if (test_bit(ADF_STATUS_AE_STARTED, &accel_dev->status)) { if (adf_ae_stop(accel_dev)) - pr_err("QAT: failed to stop AE\n"); + dev_err(&GET_DEV(accel_dev), "failed to stop AE\n"); else clear_bit(ADF_STATUS_AE_STARTED, &accel_dev->status); } @@ -350,16 +358,14 @@ void adf_dev_shutdown(struct adf_accel_dev *accel_dev) } if (test_bit(ADF_STATUS_AE_UCODE_LOADED, &accel_dev->status)) { - if (adf_ae_fw_release(accel_dev)) - pr_err("QAT: Failed to release the ucode\n"); - else - clear_bit(ADF_STATUS_AE_UCODE_LOADED, - &accel_dev->status); + adf_ae_fw_release(accel_dev); + clear_bit(ADF_STATUS_AE_UCODE_LOADED, &accel_dev->status); } if (test_bit(ADF_STATUS_AE_INITIALISED, &accel_dev->status)) { if (adf_ae_shutdown(accel_dev)) - pr_err("QAT: Failed to shutdown Accel Engine\n"); + dev_err(&GET_DEV(accel_dev), + "Failed to shutdown Accel Engine\n"); else clear_bit(ADF_STATUS_AE_INITIALISED, &accel_dev->status); @@ -372,8 +378,9 @@ void adf_dev_shutdown(struct adf_accel_dev *accel_dev) if (!test_bit(accel_dev->accel_id, &service->init_status)) continue; if (service->event_hld(accel_dev, ADF_EVENT_SHUTDOWN)) - pr_err("QAT: Failed to shutdown service %s\n", - service->name); + dev_err(&GET_DEV(accel_dev), + "Failed to shutdown service %s\n", + service->name); else clear_bit(accel_dev->accel_id, &service->init_status); } @@ -384,8 +391,9 @@ void adf_dev_shutdown(struct adf_accel_dev *accel_dev) if (!test_bit(accel_dev->accel_id, &service->init_status)) continue; if (service->event_hld(accel_dev, ADF_EVENT_SHUTDOWN)) - pr_err("QAT: Failed to shutdown service %s\n", - service->name); + dev_err(&GET_DEV(accel_dev), + "Failed to shutdown service %s\n", + service->name); else clear_bit(accel_dev->accel_id, &service->init_status); } @@ -419,16 +427,18 @@ int adf_dev_restarting_notify(struct adf_accel_dev *accel_dev) if (service->admin) continue; if (service->event_hld(accel_dev, ADF_EVENT_RESTARTING)) - pr_err("QAT: Failed to restart service %s.\n", - service->name); + dev_err(&GET_DEV(accel_dev), + "Failed to restart service %s.\n", + service->name); } list_for_each(list_itr, &service_table) { service = list_entry(list_itr, struct service_hndl, list); if (!service->admin) continue; if (service->event_hld(accel_dev, ADF_EVENT_RESTARTING)) - pr_err("QAT: Failed to restart service %s.\n", - service->name); + dev_err(&GET_DEV(accel_dev), + "Failed to restart service %s.\n", + service->name); } return 0; } @@ -443,16 +453,18 @@ int adf_dev_restarted_notify(struct adf_accel_dev *accel_dev) if (service->admin) continue; if (service->event_hld(accel_dev, ADF_EVENT_RESTARTED)) - pr_err("QAT: Failed to restart service %s.\n", - service->name); + dev_err(&GET_DEV(accel_dev), + "Failed to restart service %s.\n", + service->name); } list_for_each(list_itr, &service_table) { service = list_entry(list_itr, struct service_hndl, list); if (!service->admin) continue; if (service->event_hld(accel_dev, ADF_EVENT_RESTARTED)) - pr_err("QAT: Failed to restart service %s.\n", - service->name); + dev_err(&GET_DEV(accel_dev), + "Failed to restart service %s.\n", + service->name); } return 0; } diff --git a/drivers/crypto/qat/qat_common/adf_transport.c b/drivers/crypto/qat/qat_common/adf_transport.c index 7dd54aaee9fa..ccec327489da 100644 --- a/drivers/crypto/qat/qat_common/adf_transport.c +++ b/drivers/crypto/qat/qat_common/adf_transport.c @@ -195,7 +195,7 @@ static int adf_init_ring(struct adf_etr_ring_data *ring) memset(ring->base_addr, 0x7F, ring_size_bytes); /* The base_addr has to be aligned to the size of the buffer */ if (adf_check_ring_alignment(ring->dma_addr, ring_size_bytes)) { - pr_err("QAT: Ring address not aligned\n"); + dev_err(&GET_DEV(accel_dev), "Ring address not aligned\n"); dma_free_coherent(&GET_DEV(accel_dev), ring_size_bytes, ring->base_addr, ring->dma_addr); return -EFAULT; @@ -242,32 +242,33 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section, int ret; if (bank_num >= GET_MAX_BANKS(accel_dev)) { - pr_err("QAT: Invalid bank number\n"); + dev_err(&GET_DEV(accel_dev), "Invalid bank number\n"); return -EFAULT; } if (msg_size > ADF_MSG_SIZE_TO_BYTES(ADF_MAX_MSG_SIZE)) { - pr_err("QAT: Invalid msg size\n"); + dev_err(&GET_DEV(accel_dev), "Invalid msg size\n"); return -EFAULT; } if (ADF_MAX_INFLIGHTS(adf_verify_ring_size(msg_size, num_msgs), ADF_BYTES_TO_MSG_SIZE(msg_size)) < 2) { - pr_err("QAT: Invalid ring size for given msg size\n"); + dev_err(&GET_DEV(accel_dev), + "Invalid ring size for given msg size\n"); return -EFAULT; } if (adf_cfg_get_param_value(accel_dev, section, ring_name, val)) { - pr_err("QAT: Section %s, no such entry : %s\n", - section, ring_name); + dev_err(&GET_DEV(accel_dev), "Section %s, no such entry : %s\n", + section, ring_name); return -EFAULT; } if (kstrtouint(val, 10, &ring_num)) { - pr_err("QAT: Can't get ring number\n"); + dev_err(&GET_DEV(accel_dev), "Can't get ring number\n"); return -EFAULT; } bank = &transport_data->banks[bank_num]; if (adf_reserve_ring(bank, ring_num)) { - pr_err("QAT: Ring %d, %s already exists.\n", - ring_num, ring_name); + dev_err(&GET_DEV(accel_dev), "Ring %d, %s already exists.\n", + ring_num, ring_name); return -EFAULT; } ring = &bank->rings[ring_num]; @@ -287,7 +288,8 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section, accel_dev->hw_device->hw_arb_ring_enable(ring); if (adf_ring_debugfs_add(ring, ring_name)) { - pr_err("QAT: Couldn't add ring debugfs entry\n"); + dev_err(&GET_DEV(accel_dev), + "Couldn't add ring debugfs entry\n"); ret = -EFAULT; goto err; } @@ -428,7 +430,8 @@ static int adf_init_bank(struct adf_accel_dev *accel_dev, goto err; } else { if (i < hw_data->tx_rx_gap) { - pr_err("QAT: Invalid tx rings mask config\n"); + dev_err(&GET_DEV(accel_dev), + "Invalid tx rings mask config\n"); goto err; } tx_ring = &bank->rings[i - hw_data->tx_rx_gap]; @@ -436,7 +439,8 @@ static int adf_init_bank(struct adf_accel_dev *accel_dev, } } if (adf_bank_debugfs_add(bank)) { - pr_err("QAT: Failed to add bank debugfs entry\n"); + dev_err(&GET_DEV(accel_dev), + "Failed to add bank debugfs entry\n"); goto err; } @@ -492,7 +496,8 @@ int adf_init_etr_data(struct adf_accel_dev *accel_dev) etr_data->debug = debugfs_create_dir("transport", accel_dev->debugfs_dir); if (!etr_data->debug) { - pr_err("QAT: Unable to create transport debugfs entry\n"); + dev_err(&GET_DEV(accel_dev), + "Unable to create transport debugfs entry\n"); ret = -ENOENT; goto err_bank_debug; } diff --git a/drivers/crypto/qat/qat_common/adf_transport_debug.c b/drivers/crypto/qat/qat_common/adf_transport_debug.c index 6b6974553514..e41986967294 100644 --- a/drivers/crypto/qat/qat_common/adf_transport_debug.c +++ b/drivers/crypto/qat/qat_common/adf_transport_debug.c @@ -100,6 +100,8 @@ static int adf_ring_show(struct seq_file *sfile, void *v) empty = READ_CSR_E_STAT(csr, bank->bank_number); seq_puts(sfile, "------- Ring configuration -------\n"); + seq_printf(sfile, "ring name: %s\n", + ring->ring_debug->ring_name); seq_printf(sfile, "ring num %d, bank num %d\n", ring->ring_number, ring->bank->bank_number); seq_printf(sfile, "head %x, tail %x, empty: %d\n", diff --git a/drivers/crypto/qat/qat_common/icp_qat_hw.h b/drivers/crypto/qat/qat_common/icp_qat_hw.h index 68f191b653b0..121d5e6e46ca 100644 --- a/drivers/crypto/qat/qat_common/icp_qat_hw.h +++ b/drivers/crypto/qat/qat_common/icp_qat_hw.h @@ -145,7 +145,7 @@ struct icp_qat_hw_auth_setup { }; #define QAT_HW_DEFAULT_ALIGNMENT 8 -#define QAT_HW_ROUND_UP(val, n) (((val) + ((n)-1)) & (~(n-1))) +#define QAT_HW_ROUND_UP(val, n) (((val) + ((n) - 1)) & (~(n - 1))) #define ICP_QAT_HW_NULL_STATE1_SZ 32 #define ICP_QAT_HW_MD5_STATE1_SZ 16 #define ICP_QAT_HW_SHA1_STATE1_SZ 20 diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c index 828f2a686aab..3bd705ca5973 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.c +++ b/drivers/crypto/qat/qat_common/qat_crypto.c @@ -110,13 +110,13 @@ struct qat_crypto_instance *qat_crypto_get_instance_node(int node) list_for_each(itr, adf_devmgr_get_head()) { accel_dev = list_entry(itr, struct adf_accel_dev, list); if ((node == dev_to_node(&GET_DEV(accel_dev)) || - dev_to_node(&GET_DEV(accel_dev)) < 0) - && adf_dev_started(accel_dev)) + dev_to_node(&GET_DEV(accel_dev)) < 0) && + adf_dev_started(accel_dev)) break; accel_dev = NULL; } if (!accel_dev) { - pr_err("QAT: Could not find device on node %d\n", node); + pr_err("QAT: Could not find a device on node %d\n", node); accel_dev = adf_devmgr_get_first(); } if (!accel_dev || !adf_dev_started(accel_dev)) @@ -137,7 +137,8 @@ struct qat_crypto_instance *qat_crypto_get_instance_node(int node) if (atomic_add_return(1, &inst_best->refctr) == 1) { if (adf_dev_get(accel_dev)) { atomic_dec(&inst_best->refctr); - pr_err("QAT: Could increment dev refctr\n"); + dev_err(&GET_DEV(accel_dev), + "Could not increment dev refctr\n"); return NULL; } } diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c index b818c19713bf..274ff7e9de6e 100644 --- a/drivers/crypto/qat/qat_common/qat_hal.c +++ b/drivers/crypto/qat/qat_common/qat_hal.c @@ -434,8 +434,8 @@ static void qat_hal_reset_timestamp(struct icp_qat_fw_loader_handle *handle) SET_GLB_CSR(handle, MISC_CONTROL, misc_ctl | MC_TIMESTAMP_ENABLE); } -#define ESRAM_AUTO_TINIT (1<<2) -#define ESRAM_AUTO_TINIT_DONE (1<<3) +#define ESRAM_AUTO_TINIT BIT(2) +#define ESRAM_AUTO_TINIT_DONE BIT(3) #define ESRAM_AUTO_INIT_USED_CYCLES (1640) #define ESRAM_AUTO_INIT_CSR_OFFSET 0xC1C static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle) @@ -718,7 +718,7 @@ int qat_hal_init(struct adf_accel_dev *accel_dev) handle->hal_handle->ae_max_num = max_en_ae_id + 1; /* take all AEs out of reset */ if (qat_hal_clr_reset(handle)) { - pr_err("QAT: qat_hal_clr_reset error\n"); + dev_err(&GET_DEV(accel_dev), "qat_hal_clr_reset error\n"); goto out_err; } if (qat_hal_clear_gpr(handle)) diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_admin.c b/drivers/crypto/qat/qat_dh895xcc/adf_admin.c index 53c491b59f07..e4666065c399 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_admin.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_admin.c @@ -93,7 +93,8 @@ int adf_put_admin_msg_sync(struct adf_accel_dev *accel_dev, memcpy(out, admin->virt_addr + offset + ADF_ADMINMSG_LEN, ADF_ADMINMSG_LEN); else - pr_err("QAT: Failed to send admin msg to accelerator\n"); + dev_err(&GET_DEV(accel_dev), + "Failed to send admin msg to accelerator\n"); mutex_unlock(&admin->lock); return received ? 0 : -EFAULT; diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index 6a735d5c0e37..b1386922d7a2 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -150,7 +150,8 @@ void adf_get_arbiter_mapping(struct adf_accel_dev *accel_dev, *arb_map_config = thrd_to_arb_map_sku6; break; default: - pr_err("QAT: The configuration doesn't match any SKU"); + dev_err(&GET_DEV(accel_dev), + "The configuration doesn't match any SKU"); *arb_map_config = NULL; } } diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h index 01e0be21e93a..25269a9f24a2 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h @@ -73,11 +73,11 @@ /* Error detection and correction */ #define ADF_DH895XCC_AE_CTX_ENABLES(i) (i * 0x1000 + 0x20818) #define ADF_DH895XCC_AE_MISC_CONTROL(i) (i * 0x1000 + 0x20960) -#define ADF_DH895XCC_ENABLE_AE_ECC_ERR (1 << 28) -#define ADF_DH895XCC_ENABLE_AE_ECC_PARITY_CORR (1 << 24 | 1 << 12) +#define ADF_DH895XCC_ENABLE_AE_ECC_ERR BIT(28) +#define ADF_DH895XCC_ENABLE_AE_ECC_PARITY_CORR (BIT(24) | BIT(12)) #define ADF_DH895XCC_UERRSSMSH(i) (i * 0x4000 + 0x18) #define ADF_DH895XCC_CERRSSMSH(i) (i * 0x4000 + 0x10) -#define ADF_DH895XCC_ERRSSMSH_EN (1 << 3) +#define ADF_DH895XCC_ERRSSMSH_EN BIT(3) /* Admin Messages Registers */ #define ADF_DH895XCC_ADMINMSGUR_OFFSET (0x3A000 + 0x574) diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c index 8ffdb95c9804..9decea2779c6 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c @@ -236,7 +236,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } accel_dev = kzalloc_node(sizeof(*accel_dev), GFP_KERNEL, - dev_to_node(&pdev->dev)); + dev_to_node(&pdev->dev)); if (!accel_dev) return -ENOMEM; @@ -379,7 +379,7 @@ out_err: return ret; } -static void __exit adf_remove(struct pci_dev *pdev) +static void adf_remove(struct pci_dev *pdev) { struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev); diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_isr.c b/drivers/crypto/qat/qat_dh895xcc/adf_isr.c index fe8f89697ad8..0d03c109c2d3 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_isr.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_isr.c @@ -73,7 +73,7 @@ static int adf_enable_msix(struct adf_accel_dev *accel_dev) if (pci_enable_msix_exact(pci_dev_info->pci_dev, pci_dev_info->msix_entries.entries, msix_num_entries)) { - pr_err("QAT: Failed to enable MSIX IRQ\n"); + dev_err(&GET_DEV(accel_dev), "Failed to enable MSIX IRQ\n"); return -EFAULT; } return 0; @@ -97,7 +97,8 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr) { struct adf_accel_dev *accel_dev = dev_ptr; - pr_info("QAT: qat_dev%d spurious AE interrupt\n", accel_dev->accel_id); + dev_info(&GET_DEV(accel_dev), "qat_dev%d spurious AE interrupt\n", + accel_dev->accel_id); return IRQ_HANDLED; } @@ -121,8 +122,9 @@ static int adf_request_irqs(struct adf_accel_dev *accel_dev) ret = request_irq(msixe[i].vector, adf_msix_isr_bundle, 0, name, bank); if (ret) { - pr_err("QAT: failed to enable irq %d for %s\n", - msixe[i].vector, name); + dev_err(&GET_DEV(accel_dev), + "failed to enable irq %d for %s\n", + msixe[i].vector, name); return ret; } @@ -136,8 +138,9 @@ static int adf_request_irqs(struct adf_accel_dev *accel_dev) "qat%d-ae-cluster", accel_dev->accel_id); ret = request_irq(msixe[i].vector, adf_msix_isr_ae, 0, name, accel_dev); if (ret) { - pr_err("QAT: failed to enable irq %d, for %s\n", - msixe[i].vector, name); + dev_err(&GET_DEV(accel_dev), + "failed to enable irq %d, for %s\n", + msixe[i].vector, name); return ret; } return ret; diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index 290a7f0a681f..6be377f6b9e7 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -479,6 +479,7 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev) struct scatterlist *sg; int ret; int i, j; + int idx = 0; /* Copy new key if necessary */ if (ctx->flags & FLAGS_NEW_KEY) { @@ -486,17 +487,20 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev) ctx->flags &= ~FLAGS_NEW_KEY; if (dev->flags & FLAGS_CBC) { - dev->hw_desc[0]->len1 = AES_BLOCK_SIZE; - dev->hw_desc[0]->p1 = dev->iv_phys_base; + dev->hw_desc[idx]->len1 = AES_BLOCK_SIZE; + dev->hw_desc[idx]->p1 = dev->iv_phys_base; } else { - dev->hw_desc[0]->len1 = 0; - dev->hw_desc[0]->p1 = 0; + dev->hw_desc[idx]->len1 = 0; + dev->hw_desc[idx]->p1 = 0; } - dev->hw_desc[0]->len2 = ctx->keylen; - dev->hw_desc[0]->p2 = dev->key_phys_base; - dev->hw_desc[0]->next = dev->hw_phys_desc[1]; + dev->hw_desc[idx]->len2 = ctx->keylen; + dev->hw_desc[idx]->p2 = dev->key_phys_base; + dev->hw_desc[idx]->next = dev->hw_phys_desc[1]; + + dev->hw_desc[idx]->hdr = sahara_aes_key_hdr(dev); + + idx++; } - dev->hw_desc[0]->hdr = sahara_aes_key_hdr(dev); dev->nb_in_sg = sahara_sg_length(dev->in_sg, dev->total); dev->nb_out_sg = sahara_sg_length(dev->out_sg, dev->total); @@ -520,7 +524,7 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev) } /* Create input links */ - dev->hw_desc[1]->p1 = dev->hw_phys_link[0]; + dev->hw_desc[idx]->p1 = dev->hw_phys_link[0]; sg = dev->in_sg; for (i = 0; i < dev->nb_in_sg; i++) { dev->hw_link[i]->len = sg->length; @@ -534,7 +538,7 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev) } /* Create output links */ - dev->hw_desc[1]->p2 = dev->hw_phys_link[i]; + dev->hw_desc[idx]->p2 = dev->hw_phys_link[i]; sg = dev->out_sg; for (j = i; j < dev->nb_out_sg + i; j++) { dev->hw_link[j]->len = sg->length; @@ -548,10 +552,10 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev) } /* Fill remaining fields of hw_desc[1] */ - dev->hw_desc[1]->hdr = sahara_aes_data_link_hdr(dev); - dev->hw_desc[1]->len1 = dev->total; - dev->hw_desc[1]->len2 = dev->total; - dev->hw_desc[1]->next = 0; + dev->hw_desc[idx]->hdr = sahara_aes_data_link_hdr(dev); + dev->hw_desc[idx]->len1 = dev->total; + dev->hw_desc[idx]->len2 = dev->total; + dev->hw_desc[idx]->next = 0; sahara_dump_descriptors(dev); sahara_dump_links(dev); @@ -576,6 +580,7 @@ static int sahara_aes_process(struct ablkcipher_request *req) struct sahara_ctx *ctx; struct sahara_aes_reqctx *rctx; int ret; + unsigned long timeout; /* Request is ready to be dispatched by the device */ dev_dbg(dev->device, @@ -601,10 +606,12 @@ static int sahara_aes_process(struct ablkcipher_request *req) reinit_completion(&dev->dma_completion); ret = sahara_hw_descriptor_create(dev); + if (ret) + return -EINVAL; - ret = wait_for_completion_timeout(&dev->dma_completion, + timeout = wait_for_completion_timeout(&dev->dma_completion, msecs_to_jiffies(SAHARA_TIMEOUT_MS)); - if (!ret) { + if (!timeout) { dev_err(dev->device, "AES timeout\n"); return -ETIMEDOUT; } @@ -1044,7 +1051,8 @@ static int sahara_sha_process(struct ahash_request *req) { struct sahara_dev *dev = dev_ptr; struct sahara_sha_reqctx *rctx = ahash_request_ctx(req); - int ret = -EINPROGRESS; + int ret; + unsigned long timeout; ret = sahara_sha_prepare_request(req); if (!ret) @@ -1070,9 +1078,9 @@ static int sahara_sha_process(struct ahash_request *req) sahara_write(dev, dev->hw_phys_desc[0], SAHARA_REG_DAR); - ret = wait_for_completion_timeout(&dev->dma_completion, + timeout = wait_for_completion_timeout(&dev->dma_completion, msecs_to_jiffies(SAHARA_TIMEOUT_MS)); - if (!ret) { + if (!timeout) { dev_err(dev->device, "SHA timeout\n"); return -ETIMEDOUT; } @@ -1092,15 +1100,20 @@ static int sahara_queue_manage(void *data) { struct sahara_dev *dev = (struct sahara_dev *)data; struct crypto_async_request *async_req; + struct crypto_async_request *backlog; int ret = 0; do { __set_current_state(TASK_INTERRUPTIBLE); mutex_lock(&dev->queue_mutex); + backlog = crypto_get_backlog(&dev->queue); async_req = crypto_dequeue_request(&dev->queue); mutex_unlock(&dev->queue_mutex); + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + if (async_req) { if (crypto_tfm_alg_type(async_req->tfm) == CRYPTO_ALG_TYPE_AHASH) { diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index ebbae8d3ce0d..857414afa29a 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -637,8 +637,6 @@ static void talitos_unregister_rng(struct device *dev) #define TALITOS_MAX_KEY_SIZE 96 #define TALITOS_MAX_IV_LENGTH 16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */ -#define MD5_BLOCK_SIZE 64 - struct talitos_ctx { struct device *dev; int ch; @@ -2195,7 +2193,7 @@ static struct talitos_alg_template driver_algs[] = { .halg.base = { .cra_name = "md5", .cra_driver_name = "md5-talitos", - .cra_blocksize = MD5_BLOCK_SIZE, + .cra_blocksize = MD5_HMAC_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, } @@ -2285,7 +2283,7 @@ static struct talitos_alg_template driver_algs[] = { .halg.base = { .cra_name = "hmac(md5)", .cra_driver_name = "hmac-md5-talitos", - .cra_blocksize = MD5_BLOCK_SIZE, + .cra_blocksize = MD5_HMAC_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, } @@ -2706,20 +2704,16 @@ static int talitos_probe(struct platform_device *ofdev) goto err_out; } + priv->fifo_len = roundup_pow_of_two(priv->chfifo_len); + for (i = 0; i < priv->num_channels; i++) { priv->chan[i].reg = priv->reg + TALITOS_CH_STRIDE * (i + 1); if (!priv->irq[1] || !(i & 1)) priv->chan[i].reg += TALITOS_CH_BASE_OFFSET; - } - for (i = 0; i < priv->num_channels; i++) { spin_lock_init(&priv->chan[i].head_lock); spin_lock_init(&priv->chan[i].tail_lock); - } - priv->fifo_len = roundup_pow_of_two(priv->chfifo_len); - - for (i = 0; i < priv->num_channels; i++) { priv->chan[i].fifo = kzalloc(sizeof(struct talitos_request) * priv->fifo_len, GFP_KERNEL); if (!priv->chan[i].fifo) { @@ -2727,11 +2721,10 @@ static int talitos_probe(struct platform_device *ofdev) err = -ENOMEM; goto err_out; } - } - for (i = 0; i < priv->num_channels; i++) atomic_set(&priv->chan[i].submit_count, -(priv->chfifo_len - 1)); + } dma_set_mask(dev, DMA_BIT_MASK(36)); diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index 187a8fd7eee7..5f5f360628fc 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c @@ -184,7 +184,7 @@ static int hash_set_dma_transfer(struct hash_ctx *ctx, struct scatterlist *sg, direction, DMA_CTRL_ACK | DMA_PREP_INTERRUPT); if (!desc) { dev_err(ctx->device->dev, - "%s: device_prep_slave_sg() failed!\n", __func__); + "%s: dmaengine_prep_slave_sg() failed!\n", __func__); return -EFAULT; } diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig new file mode 100644 index 000000000000..771babf16aa0 --- /dev/null +++ b/drivers/crypto/vmx/Kconfig @@ -0,0 +1,8 @@ +config CRYPTO_DEV_VMX_ENCRYPT + tristate "Encryption acceleration support on P8 CPU" + depends on PPC64 && CRYPTO_DEV_VMX + default y + help + Support for VMX cryptographic acceleration instructions on Power8 CPU. + This module supports acceleration for AES and GHASH in hardware. If you + choose 'M' here, this module will be called vmx-crypto. diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile new file mode 100644 index 000000000000..c699c6e6c82e --- /dev/null +++ b/drivers/crypto/vmx/Makefile @@ -0,0 +1,19 @@ +obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o +vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o ghash.o + +ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) +TARGET := linux-ppc64le +else +TARGET := linux-pcc64 +endif + +quiet_cmd_perl = PERL $@ + cmd_perl = $(PERL) $(<) $(TARGET) > $(@) + +$(src)/aesp8-ppc.S: $(src)/aesp8-ppc.pl + $(call cmd,perl) + +$(src)/ghashp8-ppc.S: $(src)/ghashp8-ppc.pl + $(call cmd,perl) + +.PRECIOUS: $(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S diff --git a/drivers/crypto/vmx/aes.c b/drivers/crypto/vmx/aes.c new file mode 100644 index 000000000000..ab300ea19434 --- /dev/null +++ b/drivers/crypto/vmx/aes.c @@ -0,0 +1,139 @@ +/** + * AES routines supporting VMX instructions on the Power 8 + * + * Copyright (C) 2015 International Business Machines Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 only. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> + */ + +#include <linux/types.h> +#include <linux/err.h> +#include <linux/crypto.h> +#include <linux/delay.h> +#include <linux/hardirq.h> +#include <asm/switch_to.h> +#include <crypto/aes.h> + +#include "aesp8-ppc.h" + +struct p8_aes_ctx { + struct crypto_cipher *fallback; + struct aes_key enc_key; + struct aes_key dec_key; +}; + +static int p8_aes_init(struct crypto_tfm *tfm) +{ + const char *alg; + struct crypto_cipher *fallback; + struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + if (!(alg = crypto_tfm_alg_name(tfm))) { + printk(KERN_ERR "Failed to get algorithm name.\n"); + return -ENOENT; + } + + fallback = crypto_alloc_cipher(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(fallback)) { + printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n", + alg, PTR_ERR(fallback)); + return PTR_ERR(fallback); + } + printk(KERN_INFO "Using '%s' as fallback implementation.\n", + crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback)); + + crypto_cipher_set_flags(fallback, + crypto_cipher_get_flags((struct crypto_cipher *) tfm)); + ctx->fallback = fallback; + + return 0; +} + +static void p8_aes_exit(struct crypto_tfm *tfm) +{ + struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->fallback) { + crypto_free_cipher(ctx->fallback); + ctx->fallback = NULL; + } +} + +static int p8_aes_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + int ret; + struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + pagefault_disable(); + enable_kernel_altivec(); + ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); + ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); + pagefault_enable(); + + ret += crypto_cipher_setkey(ctx->fallback, key, keylen); + return ret; +} + +static void p8_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + if (in_interrupt()) { + crypto_cipher_encrypt_one(ctx->fallback, dst, src); + } else { + pagefault_disable(); + enable_kernel_altivec(); + aes_p8_encrypt(src, dst, &ctx->enc_key); + pagefault_enable(); + } +} + +static void p8_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + if (in_interrupt()) { + crypto_cipher_decrypt_one(ctx->fallback, dst, src); + } else { + pagefault_disable(); + enable_kernel_altivec(); + aes_p8_decrypt(src, dst, &ctx->dec_key); + pagefault_enable(); + } +} + +struct crypto_alg p8_aes_alg = { + .cra_name = "aes", + .cra_driver_name = "p8_aes", + .cra_module = THIS_MODULE, + .cra_priority = 1000, + .cra_type = NULL, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_NEED_FALLBACK, + .cra_alignmask = 0, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct p8_aes_ctx), + .cra_init = p8_aes_init, + .cra_exit = p8_aes_exit, + .cra_cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = p8_aes_setkey, + .cia_encrypt = p8_aes_encrypt, + .cia_decrypt = p8_aes_decrypt, + }, +}; + diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c new file mode 100644 index 000000000000..1a559b7dddb5 --- /dev/null +++ b/drivers/crypto/vmx/aes_cbc.c @@ -0,0 +1,184 @@ +/** + * AES CBC routines supporting VMX instructions on the Power 8 + * + * Copyright (C) 2015 International Business Machines Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 only. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> + */ + +#include <linux/types.h> +#include <linux/err.h> +#include <linux/crypto.h> +#include <linux/delay.h> +#include <linux/hardirq.h> +#include <asm/switch_to.h> +#include <crypto/aes.h> +#include <crypto/scatterwalk.h> + +#include "aesp8-ppc.h" + +struct p8_aes_cbc_ctx { + struct crypto_blkcipher *fallback; + struct aes_key enc_key; + struct aes_key dec_key; +}; + +static int p8_aes_cbc_init(struct crypto_tfm *tfm) +{ + const char *alg; + struct crypto_blkcipher *fallback; + struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm); + + if (!(alg = crypto_tfm_alg_name(tfm))) { + printk(KERN_ERR "Failed to get algorithm name.\n"); + return -ENOENT; + } + + fallback = crypto_alloc_blkcipher(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(fallback)) { + printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n", + alg, PTR_ERR(fallback)); + return PTR_ERR(fallback); + } + printk(KERN_INFO "Using '%s' as fallback implementation.\n", + crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback)); + + crypto_blkcipher_set_flags(fallback, + crypto_blkcipher_get_flags((struct crypto_blkcipher *) tfm)); + ctx->fallback = fallback; + + return 0; +} + +static void p8_aes_cbc_exit(struct crypto_tfm *tfm) +{ + struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->fallback) { + crypto_free_blkcipher(ctx->fallback); + ctx->fallback = NULL; + } +} + +static int p8_aes_cbc_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + int ret; + struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm); + + pagefault_disable(); + enable_kernel_altivec(); + ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); + ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); + pagefault_enable(); + + ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); + return ret; +} + +static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + int ret; + struct blkcipher_walk walk; + struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx( + crypto_blkcipher_tfm(desc->tfm)); + struct blkcipher_desc fallback_desc = { + .tfm = ctx->fallback, + .info = desc->info, + .flags = desc->flags + }; + + if (in_interrupt()) { + ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes); + } else { + pagefault_disable(); + enable_kernel_altivec(); + + blkcipher_walk_init(&walk, dst, src, nbytes); + ret = blkcipher_walk_virt(desc, &walk); + while ((nbytes = walk.nbytes)) { + aes_p8_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, + nbytes & AES_BLOCK_MASK, &ctx->enc_key, walk.iv, 1); + nbytes &= AES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, &walk, nbytes); + } + + pagefault_enable(); + } + + return ret; +} + +static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + int ret; + struct blkcipher_walk walk; + struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx( + crypto_blkcipher_tfm(desc->tfm)); + struct blkcipher_desc fallback_desc = { + .tfm = ctx->fallback, + .info = desc->info, + .flags = desc->flags + }; + + if (in_interrupt()) { + ret = crypto_blkcipher_decrypt(&fallback_desc, dst, src, nbytes); + } else { + pagefault_disable(); + enable_kernel_altivec(); + + blkcipher_walk_init(&walk, dst, src, nbytes); + ret = blkcipher_walk_virt(desc, &walk); + while ((nbytes = walk.nbytes)) { + aes_p8_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, + nbytes & AES_BLOCK_MASK, &ctx->dec_key, walk.iv, 0); + nbytes &= AES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, &walk, nbytes); + } + + pagefault_enable(); + } + + return ret; +} + + +struct crypto_alg p8_aes_cbc_alg = { + .cra_name = "cbc(aes)", + .cra_driver_name = "p8_aes_cbc", + .cra_module = THIS_MODULE, + .cra_priority = 1000, + .cra_type = &crypto_blkcipher_type, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, + .cra_alignmask = 0, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct p8_aes_cbc_ctx), + .cra_init = p8_aes_cbc_init, + .cra_exit = p8_aes_cbc_exit, + .cra_blkcipher = { + .ivsize = 0, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = p8_aes_cbc_setkey, + .encrypt = p8_aes_cbc_encrypt, + .decrypt = p8_aes_cbc_decrypt, + }, +}; + diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c new file mode 100644 index 000000000000..96dbee4bf4a6 --- /dev/null +++ b/drivers/crypto/vmx/aes_ctr.c @@ -0,0 +1,167 @@ +/** + * AES CTR routines supporting VMX instructions on the Power 8 + * + * Copyright (C) 2015 International Business Machines Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 only. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> + */ + +#include <linux/types.h> +#include <linux/err.h> +#include <linux/crypto.h> +#include <linux/delay.h> +#include <linux/hardirq.h> +#include <asm/switch_to.h> +#include <crypto/aes.h> +#include <crypto/scatterwalk.h> +#include "aesp8-ppc.h" + +struct p8_aes_ctr_ctx { + struct crypto_blkcipher *fallback; + struct aes_key enc_key; +}; + +static int p8_aes_ctr_init(struct crypto_tfm *tfm) +{ + const char *alg; + struct crypto_blkcipher *fallback; + struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm); + + if (!(alg = crypto_tfm_alg_name(tfm))) { + printk(KERN_ERR "Failed to get algorithm name.\n"); + return -ENOENT; + } + + fallback = crypto_alloc_blkcipher(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(fallback)) { + printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n", + alg, PTR_ERR(fallback)); + return PTR_ERR(fallback); + } + printk(KERN_INFO "Using '%s' as fallback implementation.\n", + crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback)); + + crypto_blkcipher_set_flags(fallback, + crypto_blkcipher_get_flags((struct crypto_blkcipher *) tfm)); + ctx->fallback = fallback; + + return 0; +} + +static void p8_aes_ctr_exit(struct crypto_tfm *tfm) +{ + struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->fallback) { + crypto_free_blkcipher(ctx->fallback); + ctx->fallback = NULL; + } +} + +static int p8_aes_ctr_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + int ret; + struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm); + + pagefault_disable(); + enable_kernel_altivec(); + ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); + pagefault_enable(); + + ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); + return ret; +} + +static void p8_aes_ctr_final(struct p8_aes_ctr_ctx *ctx, + struct blkcipher_walk *walk) +{ + u8 *ctrblk = walk->iv; + u8 keystream[AES_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + + pagefault_disable(); + enable_kernel_altivec(); + aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key); + pagefault_enable(); + + crypto_xor(keystream, src, nbytes); + memcpy(dst, keystream, nbytes); + crypto_inc(ctrblk, AES_BLOCK_SIZE); +} + +static int p8_aes_ctr_crypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + int ret; + struct blkcipher_walk walk; + struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx( + crypto_blkcipher_tfm(desc->tfm)); + struct blkcipher_desc fallback_desc = { + .tfm = ctx->fallback, + .info = desc->info, + .flags = desc->flags + }; + + if (in_interrupt()) { + ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes); + } else { + blkcipher_walk_init(&walk, dst, src, nbytes); + ret = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); + while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { + pagefault_disable(); + enable_kernel_altivec(); + aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr, walk.dst.virt.addr, + (nbytes & AES_BLOCK_MASK)/AES_BLOCK_SIZE, &ctx->enc_key, walk.iv); + pagefault_enable(); + + crypto_inc(walk.iv, AES_BLOCK_SIZE); + nbytes &= AES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, &walk, nbytes); + } + if (walk.nbytes) { + p8_aes_ctr_final(ctx, &walk); + ret = blkcipher_walk_done(desc, &walk, 0); + } + } + + return ret; +} + +struct crypto_alg p8_aes_ctr_alg = { + .cra_name = "ctr(aes)", + .cra_driver_name = "p8_aes_ctr", + .cra_module = THIS_MODULE, + .cra_priority = 1000, + .cra_type = &crypto_blkcipher_type, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, + .cra_alignmask = 0, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct p8_aes_ctr_ctx), + .cra_init = p8_aes_ctr_init, + .cra_exit = p8_aes_ctr_exit, + .cra_blkcipher = { + .ivsize = 0, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = p8_aes_ctr_setkey, + .encrypt = p8_aes_ctr_crypt, + .decrypt = p8_aes_ctr_crypt, + }, +}; diff --git a/drivers/crypto/vmx/aesp8-ppc.h b/drivers/crypto/vmx/aesp8-ppc.h new file mode 100644 index 000000000000..e963945a83e1 --- /dev/null +++ b/drivers/crypto/vmx/aesp8-ppc.h @@ -0,0 +1,20 @@ +#include <linux/types.h> +#include <crypto/aes.h> + +#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) + +struct aes_key { + u8 key[AES_MAX_KEYLENGTH]; + int rounds; +}; + +int aes_p8_set_encrypt_key(const u8 *userKey, const int bits, + struct aes_key *key); +int aes_p8_set_decrypt_key(const u8 *userKey, const int bits, + struct aes_key *key); +void aes_p8_encrypt(const u8 *in, u8 *out, const struct aes_key *key); +void aes_p8_decrypt(const u8 *in, u8 *out,const struct aes_key *key); +void aes_p8_cbc_encrypt(const u8 *in, u8 *out, size_t len, + const struct aes_key *key, u8 *iv, const int enc); +void aes_p8_ctr32_encrypt_blocks(const u8 *in, u8 *out, + size_t len, const struct aes_key *key, const u8 *iv); diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl new file mode 100644 index 000000000000..6c5c20c6108e --- /dev/null +++ b/drivers/crypto/vmx/aesp8-ppc.pl @@ -0,0 +1,1930 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# This module implements support for AES instructions as per PowerISA +# specification version 2.07, first implemented by POWER8 processor. +# The module is endian-agnostic in sense that it supports both big- +# and little-endian cases. Data alignment in parallelizable modes is +# handled with VSX loads and stores, which implies MSR.VSX flag being +# set. It should also be noted that ISA specification doesn't prohibit +# alignment exceptions for these instructions on page boundaries. +# Initially alignment was handled in pure AltiVec/VMX way [when data +# is aligned programmatically, which in turn guarantees exception- +# free execution], but it turned to hamper performance when vcipher +# instructions are interleaved. It's reckoned that eventual +# misalignment penalties at page boundaries are in average lower +# than additional overhead in pure AltiVec approach. + +$flavour = shift; + +if ($flavour =~ /64/) { + $SIZE_T =8; + $LRSAVE =2*$SIZE_T; + $STU ="stdu"; + $POP ="ld"; + $PUSH ="std"; + $UCMP ="cmpld"; + $SHL ="sldi"; +} elsif ($flavour =~ /32/) { + $SIZE_T =4; + $LRSAVE =$SIZE_T; + $STU ="stwu"; + $POP ="lwz"; + $PUSH ="stw"; + $UCMP ="cmplw"; + $SHL ="slwi"; +} else { die "nonsense $flavour"; } + +$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; + +open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; + +$FRAME=8*$SIZE_T; +$prefix="aes_p8"; + +$sp="r1"; +$vrsave="r12"; + +######################################################################### +{{{ # Key setup procedures # +my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); +my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); +my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); + +$code.=<<___; +.machine "any" + +.text + +.align 7 +rcon: +.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev +.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev +.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev +.long 0,0,0,0 ?asis +Lconsts: + mflr r0 + bcl 20,31,\$+4 + mflr $ptr #vvvvv "distance between . and rcon + addi $ptr,$ptr,-0x48 + mtlr r0 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 +.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" + +.globl .${prefix}_set_encrypt_key +Lset_encrypt_key: + mflr r11 + $PUSH r11,$LRSAVE($sp) + + li $ptr,-1 + ${UCMP}i $inp,0 + beq- Lenc_key_abort # if ($inp==0) return -1; + ${UCMP}i $out,0 + beq- Lenc_key_abort # if ($out==0) return -1; + li $ptr,-2 + cmpwi $bits,128 + blt- Lenc_key_abort + cmpwi $bits,256 + bgt- Lenc_key_abort + andi. r0,$bits,0x3f + bne- Lenc_key_abort + + lis r0,0xfff0 + mfspr $vrsave,256 + mtspr 256,r0 + + bl Lconsts + mtlr r11 + + neg r9,$inp + lvx $in0,0,$inp + addi $inp,$inp,15 # 15 is not typo + lvsr $key,0,r9 # borrow $key + li r8,0x20 + cmpwi $bits,192 + lvx $in1,0,$inp + le?vspltisb $mask,0x0f # borrow $mask + lvx $rcon,0,$ptr + le?vxor $key,$key,$mask # adjust for byte swap + lvx $mask,r8,$ptr + addi $ptr,$ptr,0x10 + vperm $in0,$in0,$in1,$key # align [and byte swap in LE] + li $cnt,8 + vxor $zero,$zero,$zero + mtctr $cnt + + ?lvsr $outperm,0,$out + vspltisb $outmask,-1 + lvx $outhead,0,$out + ?vperm $outmask,$zero,$outmask,$outperm + + blt Loop128 + addi $inp,$inp,8 + beq L192 + addi $inp,$inp,8 + b L256 + +.align 4 +Loop128: + vperm $key,$in0,$in0,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vadduwm $rcon,$rcon,$rcon + vxor $in0,$in0,$key + bdnz Loop128 + + lvx $rcon,0,$ptr # last two round keys + + vperm $key,$in0,$in0,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vadduwm $rcon,$rcon,$rcon + vxor $in0,$in0,$key + + vperm $key,$in0,$in0,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vxor $in0,$in0,$key + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + + addi $inp,$out,15 # 15 is not typo + addi $out,$out,0x50 + + li $rounds,10 + b Ldone + +.align 4 +L192: + lvx $tmp,0,$inp + li $cnt,4 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $out,$out,16 + vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] + vspltisb $key,8 # borrow $key + mtctr $cnt + vsububm $mask,$mask,$key # adjust the mask + +Loop192: + vperm $key,$in1,$in1,$mask # roate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vcipherlast $key,$key,$rcon + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + + vsldoi $stage,$zero,$in1,8 + vspltw $tmp,$in0,3 + vxor $tmp,$tmp,$in1 + vsldoi $in1,$zero,$in1,12 # >>32 + vadduwm $rcon,$rcon,$rcon + vxor $in1,$in1,$tmp + vxor $in0,$in0,$key + vxor $in1,$in1,$key + vsldoi $stage,$stage,$in0,8 + + vperm $key,$in1,$in1,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$stage,$stage,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vsldoi $stage,$in0,$in1,8 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vperm $outtail,$stage,$stage,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + stvx $stage,0,$out + addi $out,$out,16 + + vspltw $tmp,$in0,3 + vxor $tmp,$tmp,$in1 + vsldoi $in1,$zero,$in1,12 # >>32 + vadduwm $rcon,$rcon,$rcon + vxor $in1,$in1,$tmp + vxor $in0,$in0,$key + vxor $in1,$in1,$key + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $inp,$out,15 # 15 is not typo + addi $out,$out,16 + bdnz Loop192 + + li $rounds,12 + addi $out,$out,0x20 + b Ldone + +.align 4 +L256: + lvx $tmp,0,$inp + li $cnt,7 + li $rounds,14 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $out,$out,16 + vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] + mtctr $cnt + +Loop256: + vperm $key,$in1,$in1,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in1,$in1,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vadduwm $rcon,$rcon,$rcon + vxor $in0,$in0,$key + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $inp,$out,15 # 15 is not typo + addi $out,$out,16 + bdz Ldone + + vspltw $key,$in0,3 # just splat + vsldoi $tmp,$zero,$in1,12 # >>32 + vsbox $key,$key + + vxor $in1,$in1,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in1,$in1,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in1,$in1,$tmp + + vxor $in1,$in1,$key + b Loop256 + +.align 4 +Ldone: + lvx $in1,0,$inp # redundant in aligned case + vsel $in1,$outhead,$in1,$outmask + stvx $in1,0,$inp + li $ptr,0 + mtspr 256,$vrsave + stw $rounds,0($out) + +Lenc_key_abort: + mr r3,$ptr + blr + .long 0 + .byte 0,12,0x14,1,0,0,3,0 + .long 0 +.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key + +.globl .${prefix}_set_decrypt_key + $STU $sp,-$FRAME($sp) + mflr r10 + $PUSH r10,$FRAME+$LRSAVE($sp) + bl Lset_encrypt_key + mtlr r10 + + cmpwi r3,0 + bne- Ldec_key_abort + + slwi $cnt,$rounds,4 + subi $inp,$out,240 # first round key + srwi $rounds,$rounds,1 + add $out,$inp,$cnt # last round key + mtctr $rounds + +Ldeckey: + lwz r0, 0($inp) + lwz r6, 4($inp) + lwz r7, 8($inp) + lwz r8, 12($inp) + addi $inp,$inp,16 + lwz r9, 0($out) + lwz r10,4($out) + lwz r11,8($out) + lwz r12,12($out) + stw r0, 0($out) + stw r6, 4($out) + stw r7, 8($out) + stw r8, 12($out) + subi $out,$out,16 + stw r9, -16($inp) + stw r10,-12($inp) + stw r11,-8($inp) + stw r12,-4($inp) + bdnz Ldeckey + + xor r3,r3,r3 # return value +Ldec_key_abort: + addi $sp,$sp,$FRAME + blr + .long 0 + .byte 0,12,4,1,0x80,0,3,0 + .long 0 +.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key +___ +}}} +######################################################################### +{{{ # Single block en- and decrypt procedures # +sub gen_block () { +my $dir = shift; +my $n = $dir eq "de" ? "n" : ""; +my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); + +$code.=<<___; +.globl .${prefix}_${dir}crypt + lwz $rounds,240($key) + lis r0,0xfc00 + mfspr $vrsave,256 + li $idx,15 # 15 is not typo + mtspr 256,r0 + + lvx v0,0,$inp + neg r11,$out + lvx v1,$idx,$inp + lvsl v2,0,$inp # inpperm + le?vspltisb v4,0x0f + ?lvsl v3,0,r11 # outperm + le?vxor v2,v2,v4 + li $idx,16 + vperm v0,v0,v1,v2 # align [and byte swap in LE] + lvx v1,0,$key + ?lvsl v5,0,$key # keyperm + srwi $rounds,$rounds,1 + lvx v2,$idx,$key + addi $idx,$idx,16 + subi $rounds,$rounds,1 + ?vperm v1,v1,v2,v5 # align round key + + vxor v0,v0,v1 + lvx v1,$idx,$key + addi $idx,$idx,16 + mtctr $rounds + +Loop_${dir}c: + ?vperm v2,v2,v1,v5 + v${n}cipher v0,v0,v2 + lvx v2,$idx,$key + addi $idx,$idx,16 + ?vperm v1,v1,v2,v5 + v${n}cipher v0,v0,v1 + lvx v1,$idx,$key + addi $idx,$idx,16 + bdnz Loop_${dir}c + + ?vperm v2,v2,v1,v5 + v${n}cipher v0,v0,v2 + lvx v2,$idx,$key + ?vperm v1,v1,v2,v5 + v${n}cipherlast v0,v0,v1 + + vspltisb v2,-1 + vxor v1,v1,v1 + li $idx,15 # 15 is not typo + ?vperm v2,v1,v2,v3 # outmask + le?vxor v3,v3,v4 + lvx v1,0,$out # outhead + vperm v0,v0,v0,v3 # rotate [and byte swap in LE] + vsel v1,v1,v0,v2 + lvx v4,$idx,$out + stvx v1,0,$out + vsel v0,v0,v4,v2 + stvx v0,$idx,$out + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,3,0 + .long 0 +.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt +___ +} +&gen_block("en"); +&gen_block("de"); +}}} +######################################################################### +{{{ # CBC en- and decrypt procedures # +my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); +my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); +my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= + map("v$_",(4..10)); +$code.=<<___; +.globl .${prefix}_cbc_encrypt + ${UCMP}i $len,16 + bltlr- + + cmpwi $enc,0 # test direction + lis r0,0xffe0 + mfspr $vrsave,256 + mtspr 256,r0 + + li $idx,15 + vxor $rndkey0,$rndkey0,$rndkey0 + le?vspltisb $tmp,0x0f + + lvx $ivec,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + le?vxor $inpperm,$inpperm,$tmp + vperm $ivec,$ivec,$inptail,$inpperm + + neg r11,$inp + ?lvsl $keyperm,0,$key # prepare for unaligned key + lwz $rounds,240($key) + + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inptail,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + ?lvsr $outperm,0,$out # prepare for unaligned store + vspltisb $outmask,-1 + lvx $outhead,0,$out + ?vperm $outmask,$rndkey0,$outmask,$outperm + le?vxor $outperm,$outperm,$tmp + + srwi $rounds,$rounds,1 + li $idx,16 + subi $rounds,$rounds,1 + beq Lcbc_dec + +Lcbc_enc: + vmr $inout,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + mtctr $rounds + subi $len,$len,16 # len-=16 + + lvx $rndkey0,0,$key + vperm $inout,$inout,$inptail,$inpperm + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + vxor $inout,$inout,$ivec + +Loop_cbc_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + bdnz Loop_cbc_enc + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipherlast $ivec,$inout,$rndkey0 + ${UCMP}i $len,16 + + vperm $tmp,$ivec,$ivec,$outperm + vsel $inout,$outhead,$tmp,$outmask + vmr $outhead,$tmp + stvx $inout,0,$out + addi $out,$out,16 + bge Lcbc_enc + + b Lcbc_done + +.align 4 +Lcbc_dec: + ${UCMP}i $len,128 + bge _aesp8_cbc_decrypt8x + vmr $tmp,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + mtctr $rounds + subi $len,$len,16 # len-=16 + + lvx $rndkey0,0,$key + vperm $tmp,$tmp,$inptail,$inpperm + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$tmp,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + +Loop_cbc_dec: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + bdnz Loop_cbc_dec + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipherlast $inout,$inout,$rndkey0 + ${UCMP}i $len,16 + + vxor $inout,$inout,$ivec + vmr $ivec,$tmp + vperm $tmp,$inout,$inout,$outperm + vsel $inout,$outhead,$tmp,$outmask + vmr $outhead,$tmp + stvx $inout,0,$out + addi $out,$out,16 + bge Lcbc_dec + +Lcbc_done: + addi $out,$out,-1 + lvx $inout,0,$out # redundant in aligned case + vsel $inout,$outhead,$inout,$outmask + stvx $inout,0,$out + + neg $enc,$ivp # write [unaligned] iv + li $idx,15 # 15 is not typo + vxor $rndkey0,$rndkey0,$rndkey0 + vspltisb $outmask,-1 + le?vspltisb $tmp,0x0f + ?lvsl $outperm,0,$enc + ?vperm $outmask,$rndkey0,$outmask,$outperm + le?vxor $outperm,$outperm,$tmp + lvx $outhead,0,$ivp + vperm $ivec,$ivec,$ivec,$outperm + vsel $inout,$outhead,$ivec,$outmask + lvx $inptail,$idx,$ivp + stvx $inout,0,$ivp + vsel $inout,$ivec,$inptail,$outmask + stvx $inout,$idx,$ivp + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,6,0 + .long 0 +___ +######################################################################### +{{ # Optimized CBC decrypt procedure # +my $key_="r11"; +my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); +my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); +my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); +my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys + # v26-v31 last 6 round keys +my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment + +$code.=<<___; +.align 5 +_aesp8_cbc_decrypt8x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + li r10,`$FRAME+8*16+15` + li r11,`$FRAME+8*16+31` + stvx v20,r10,$sp # ABI says so + addi r10,r10,32 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + subi $len,$len,128 # bias + + lvx $rndkey0,$x00,$key # load key schedule + lvx v30,$x10,$key + addi $key,$key,0x20 + lvx v31,$x00,$key + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_cbc_dec_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key + addi $key,$key,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_cbc_dec_key + + lvx v26,$x10,$key + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key + ?vperm v29,v29,v30,$keyperm + lvx $out0,$x70,$key # borrow $out0 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$out0,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + #lvx $inptail,0,$inp # "caller" already did this + #addi $inp,$inp,15 # 15 is not typo + subi $inp,$inp,15 # undo "caller" + + le?li $idx,8 + lvx_u $in0,$x00,$inp # load first 8 "words" + le?lvsl $inpperm,0,$idx + le?vspltisb $tmp,0x0f + lvx_u $in1,$x10,$inp + le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u + lvx_u $in2,$x20,$inp + le?vperm $in0,$in0,$in0,$inpperm + lvx_u $in3,$x30,$inp + le?vperm $in1,$in1,$in1,$inpperm + lvx_u $in4,$x40,$inp + le?vperm $in2,$in2,$in2,$inpperm + vxor $out0,$in0,$rndkey0 + lvx_u $in5,$x50,$inp + le?vperm $in3,$in3,$in3,$inpperm + vxor $out1,$in1,$rndkey0 + lvx_u $in6,$x60,$inp + le?vperm $in4,$in4,$in4,$inpperm + vxor $out2,$in2,$rndkey0 + lvx_u $in7,$x70,$inp + addi $inp,$inp,0x80 + le?vperm $in5,$in5,$in5,$inpperm + vxor $out3,$in3,$rndkey0 + le?vperm $in6,$in6,$in6,$inpperm + vxor $out4,$in4,$rndkey0 + le?vperm $in7,$in7,$in7,$inpperm + vxor $out5,$in5,$rndkey0 + vxor $out6,$in6,$rndkey0 + vxor $out7,$in7,$rndkey0 + + mtctr $rounds + b Loop_cbc_dec8x +.align 5 +Loop_cbc_dec8x: + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_cbc_dec8x + + subic $len,$len,128 # $len-=128 + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + + subfe. r0,r0,r0 # borrow?-1:0 + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + + and r0,r0,$len + vncipher $out0,$out0,v26 + vncipher $out1,$out1,v26 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vncipher $out4,$out4,v26 + vncipher $out5,$out5,v26 + vncipher $out6,$out6,v26 + vncipher $out7,$out7,v26 + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in7 are loaded + # with last "words" + vncipher $out0,$out0,v27 + vncipher $out1,$out1,v27 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vncipher $out4,$out4,v27 + vncipher $out5,$out5,v27 + vncipher $out6,$out6,v27 + vncipher $out7,$out7,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + vncipher $out1,$out1,v28 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vncipher $out4,$out4,v28 + vncipher $out5,$out5,v28 + vncipher $out6,$out6,v28 + vncipher $out7,$out7,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vncipher $out0,$out0,v29 + vncipher $out1,$out1,v29 + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vncipher $out4,$out4,v29 + vncipher $out5,$out5,v29 + vncipher $out6,$out6,v29 + vncipher $out7,$out7,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + + vncipher $out0,$out0,v30 + vxor $ivec,$ivec,v31 # xor with last round key + vncipher $out1,$out1,v30 + vxor $in0,$in0,v31 + vncipher $out2,$out2,v30 + vxor $in1,$in1,v31 + vncipher $out3,$out3,v30 + vxor $in2,$in2,v31 + vncipher $out4,$out4,v30 + vxor $in3,$in3,v31 + vncipher $out5,$out5,v30 + vxor $in4,$in4,v31 + vncipher $out6,$out6,v30 + vxor $in5,$in5,v31 + vncipher $out7,$out7,v30 + vxor $in6,$in6,v31 + + vncipherlast $out0,$out0,$ivec + vncipherlast $out1,$out1,$in0 + lvx_u $in0,$x00,$inp # load next input block + vncipherlast $out2,$out2,$in1 + lvx_u $in1,$x10,$inp + vncipherlast $out3,$out3,$in2 + le?vperm $in0,$in0,$in0,$inpperm + lvx_u $in2,$x20,$inp + vncipherlast $out4,$out4,$in3 + le?vperm $in1,$in1,$in1,$inpperm + lvx_u $in3,$x30,$inp + vncipherlast $out5,$out5,$in4 + le?vperm $in2,$in2,$in2,$inpperm + lvx_u $in4,$x40,$inp + vncipherlast $out6,$out6,$in5 + le?vperm $in3,$in3,$in3,$inpperm + lvx_u $in5,$x50,$inp + vncipherlast $out7,$out7,$in6 + le?vperm $in4,$in4,$in4,$inpperm + lvx_u $in6,$x60,$inp + vmr $ivec,$in7 + le?vperm $in5,$in5,$in5,$inpperm + lvx_u $in7,$x70,$inp + addi $inp,$inp,0x80 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $in6,$in6,$in6,$inpperm + vxor $out0,$in0,$rndkey0 + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $in7,$in7,$in7,$inpperm + vxor $out1,$in1,$rndkey0 + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + vxor $out2,$in2,$rndkey0 + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + vxor $out3,$in3,$rndkey0 + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + vxor $out4,$in4,$rndkey0 + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x50,$out + vxor $out5,$in5,$rndkey0 + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x60,$out + vxor $out6,$in6,$rndkey0 + stvx_u $out7,$x70,$out + addi $out,$out,0x80 + vxor $out7,$in7,$rndkey0 + + mtctr $rounds + beq Loop_cbc_dec8x # did $len-=128 borrow? + + addic. $len,$len,128 + beq Lcbc_dec8x_done + nop + nop + +Loop_cbc_dec8x_tail: # up to 7 "words" tail... + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_cbc_dec8x_tail + + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + + vncipher $out1,$out1,v26 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vncipher $out4,$out4,v26 + vncipher $out5,$out5,v26 + vncipher $out6,$out6,v26 + vncipher $out7,$out7,v26 + + vncipher $out1,$out1,v27 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vncipher $out4,$out4,v27 + vncipher $out5,$out5,v27 + vncipher $out6,$out6,v27 + vncipher $out7,$out7,v27 + + vncipher $out1,$out1,v28 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vncipher $out4,$out4,v28 + vncipher $out5,$out5,v28 + vncipher $out6,$out6,v28 + vncipher $out7,$out7,v28 + + vncipher $out1,$out1,v29 + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vncipher $out4,$out4,v29 + vncipher $out5,$out5,v29 + vncipher $out6,$out6,v29 + vncipher $out7,$out7,v29 + + vncipher $out1,$out1,v30 + vxor $ivec,$ivec,v31 # last round key + vncipher $out2,$out2,v30 + vxor $in1,$in1,v31 + vncipher $out3,$out3,v30 + vxor $in2,$in2,v31 + vncipher $out4,$out4,v30 + vxor $in3,$in3,v31 + vncipher $out5,$out5,v30 + vxor $in4,$in4,v31 + vncipher $out6,$out6,v30 + vxor $in5,$in5,v31 + vncipher $out7,$out7,v30 + vxor $in6,$in6,v31 + + cmplwi $len,32 # switch($len) + blt Lcbc_dec8x_one + nop + beq Lcbc_dec8x_two + cmplwi $len,64 + blt Lcbc_dec8x_three + nop + beq Lcbc_dec8x_four + cmplwi $len,96 + blt Lcbc_dec8x_five + nop + beq Lcbc_dec8x_six + +Lcbc_dec8x_seven: + vncipherlast $out1,$out1,$ivec + vncipherlast $out2,$out2,$in1 + vncipherlast $out3,$out3,$in2 + vncipherlast $out4,$out4,$in3 + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out1,$out1,$out1,$inpperm + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x00,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x10,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x20,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x30,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x40,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x50,$out + stvx_u $out7,$x60,$out + addi $out,$out,0x70 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_six: + vncipherlast $out2,$out2,$ivec + vncipherlast $out3,$out3,$in2 + vncipherlast $out4,$out4,$in3 + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out2,$out2,$out2,$inpperm + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x00,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x10,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x20,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x30,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x40,$out + stvx_u $out7,$x50,$out + addi $out,$out,0x60 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_five: + vncipherlast $out3,$out3,$ivec + vncipherlast $out4,$out4,$in3 + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out3,$out3,$out3,$inpperm + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x00,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x10,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x20,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x30,$out + stvx_u $out7,$x40,$out + addi $out,$out,0x50 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_four: + vncipherlast $out4,$out4,$ivec + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out4,$out4,$out4,$inpperm + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x00,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x10,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x20,$out + stvx_u $out7,$x30,$out + addi $out,$out,0x40 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_three: + vncipherlast $out5,$out5,$ivec + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out5,$out5,$out5,$inpperm + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x00,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x10,$out + stvx_u $out7,$x20,$out + addi $out,$out,0x30 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_two: + vncipherlast $out6,$out6,$ivec + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out6,$out6,$out6,$inpperm + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x00,$out + stvx_u $out7,$x10,$out + addi $out,$out,0x20 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_one: + vncipherlast $out7,$out7,$ivec + vmr $ivec,$in7 + + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out7,0,$out + addi $out,$out,0x10 + +Lcbc_dec8x_done: + le?vperm $ivec,$ivec,$ivec,$inpperm + stvx_u $ivec,0,$ivp # write [unaligned] iv + + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $inpperm,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x14,0,0x80,6,6,0 + .long 0 +.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt +___ +}} }}} + +######################################################################### +{{{ # CTR procedure[s] # +my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); +my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); +my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= + map("v$_",(4..11)); +my $dat=$tmp; + +$code.=<<___; +.globl .${prefix}_ctr32_encrypt_blocks + ${UCMP}i $len,1 + bltlr- + + lis r0,0xfff0 + mfspr $vrsave,256 + mtspr 256,r0 + + li $idx,15 + vxor $rndkey0,$rndkey0,$rndkey0 + le?vspltisb $tmp,0x0f + + lvx $ivec,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + vspltisb $one,1 + le?vxor $inpperm,$inpperm,$tmp + vperm $ivec,$ivec,$inptail,$inpperm + vsldoi $one,$rndkey0,$one,1 + + neg r11,$inp + ?lvsl $keyperm,0,$key # prepare for unaligned key + lwz $rounds,240($key) + + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inptail,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + srwi $rounds,$rounds,1 + li $idx,16 + subi $rounds,$rounds,1 + + ${UCMP}i $len,8 + bge _aesp8_ctr32_encrypt8x + + ?lvsr $outperm,0,$out # prepare for unaligned store + vspltisb $outmask,-1 + lvx $outhead,0,$out + ?vperm $outmask,$rndkey0,$outmask,$outperm + le?vxor $outperm,$outperm,$tmp + + lvx $rndkey0,0,$key + mtctr $rounds + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$ivec,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + b Loop_ctr32_enc + +.align 5 +Loop_ctr32_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + bdnz Loop_ctr32_enc + + vadduwm $ivec,$ivec,$one + vmr $dat,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + subic. $len,$len,1 # blocks-- + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + vperm $dat,$dat,$inptail,$inpperm + li $idx,16 + ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm + lvx $rndkey0,0,$key + vxor $dat,$dat,$rndkey1 # last round key + vcipherlast $inout,$inout,$dat + + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + vperm $inout,$inout,$inout,$outperm + vsel $dat,$outhead,$inout,$outmask + mtctr $rounds + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vmr $outhead,$inout + vxor $inout,$ivec,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + stvx $dat,0,$out + addi $out,$out,16 + bne Loop_ctr32_enc + + addi $out,$out,-1 + lvx $inout,0,$out # redundant in aligned case + vsel $inout,$outhead,$inout,$outmask + stvx $inout,0,$out + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,6,0 + .long 0 +___ +######################################################################### +{{ # Optimized CTR procedure # +my $key_="r11"; +my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); +my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); +my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); +my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys + # v26-v31 last 6 round keys +my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment +my ($two,$three,$four)=($outhead,$outperm,$outmask); + +$code.=<<___; +.align 5 +_aesp8_ctr32_encrypt8x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + li r10,`$FRAME+8*16+15` + li r11,`$FRAME+8*16+31` + stvx v20,r10,$sp # ABI says so + addi r10,r10,32 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + + lvx $rndkey0,$x00,$key # load key schedule + lvx v30,$x10,$key + addi $key,$key,0x20 + lvx v31,$x00,$key + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_ctr32_enc_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key + addi $key,$key,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_ctr32_enc_key + + lvx v26,$x10,$key + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key + ?vperm v29,v29,v30,$keyperm + lvx $out0,$x70,$key # borrow $out0 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$out0,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + vadduwm $two,$one,$one + subi $inp,$inp,15 # undo "caller" + $SHL $len,$len,4 + + vadduwm $out1,$ivec,$one # counter values ... + vadduwm $out2,$ivec,$two + vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] + le?li $idx,8 + vadduwm $out3,$out1,$two + vxor $out1,$out1,$rndkey0 + le?lvsl $inpperm,0,$idx + vadduwm $out4,$out2,$two + vxor $out2,$out2,$rndkey0 + le?vspltisb $tmp,0x0f + vadduwm $out5,$out3,$two + vxor $out3,$out3,$rndkey0 + le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u + vadduwm $out6,$out4,$two + vxor $out4,$out4,$rndkey0 + vadduwm $out7,$out5,$two + vxor $out5,$out5,$rndkey0 + vadduwm $ivec,$out6,$two # next counter value + vxor $out6,$out6,$rndkey0 + vxor $out7,$out7,$rndkey0 + + mtctr $rounds + b Loop_ctr32_enc8x +.align 5 +Loop_ctr32_enc8x: + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + vcipher $out6,$out6,v24 + vcipher $out7,$out7,v24 +Loop_ctr32_enc8x_middle: + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + vcipher $out6,$out6,v25 + vcipher $out7,$out7,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_ctr32_enc8x + + subic r11,$len,256 # $len-256, borrow $key_ + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + vcipher $out6,$out6,v24 + vcipher $out7,$out7,v24 + + subfe r0,r0,r0 # borrow?-1:0 + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + vcipher $out6,$out6,v25 + vcipher $out7,$out7,v25 + + and r0,r0,r11 + addi $key_,$sp,$FRAME+15 # rewind $key_ + vcipher $out0,$out0,v26 + vcipher $out1,$out1,v26 + vcipher $out2,$out2,v26 + vcipher $out3,$out3,v26 + vcipher $out4,$out4,v26 + vcipher $out5,$out5,v26 + vcipher $out6,$out6,v26 + vcipher $out7,$out7,v26 + lvx v24,$x00,$key_ # re-pre-load round[1] + + subic $len,$len,129 # $len-=129 + vcipher $out0,$out0,v27 + addi $len,$len,1 # $len-=128 really + vcipher $out1,$out1,v27 + vcipher $out2,$out2,v27 + vcipher $out3,$out3,v27 + vcipher $out4,$out4,v27 + vcipher $out5,$out5,v27 + vcipher $out6,$out6,v27 + vcipher $out7,$out7,v27 + lvx v25,$x10,$key_ # re-pre-load round[2] + + vcipher $out0,$out0,v28 + lvx_u $in0,$x00,$inp # load input + vcipher $out1,$out1,v28 + lvx_u $in1,$x10,$inp + vcipher $out2,$out2,v28 + lvx_u $in2,$x20,$inp + vcipher $out3,$out3,v28 + lvx_u $in3,$x30,$inp + vcipher $out4,$out4,v28 + lvx_u $in4,$x40,$inp + vcipher $out5,$out5,v28 + lvx_u $in5,$x50,$inp + vcipher $out6,$out6,v28 + lvx_u $in6,$x60,$inp + vcipher $out7,$out7,v28 + lvx_u $in7,$x70,$inp + addi $inp,$inp,0x80 + + vcipher $out0,$out0,v29 + le?vperm $in0,$in0,$in0,$inpperm + vcipher $out1,$out1,v29 + le?vperm $in1,$in1,$in1,$inpperm + vcipher $out2,$out2,v29 + le?vperm $in2,$in2,$in2,$inpperm + vcipher $out3,$out3,v29 + le?vperm $in3,$in3,$in3,$inpperm + vcipher $out4,$out4,v29 + le?vperm $in4,$in4,$in4,$inpperm + vcipher $out5,$out5,v29 + le?vperm $in5,$in5,$in5,$inpperm + vcipher $out6,$out6,v29 + le?vperm $in6,$in6,$in6,$inpperm + vcipher $out7,$out7,v29 + le?vperm $in7,$in7,$in7,$inpperm + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in7 are loaded + # with last "words" + subfe. r0,r0,r0 # borrow?-1:0 + vcipher $out0,$out0,v30 + vxor $in0,$in0,v31 # xor with last round key + vcipher $out1,$out1,v30 + vxor $in1,$in1,v31 + vcipher $out2,$out2,v30 + vxor $in2,$in2,v31 + vcipher $out3,$out3,v30 + vxor $in3,$in3,v31 + vcipher $out4,$out4,v30 + vxor $in4,$in4,v31 + vcipher $out5,$out5,v30 + vxor $in5,$in5,v31 + vcipher $out6,$out6,v30 + vxor $in6,$in6,v31 + vcipher $out7,$out7,v30 + vxor $in7,$in7,v31 + + bne Lctr32_enc8x_break # did $len-129 borrow? + + vcipherlast $in0,$out0,$in0 + vcipherlast $in1,$out1,$in1 + vadduwm $out1,$ivec,$one # counter values ... + vcipherlast $in2,$out2,$in2 + vadduwm $out2,$ivec,$two + vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] + vcipherlast $in3,$out3,$in3 + vadduwm $out3,$out1,$two + vxor $out1,$out1,$rndkey0 + vcipherlast $in4,$out4,$in4 + vadduwm $out4,$out2,$two + vxor $out2,$out2,$rndkey0 + vcipherlast $in5,$out5,$in5 + vadduwm $out5,$out3,$two + vxor $out3,$out3,$rndkey0 + vcipherlast $in6,$out6,$in6 + vadduwm $out6,$out4,$two + vxor $out4,$out4,$rndkey0 + vcipherlast $in7,$out7,$in7 + vadduwm $out7,$out5,$two + vxor $out5,$out5,$rndkey0 + le?vperm $in0,$in0,$in0,$inpperm + vadduwm $ivec,$out6,$two # next counter value + vxor $out6,$out6,$rndkey0 + le?vperm $in1,$in1,$in1,$inpperm + vxor $out7,$out7,$rndkey0 + mtctr $rounds + + vcipher $out0,$out0,v24 + stvx_u $in0,$x00,$out + le?vperm $in2,$in2,$in2,$inpperm + vcipher $out1,$out1,v24 + stvx_u $in1,$x10,$out + le?vperm $in3,$in3,$in3,$inpperm + vcipher $out2,$out2,v24 + stvx_u $in2,$x20,$out + le?vperm $in4,$in4,$in4,$inpperm + vcipher $out3,$out3,v24 + stvx_u $in3,$x30,$out + le?vperm $in5,$in5,$in5,$inpperm + vcipher $out4,$out4,v24 + stvx_u $in4,$x40,$out + le?vperm $in6,$in6,$in6,$inpperm + vcipher $out5,$out5,v24 + stvx_u $in5,$x50,$out + le?vperm $in7,$in7,$in7,$inpperm + vcipher $out6,$out6,v24 + stvx_u $in6,$x60,$out + vcipher $out7,$out7,v24 + stvx_u $in7,$x70,$out + addi $out,$out,0x80 + + b Loop_ctr32_enc8x_middle + +.align 5 +Lctr32_enc8x_break: + cmpwi $len,-0x60 + blt Lctr32_enc8x_one + nop + beq Lctr32_enc8x_two + cmpwi $len,-0x40 + blt Lctr32_enc8x_three + nop + beq Lctr32_enc8x_four + cmpwi $len,-0x20 + blt Lctr32_enc8x_five + nop + beq Lctr32_enc8x_six + cmpwi $len,0x00 + blt Lctr32_enc8x_seven + +Lctr32_enc8x_eight: + vcipherlast $out0,$out0,$in0 + vcipherlast $out1,$out1,$in1 + vcipherlast $out2,$out2,$in2 + vcipherlast $out3,$out3,$in3 + vcipherlast $out4,$out4,$in4 + vcipherlast $out5,$out5,$in5 + vcipherlast $out6,$out6,$in6 + vcipherlast $out7,$out7,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x50,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x60,$out + stvx_u $out7,$x70,$out + addi $out,$out,0x80 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_seven: + vcipherlast $out0,$out0,$in1 + vcipherlast $out1,$out1,$in2 + vcipherlast $out2,$out2,$in3 + vcipherlast $out3,$out3,$in4 + vcipherlast $out4,$out4,$in5 + vcipherlast $out5,$out5,$in6 + vcipherlast $out6,$out6,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x50,$out + stvx_u $out6,$x60,$out + addi $out,$out,0x70 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_six: + vcipherlast $out0,$out0,$in2 + vcipherlast $out1,$out1,$in3 + vcipherlast $out2,$out2,$in4 + vcipherlast $out3,$out3,$in5 + vcipherlast $out4,$out4,$in6 + vcipherlast $out5,$out5,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + stvx_u $out5,$x50,$out + addi $out,$out,0x60 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_five: + vcipherlast $out0,$out0,$in3 + vcipherlast $out1,$out1,$in4 + vcipherlast $out2,$out2,$in5 + vcipherlast $out3,$out3,$in6 + vcipherlast $out4,$out4,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + stvx_u $out4,$x40,$out + addi $out,$out,0x50 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_four: + vcipherlast $out0,$out0,$in4 + vcipherlast $out1,$out1,$in5 + vcipherlast $out2,$out2,$in6 + vcipherlast $out3,$out3,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + stvx_u $out3,$x30,$out + addi $out,$out,0x40 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_three: + vcipherlast $out0,$out0,$in5 + vcipherlast $out1,$out1,$in6 + vcipherlast $out2,$out2,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + stvx_u $out2,$x20,$out + addi $out,$out,0x30 + b Lcbc_dec8x_done + +.align 5 +Lctr32_enc8x_two: + vcipherlast $out0,$out0,$in6 + vcipherlast $out1,$out1,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + stvx_u $out1,$x10,$out + addi $out,$out,0x20 + b Lcbc_dec8x_done + +.align 5 +Lctr32_enc8x_one: + vcipherlast $out0,$out0,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + stvx_u $out0,0,$out + addi $out,$out,0x10 + +Lctr32_enc8x_done: + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $inpperm,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x14,0,0x80,6,6,0 + .long 0 +.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks +___ +}} }}} + +my $consts=1; +foreach(split("\n",$code)) { + s/\`([^\`]*)\`/eval($1)/geo; + + # constants table endian-specific conversion + if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { + my $conv=$3; + my @bytes=(); + + # convert to endian-agnostic format + if ($1 eq "long") { + foreach (split(/,\s*/,$2)) { + my $l = /^0/?oct:int; + push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; + } + } else { + @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); + } + + # little-endian conversion + if ($flavour =~ /le$/o) { + SWITCH: for($conv) { + /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; + /\?rev/ && do { @bytes=reverse(@bytes); last; }; + } + } + + #emit + print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; + next; + } + $consts=0 if (m/Lconsts:/o); # end of table + + # instructions prefixed with '?' are endian-specific and need + # to be adjusted accordingly... + if ($flavour =~ /le$/o) { # little-endian + s/le\?//o or + s/be\?/#be#/o or + s/\?lvsr/lvsl/o or + s/\?lvsl/lvsr/o or + s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or + s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or + s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; + } else { # big-endian + s/le\?/#le#/o or + s/be\?//o or + s/\?([a-z]+)/$1/o; + } + + print $_,"\n"; +} + +close STDOUT; diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c new file mode 100644 index 000000000000..d0ffe277af5c --- /dev/null +++ b/drivers/crypto/vmx/ghash.c @@ -0,0 +1,214 @@ +/** + * GHASH routines supporting VMX instructions on the Power 8 + * + * Copyright (C) 2015 International Business Machines Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 only. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> + */ + +#include <linux/types.h> +#include <linux/err.h> +#include <linux/crypto.h> +#include <linux/delay.h> +#include <linux/hardirq.h> +#include <asm/switch_to.h> +#include <crypto/aes.h> +#include <crypto/scatterwalk.h> +#include <crypto/internal/hash.h> +#include <crypto/b128ops.h> + +#define IN_INTERRUPT in_interrupt() + +#define GHASH_BLOCK_SIZE (16) +#define GHASH_DIGEST_SIZE (16) +#define GHASH_KEY_LEN (16) + +void gcm_init_p8(u128 htable[16], const u64 Xi[2]); +void gcm_gmult_p8(u64 Xi[2], const u128 htable[16]); +void gcm_ghash_p8(u64 Xi[2], const u128 htable[16], + const u8 *in,size_t len); + +struct p8_ghash_ctx { + u128 htable[16]; + struct crypto_shash *fallback; +}; + +struct p8_ghash_desc_ctx { + u64 shash[2]; + u8 buffer[GHASH_DIGEST_SIZE]; + int bytes; + struct shash_desc fallback_desc; +}; + +static int p8_ghash_init_tfm(struct crypto_tfm *tfm) +{ + const char *alg; + struct crypto_shash *fallback; + struct crypto_shash *shash_tfm = __crypto_shash_cast(tfm); + struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm); + + if (!(alg = crypto_tfm_alg_name(tfm))) { + printk(KERN_ERR "Failed to get algorithm name.\n"); + return -ENOENT; + } + + fallback = crypto_alloc_shash(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(fallback)) { + printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n", + alg, PTR_ERR(fallback)); + return PTR_ERR(fallback); + } + printk(KERN_INFO "Using '%s' as fallback implementation.\n", + crypto_tfm_alg_driver_name(crypto_shash_tfm(fallback))); + + crypto_shash_set_flags(fallback, + crypto_shash_get_flags((struct crypto_shash *) tfm)); + ctx->fallback = fallback; + + shash_tfm->descsize = sizeof(struct p8_ghash_desc_ctx) + + crypto_shash_descsize(fallback); + + return 0; +} + +static void p8_ghash_exit_tfm(struct crypto_tfm *tfm) +{ + struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->fallback) { + crypto_free_shash(ctx->fallback); + ctx->fallback = NULL; + } +} + +static int p8_ghash_init(struct shash_desc *desc) +{ + struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm)); + struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + dctx->bytes = 0; + memset(dctx->shash, 0, GHASH_DIGEST_SIZE); + dctx->fallback_desc.tfm = ctx->fallback; + dctx->fallback_desc.flags = desc->flags; + return crypto_shash_init(&dctx->fallback_desc); +} + +static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int keylen) +{ + struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(tfm)); + + if (keylen != GHASH_KEY_LEN) + return -EINVAL; + + pagefault_disable(); + enable_kernel_altivec(); + enable_kernel_fp(); + gcm_init_p8(ctx->htable, (const u64 *) key); + pagefault_enable(); + return crypto_shash_setkey(ctx->fallback, key, keylen); +} + +static int p8_ghash_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + unsigned int len; + struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm)); + struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + if (IN_INTERRUPT) { + return crypto_shash_update(&dctx->fallback_desc, src, srclen); + } else { + if (dctx->bytes) { + if (dctx->bytes + srclen < GHASH_DIGEST_SIZE) { + memcpy(dctx->buffer + dctx->bytes, src, srclen); + dctx->bytes += srclen; + return 0; + } + memcpy(dctx->buffer + dctx->bytes, src, + GHASH_DIGEST_SIZE - dctx->bytes); + pagefault_disable(); + enable_kernel_altivec(); + enable_kernel_fp(); + gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer, + GHASH_DIGEST_SIZE); + pagefault_enable(); + src += GHASH_DIGEST_SIZE - dctx->bytes; + srclen -= GHASH_DIGEST_SIZE - dctx->bytes; + dctx->bytes = 0; + } + len = srclen & ~(GHASH_DIGEST_SIZE - 1); + if (len) { + pagefault_disable(); + enable_kernel_altivec(); + enable_kernel_fp(); + gcm_ghash_p8(dctx->shash, ctx->htable, src, len); + pagefault_enable(); + src += len; + srclen -= len; + } + if (srclen) { + memcpy(dctx->buffer, src, srclen); + dctx->bytes = srclen; + } + return 0; + } +} + +static int p8_ghash_final(struct shash_desc *desc, u8 *out) +{ + int i; + struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm)); + struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + if (IN_INTERRUPT) { + return crypto_shash_final(&dctx->fallback_desc, out); + } else { + if (dctx->bytes) { + for (i = dctx->bytes; i < GHASH_DIGEST_SIZE; i++) + dctx->buffer[i] = 0; + pagefault_disable(); + enable_kernel_altivec(); + enable_kernel_fp(); + gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer, + GHASH_DIGEST_SIZE); + pagefault_enable(); + dctx->bytes = 0; + } + memcpy(out, dctx->shash, GHASH_DIGEST_SIZE); + return 0; + } +} + +struct shash_alg p8_ghash_alg = { + .digestsize = GHASH_DIGEST_SIZE, + .init = p8_ghash_init, + .update = p8_ghash_update, + .final = p8_ghash_final, + .setkey = p8_ghash_setkey, + .descsize = sizeof(struct p8_ghash_desc_ctx), + .base = { + .cra_name = "ghash", + .cra_driver_name = "p8_ghash", + .cra_priority = 1000, + .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct p8_ghash_ctx), + .cra_module = THIS_MODULE, + .cra_init = p8_ghash_init_tfm, + .cra_exit = p8_ghash_exit_tfm, + }, +}; diff --git a/drivers/crypto/vmx/ghashp8-ppc.pl b/drivers/crypto/vmx/ghashp8-ppc.pl new file mode 100644 index 000000000000..0a6f899839dd --- /dev/null +++ b/drivers/crypto/vmx/ghashp8-ppc.pl @@ -0,0 +1,228 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# GHASH for for PowerISA v2.07. +# +# July 2014 +# +# Accurate performance measurements are problematic, because it's +# always virtualized setup with possibly throttled processor. +# Relative comparison is therefore more informative. This initial +# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x +# faster than "4-bit" integer-only compiler-generated 64-bit code. +# "Initial version" means that there is room for futher improvement. + +$flavour=shift; +$output =shift; + +if ($flavour =~ /64/) { + $SIZE_T=8; + $LRSAVE=2*$SIZE_T; + $STU="stdu"; + $POP="ld"; + $PUSH="std"; +} elsif ($flavour =~ /32/) { + $SIZE_T=4; + $LRSAVE=$SIZE_T; + $STU="stwu"; + $POP="lwz"; + $PUSH="stw"; +} else { die "nonsense $flavour"; } + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; + +open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; + +my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block + +my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3)); +my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12)); +my $vrsave="r12"; + +$code=<<___; +.machine "any" + +.text + +.globl .gcm_init_p8 + lis r0,0xfff0 + li r8,0x10 + mfspr $vrsave,256 + li r9,0x20 + mtspr 256,r0 + li r10,0x30 + lvx_u $H,0,r4 # load H + + vspltisb $xC2,-16 # 0xf0 + vspltisb $t0,1 # one + vaddubm $xC2,$xC2,$xC2 # 0xe0 + vxor $zero,$zero,$zero + vor $xC2,$xC2,$t0 # 0xe1 + vsldoi $xC2,$xC2,$zero,15 # 0xe1... + vsldoi $t1,$zero,$t0,1 # ...1 + vaddubm $xC2,$xC2,$xC2 # 0xc2... + vspltisb $t2,7 + vor $xC2,$xC2,$t1 # 0xc2....01 + vspltb $t1,$H,0 # most significant byte + vsl $H,$H,$t0 # H<<=1 + vsrab $t1,$t1,$t2 # broadcast carry bit + vand $t1,$t1,$xC2 + vxor $H,$H,$t1 # twisted H + + vsldoi $H,$H,$H,8 # twist even more ... + vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 + vsldoi $Hl,$zero,$H,8 # ... and split + vsldoi $Hh,$H,$zero,8 + + stvx_u $xC2,0,r3 # save pre-computed table + stvx_u $Hl,r8,r3 + stvx_u $H, r9,r3 + stvx_u $Hh,r10,r3 + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,2,0 + .long 0 +.size .gcm_init_p8,.-.gcm_init_p8 + +.globl .gcm_gmult_p8 + lis r0,0xfff8 + li r8,0x10 + mfspr $vrsave,256 + li r9,0x20 + mtspr 256,r0 + li r10,0x30 + lvx_u $IN,0,$Xip # load Xi + + lvx_u $Hl,r8,$Htbl # load pre-computed table + le?lvsl $lemask,r0,r0 + lvx_u $H, r9,$Htbl + le?vspltisb $t0,0x07 + lvx_u $Hh,r10,$Htbl + le?vxor $lemask,$lemask,$t0 + lvx_u $xC2,0,$Htbl + le?vperm $IN,$IN,$IN,$lemask + vxor $zero,$zero,$zero + + vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo + vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi + vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi + + vpmsumd $t2,$Xl,$xC2 # 1st phase + + vsldoi $t0,$Xm,$zero,8 + vsldoi $t1,$zero,$Xm,8 + vxor $Xl,$Xl,$t0 + vxor $Xh,$Xh,$t1 + + vsldoi $Xl,$Xl,$Xl,8 + vxor $Xl,$Xl,$t2 + + vsldoi $t1,$Xl,$Xl,8 # 2nd phase + vpmsumd $Xl,$Xl,$xC2 + vxor $t1,$t1,$Xh + vxor $Xl,$Xl,$t1 + + le?vperm $Xl,$Xl,$Xl,$lemask + stvx_u $Xl,0,$Xip # write out Xi + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,2,0 + .long 0 +.size .gcm_gmult_p8,.-.gcm_gmult_p8 + +.globl .gcm_ghash_p8 + lis r0,0xfff8 + li r8,0x10 + mfspr $vrsave,256 + li r9,0x20 + mtspr 256,r0 + li r10,0x30 + lvx_u $Xl,0,$Xip # load Xi + + lvx_u $Hl,r8,$Htbl # load pre-computed table + le?lvsl $lemask,r0,r0 + lvx_u $H, r9,$Htbl + le?vspltisb $t0,0x07 + lvx_u $Hh,r10,$Htbl + le?vxor $lemask,$lemask,$t0 + lvx_u $xC2,0,$Htbl + le?vperm $Xl,$Xl,$Xl,$lemask + vxor $zero,$zero,$zero + + lvx_u $IN,0,$inp + addi $inp,$inp,16 + subi $len,$len,16 + le?vperm $IN,$IN,$IN,$lemask + vxor $IN,$IN,$Xl + b Loop + +.align 5 +Loop: + subic $len,$len,16 + vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo + subfe. r0,r0,r0 # borrow?-1:0 + vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi + and r0,r0,$len + vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi + add $inp,$inp,r0 + + vpmsumd $t2,$Xl,$xC2 # 1st phase + + vsldoi $t0,$Xm,$zero,8 + vsldoi $t1,$zero,$Xm,8 + vxor $Xl,$Xl,$t0 + vxor $Xh,$Xh,$t1 + + vsldoi $Xl,$Xl,$Xl,8 + vxor $Xl,$Xl,$t2 + lvx_u $IN,0,$inp + addi $inp,$inp,16 + + vsldoi $t1,$Xl,$Xl,8 # 2nd phase + vpmsumd $Xl,$Xl,$xC2 + le?vperm $IN,$IN,$IN,$lemask + vxor $t1,$t1,$Xh + vxor $IN,$IN,$t1 + vxor $IN,$IN,$Xl + beq Loop # did $len-=16 borrow? + + vxor $Xl,$Xl,$t1 + le?vperm $Xl,$Xl,$Xl,$lemask + stvx_u $Xl,0,$Xip # write out Xi + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,4,0 + .long 0 +.size .gcm_ghash_p8,.-.gcm_ghash_p8 + +.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" +.align 2 +___ + +foreach (split("\n",$code)) { + if ($flavour =~ /le$/o) { # little-endian + s/le\?//o or + s/be\?/#be#/o; + } else { + s/le\?/#le#/o or + s/be\?//o; + } + print $_,"\n"; +} + +close STDOUT; # enforce flush diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl new file mode 100644 index 000000000000..a59188494af8 --- /dev/null +++ b/drivers/crypto/vmx/ppc-xlate.pl @@ -0,0 +1,207 @@ +#!/usr/bin/env perl + +# PowerPC assembler distiller by <appro>. + +my $flavour = shift; +my $output = shift; +open STDOUT,">$output" || die "can't open $output: $!"; + +my %GLOBALS; +my $dotinlocallabels=($flavour=~/linux/)?1:0; + +################################################################ +# directives which need special treatment on different platforms +################################################################ +my $globl = sub { + my $junk = shift; + my $name = shift; + my $global = \$GLOBALS{$name}; + my $ret; + + $name =~ s|^[\.\_]||; + + SWITCH: for ($flavour) { + /aix/ && do { $name = ".$name"; + last; + }; + /osx/ && do { $name = "_$name"; + last; + }; + /linux/ + && do { $ret = "_GLOBAL($name)"; + last; + }; + } + + $ret = ".globl $name\nalign 5\n$name:" if (!$ret); + $$global = $name; + $ret; +}; +my $text = sub { + my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; + $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/); + $ret; +}; +my $machine = sub { + my $junk = shift; + my $arch = shift; + if ($flavour =~ /osx/) + { $arch =~ s/\"//g; + $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any"); + } + ".machine $arch"; +}; +my $size = sub { + if ($flavour =~ /linux/) + { shift; + my $name = shift; $name =~ s|^[\.\_]||; + my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name; + $ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/); + $ret; + } + else + { ""; } +}; +my $asciz = sub { + shift; + my $line = join(",",@_); + if ($line =~ /^"(.*)"$/) + { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; } + else + { ""; } +}; +my $quad = sub { + shift; + my @ret; + my ($hi,$lo); + for (@_) { + if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io) + { $hi=$1?"0x$1":"0"; $lo="0x$2"; } + elsif (/^([0-9]+)$/o) + { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl + else + { $hi=undef; $lo=$_; } + + if (defined($hi)) + { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); } + else + { push(@ret,".quad $lo"); } + } + join("\n",@ret); +}; + +################################################################ +# simplified mnemonics not handled by at least one assembler +################################################################ +my $cmplw = sub { + my $f = shift; + my $cr = 0; $cr = shift if ($#_>1); + # Some out-of-date 32-bit GNU assembler just can't handle cmplw... + ($flavour =~ /linux.*32/) ? + " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 : + " cmplw ".join(',',$cr,@_); +}; +my $bdnz = sub { + my $f = shift; + my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint + " bc $bo,0,".shift; +} if ($flavour!~/linux/); +my $bltlr = sub { + my $f = shift; + my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint + ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints + " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 : + " bclr $bo,0"; +}; +my $bnelr = sub { + my $f = shift; + my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint + ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints + " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 : + " bclr $bo,2"; +}; +my $beqlr = sub { + my $f = shift; + my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint + ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints + " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 : + " bclr $bo,2"; +}; +# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two +# arguments is 64, with "operand out of range" error. +my $extrdi = sub { + my ($f,$ra,$rs,$n,$b) = @_; + $b = ($b+$n)&63; $n = 64-$n; + " rldicl $ra,$rs,$b,$n"; +}; +my $vmr = sub { + my ($f,$vx,$vy) = @_; + " vor $vx,$vy,$vy"; +}; + +# PowerISA 2.06 stuff +sub vsxmem_op { + my ($f, $vrt, $ra, $rb, $op) = @_; + " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1); +} +# made-up unaligned memory reference AltiVec/VMX instructions +my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x +my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x +my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx +my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx +my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x +my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x + +# PowerISA 2.07 stuff +sub vcrypto_op { + my ($f, $vrt, $vra, $vrb, $op) = @_; + " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op; +} +my $vcipher = sub { vcrypto_op(@_, 1288); }; +my $vcipherlast = sub { vcrypto_op(@_, 1289); }; +my $vncipher = sub { vcrypto_op(@_, 1352); }; +my $vncipherlast= sub { vcrypto_op(@_, 1353); }; +my $vsbox = sub { vcrypto_op(@_, 0, 1480); }; +my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); }; +my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); }; +my $vpmsumb = sub { vcrypto_op(@_, 1032); }; +my $vpmsumd = sub { vcrypto_op(@_, 1224); }; +my $vpmsubh = sub { vcrypto_op(@_, 1096); }; +my $vpmsumw = sub { vcrypto_op(@_, 1160); }; +my $vaddudm = sub { vcrypto_op(@_, 192); }; + +my $mtsle = sub { + my ($f, $arg) = @_; + " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2); +}; + +print "#include <asm/ppc_asm.h>\n" if $flavour =~ /linux/; + +while($line=<>) { + + $line =~ s|[#!;].*$||; # get rid of asm-style comments... + $line =~ s|/\*.*\*/||; # ... and C-style comments... + $line =~ s|^\s+||; # ... and skip white spaces in beginning... + $line =~ s|\s+$||; # ... and at the end + + { + $line =~ s|\b\.L(\w+)|L$1|g; # common denominator for Locallabel + $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels); + } + + { + $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||; + my $c = $1; $c = "\t" if ($c eq ""); + my $mnemonic = $2; + my $f = $3; + my $opcode = eval("\$$mnemonic"); + $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/); + if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); } + elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; } + } + + print $line if ($line); + print "\n"; +} + +close STDOUT; diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c new file mode 100644 index 000000000000..44d8d5cfe40d --- /dev/null +++ b/drivers/crypto/vmx/vmx.c @@ -0,0 +1,88 @@ +/** + * Routines supporting VMX instructions on the Power 8 + * + * Copyright (C) 2015 International Business Machines Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 only. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/types.h> +#include <linux/err.h> +#include <linux/crypto.h> +#include <asm/cputable.h> +#include <crypto/internal/hash.h> + +extern struct shash_alg p8_ghash_alg; +extern struct crypto_alg p8_aes_alg; +extern struct crypto_alg p8_aes_cbc_alg; +extern struct crypto_alg p8_aes_ctr_alg; +static struct crypto_alg *algs[] = { + &p8_aes_alg, + &p8_aes_cbc_alg, + &p8_aes_ctr_alg, + NULL, +}; + +int __init p8_init(void) +{ + int ret = 0; + struct crypto_alg **alg_it; + + if (!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO)) + return -ENODEV; + + for (alg_it = algs; *alg_it; alg_it++) { + ret = crypto_register_alg(*alg_it); + printk(KERN_INFO "crypto_register_alg '%s' = %d\n", + (*alg_it)->cra_name, ret); + if (ret) { + for (alg_it--; alg_it >= algs; alg_it--) + crypto_unregister_alg(*alg_it); + break; + } + } + if (ret) + return ret; + + ret = crypto_register_shash(&p8_ghash_alg); + if (ret) { + for (alg_it = algs; *alg_it; alg_it++) + crypto_unregister_alg(*alg_it); + } + return ret; +} + +void __exit p8_exit(void) +{ + struct crypto_alg **alg_it; + + for (alg_it = algs; *alg_it; alg_it++) { + printk(KERN_INFO "Removing '%s'\n", (*alg_it)->cra_name); + crypto_unregister_alg(*alg_it); + } + crypto_unregister_shash(&p8_ghash_alg); +} + +module_init(p8_init); +module_exit(p8_exit); + +MODULE_AUTHOR("Marcelo Cerri<mhcerri@br.ibm.com>"); +MODULE_DESCRIPTION("IBM VMX cryptogaphic acceleration instructions support on Power 8"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.0.0"); + |