diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-11 00:04:16 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-11 00:04:16 +0300 |
commit | 30066ce675d3af350bc5a53858991c0b518dda00 (patch) | |
tree | 75db2274cd0887b11b4e297771287f0fb4c14b81 /drivers/crypto/ccp | |
parent | 6763afe4b9f39142bda2a92d69e62fe85f67251c (diff) | |
parent | c3afafa47898e34eb49828ec4ac92bcdc81c8f0c (diff) | |
download | linux-30066ce675d3af350bc5a53858991c0b518dda00.tar.xz |
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu:
"Here is the crypto update for 4.9:
API:
- The crypto engine code now supports hashes.
Algorithms:
- Allow keys >= 2048 bits in FIPS mode for RSA.
Drivers:
- Memory overwrite fix for vmx ghash.
- Add support for building ARM sha1-neon in Thumb2 mode.
- Reenable ARM ghash-ce code by adding import/export.
- Reenable img-hash by adding import/export.
- Add support for multiple cores in omap-aes.
- Add little-endian support for sha1-powerpc.
- Add Cavium HWRNG driver for ThunderX SoC"
* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (137 commits)
crypto: caam - treat SGT address pointer as u64
crypto: ccp - Make syslog errors human-readable
crypto: ccp - clean up data structure
crypto: vmx - Ensure ghash-generic is enabled
crypto: testmgr - add guard to dst buffer for ahash_export
crypto: caam - Unmap region obtained by of_iomap
crypto: sha1-powerpc - little-endian support
crypto: gcm - Fix IV buffer size in crypto_gcm_setkey
crypto: vmx - Fix memory corruption caused by p8_ghash
crypto: ghash-generic - move common definitions to a new header file
crypto: caam - fix sg dump
hwrng: omap - Only fail if pm_runtime_get_sync returns < 0
crypto: omap-sham - shrink the internal buffer size
crypto: omap-sham - add support for export/import
crypto: omap-sham - convert driver logic to use sgs for data xmit
crypto: omap-sham - change the DMA threshold value to a define
crypto: omap-sham - add support functions for sg based data handling
crypto: omap-sham - rename sgl to sgl_tmp for deprecation
crypto: omap-sham - align algorithms on word offset
crypto: omap-sham - add context export/import stubs
...
Diffstat (limited to 'drivers/crypto/ccp')
-rw-r--r-- | drivers/crypto/ccp/Makefile | 1 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-crypto-sha.c | 18 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-dev-v3.c | 182 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-dev-v5.c | 1017 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-dev.c | 113 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-dev.h | 312 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-dmaengine.c | 11 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-ops.c | 576 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-pci.c | 23 |
9 files changed, 1823 insertions, 430 deletions
diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile index ee4d2741b3ab..346ceb8f17bd 100644 --- a/drivers/crypto/ccp/Makefile +++ b/drivers/crypto/ccp/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o ccp-objs := ccp-dev.o \ ccp-ops.o \ ccp-dev-v3.o \ + ccp-dev-v5.o \ ccp-platform.o \ ccp-dmaengine.o ccp-$(CONFIG_PCI) += ccp-pci.o diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c index 8f36af62fe95..84a652be4274 100644 --- a/drivers/crypto/ccp/ccp-crypto-sha.c +++ b/drivers/crypto/ccp/ccp-crypto-sha.c @@ -4,6 +4,7 @@ * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> + * Author: Gary R Hook <gary.hook@amd.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -134,7 +135,22 @@ static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes, rctx->cmd.engine = CCP_ENGINE_SHA; rctx->cmd.u.sha.type = rctx->type; rctx->cmd.u.sha.ctx = &rctx->ctx_sg; - rctx->cmd.u.sha.ctx_len = sizeof(rctx->ctx); + + switch (rctx->type) { + case CCP_SHA_TYPE_1: + rctx->cmd.u.sha.ctx_len = SHA1_DIGEST_SIZE; + break; + case CCP_SHA_TYPE_224: + rctx->cmd.u.sha.ctx_len = SHA224_DIGEST_SIZE; + break; + case CCP_SHA_TYPE_256: + rctx->cmd.u.sha.ctx_len = SHA256_DIGEST_SIZE; + break; + default: + /* Should never get here */ + break; + } + rctx->cmd.u.sha.src = sg; rctx->cmd.u.sha.src_len = rctx->hash_cnt; rctx->cmd.u.sha.opad = ctx->u.sha.key_len ? diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c index d7a710347967..8d2dbacc6161 100644 --- a/drivers/crypto/ccp/ccp-dev-v3.c +++ b/drivers/crypto/ccp/ccp-dev-v3.c @@ -4,6 +4,7 @@ * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> + * Author: Gary R Hook <gary.hook@amd.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -19,6 +20,61 @@ #include "ccp-dev.h" +static u32 ccp_alloc_ksb(struct ccp_cmd_queue *cmd_q, unsigned int count) +{ + int start; + struct ccp_device *ccp = cmd_q->ccp; + + for (;;) { + mutex_lock(&ccp->sb_mutex); + + start = (u32)bitmap_find_next_zero_area(ccp->sb, + ccp->sb_count, + ccp->sb_start, + count, 0); + if (start <= ccp->sb_count) { + bitmap_set(ccp->sb, start, count); + + mutex_unlock(&ccp->sb_mutex); + break; + } + + ccp->sb_avail = 0; + + mutex_unlock(&ccp->sb_mutex); + + /* Wait for KSB entries to become available */ + if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail)) + return 0; + } + + return KSB_START + start; +} + +static void ccp_free_ksb(struct ccp_cmd_queue *cmd_q, unsigned int start, + unsigned int count) +{ + struct ccp_device *ccp = cmd_q->ccp; + + if (!start) + return; + + mutex_lock(&ccp->sb_mutex); + + bitmap_clear(ccp->sb, start - KSB_START, count); + + ccp->sb_avail = 1; + + mutex_unlock(&ccp->sb_mutex); + + wake_up_interruptible_all(&ccp->sb_queue); +} + +static unsigned int ccp_get_free_slots(struct ccp_cmd_queue *cmd_q) +{ + return CMD_Q_DEPTH(ioread32(cmd_q->reg_status)); +} + static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count) { struct ccp_cmd_queue *cmd_q = op->cmd_q; @@ -68,6 +124,9 @@ static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count) /* On error delete all related jobs from the queue */ cmd = (cmd_q->id << DEL_Q_ID_SHIFT) | op->jobid; + if (cmd_q->cmd_error) + ccp_log_error(cmd_q->ccp, + cmd_q->cmd_error); iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB); @@ -99,10 +158,10 @@ static int ccp_perform_aes(struct ccp_op *op) | (op->u.aes.type << REQ1_AES_TYPE_SHIFT) | (op->u.aes.mode << REQ1_AES_MODE_SHIFT) | (op->u.aes.action << REQ1_AES_ACTION_SHIFT) - | (op->ksb_key << REQ1_KEY_KSB_SHIFT); + | (op->sb_key << REQ1_KEY_KSB_SHIFT); cr[1] = op->src.u.dma.length - 1; cr[2] = ccp_addr_lo(&op->src.u.dma); - cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) + cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT) | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ccp_addr_hi(&op->src.u.dma); cr[4] = ccp_addr_lo(&op->dst.u.dma); @@ -129,10 +188,10 @@ static int ccp_perform_xts_aes(struct ccp_op *op) cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT) | (op->u.xts.action << REQ1_AES_ACTION_SHIFT) | (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT) - | (op->ksb_key << REQ1_KEY_KSB_SHIFT); + | (op->sb_key << REQ1_KEY_KSB_SHIFT); cr[1] = op->src.u.dma.length - 1; cr[2] = ccp_addr_lo(&op->src.u.dma); - cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) + cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT) | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ccp_addr_hi(&op->src.u.dma); cr[4] = ccp_addr_lo(&op->dst.u.dma); @@ -158,7 +217,7 @@ static int ccp_perform_sha(struct ccp_op *op) | REQ1_INIT; cr[1] = op->src.u.dma.length - 1; cr[2] = ccp_addr_lo(&op->src.u.dma); - cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) + cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT) | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ccp_addr_hi(&op->src.u.dma); @@ -181,11 +240,11 @@ static int ccp_perform_rsa(struct ccp_op *op) /* Fill out the register contents for REQ1 through REQ6 */ cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT) | (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT) - | (op->ksb_key << REQ1_KEY_KSB_SHIFT) + | (op->sb_key << REQ1_KEY_KSB_SHIFT) | REQ1_EOM; cr[1] = op->u.rsa.input_len - 1; cr[2] = ccp_addr_lo(&op->src.u.dma); - cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) + cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT) | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ccp_addr_hi(&op->src.u.dma); cr[4] = ccp_addr_lo(&op->dst.u.dma); @@ -215,10 +274,10 @@ static int ccp_perform_passthru(struct ccp_op *op) | ccp_addr_hi(&op->src.u.dma); if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP) - cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT); + cr[3] |= (op->sb_key << REQ4_KSB_SHIFT); } else { - cr[2] = op->src.u.ksb * CCP_KSB_BYTES; - cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT); + cr[2] = op->src.u.sb * CCP_SB_BYTES; + cr[3] = (CCP_MEMTYPE_SB << REQ4_MEMTYPE_SHIFT); } if (op->dst.type == CCP_MEMTYPE_SYSTEM) { @@ -226,8 +285,8 @@ static int ccp_perform_passthru(struct ccp_op *op) cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) | ccp_addr_hi(&op->dst.u.dma); } else { - cr[4] = op->dst.u.ksb * CCP_KSB_BYTES; - cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT); + cr[4] = op->dst.u.sb * CCP_SB_BYTES; + cr[5] = (CCP_MEMTYPE_SB << REQ6_MEMTYPE_SHIFT); } if (op->eom) @@ -256,35 +315,6 @@ static int ccp_perform_ecc(struct ccp_op *op) return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); } -static int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait) -{ - struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng); - u32 trng_value; - int len = min_t(int, sizeof(trng_value), max); - - /* - * Locking is provided by the caller so we can update device - * hwrng-related fields safely - */ - trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG); - if (!trng_value) { - /* Zero is returned if not data is available or if a - * bad-entropy error is present. Assume an error if - * we exceed TRNG_RETRIES reads of zero. - */ - if (ccp->hwrng_retries++ > TRNG_RETRIES) - return -EIO; - - return 0; - } - - /* Reset the counter and save the rng value */ - ccp->hwrng_retries = 0; - memcpy(data, &trng_value, len); - - return len; -} - static int ccp_init(struct ccp_device *ccp) { struct device *dev = ccp->dev; @@ -321,9 +351,9 @@ static int ccp_init(struct ccp_device *ccp) cmd_q->dma_pool = dma_pool; /* Reserve 2 KSB regions for the queue */ - cmd_q->ksb_key = KSB_START + ccp->ksb_start++; - cmd_q->ksb_ctx = KSB_START + ccp->ksb_start++; - ccp->ksb_count -= 2; + cmd_q->sb_key = KSB_START + ccp->sb_start++; + cmd_q->sb_ctx = KSB_START + ccp->sb_start++; + ccp->sb_count -= 2; /* Preset some register values and masks that are queue * number dependent @@ -335,7 +365,7 @@ static int ccp_init(struct ccp_device *ccp) cmd_q->int_ok = 1 << (i * 2); cmd_q->int_err = 1 << ((i * 2) + 1); - cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status)); + cmd_q->free_slots = ccp_get_free_slots(cmd_q); init_waitqueue_head(&cmd_q->int_queue); @@ -375,9 +405,10 @@ static int ccp_init(struct ccp_device *ccp) } /* Initialize the queues used to wait for KSB space and suspend */ - init_waitqueue_head(&ccp->ksb_queue); + init_waitqueue_head(&ccp->sb_queue); init_waitqueue_head(&ccp->suspend_queue); + dev_dbg(dev, "Starting threads...\n"); /* Create a kthread for each queue */ for (i = 0; i < ccp->cmd_q_count; i++) { struct task_struct *kthread; @@ -397,29 +428,26 @@ static int ccp_init(struct ccp_device *ccp) wake_up_process(kthread); } - /* Register the RNG */ - ccp->hwrng.name = ccp->rngname; - ccp->hwrng.read = ccp_trng_read; - ret = hwrng_register(&ccp->hwrng); - if (ret) { - dev_err(dev, "error registering hwrng (%d)\n", ret); + dev_dbg(dev, "Enabling interrupts...\n"); + /* Enable interrupts */ + iowrite32(qim, ccp->io_regs + IRQ_MASK_REG); + + dev_dbg(dev, "Registering device...\n"); + ccp_add_device(ccp); + + ret = ccp_register_rng(ccp); + if (ret) goto e_kthread; - } /* Register the DMA engine support */ ret = ccp_dmaengine_register(ccp); if (ret) goto e_hwrng; - ccp_add_device(ccp); - - /* Enable interrupts */ - iowrite32(qim, ccp->io_regs + IRQ_MASK_REG); - return 0; e_hwrng: - hwrng_unregister(&ccp->hwrng); + ccp_unregister_rng(ccp); e_kthread: for (i = 0; i < ccp->cmd_q_count; i++) @@ -441,19 +469,14 @@ static void ccp_destroy(struct ccp_device *ccp) struct ccp_cmd *cmd; unsigned int qim, i; - /* Remove this device from the list of available units first */ - ccp_del_device(ccp); - /* Unregister the DMA engine */ ccp_dmaengine_unregister(ccp); /* Unregister the RNG */ - hwrng_unregister(&ccp->hwrng); + ccp_unregister_rng(ccp); - /* Stop the queue kthreads */ - for (i = 0; i < ccp->cmd_q_count; i++) - if (ccp->cmd_q[i].kthread) - kthread_stop(ccp->cmd_q[i].kthread); + /* Remove this device from the list of available units */ + ccp_del_device(ccp); /* Build queue interrupt mask (two interrupt masks per queue) */ qim = 0; @@ -472,6 +495,11 @@ static void ccp_destroy(struct ccp_device *ccp) } iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG); + /* Stop the queue kthreads */ + for (i = 0; i < ccp->cmd_q_count; i++) + if (ccp->cmd_q[i].kthread) + kthread_stop(ccp->cmd_q[i].kthread); + ccp->free_irq(ccp); for (i = 0; i < ccp->cmd_q_count; i++) @@ -527,18 +555,24 @@ static irqreturn_t ccp_irq_handler(int irq, void *data) } static const struct ccp_actions ccp3_actions = { - .perform_aes = ccp_perform_aes, - .perform_xts_aes = ccp_perform_xts_aes, - .perform_sha = ccp_perform_sha, - .perform_rsa = ccp_perform_rsa, - .perform_passthru = ccp_perform_passthru, - .perform_ecc = ccp_perform_ecc, + .aes = ccp_perform_aes, + .xts_aes = ccp_perform_xts_aes, + .sha = ccp_perform_sha, + .rsa = ccp_perform_rsa, + .passthru = ccp_perform_passthru, + .ecc = ccp_perform_ecc, + .sballoc = ccp_alloc_ksb, + .sbfree = ccp_free_ksb, .init = ccp_init, .destroy = ccp_destroy, + .get_free_slots = ccp_get_free_slots, .irqhandler = ccp_irq_handler, }; -struct ccp_vdata ccpv3 = { +const struct ccp_vdata ccpv3 = { .version = CCP_VERSION(3, 0), + .setup = NULL, .perform = &ccp3_actions, + .bar = 2, + .offset = 0x20000, }; diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c new file mode 100644 index 000000000000..faf3cb3ddce2 --- /dev/null +++ b/drivers/crypto/ccp/ccp-dev-v5.c @@ -0,0 +1,1017 @@ +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2016 Advanced Micro Devices, Inc. + * + * Author: Gary R Hook <gary.hook@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/kthread.h> +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> +#include <linux/compiler.h> +#include <linux/ccp.h> + +#include "ccp-dev.h" + +static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count) +{ + struct ccp_device *ccp; + int start; + + /* First look at the map for the queue */ + if (cmd_q->lsb >= 0) { + start = (u32)bitmap_find_next_zero_area(cmd_q->lsbmap, + LSB_SIZE, + 0, count, 0); + if (start < LSB_SIZE) { + bitmap_set(cmd_q->lsbmap, start, count); + return start + cmd_q->lsb * LSB_SIZE; + } + } + + /* No joy; try to get an entry from the shared blocks */ + ccp = cmd_q->ccp; + for (;;) { + mutex_lock(&ccp->sb_mutex); + + start = (u32)bitmap_find_next_zero_area(ccp->lsbmap, + MAX_LSB_CNT * LSB_SIZE, + 0, + count, 0); + if (start <= MAX_LSB_CNT * LSB_SIZE) { + bitmap_set(ccp->lsbmap, start, count); + + mutex_unlock(&ccp->sb_mutex); + return start * LSB_ITEM_SIZE; + } + + ccp->sb_avail = 0; + + mutex_unlock(&ccp->sb_mutex); + + /* Wait for KSB entries to become available */ + if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail)) + return 0; + } +} + +static void ccp_lsb_free(struct ccp_cmd_queue *cmd_q, unsigned int start, + unsigned int count) +{ + int lsbno = start / LSB_SIZE; + + if (!start) + return; + + if (cmd_q->lsb == lsbno) { + /* An entry from the private LSB */ + bitmap_clear(cmd_q->lsbmap, start % LSB_SIZE, count); + } else { + /* From the shared LSBs */ + struct ccp_device *ccp = cmd_q->ccp; + + mutex_lock(&ccp->sb_mutex); + bitmap_clear(ccp->lsbmap, start, count); + ccp->sb_avail = 1; + mutex_unlock(&ccp->sb_mutex); + wake_up_interruptible_all(&ccp->sb_queue); + } +} + +/* CCP version 5: Union to define the function field (cmd_reg1/dword0) */ +union ccp_function { + struct { + u16 size:7; + u16 encrypt:1; + u16 mode:5; + u16 type:2; + } aes; + struct { + u16 size:7; + u16 encrypt:1; + u16 rsvd:5; + u16 type:2; + } aes_xts; + struct { + u16 rsvd1:10; + u16 type:4; + u16 rsvd2:1; + } sha; + struct { + u16 mode:3; + u16 size:12; + } rsa; + struct { + u16 byteswap:2; + u16 bitwise:3; + u16 reflect:2; + u16 rsvd:8; + } pt; + struct { + u16 rsvd:13; + } zlib; + struct { + u16 size:10; + u16 type:2; + u16 mode:3; + } ecc; + u16 raw; +}; + +#define CCP_AES_SIZE(p) ((p)->aes.size) +#define CCP_AES_ENCRYPT(p) ((p)->aes.encrypt) +#define CCP_AES_MODE(p) ((p)->aes.mode) +#define CCP_AES_TYPE(p) ((p)->aes.type) +#define CCP_XTS_SIZE(p) ((p)->aes_xts.size) +#define CCP_XTS_ENCRYPT(p) ((p)->aes_xts.encrypt) +#define CCP_SHA_TYPE(p) ((p)->sha.type) +#define CCP_RSA_SIZE(p) ((p)->rsa.size) +#define CCP_PT_BYTESWAP(p) ((p)->pt.byteswap) +#define CCP_PT_BITWISE(p) ((p)->pt.bitwise) +#define CCP_ECC_MODE(p) ((p)->ecc.mode) +#define CCP_ECC_AFFINE(p) ((p)->ecc.one) + +/* Word 0 */ +#define CCP5_CMD_DW0(p) ((p)->dw0) +#define CCP5_CMD_SOC(p) (CCP5_CMD_DW0(p).soc) +#define CCP5_CMD_IOC(p) (CCP5_CMD_DW0(p).ioc) +#define CCP5_CMD_INIT(p) (CCP5_CMD_DW0(p).init) +#define CCP5_CMD_EOM(p) (CCP5_CMD_DW0(p).eom) +#define CCP5_CMD_FUNCTION(p) (CCP5_CMD_DW0(p).function) +#define CCP5_CMD_ENGINE(p) (CCP5_CMD_DW0(p).engine) +#define CCP5_CMD_PROT(p) (CCP5_CMD_DW0(p).prot) + +/* Word 1 */ +#define CCP5_CMD_DW1(p) ((p)->length) +#define CCP5_CMD_LEN(p) (CCP5_CMD_DW1(p)) + +/* Word 2 */ +#define CCP5_CMD_DW2(p) ((p)->src_lo) +#define CCP5_CMD_SRC_LO(p) (CCP5_CMD_DW2(p)) + +/* Word 3 */ +#define CCP5_CMD_DW3(p) ((p)->dw3) +#define CCP5_CMD_SRC_MEM(p) ((p)->dw3.src_mem) +#define CCP5_CMD_SRC_HI(p) ((p)->dw3.src_hi) +#define CCP5_CMD_LSB_ID(p) ((p)->dw3.lsb_cxt_id) +#define CCP5_CMD_FIX_SRC(p) ((p)->dw3.fixed) + +/* Words 4/5 */ +#define CCP5_CMD_DW4(p) ((p)->dw4) +#define CCP5_CMD_DST_LO(p) (CCP5_CMD_DW4(p).dst_lo) +#define CCP5_CMD_DW5(p) ((p)->dw5.fields.dst_hi) +#define CCP5_CMD_DST_HI(p) (CCP5_CMD_DW5(p)) +#define CCP5_CMD_DST_MEM(p) ((p)->dw5.fields.dst_mem) +#define CCP5_CMD_FIX_DST(p) ((p)->dw5.fields.fixed) +#define CCP5_CMD_SHA_LO(p) ((p)->dw4.sha_len_lo) +#define CCP5_CMD_SHA_HI(p) ((p)->dw5.sha_len_hi) + +/* Word 6/7 */ +#define CCP5_CMD_DW6(p) ((p)->key_lo) +#define CCP5_CMD_KEY_LO(p) (CCP5_CMD_DW6(p)) +#define CCP5_CMD_DW7(p) ((p)->dw7) +#define CCP5_CMD_KEY_HI(p) ((p)->dw7.key_hi) +#define CCP5_CMD_KEY_MEM(p) ((p)->dw7.key_mem) + +static inline u32 low_address(unsigned long addr) +{ + return (u64)addr & 0x0ffffffff; +} + +static inline u32 high_address(unsigned long addr) +{ + return ((u64)addr >> 32) & 0x00000ffff; +} + +static unsigned int ccp5_get_free_slots(struct ccp_cmd_queue *cmd_q) +{ + unsigned int head_idx, n; + u32 head_lo, queue_start; + + queue_start = low_address(cmd_q->qdma_tail); + head_lo = ioread32(cmd_q->reg_head_lo); + head_idx = (head_lo - queue_start) / sizeof(struct ccp5_desc); + + n = head_idx + COMMANDS_PER_QUEUE - cmd_q->qidx - 1; + + return n % COMMANDS_PER_QUEUE; /* Always one unused spot */ +} + +static int ccp5_do_cmd(struct ccp5_desc *desc, + struct ccp_cmd_queue *cmd_q) +{ + u32 *mP; + __le32 *dP; + u32 tail; + int i; + int ret = 0; + + if (CCP5_CMD_SOC(desc)) { + CCP5_CMD_IOC(desc) = 1; + CCP5_CMD_SOC(desc) = 0; + } + mutex_lock(&cmd_q->q_mutex); + + mP = (u32 *) &cmd_q->qbase[cmd_q->qidx]; + dP = (__le32 *) desc; + for (i = 0; i < 8; i++) + mP[i] = cpu_to_le32(dP[i]); /* handle endianness */ + + cmd_q->qidx = (cmd_q->qidx + 1) % COMMANDS_PER_QUEUE; + + /* The data used by this command must be flushed to memory */ + wmb(); + + /* Write the new tail address back to the queue register */ + tail = low_address(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE); + iowrite32(tail, cmd_q->reg_tail_lo); + + /* Turn the queue back on using our cached control register */ + iowrite32(cmd_q->qcontrol | CMD5_Q_RUN, cmd_q->reg_control); + mutex_unlock(&cmd_q->q_mutex); + + if (CCP5_CMD_IOC(desc)) { + /* Wait for the job to complete */ + ret = wait_event_interruptible(cmd_q->int_queue, + cmd_q->int_rcvd); + if (ret || cmd_q->cmd_error) { + if (cmd_q->cmd_error) + ccp_log_error(cmd_q->ccp, + cmd_q->cmd_error); + /* A version 5 device doesn't use Job IDs... */ + if (!ret) + ret = -EIO; + } + cmd_q->int_rcvd = 0; + } + + return 0; +} + +static int ccp5_perform_aes(struct ccp_op *op) +{ + struct ccp5_desc desc; + union ccp_function function; + u32 key_addr = op->sb_key * LSB_ITEM_SIZE; + + /* Zero out all the fields of the command desc */ + memset(&desc, 0, Q_DESC_SIZE); + + CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_AES; + + CCP5_CMD_SOC(&desc) = op->soc; + CCP5_CMD_IOC(&desc) = 1; + CCP5_CMD_INIT(&desc) = op->init; + CCP5_CMD_EOM(&desc) = op->eom; + CCP5_CMD_PROT(&desc) = 0; + + function.raw = 0; + CCP_AES_ENCRYPT(&function) = op->u.aes.action; + CCP_AES_MODE(&function) = op->u.aes.mode; + CCP_AES_TYPE(&function) = op->u.aes.type; + if (op->u.aes.mode == CCP_AES_MODE_CFB) + CCP_AES_SIZE(&function) = 0x7f; + + CCP5_CMD_FUNCTION(&desc) = function.raw; + + CCP5_CMD_LEN(&desc) = op->src.u.dma.length; + + CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma); + CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma); + CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma); + CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma); + CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr); + CCP5_CMD_KEY_HI(&desc) = 0; + CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB; + CCP5_CMD_LSB_ID(&desc) = op->sb_ctx; + + return ccp5_do_cmd(&desc, op->cmd_q); +} + +static int ccp5_perform_xts_aes(struct ccp_op *op) +{ + struct ccp5_desc desc; + union ccp_function function; + u32 key_addr = op->sb_key * LSB_ITEM_SIZE; + + /* Zero out all the fields of the command desc */ + memset(&desc, 0, Q_DESC_SIZE); + + CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_XTS_AES_128; + + CCP5_CMD_SOC(&desc) = op->soc; + CCP5_CMD_IOC(&desc) = 1; + CCP5_CMD_INIT(&desc) = op->init; + CCP5_CMD_EOM(&desc) = op->eom; + CCP5_CMD_PROT(&desc) = 0; + + function.raw = 0; + CCP_XTS_ENCRYPT(&function) = op->u.xts.action; + CCP_XTS_SIZE(&function) = op->u.xts.unit_size; + CCP5_CMD_FUNCTION(&desc) = function.raw; + + CCP5_CMD_LEN(&desc) = op->src.u.dma.length; + + CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma); + CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma); + CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma); + CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma); + CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr); + CCP5_CMD_KEY_HI(&desc) = 0; + CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB; + CCP5_CMD_LSB_ID(&desc) = op->sb_ctx; + + return ccp5_do_cmd(&desc, op->cmd_q); +} + +static int ccp5_perform_sha(struct ccp_op *op) +{ + struct ccp5_desc desc; + union ccp_function function; + + /* Zero out all the fields of the command desc */ + memset(&desc, 0, Q_DESC_SIZE); + + CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_SHA; + + CCP5_CMD_SOC(&desc) = op->soc; + CCP5_CMD_IOC(&desc) = 1; + CCP5_CMD_INIT(&desc) = 1; + CCP5_CMD_EOM(&desc) = op->eom; + CCP5_CMD_PROT(&desc) = 0; + + function.raw = 0; + CCP_SHA_TYPE(&function) = op->u.sha.type; + CCP5_CMD_FUNCTION(&desc) = function.raw; + + CCP5_CMD_LEN(&desc) = op->src.u.dma.length; + + CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma); + CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma); + CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + CCP5_CMD_LSB_ID(&desc) = op->sb_ctx; + + if (op->eom) { + CCP5_CMD_SHA_LO(&desc) = lower_32_bits(op->u.sha.msg_bits); + CCP5_CMD_SHA_HI(&desc) = upper_32_bits(op->u.sha.msg_bits); + } else { + CCP5_CMD_SHA_LO(&desc) = 0; + CCP5_CMD_SHA_HI(&desc) = 0; + } + + return ccp5_do_cmd(&desc, op->cmd_q); +} + +static int ccp5_perform_rsa(struct ccp_op *op) +{ + struct ccp5_desc desc; + union ccp_function function; + + /* Zero out all the fields of the command desc */ + memset(&desc, 0, Q_DESC_SIZE); + + CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_RSA; + + CCP5_CMD_SOC(&desc) = op->soc; + CCP5_CMD_IOC(&desc) = 1; + CCP5_CMD_INIT(&desc) = 0; + CCP5_CMD_EOM(&desc) = 1; + CCP5_CMD_PROT(&desc) = 0; + + function.raw = 0; + CCP_RSA_SIZE(&function) = op->u.rsa.mod_size; + CCP5_CMD_FUNCTION(&desc) = function.raw; + + CCP5_CMD_LEN(&desc) = op->u.rsa.input_len; + + /* Source is from external memory */ + CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma); + CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma); + CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + /* Destination is in external memory */ + CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma); + CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma); + CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + /* Key (Exponent) is in external memory */ + CCP5_CMD_KEY_LO(&desc) = ccp_addr_lo(&op->exp.u.dma); + CCP5_CMD_KEY_HI(&desc) = ccp_addr_hi(&op->exp.u.dma); + CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + return ccp5_do_cmd(&desc, op->cmd_q); +} + +static int ccp5_perform_passthru(struct ccp_op *op) +{ + struct ccp5_desc desc; + union ccp_function function; + struct ccp_dma_info *saddr = &op->src.u.dma; + struct ccp_dma_info *daddr = &op->dst.u.dma; + + memset(&desc, 0, Q_DESC_SIZE); + + CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_PASSTHRU; + + CCP5_CMD_SOC(&desc) = 0; + CCP5_CMD_IOC(&desc) = 1; + CCP5_CMD_INIT(&desc) = 0; + CCP5_CMD_EOM(&desc) = op->eom; + CCP5_CMD_PROT(&desc) = 0; + + function.raw = 0; + CCP_PT_BYTESWAP(&function) = op->u.passthru.byte_swap; + CCP_PT_BITWISE(&function) = op->u.passthru.bit_mod; + CCP5_CMD_FUNCTION(&desc) = function.raw; + + /* Length of source data is always 256 bytes */ + if (op->src.type == CCP_MEMTYPE_SYSTEM) + CCP5_CMD_LEN(&desc) = saddr->length; + else + CCP5_CMD_LEN(&desc) = daddr->length; + + if (op->src.type == CCP_MEMTYPE_SYSTEM) { + CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma); + CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma); + CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP) + CCP5_CMD_LSB_ID(&desc) = op->sb_key; + } else { + u32 key_addr = op->src.u.sb * CCP_SB_BYTES; + + CCP5_CMD_SRC_LO(&desc) = lower_32_bits(key_addr); + CCP5_CMD_SRC_HI(&desc) = 0; + CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SB; + } + + if (op->dst.type == CCP_MEMTYPE_SYSTEM) { + CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma); + CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma); + CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + } else { + u32 key_addr = op->dst.u.sb * CCP_SB_BYTES; + + CCP5_CMD_DST_LO(&desc) = lower_32_bits(key_addr); + CCP5_CMD_DST_HI(&desc) = 0; + CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SB; + } + + return ccp5_do_cmd(&desc, op->cmd_q); +} + +static int ccp5_perform_ecc(struct ccp_op *op) +{ + struct ccp5_desc desc; + union ccp_function function; + + /* Zero out all the fields of the command desc */ + memset(&desc, 0, Q_DESC_SIZE); + + CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_ECC; + + CCP5_CMD_SOC(&desc) = 0; + CCP5_CMD_IOC(&desc) = 1; + CCP5_CMD_INIT(&desc) = 0; + CCP5_CMD_EOM(&desc) = 1; + CCP5_CMD_PROT(&desc) = 0; + + function.raw = 0; + function.ecc.mode = op->u.ecc.function; + CCP5_CMD_FUNCTION(&desc) = function.raw; + + CCP5_CMD_LEN(&desc) = op->src.u.dma.length; + + CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma); + CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma); + CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma); + CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma); + CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + return ccp5_do_cmd(&desc, op->cmd_q); +} + +static int ccp_find_lsb_regions(struct ccp_cmd_queue *cmd_q, u64 status) +{ + int q_mask = 1 << cmd_q->id; + int queues = 0; + int j; + + /* Build a bit mask to know which LSBs this queue has access to. + * Don't bother with segment 0 as it has special privileges. + */ + for (j = 1; j < MAX_LSB_CNT; j++) { + if (status & q_mask) + bitmap_set(cmd_q->lsbmask, j, 1); + status >>= LSB_REGION_WIDTH; + } + queues = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT); + dev_info(cmd_q->ccp->dev, "Queue %d can access %d LSB regions\n", + cmd_q->id, queues); + + return queues ? 0 : -EINVAL; +} + + +static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp, + int lsb_cnt, int n_lsbs, + unsigned long *lsb_pub) +{ + DECLARE_BITMAP(qlsb, MAX_LSB_CNT); + int bitno; + int qlsb_wgt; + int i; + + /* For each queue: + * If the count of potential LSBs available to a queue matches the + * ordinal given to us in lsb_cnt: + * Copy the mask of possible LSBs for this queue into "qlsb"; + * For each bit in qlsb, see if the corresponding bit in the + * aggregation mask is set; if so, we have a match. + * If we have a match, clear the bit in the aggregation to + * mark it as no longer available. + * If there is no match, clear the bit in qlsb and keep looking. + */ + for (i = 0; i < ccp->cmd_q_count; i++) { + struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i]; + + qlsb_wgt = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT); + + if (qlsb_wgt == lsb_cnt) { + bitmap_copy(qlsb, cmd_q->lsbmask, MAX_LSB_CNT); + + bitno = find_first_bit(qlsb, MAX_LSB_CNT); + while (bitno < MAX_LSB_CNT) { + if (test_bit(bitno, lsb_pub)) { + /* We found an available LSB + * that this queue can access + */ + cmd_q->lsb = bitno; + bitmap_clear(lsb_pub, bitno, 1); + dev_info(ccp->dev, + "Queue %d gets LSB %d\n", + i, bitno); + break; + } + bitmap_clear(qlsb, bitno, 1); + bitno = find_first_bit(qlsb, MAX_LSB_CNT); + } + if (bitno >= MAX_LSB_CNT) + return -EINVAL; + n_lsbs--; + } + } + return n_lsbs; +} + +/* For each queue, from the most- to least-constrained: + * find an LSB that can be assigned to the queue. If there are N queues that + * can only use M LSBs, where N > M, fail; otherwise, every queue will get a + * dedicated LSB. Remaining LSB regions become a shared resource. + * If we have fewer LSBs than queues, all LSB regions become shared resources. + */ +static int ccp_assign_lsbs(struct ccp_device *ccp) +{ + DECLARE_BITMAP(lsb_pub, MAX_LSB_CNT); + DECLARE_BITMAP(qlsb, MAX_LSB_CNT); + int n_lsbs = 0; + int bitno; + int i, lsb_cnt; + int rc = 0; + + bitmap_zero(lsb_pub, MAX_LSB_CNT); + + /* Create an aggregate bitmap to get a total count of available LSBs */ + for (i = 0; i < ccp->cmd_q_count; i++) + bitmap_or(lsb_pub, + lsb_pub, ccp->cmd_q[i].lsbmask, + MAX_LSB_CNT); + + n_lsbs = bitmap_weight(lsb_pub, MAX_LSB_CNT); + + if (n_lsbs >= ccp->cmd_q_count) { + /* We have enough LSBS to give every queue a private LSB. + * Brute force search to start with the queues that are more + * constrained in LSB choice. When an LSB is privately + * assigned, it is removed from the public mask. + * This is an ugly N squared algorithm with some optimization. + */ + for (lsb_cnt = 1; + n_lsbs && (lsb_cnt <= MAX_LSB_CNT); + lsb_cnt++) { + rc = ccp_find_and_assign_lsb_to_q(ccp, lsb_cnt, n_lsbs, + lsb_pub); + if (rc < 0) + return -EINVAL; + n_lsbs = rc; + } + } + + rc = 0; + /* What's left of the LSBs, according to the public mask, now become + * shared. Any zero bits in the lsb_pub mask represent an LSB region + * that can't be used as a shared resource, so mark the LSB slots for + * them as "in use". + */ + bitmap_copy(qlsb, lsb_pub, MAX_LSB_CNT); + + bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT); + while (bitno < MAX_LSB_CNT) { + bitmap_set(ccp->lsbmap, bitno * LSB_SIZE, LSB_SIZE); + bitmap_set(qlsb, bitno, 1); + bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT); + } + + return rc; +} + +static int ccp5_init(struct ccp_device *ccp) +{ + struct device *dev = ccp->dev; + struct ccp_cmd_queue *cmd_q; + struct dma_pool *dma_pool; + char dma_pool_name[MAX_DMAPOOL_NAME_LEN]; + unsigned int qmr, qim, i; + u64 status; + u32 status_lo, status_hi; + int ret; + + /* Find available queues */ + qim = 0; + qmr = ioread32(ccp->io_regs + Q_MASK_REG); + for (i = 0; i < MAX_HW_QUEUES; i++) { + + if (!(qmr & (1 << i))) + continue; + + /* Allocate a dma pool for this queue */ + snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d", + ccp->name, i); + dma_pool = dma_pool_create(dma_pool_name, dev, + CCP_DMAPOOL_MAX_SIZE, + CCP_DMAPOOL_ALIGN, 0); + if (!dma_pool) { + dev_err(dev, "unable to allocate dma pool\n"); + ret = -ENOMEM; + } + + cmd_q = &ccp->cmd_q[ccp->cmd_q_count]; + ccp->cmd_q_count++; + + cmd_q->ccp = ccp; + cmd_q->id = i; + cmd_q->dma_pool = dma_pool; + mutex_init(&cmd_q->q_mutex); + + /* Page alignment satisfies our needs for N <= 128 */ + BUILD_BUG_ON(COMMANDS_PER_QUEUE > 128); + cmd_q->qsize = Q_SIZE(Q_DESC_SIZE); + cmd_q->qbase = dma_zalloc_coherent(dev, cmd_q->qsize, + &cmd_q->qbase_dma, + GFP_KERNEL); + if (!cmd_q->qbase) { + dev_err(dev, "unable to allocate command queue\n"); + ret = -ENOMEM; + goto e_pool; + } + + cmd_q->qidx = 0; + /* Preset some register values and masks that are queue + * number dependent + */ + cmd_q->reg_control = ccp->io_regs + + CMD5_Q_STATUS_INCR * (i + 1); + cmd_q->reg_tail_lo = cmd_q->reg_control + CMD5_Q_TAIL_LO_BASE; + cmd_q->reg_head_lo = cmd_q->reg_control + CMD5_Q_HEAD_LO_BASE; + cmd_q->reg_int_enable = cmd_q->reg_control + + CMD5_Q_INT_ENABLE_BASE; + cmd_q->reg_interrupt_status = cmd_q->reg_control + + CMD5_Q_INTERRUPT_STATUS_BASE; + cmd_q->reg_status = cmd_q->reg_control + CMD5_Q_STATUS_BASE; + cmd_q->reg_int_status = cmd_q->reg_control + + CMD5_Q_INT_STATUS_BASE; + cmd_q->reg_dma_status = cmd_q->reg_control + + CMD5_Q_DMA_STATUS_BASE; + cmd_q->reg_dma_read_status = cmd_q->reg_control + + CMD5_Q_DMA_READ_STATUS_BASE; + cmd_q->reg_dma_write_status = cmd_q->reg_control + + CMD5_Q_DMA_WRITE_STATUS_BASE; + + init_waitqueue_head(&cmd_q->int_queue); + + dev_dbg(dev, "queue #%u available\n", i); + } + if (ccp->cmd_q_count == 0) { + dev_notice(dev, "no command queues available\n"); + ret = -EIO; + goto e_pool; + } + dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count); + + /* Turn off the queues and disable interrupts until ready */ + for (i = 0; i < ccp->cmd_q_count; i++) { + cmd_q = &ccp->cmd_q[i]; + + cmd_q->qcontrol = 0; /* Start with nothing */ + iowrite32(cmd_q->qcontrol, cmd_q->reg_control); + + /* Disable the interrupts */ + iowrite32(0x00, cmd_q->reg_int_enable); + ioread32(cmd_q->reg_int_status); + ioread32(cmd_q->reg_status); + + /* Clear the interrupts */ + iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status); + } + + dev_dbg(dev, "Requesting an IRQ...\n"); + /* Request an irq */ + ret = ccp->get_irq(ccp); + if (ret) { + dev_err(dev, "unable to allocate an IRQ\n"); + goto e_pool; + } + + /* Initialize the queue used to suspend */ + init_waitqueue_head(&ccp->suspend_queue); + + dev_dbg(dev, "Loading LSB map...\n"); + /* Copy the private LSB mask to the public registers */ + status_lo = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET); + status_hi = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET); + iowrite32(status_lo, ccp->io_regs + LSB_PUBLIC_MASK_LO_OFFSET); + iowrite32(status_hi, ccp->io_regs + LSB_PUBLIC_MASK_HI_OFFSET); + status = ((u64)status_hi<<30) | (u64)status_lo; + + dev_dbg(dev, "Configuring virtual queues...\n"); + /* Configure size of each virtual queue accessible to host */ + for (i = 0; i < ccp->cmd_q_count; i++) { + u32 dma_addr_lo; + u32 dma_addr_hi; + + cmd_q = &ccp->cmd_q[i]; + + cmd_q->qcontrol &= ~(CMD5_Q_SIZE << CMD5_Q_SHIFT); + cmd_q->qcontrol |= QUEUE_SIZE_VAL << CMD5_Q_SHIFT; + + cmd_q->qdma_tail = cmd_q->qbase_dma; + dma_addr_lo = low_address(cmd_q->qdma_tail); + iowrite32((u32)dma_addr_lo, cmd_q->reg_tail_lo); + iowrite32((u32)dma_addr_lo, cmd_q->reg_head_lo); + + dma_addr_hi = high_address(cmd_q->qdma_tail); + cmd_q->qcontrol |= (dma_addr_hi << 16); + iowrite32(cmd_q->qcontrol, cmd_q->reg_control); + + /* Find the LSB regions accessible to the queue */ + ccp_find_lsb_regions(cmd_q, status); + cmd_q->lsb = -1; /* Unassigned value */ + } + + dev_dbg(dev, "Assigning LSBs...\n"); + ret = ccp_assign_lsbs(ccp); + if (ret) { + dev_err(dev, "Unable to assign LSBs (%d)\n", ret); + goto e_irq; + } + + /* Optimization: pre-allocate LSB slots for each queue */ + for (i = 0; i < ccp->cmd_q_count; i++) { + ccp->cmd_q[i].sb_key = ccp_lsb_alloc(&ccp->cmd_q[i], 2); + ccp->cmd_q[i].sb_ctx = ccp_lsb_alloc(&ccp->cmd_q[i], 2); + } + + dev_dbg(dev, "Starting threads...\n"); + /* Create a kthread for each queue */ + for (i = 0; i < ccp->cmd_q_count; i++) { + struct task_struct *kthread; + + cmd_q = &ccp->cmd_q[i]; + + kthread = kthread_create(ccp_cmd_queue_thread, cmd_q, + "%s-q%u", ccp->name, cmd_q->id); + if (IS_ERR(kthread)) { + dev_err(dev, "error creating queue thread (%ld)\n", + PTR_ERR(kthread)); + ret = PTR_ERR(kthread); + goto e_kthread; + } + + cmd_q->kthread = kthread; + wake_up_process(kthread); + } + + dev_dbg(dev, "Enabling interrupts...\n"); + /* Enable interrupts */ + for (i = 0; i < ccp->cmd_q_count; i++) { + cmd_q = &ccp->cmd_q[i]; + iowrite32(ALL_INTERRUPTS, cmd_q->reg_int_enable); + } + + dev_dbg(dev, "Registering device...\n"); + /* Put this on the unit list to make it available */ + ccp_add_device(ccp); + + ret = ccp_register_rng(ccp); + if (ret) + goto e_kthread; + + /* Register the DMA engine support */ + ret = ccp_dmaengine_register(ccp); + if (ret) + goto e_hwrng; + + return 0; + +e_hwrng: + ccp_unregister_rng(ccp); + +e_kthread: + for (i = 0; i < ccp->cmd_q_count; i++) + if (ccp->cmd_q[i].kthread) + kthread_stop(ccp->cmd_q[i].kthread); + +e_irq: + ccp->free_irq(ccp); + +e_pool: + for (i = 0; i < ccp->cmd_q_count; i++) + dma_pool_destroy(ccp->cmd_q[i].dma_pool); + + return ret; +} + +static void ccp5_destroy(struct ccp_device *ccp) +{ + struct device *dev = ccp->dev; + struct ccp_cmd_queue *cmd_q; + struct ccp_cmd *cmd; + unsigned int i; + + /* Unregister the DMA engine */ + ccp_dmaengine_unregister(ccp); + + /* Unregister the RNG */ + ccp_unregister_rng(ccp); + + /* Remove this device from the list of available units first */ + ccp_del_device(ccp); + + /* Disable and clear interrupts */ + for (i = 0; i < ccp->cmd_q_count; i++) { + cmd_q = &ccp->cmd_q[i]; + + /* Turn off the run bit */ + iowrite32(cmd_q->qcontrol & ~CMD5_Q_RUN, cmd_q->reg_control); + + /* Disable the interrupts */ + iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status); + + /* Clear the interrupt status */ + iowrite32(0x00, cmd_q->reg_int_enable); + ioread32(cmd_q->reg_int_status); + ioread32(cmd_q->reg_status); + } + + /* Stop the queue kthreads */ + for (i = 0; i < ccp->cmd_q_count; i++) + if (ccp->cmd_q[i].kthread) + kthread_stop(ccp->cmd_q[i].kthread); + + ccp->free_irq(ccp); + + for (i = 0; i < ccp->cmd_q_count; i++) { + cmd_q = &ccp->cmd_q[i]; + dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase, + cmd_q->qbase_dma); + } + + /* Flush the cmd and backlog queue */ + while (!list_empty(&ccp->cmd)) { + /* Invoke the callback directly with an error code */ + cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry); + list_del(&cmd->entry); + cmd->callback(cmd->data, -ENODEV); + } + while (!list_empty(&ccp->backlog)) { + /* Invoke the callback directly with an error code */ + cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry); + list_del(&cmd->entry); + cmd->callback(cmd->data, -ENODEV); + } +} + +static irqreturn_t ccp5_irq_handler(int irq, void *data) +{ + struct device *dev = data; + struct ccp_device *ccp = dev_get_drvdata(dev); + u32 status; + unsigned int i; + + for (i = 0; i < ccp->cmd_q_count; i++) { + struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i]; + + status = ioread32(cmd_q->reg_interrupt_status); + + if (status) { + cmd_q->int_status = status; + cmd_q->q_status = ioread32(cmd_q->reg_status); + cmd_q->q_int_status = ioread32(cmd_q->reg_int_status); + + /* On error, only save the first error value */ + if ((status & INT_ERROR) && !cmd_q->cmd_error) + cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status); + + cmd_q->int_rcvd = 1; + + /* Acknowledge the interrupt and wake the kthread */ + iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status); + wake_up_interruptible(&cmd_q->int_queue); + } + } + + return IRQ_HANDLED; +} + +static void ccp5_config(struct ccp_device *ccp) +{ + /* Public side */ + iowrite32(0x00001249, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET); +} + +static void ccp5other_config(struct ccp_device *ccp) +{ + int i; + u32 rnd; + + /* We own all of the queues on the NTB CCP */ + + iowrite32(0x00012D57, ccp->io_regs + CMD5_TRNG_CTL_OFFSET); + iowrite32(0x00000003, ccp->io_regs + CMD5_CONFIG_0_OFFSET); + for (i = 0; i < 12; i++) { + rnd = ioread32(ccp->io_regs + TRNG_OUT_REG); + iowrite32(rnd, ccp->io_regs + CMD5_AES_MASK_OFFSET); + } + + iowrite32(0x0000001F, ccp->io_regs + CMD5_QUEUE_MASK_OFFSET); + iowrite32(0x00005B6D, ccp->io_regs + CMD5_QUEUE_PRIO_OFFSET); + iowrite32(0x00000000, ccp->io_regs + CMD5_CMD_TIMEOUT_OFFSET); + + iowrite32(0x3FFFFFFF, ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET); + iowrite32(0x000003FF, ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET); + + iowrite32(0x00108823, ccp->io_regs + CMD5_CLK_GATE_CTL_OFFSET); + + ccp5_config(ccp); +} + +/* Version 5 adds some function, but is essentially the same as v5 */ +static const struct ccp_actions ccp5_actions = { + .aes = ccp5_perform_aes, + .xts_aes = ccp5_perform_xts_aes, + .sha = ccp5_perform_sha, + .rsa = ccp5_perform_rsa, + .passthru = ccp5_perform_passthru, + .ecc = ccp5_perform_ecc, + .sballoc = ccp_lsb_alloc, + .sbfree = ccp_lsb_free, + .init = ccp5_init, + .destroy = ccp5_destroy, + .get_free_slots = ccp5_get_free_slots, + .irqhandler = ccp5_irq_handler, +}; + +const struct ccp_vdata ccpv5a = { + .version = CCP_VERSION(5, 0), + .setup = ccp5_config, + .perform = &ccp5_actions, + .bar = 2, + .offset = 0x0, +}; + +const struct ccp_vdata ccpv5b = { + .version = CCP_VERSION(5, 0), + .setup = ccp5other_config, + .perform = &ccp5_actions, + .bar = 2, + .offset = 0x0, +}; diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c index 87b9f2bfa623..cafa633aae10 100644 --- a/drivers/crypto/ccp/ccp-dev.c +++ b/drivers/crypto/ccp/ccp-dev.c @@ -4,6 +4,7 @@ * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> + * Author: Gary R Hook <gary.hook@amd.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -39,6 +40,59 @@ struct ccp_tasklet_data { struct ccp_cmd *cmd; }; +/* Human-readable error strings */ +char *ccp_error_codes[] = { + "", + "ERR 01: ILLEGAL_ENGINE", + "ERR 02: ILLEGAL_KEY_ID", + "ERR 03: ILLEGAL_FUNCTION_TYPE", + "ERR 04: ILLEGAL_FUNCTION_MODE", + "ERR 05: ILLEGAL_FUNCTION_ENCRYPT", + "ERR 06: ILLEGAL_FUNCTION_SIZE", + "ERR 07: Zlib_MISSING_INIT_EOM", + "ERR 08: ILLEGAL_FUNCTION_RSVD", + "ERR 09: ILLEGAL_BUFFER_LENGTH", + "ERR 10: VLSB_FAULT", + "ERR 11: ILLEGAL_MEM_ADDR", + "ERR 12: ILLEGAL_MEM_SEL", + "ERR 13: ILLEGAL_CONTEXT_ID", + "ERR 14: ILLEGAL_KEY_ADDR", + "ERR 15: 0xF Reserved", + "ERR 16: Zlib_ILLEGAL_MULTI_QUEUE", + "ERR 17: Zlib_ILLEGAL_JOBID_CHANGE", + "ERR 18: CMD_TIMEOUT", + "ERR 19: IDMA0_AXI_SLVERR", + "ERR 20: IDMA0_AXI_DECERR", + "ERR 21: 0x15 Reserved", + "ERR 22: IDMA1_AXI_SLAVE_FAULT", + "ERR 23: IDMA1_AIXI_DECERR", + "ERR 24: 0x18 Reserved", + "ERR 25: ZLIBVHB_AXI_SLVERR", + "ERR 26: ZLIBVHB_AXI_DECERR", + "ERR 27: 0x1B Reserved", + "ERR 27: ZLIB_UNEXPECTED_EOM", + "ERR 27: ZLIB_EXTRA_DATA", + "ERR 30: ZLIB_BTYPE", + "ERR 31: ZLIB_UNDEFINED_SYMBOL", + "ERR 32: ZLIB_UNDEFINED_DISTANCE_S", + "ERR 33: ZLIB_CODE_LENGTH_SYMBOL", + "ERR 34: ZLIB _VHB_ILLEGAL_FETCH", + "ERR 35: ZLIB_UNCOMPRESSED_LEN", + "ERR 36: ZLIB_LIMIT_REACHED", + "ERR 37: ZLIB_CHECKSUM_MISMATCH0", + "ERR 38: ODMA0_AXI_SLVERR", + "ERR 39: ODMA0_AXI_DECERR", + "ERR 40: 0x28 Reserved", + "ERR 41: ODMA1_AXI_SLVERR", + "ERR 42: ODMA1_AXI_DECERR", + "ERR 43: LSB_PARITY_ERR", +}; + +void ccp_log_error(struct ccp_device *d, int e) +{ + dev_err(d->dev, "CCP error: %s (0x%x)\n", ccp_error_codes[e], e); +} + /* List of CCPs, CCP count, read-write access lock, and access functions * * Lock structure: get ccp_unit_lock for reading whenever we need to @@ -58,7 +112,7 @@ static struct ccp_device *ccp_rr; /* Ever-increasing value to produce unique unit numbers */ static atomic_t ccp_unit_ordinal; -unsigned int ccp_increment_unit_ordinal(void) +static unsigned int ccp_increment_unit_ordinal(void) { return atomic_inc_return(&ccp_unit_ordinal); } @@ -118,6 +172,29 @@ void ccp_del_device(struct ccp_device *ccp) write_unlock_irqrestore(&ccp_unit_lock, flags); } + + +int ccp_register_rng(struct ccp_device *ccp) +{ + int ret = 0; + + dev_dbg(ccp->dev, "Registering RNG...\n"); + /* Register an RNG */ + ccp->hwrng.name = ccp->rngname; + ccp->hwrng.read = ccp_trng_read; + ret = hwrng_register(&ccp->hwrng); + if (ret) + dev_err(ccp->dev, "error registering hwrng (%d)\n", ret); + + return ret; +} + +void ccp_unregister_rng(struct ccp_device *ccp) +{ + if (ccp->hwrng.name) + hwrng_unregister(&ccp->hwrng); +} + static struct ccp_device *ccp_get_device(void) { unsigned long flags; @@ -397,9 +474,9 @@ struct ccp_device *ccp_alloc_struct(struct device *dev) spin_lock_init(&ccp->cmd_lock); mutex_init(&ccp->req_mutex); - mutex_init(&ccp->ksb_mutex); - ccp->ksb_count = KSB_COUNT; - ccp->ksb_start = 0; + mutex_init(&ccp->sb_mutex); + ccp->sb_count = KSB_COUNT; + ccp->sb_start = 0; ccp->ord = ccp_increment_unit_ordinal(); snprintf(ccp->name, MAX_CCP_NAME_LEN, "ccp-%u", ccp->ord); @@ -408,6 +485,34 @@ struct ccp_device *ccp_alloc_struct(struct device *dev) return ccp; } +int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait) +{ + struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng); + u32 trng_value; + int len = min_t(int, sizeof(trng_value), max); + + /* Locking is provided by the caller so we can update device + * hwrng-related fields safely + */ + trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG); + if (!trng_value) { + /* Zero is returned if not data is available or if a + * bad-entropy error is present. Assume an error if + * we exceed TRNG_RETRIES reads of zero. + */ + if (ccp->hwrng_retries++ > TRNG_RETRIES) + return -EIO; + + return 0; + } + + /* Reset the counter and save the rng value */ + ccp->hwrng_retries = 0; + memcpy(data, &trng_value, len); + + return len; +} + #ifdef CONFIG_PM bool ccp_queues_suspended(struct ccp_device *ccp) { diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h index bd41ffceff82..da5f4a678083 100644 --- a/drivers/crypto/ccp/ccp-dev.h +++ b/drivers/crypto/ccp/ccp-dev.h @@ -4,6 +4,7 @@ * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> + * Author: Gary R Hook <gary.hook@amd.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -60,7 +61,69 @@ #define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f) #define CMD_Q_DEPTH(__qs) (((__qs) >> 12) & 0x0000000f) -/****** REQ0 Related Values ******/ +/* ------------------------ CCP Version 5 Specifics ------------------------ */ +#define CMD5_QUEUE_MASK_OFFSET 0x00 +#define CMD5_QUEUE_PRIO_OFFSET 0x04 +#define CMD5_REQID_CONFIG_OFFSET 0x08 +#define CMD5_CMD_TIMEOUT_OFFSET 0x10 +#define LSB_PUBLIC_MASK_LO_OFFSET 0x18 +#define LSB_PUBLIC_MASK_HI_OFFSET 0x1C +#define LSB_PRIVATE_MASK_LO_OFFSET 0x20 +#define LSB_PRIVATE_MASK_HI_OFFSET 0x24 + +#define CMD5_Q_CONTROL_BASE 0x0000 +#define CMD5_Q_TAIL_LO_BASE 0x0004 +#define CMD5_Q_HEAD_LO_BASE 0x0008 +#define CMD5_Q_INT_ENABLE_BASE 0x000C +#define CMD5_Q_INTERRUPT_STATUS_BASE 0x0010 + +#define CMD5_Q_STATUS_BASE 0x0100 +#define CMD5_Q_INT_STATUS_BASE 0x0104 +#define CMD5_Q_DMA_STATUS_BASE 0x0108 +#define CMD5_Q_DMA_READ_STATUS_BASE 0x010C +#define CMD5_Q_DMA_WRITE_STATUS_BASE 0x0110 +#define CMD5_Q_ABORT_BASE 0x0114 +#define CMD5_Q_AX_CACHE_BASE 0x0118 + +#define CMD5_CONFIG_0_OFFSET 0x6000 +#define CMD5_TRNG_CTL_OFFSET 0x6008 +#define CMD5_AES_MASK_OFFSET 0x6010 +#define CMD5_CLK_GATE_CTL_OFFSET 0x603C + +/* Address offset between two virtual queue registers */ +#define CMD5_Q_STATUS_INCR 0x1000 + +/* Bit masks */ +#define CMD5_Q_RUN 0x1 +#define CMD5_Q_HALT 0x2 +#define CMD5_Q_MEM_LOCATION 0x4 +#define CMD5_Q_SIZE 0x1F +#define CMD5_Q_SHIFT 3 +#define COMMANDS_PER_QUEUE 16 +#define QUEUE_SIZE_VAL ((ffs(COMMANDS_PER_QUEUE) - 2) & \ + CMD5_Q_SIZE) +#define Q_PTR_MASK (2 << (QUEUE_SIZE_VAL + 5) - 1) +#define Q_DESC_SIZE sizeof(struct ccp5_desc) +#define Q_SIZE(n) (COMMANDS_PER_QUEUE*(n)) + +#define INT_COMPLETION 0x1 +#define INT_ERROR 0x2 +#define INT_QUEUE_STOPPED 0x4 +#define ALL_INTERRUPTS (INT_COMPLETION| \ + INT_ERROR| \ + INT_QUEUE_STOPPED) + +#define LSB_REGION_WIDTH 5 +#define MAX_LSB_CNT 8 + +#define LSB_SIZE 16 +#define LSB_ITEM_SIZE 32 +#define PLSB_MAP_SIZE (LSB_SIZE) +#define SLSB_MAP_SIZE (MAX_LSB_CNT * LSB_SIZE) + +#define LSB_ENTRY_NUMBER(LSB_ADDR) (LSB_ADDR / LSB_ITEM_SIZE) + +/* ------------------------ CCP Version 3 Specifics ------------------------ */ #define REQ0_WAIT_FOR_WRITE 0x00000004 #define REQ0_INT_ON_COMPLETE 0x00000002 #define REQ0_STOP_ON_COMPLETE 0x00000001 @@ -110,29 +173,30 @@ #define KSB_START 77 #define KSB_END 127 #define KSB_COUNT (KSB_END - KSB_START + 1) -#define CCP_KSB_BITS 256 -#define CCP_KSB_BYTES 32 +#define CCP_SB_BITS 256 #define CCP_JOBID_MASK 0x0000003f +/* ------------------------ General CCP Defines ------------------------ */ + #define CCP_DMAPOOL_MAX_SIZE 64 #define CCP_DMAPOOL_ALIGN BIT(5) #define CCP_REVERSE_BUF_SIZE 64 -#define CCP_AES_KEY_KSB_COUNT 1 -#define CCP_AES_CTX_KSB_COUNT 1 +#define CCP_AES_KEY_SB_COUNT 1 +#define CCP_AES_CTX_SB_COUNT 1 -#define CCP_XTS_AES_KEY_KSB_COUNT 1 -#define CCP_XTS_AES_CTX_KSB_COUNT 1 +#define CCP_XTS_AES_KEY_SB_COUNT 1 +#define CCP_XTS_AES_CTX_SB_COUNT 1 -#define CCP_SHA_KSB_COUNT 1 +#define CCP_SHA_SB_COUNT 1 #define CCP_RSA_MAX_WIDTH 4096 #define CCP_PASSTHRU_BLOCKSIZE 256 #define CCP_PASSTHRU_MASKSIZE 32 -#define CCP_PASSTHRU_KSB_COUNT 1 +#define CCP_PASSTHRU_SB_COUNT 1 #define CCP_ECC_MODULUS_BYTES 48 /* 384-bits */ #define CCP_ECC_MAX_OPERANDS 6 @@ -144,31 +208,12 @@ #define CCP_ECC_RESULT_OFFSET 60 #define CCP_ECC_RESULT_SUCCESS 0x0001 -struct ccp_op; - -/* Structure for computation functions that are device-specific */ -struct ccp_actions { - int (*perform_aes)(struct ccp_op *); - int (*perform_xts_aes)(struct ccp_op *); - int (*perform_sha)(struct ccp_op *); - int (*perform_rsa)(struct ccp_op *); - int (*perform_passthru)(struct ccp_op *); - int (*perform_ecc)(struct ccp_op *); - int (*init)(struct ccp_device *); - void (*destroy)(struct ccp_device *); - irqreturn_t (*irqhandler)(int, void *); -}; - -/* Structure to hold CCP version-specific values */ -struct ccp_vdata { - unsigned int version; - const struct ccp_actions *perform; -}; - -extern struct ccp_vdata ccpv3; +#define CCP_SB_BYTES 32 +struct ccp_op; struct ccp_device; struct ccp_cmd; +struct ccp_fns; struct ccp_dma_cmd { struct list_head entry; @@ -212,9 +257,29 @@ struct ccp_cmd_queue { /* Queue dma pool */ struct dma_pool *dma_pool; - /* Queue reserved KSB regions */ - u32 ksb_key; - u32 ksb_ctx; + /* Queue base address (not neccessarily aligned)*/ + struct ccp5_desc *qbase; + + /* Aligned queue start address (per requirement) */ + struct mutex q_mutex ____cacheline_aligned; + unsigned int qidx; + + /* Version 5 has different requirements for queue memory */ + unsigned int qsize; + dma_addr_t qbase_dma; + dma_addr_t qdma_tail; + + /* Per-queue reserved storage block(s) */ + u32 sb_key; + u32 sb_ctx; + + /* Bitmap of LSBs that can be accessed by this queue */ + DECLARE_BITMAP(lsbmask, MAX_LSB_CNT); + /* Private LSB that is assigned to this queue, or -1 if none. + * Bitmap for my private LSB, unused otherwise + */ + unsigned int lsb; + DECLARE_BITMAP(lsbmap, PLSB_MAP_SIZE); /* Queue processing thread */ struct task_struct *kthread; @@ -229,8 +294,17 @@ struct ccp_cmd_queue { u32 int_err; /* Register addresses for queue */ + void __iomem *reg_control; + void __iomem *reg_tail_lo; + void __iomem *reg_head_lo; + void __iomem *reg_int_enable; + void __iomem *reg_interrupt_status; void __iomem *reg_status; void __iomem *reg_int_status; + void __iomem *reg_dma_status; + void __iomem *reg_dma_read_status; + void __iomem *reg_dma_write_status; + u32 qcontrol; /* Cached control register */ /* Status values from job */ u32 int_status; @@ -253,16 +327,14 @@ struct ccp_device { struct device *dev; - /* - * Bus specific device information + /* Bus specific device information */ void *dev_specific; int (*get_irq)(struct ccp_device *ccp); void (*free_irq)(struct ccp_device *ccp); unsigned int irq; - /* - * I/O area used for device communication. The register mapping + /* I/O area used for device communication. The register mapping * starts at an offset into the mapped bar. * The CMD_REQx registers and the Delete_Cmd_Queue_Job register * need to be protected while a command queue thread is accessing @@ -272,8 +344,7 @@ struct ccp_device { void __iomem *io_map; void __iomem *io_regs; - /* - * Master lists that all cmds are queued on. Because there can be + /* Master lists that all cmds are queued on. Because there can be * more than one CCP command queue that can process a cmd a separate * backlog list is neeeded so that the backlog completion call * completes before the cmd is available for execution. @@ -283,47 +354,54 @@ struct ccp_device { struct list_head cmd; struct list_head backlog; - /* - * The command queues. These represent the queues available on the + /* The command queues. These represent the queues available on the * CCP that are available for processing cmds */ struct ccp_cmd_queue cmd_q[MAX_HW_QUEUES]; unsigned int cmd_q_count; - /* - * Support for the CCP True RNG + /* Support for the CCP True RNG */ struct hwrng hwrng; unsigned int hwrng_retries; - /* - * Support for the CCP DMA capabilities + /* Support for the CCP DMA capabilities */ struct dma_device dma_dev; struct ccp_dma_chan *ccp_dma_chan; struct kmem_cache *dma_cmd_cache; struct kmem_cache *dma_desc_cache; - /* - * A counter used to generate job-ids for cmds submitted to the CCP + /* A counter used to generate job-ids for cmds submitted to the CCP */ atomic_t current_id ____cacheline_aligned; - /* - * The CCP uses key storage blocks (KSB) to maintain context for certain - * operations. To prevent multiple cmds from using the same KSB range - * a command queue reserves a KSB range for the duration of the cmd. - * Each queue, will however, reserve 2 KSB blocks for operations that - * only require single KSB entries (eg. AES context/iv and key) in order - * to avoid allocation contention. This will reserve at most 10 KSB - * entries, leaving 40 KSB entries available for dynamic allocation. + /* The v3 CCP uses key storage blocks (SB) to maintain context for + * certain operations. To prevent multiple cmds from using the same + * SB range a command queue reserves an SB range for the duration of + * the cmd. Each queue, will however, reserve 2 SB blocks for + * operations that only require single SB entries (eg. AES context/iv + * and key) in order to avoid allocation contention. This will reserve + * at most 10 SB entries, leaving 40 SB entries available for dynamic + * allocation. + * + * The v5 CCP Local Storage Block (LSB) is broken up into 8 + * memrory ranges, each of which can be enabled for access by one + * or more queues. Device initialization takes this into account, + * and attempts to assign one region for exclusive use by each + * available queue; the rest are then aggregated as "public" use. + * If there are fewer regions than queues, all regions are shared + * amongst all queues. */ - struct mutex ksb_mutex ____cacheline_aligned; - DECLARE_BITMAP(ksb, KSB_COUNT); - wait_queue_head_t ksb_queue; - unsigned int ksb_avail; - unsigned int ksb_count; - u32 ksb_start; + struct mutex sb_mutex ____cacheline_aligned; + DECLARE_BITMAP(sb, KSB_COUNT); + wait_queue_head_t sb_queue; + unsigned int sb_avail; + unsigned int sb_count; + u32 sb_start; + + /* Bitmap of shared LSBs, if any */ + DECLARE_BITMAP(lsbmap, SLSB_MAP_SIZE); /* Suspend support */ unsigned int suspending; @@ -335,10 +413,11 @@ struct ccp_device { enum ccp_memtype { CCP_MEMTYPE_SYSTEM = 0, - CCP_MEMTYPE_KSB, + CCP_MEMTYPE_SB, CCP_MEMTYPE_LOCAL, CCP_MEMTYPE__LAST, }; +#define CCP_MEMTYPE_LSB CCP_MEMTYPE_KSB struct ccp_dma_info { dma_addr_t address; @@ -379,7 +458,7 @@ struct ccp_mem { enum ccp_memtype type; union { struct ccp_dma_info dma; - u32 ksb; + u32 sb; } u; }; @@ -419,13 +498,14 @@ struct ccp_op { u32 jobid; u32 ioc; u32 soc; - u32 ksb_key; - u32 ksb_ctx; + u32 sb_key; + u32 sb_ctx; u32 init; u32 eom; struct ccp_mem src; struct ccp_mem dst; + struct ccp_mem exp; union { struct ccp_aes_op aes; @@ -435,6 +515,7 @@ struct ccp_op { struct ccp_passthru_op passthru; struct ccp_ecc_op ecc; } u; + struct ccp_mem key; }; static inline u32 ccp_addr_lo(struct ccp_dma_info *info) @@ -447,6 +528,70 @@ static inline u32 ccp_addr_hi(struct ccp_dma_info *info) return upper_32_bits(info->address + info->offset) & 0x0000ffff; } +/** + * descriptor for version 5 CPP commands + * 8 32-bit words: + * word 0: function; engine; control bits + * word 1: length of source data + * word 2: low 32 bits of source pointer + * word 3: upper 16 bits of source pointer; source memory type + * word 4: low 32 bits of destination pointer + * word 5: upper 16 bits of destination pointer; destination memory type + * word 6: low 32 bits of key pointer + * word 7: upper 16 bits of key pointer; key memory type + */ +struct dword0 { + __le32 soc:1; + __le32 ioc:1; + __le32 rsvd1:1; + __le32 init:1; + __le32 eom:1; /* AES/SHA only */ + __le32 function:15; + __le32 engine:4; + __le32 prot:1; + __le32 rsvd2:7; +}; + +struct dword3 { + __le32 src_hi:16; + __le32 src_mem:2; + __le32 lsb_cxt_id:8; + __le32 rsvd1:5; + __le32 fixed:1; +}; + +union dword4 { + __le32 dst_lo; /* NON-SHA */ + __le32 sha_len_lo; /* SHA */ +}; + +union dword5 { + struct { + __le32 dst_hi:16; + __le32 dst_mem:2; + __le32 rsvd1:13; + __le32 fixed:1; + } fields; + __le32 sha_len_hi; +}; + +struct dword7 { + __le32 key_hi:16; + __le32 key_mem:2; + __le32 rsvd1:14; +}; + +struct ccp5_desc { + struct dword0 dw0; + __le32 length; + __le32 src_lo; + struct dword3 dw3; + union dword4 dw4; + union dword5 dw5; + __le32 key_lo; + struct dword7 dw7; +}; + int ccp_pci_init(void); void ccp_pci_exit(void); @@ -456,13 +601,48 @@ void ccp_platform_exit(void); void ccp_add_device(struct ccp_device *ccp); void ccp_del_device(struct ccp_device *ccp); +extern void ccp_log_error(struct ccp_device *, int); + struct ccp_device *ccp_alloc_struct(struct device *dev); bool ccp_queues_suspended(struct ccp_device *ccp); int ccp_cmd_queue_thread(void *data); +int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait); int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd); +int ccp_register_rng(struct ccp_device *ccp); +void ccp_unregister_rng(struct ccp_device *ccp); int ccp_dmaengine_register(struct ccp_device *ccp); void ccp_dmaengine_unregister(struct ccp_device *ccp); +/* Structure for computation functions that are device-specific */ +struct ccp_actions { + int (*aes)(struct ccp_op *); + int (*xts_aes)(struct ccp_op *); + int (*sha)(struct ccp_op *); + int (*rsa)(struct ccp_op *); + int (*passthru)(struct ccp_op *); + int (*ecc)(struct ccp_op *); + u32 (*sballoc)(struct ccp_cmd_queue *, unsigned int); + void (*sbfree)(struct ccp_cmd_queue *, unsigned int, + unsigned int); + unsigned int (*get_free_slots)(struct ccp_cmd_queue *); + int (*init)(struct ccp_device *); + void (*destroy)(struct ccp_device *); + irqreturn_t (*irqhandler)(int, void *); +}; + +/* Structure to hold CCP version-specific values */ +struct ccp_vdata { + const unsigned int version; + void (*setup)(struct ccp_device *); + const struct ccp_actions *perform; + const unsigned int bar; + const unsigned int offset; +}; + +extern const struct ccp_vdata ccpv3; +extern const struct ccp_vdata ccpv5a; +extern const struct ccp_vdata ccpv5b; + #endif diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c index 94f77b0f9ae7..6553912804f7 100644 --- a/drivers/crypto/ccp/ccp-dmaengine.c +++ b/drivers/crypto/ccp/ccp-dmaengine.c @@ -299,12 +299,10 @@ static struct ccp_dma_desc *ccp_alloc_dma_desc(struct ccp_dma_chan *chan, { struct ccp_dma_desc *desc; - desc = kmem_cache_alloc(chan->ccp->dma_desc_cache, GFP_NOWAIT); + desc = kmem_cache_zalloc(chan->ccp->dma_desc_cache, GFP_NOWAIT); if (!desc) return NULL; - memset(desc, 0, sizeof(*desc)); - dma_async_tx_descriptor_init(&desc->tx_desc, &chan->dma_chan); desc->tx_desc.flags = flags; desc->tx_desc.tx_submit = ccp_tx_submit; @@ -650,8 +648,11 @@ int ccp_dmaengine_register(struct ccp_device *ccp) dma_desc_cache_name = devm_kasprintf(ccp->dev, GFP_KERNEL, "%s-dmaengine-desc-cache", ccp->name); - if (!dma_cmd_cache_name) - return -ENOMEM; + if (!dma_desc_cache_name) { + ret = -ENOMEM; + goto err_cache; + } + ccp->dma_desc_cache = kmem_cache_create(dma_desc_cache_name, sizeof(struct ccp_dma_desc), sizeof(void *), diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index ffa2891035ac..50fae4442801 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -4,6 +4,7 @@ * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> + * Author: Gary R Hook <gary.hook@amd.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -20,72 +21,28 @@ #include "ccp-dev.h" /* SHA initial context values */ -static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = { +static const __be32 ccp_sha1_init[SHA1_DIGEST_SIZE / sizeof(__be32)] = { cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1), cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3), - cpu_to_be32(SHA1_H4), 0, 0, 0, + cpu_to_be32(SHA1_H4), }; -static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = { +static const __be32 ccp_sha224_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = { cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1), cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3), cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5), cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7), }; -static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = { +static const __be32 ccp_sha256_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = { cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1), cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3), cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5), cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7), }; -static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count) -{ - int start; - - for (;;) { - mutex_lock(&ccp->ksb_mutex); - - start = (u32)bitmap_find_next_zero_area(ccp->ksb, - ccp->ksb_count, - ccp->ksb_start, - count, 0); - if (start <= ccp->ksb_count) { - bitmap_set(ccp->ksb, start, count); - - mutex_unlock(&ccp->ksb_mutex); - break; - } - - ccp->ksb_avail = 0; - - mutex_unlock(&ccp->ksb_mutex); - - /* Wait for KSB entries to become available */ - if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail)) - return 0; - } - - return KSB_START + start; -} - -static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start, - unsigned int count) -{ - if (!start) - return; - - mutex_lock(&ccp->ksb_mutex); - - bitmap_clear(ccp->ksb, start - KSB_START, count); - - ccp->ksb_avail = 1; - - mutex_unlock(&ccp->ksb_mutex); - - wake_up_interruptible_all(&ccp->ksb_queue); -} +#define CCP_NEW_JOBID(ccp) ((ccp->vdata->version == CCP_VERSION(3, 0)) ? \ + ccp_gen_jobid(ccp) : 0) static u32 ccp_gen_jobid(struct ccp_device *ccp) { @@ -231,7 +188,7 @@ static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa, unsigned int len, unsigned int se_len, bool sign_extend) { - unsigned int nbytes, sg_offset, dm_offset, ksb_len, i; + unsigned int nbytes, sg_offset, dm_offset, sb_len, i; u8 buffer[CCP_REVERSE_BUF_SIZE]; if (WARN_ON(se_len > sizeof(buffer))) @@ -241,21 +198,21 @@ static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa, dm_offset = 0; nbytes = len; while (nbytes) { - ksb_len = min_t(unsigned int, nbytes, se_len); - sg_offset -= ksb_len; + sb_len = min_t(unsigned int, nbytes, se_len); + sg_offset -= sb_len; - scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0); - for (i = 0; i < ksb_len; i++) - wa->address[dm_offset + i] = buffer[ksb_len - i - 1]; + scatterwalk_map_and_copy(buffer, sg, sg_offset, sb_len, 0); + for (i = 0; i < sb_len; i++) + wa->address[dm_offset + i] = buffer[sb_len - i - 1]; - dm_offset += ksb_len; - nbytes -= ksb_len; + dm_offset += sb_len; + nbytes -= sb_len; - if ((ksb_len != se_len) && sign_extend) { + if ((sb_len != se_len) && sign_extend) { /* Must sign-extend to nearest sign-extend length */ if (wa->address[dm_offset - 1] & 0x80) memset(wa->address + dm_offset, 0xff, - se_len - ksb_len); + se_len - sb_len); } } @@ -266,22 +223,22 @@ static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa, struct scatterlist *sg, unsigned int len) { - unsigned int nbytes, sg_offset, dm_offset, ksb_len, i; + unsigned int nbytes, sg_offset, dm_offset, sb_len, i; u8 buffer[CCP_REVERSE_BUF_SIZE]; sg_offset = 0; dm_offset = len; nbytes = len; while (nbytes) { - ksb_len = min_t(unsigned int, nbytes, sizeof(buffer)); - dm_offset -= ksb_len; + sb_len = min_t(unsigned int, nbytes, sizeof(buffer)); + dm_offset -= sb_len; - for (i = 0; i < ksb_len; i++) - buffer[ksb_len - i - 1] = wa->address[dm_offset + i]; - scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1); + for (i = 0; i < sb_len; i++) + buffer[sb_len - i - 1] = wa->address[dm_offset + i]; + scatterwalk_map_and_copy(buffer, sg, sg_offset, sb_len, 1); - sg_offset += ksb_len; - nbytes -= ksb_len; + sg_offset += sb_len; + nbytes -= sb_len; } } @@ -449,9 +406,9 @@ static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst, } } -static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q, - struct ccp_dm_workarea *wa, u32 jobid, u32 ksb, - u32 byte_swap, bool from) +static int ccp_copy_to_from_sb(struct ccp_cmd_queue *cmd_q, + struct ccp_dm_workarea *wa, u32 jobid, u32 sb, + u32 byte_swap, bool from) { struct ccp_op op; @@ -463,8 +420,8 @@ static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q, if (from) { op.soc = 1; - op.src.type = CCP_MEMTYPE_KSB; - op.src.u.ksb = ksb; + op.src.type = CCP_MEMTYPE_SB; + op.src.u.sb = sb; op.dst.type = CCP_MEMTYPE_SYSTEM; op.dst.u.dma.address = wa->dma.address; op.dst.u.dma.length = wa->length; @@ -472,27 +429,27 @@ static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q, op.src.type = CCP_MEMTYPE_SYSTEM; op.src.u.dma.address = wa->dma.address; op.src.u.dma.length = wa->length; - op.dst.type = CCP_MEMTYPE_KSB; - op.dst.u.ksb = ksb; + op.dst.type = CCP_MEMTYPE_SB; + op.dst.u.sb = sb; } op.u.passthru.byte_swap = byte_swap; - return cmd_q->ccp->vdata->perform->perform_passthru(&op); + return cmd_q->ccp->vdata->perform->passthru(&op); } -static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q, - struct ccp_dm_workarea *wa, u32 jobid, u32 ksb, - u32 byte_swap) +static int ccp_copy_to_sb(struct ccp_cmd_queue *cmd_q, + struct ccp_dm_workarea *wa, u32 jobid, u32 sb, + u32 byte_swap) { - return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false); + return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, false); } -static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q, - struct ccp_dm_workarea *wa, u32 jobid, u32 ksb, - u32 byte_swap) +static int ccp_copy_from_sb(struct ccp_cmd_queue *cmd_q, + struct ccp_dm_workarea *wa, u32 jobid, u32 sb, + u32 byte_swap) { - return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true); + return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, true); } static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, @@ -527,54 +484,54 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, return -EINVAL; } - BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1); - BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1); + BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1); + BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1); ret = -EIO; memset(&op, 0, sizeof(op)); op.cmd_q = cmd_q; - op.jobid = ccp_gen_jobid(cmd_q->ccp); - op.ksb_key = cmd_q->ksb_key; - op.ksb_ctx = cmd_q->ksb_ctx; + op.jobid = CCP_NEW_JOBID(cmd_q->ccp); + op.sb_key = cmd_q->sb_key; + op.sb_ctx = cmd_q->sb_ctx; op.init = 1; op.u.aes.type = aes->type; op.u.aes.mode = aes->mode; op.u.aes.action = aes->action; - /* All supported key sizes fit in a single (32-byte) KSB entry + /* All supported key sizes fit in a single (32-byte) SB entry * and must be in little endian format. Use the 256-bit byte * swap passthru option to convert from big endian to little * endian. */ ret = ccp_init_dm_workarea(&key, cmd_q, - CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES, + CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES, DMA_TO_DEVICE); if (ret) return ret; - dm_offset = CCP_KSB_BYTES - aes->key_len; + dm_offset = CCP_SB_BYTES - aes->key_len; ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len); - ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key, - CCP_PASSTHRU_BYTESWAP_256BIT); + ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key, + CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_key; } - /* The AES context fits in a single (32-byte) KSB entry and + /* The AES context fits in a single (32-byte) SB entry and * must be in little endian format. Use the 256-bit byte swap * passthru option to convert from big endian to little endian. */ ret = ccp_init_dm_workarea(&ctx, cmd_q, - CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES, + CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES, DMA_BIDIRECTIONAL); if (ret) goto e_key; - dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; + dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE; ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); - ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, - CCP_PASSTHRU_BYTESWAP_256BIT); + ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_ctx; @@ -592,9 +549,9 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, op.eom = 1; /* Push the K1/K2 key to the CCP now */ - ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, - op.ksb_ctx, - CCP_PASSTHRU_BYTESWAP_256BIT); + ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, + op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_src; @@ -602,15 +559,15 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0, aes->cmac_key_len); - ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, - CCP_PASSTHRU_BYTESWAP_256BIT); + ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_src; } } - ret = cmd_q->ccp->vdata->perform->perform_aes(&op); + ret = cmd_q->ccp->vdata->perform->aes(&op); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_src; @@ -622,15 +579,15 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, /* Retrieve the AES context - convert from LE to BE using * 32-byte (256-bit) byteswapping */ - ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, - CCP_PASSTHRU_BYTESWAP_256BIT); + ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_src; } /* ...but we only need AES_BLOCK_SIZE bytes */ - dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; + dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE; ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); e_src: @@ -680,56 +637,56 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) return -EINVAL; } - BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1); - BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1); + BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1); + BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1); ret = -EIO; memset(&op, 0, sizeof(op)); op.cmd_q = cmd_q; - op.jobid = ccp_gen_jobid(cmd_q->ccp); - op.ksb_key = cmd_q->ksb_key; - op.ksb_ctx = cmd_q->ksb_ctx; + op.jobid = CCP_NEW_JOBID(cmd_q->ccp); + op.sb_key = cmd_q->sb_key; + op.sb_ctx = cmd_q->sb_ctx; op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1; op.u.aes.type = aes->type; op.u.aes.mode = aes->mode; op.u.aes.action = aes->action; - /* All supported key sizes fit in a single (32-byte) KSB entry + /* All supported key sizes fit in a single (32-byte) SB entry * and must be in little endian format. Use the 256-bit byte * swap passthru option to convert from big endian to little * endian. */ ret = ccp_init_dm_workarea(&key, cmd_q, - CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES, + CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES, DMA_TO_DEVICE); if (ret) return ret; - dm_offset = CCP_KSB_BYTES - aes->key_len; + dm_offset = CCP_SB_BYTES - aes->key_len; ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len); - ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key, - CCP_PASSTHRU_BYTESWAP_256BIT); + ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key, + CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_key; } - /* The AES context fits in a single (32-byte) KSB entry and + /* The AES context fits in a single (32-byte) SB entry and * must be in little endian format. Use the 256-bit byte swap * passthru option to convert from big endian to little endian. */ ret = ccp_init_dm_workarea(&ctx, cmd_q, - CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES, + CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES, DMA_BIDIRECTIONAL); if (ret) goto e_key; if (aes->mode != CCP_AES_MODE_ECB) { - /* Load the AES context - conver to LE */ - dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; + /* Load the AES context - convert to LE */ + dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE; ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); - ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, - CCP_PASSTHRU_BYTESWAP_256BIT); + ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_ctx; @@ -772,7 +729,7 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) op.soc = 1; } - ret = cmd_q->ccp->vdata->perform->perform_aes(&op); + ret = cmd_q->ccp->vdata->perform->aes(&op); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_dst; @@ -785,15 +742,15 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) /* Retrieve the AES context - convert from LE to BE using * 32-byte (256-bit) byteswapping */ - ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, - CCP_PASSTHRU_BYTESWAP_256BIT); + ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_dst; } /* ...but we only need AES_BLOCK_SIZE bytes */ - dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; + dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE; ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); } @@ -857,53 +814,53 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, if (!xts->key || !xts->iv || !xts->src || !xts->dst) return -EINVAL; - BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1); - BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1); + BUILD_BUG_ON(CCP_XTS_AES_KEY_SB_COUNT != 1); + BUILD_BUG_ON(CCP_XTS_AES_CTX_SB_COUNT != 1); ret = -EIO; memset(&op, 0, sizeof(op)); op.cmd_q = cmd_q; - op.jobid = ccp_gen_jobid(cmd_q->ccp); - op.ksb_key = cmd_q->ksb_key; - op.ksb_ctx = cmd_q->ksb_ctx; + op.jobid = CCP_NEW_JOBID(cmd_q->ccp); + op.sb_key = cmd_q->sb_key; + op.sb_ctx = cmd_q->sb_ctx; op.init = 1; op.u.xts.action = xts->action; op.u.xts.unit_size = xts->unit_size; - /* All supported key sizes fit in a single (32-byte) KSB entry + /* All supported key sizes fit in a single (32-byte) SB entry * and must be in little endian format. Use the 256-bit byte * swap passthru option to convert from big endian to little * endian. */ ret = ccp_init_dm_workarea(&key, cmd_q, - CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES, + CCP_XTS_AES_KEY_SB_COUNT * CCP_SB_BYTES, DMA_TO_DEVICE); if (ret) return ret; - dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128; + dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128; ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len); ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len); - ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key, - CCP_PASSTHRU_BYTESWAP_256BIT); + ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key, + CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_key; } - /* The AES context fits in a single (32-byte) KSB entry and + /* The AES context fits in a single (32-byte) SB entry and * for XTS is already in little endian format so no byte swapping * is needed. */ ret = ccp_init_dm_workarea(&ctx, cmd_q, - CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES, + CCP_XTS_AES_CTX_SB_COUNT * CCP_SB_BYTES, DMA_BIDIRECTIONAL); if (ret) goto e_key; ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len); - ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, - CCP_PASSTHRU_BYTESWAP_NOOP); + ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_NOOP); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_ctx; @@ -937,7 +894,7 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, if (!src.sg_wa.bytes_left) op.eom = 1; - ret = cmd_q->ccp->vdata->perform->perform_xts_aes(&op); + ret = cmd_q->ccp->vdata->perform->xts_aes(&op); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_dst; @@ -949,15 +906,15 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, /* Retrieve the AES context - convert from LE to BE using * 32-byte (256-bit) byteswapping */ - ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, - CCP_PASSTHRU_BYTESWAP_256BIT); + ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_dst; } /* ...but we only need AES_BLOCK_SIZE bytes */ - dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; + dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE; ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len); e_dst: @@ -982,163 +939,227 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) struct ccp_dm_workarea ctx; struct ccp_data src; struct ccp_op op; + unsigned int ioffset, ooffset; + unsigned int digest_size; + int sb_count; + const void *init; + u64 block_size; + int ctx_size; int ret; - if (sha->ctx_len != CCP_SHA_CTXSIZE) + switch (sha->type) { + case CCP_SHA_TYPE_1: + if (sha->ctx_len < SHA1_DIGEST_SIZE) + return -EINVAL; + block_size = SHA1_BLOCK_SIZE; + break; + case CCP_SHA_TYPE_224: + if (sha->ctx_len < SHA224_DIGEST_SIZE) + return -EINVAL; + block_size = SHA224_BLOCK_SIZE; + break; + case CCP_SHA_TYPE_256: + if (sha->ctx_len < SHA256_DIGEST_SIZE) + return -EINVAL; + block_size = SHA256_BLOCK_SIZE; + break; + default: return -EINVAL; + } if (!sha->ctx) return -EINVAL; - if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1))) + if (!sha->final && (sha->src_len & (block_size - 1))) return -EINVAL; - if (!sha->src_len) { - const u8 *sha_zero; + /* The version 3 device can't handle zero-length input */ + if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) { - /* Not final, just return */ - if (!sha->final) - return 0; + if (!sha->src_len) { + unsigned int digest_len; + const u8 *sha_zero; - /* CCP can't do a zero length sha operation so the caller - * must buffer the data. - */ - if (sha->msg_bits) - return -EINVAL; + /* Not final, just return */ + if (!sha->final) + return 0; - /* The CCP cannot perform zero-length sha operations so the - * caller is required to buffer data for the final operation. - * However, a sha operation for a message with a total length - * of zero is valid so known values are required to supply - * the result. - */ - switch (sha->type) { - case CCP_SHA_TYPE_1: - sha_zero = sha1_zero_message_hash; - break; - case CCP_SHA_TYPE_224: - sha_zero = sha224_zero_message_hash; - break; - case CCP_SHA_TYPE_256: - sha_zero = sha256_zero_message_hash; - break; - default: - return -EINVAL; - } + /* CCP can't do a zero length sha operation so the + * caller must buffer the data. + */ + if (sha->msg_bits) + return -EINVAL; + + /* The CCP cannot perform zero-length sha operations + * so the caller is required to buffer data for the + * final operation. However, a sha operation for a + * message with a total length of zero is valid so + * known values are required to supply the result. + */ + switch (sha->type) { + case CCP_SHA_TYPE_1: + sha_zero = sha1_zero_message_hash; + digest_len = SHA1_DIGEST_SIZE; + break; + case CCP_SHA_TYPE_224: + sha_zero = sha224_zero_message_hash; + digest_len = SHA224_DIGEST_SIZE; + break; + case CCP_SHA_TYPE_256: + sha_zero = sha256_zero_message_hash; + digest_len = SHA256_DIGEST_SIZE; + break; + default: + return -EINVAL; + } - scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0, - sha->ctx_len, 1); + scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0, + digest_len, 1); - return 0; + return 0; + } } - if (!sha->src) - return -EINVAL; + /* Set variables used throughout */ + switch (sha->type) { + case CCP_SHA_TYPE_1: + digest_size = SHA1_DIGEST_SIZE; + init = (void *) ccp_sha1_init; + ctx_size = SHA1_DIGEST_SIZE; + sb_count = 1; + if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0)) + ooffset = ioffset = CCP_SB_BYTES - SHA1_DIGEST_SIZE; + else + ooffset = ioffset = 0; + break; + case CCP_SHA_TYPE_224: + digest_size = SHA224_DIGEST_SIZE; + init = (void *) ccp_sha224_init; + ctx_size = SHA256_DIGEST_SIZE; + sb_count = 1; + ioffset = 0; + if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0)) + ooffset = CCP_SB_BYTES - SHA224_DIGEST_SIZE; + else + ooffset = 0; + break; + case CCP_SHA_TYPE_256: + digest_size = SHA256_DIGEST_SIZE; + init = (void *) ccp_sha256_init; + ctx_size = SHA256_DIGEST_SIZE; + sb_count = 1; + ooffset = ioffset = 0; + break; + default: + ret = -EINVAL; + goto e_data; + } - BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1); + /* For zero-length plaintext the src pointer is ignored; + * otherwise both parts must be valid + */ + if (sha->src_len && !sha->src) + return -EINVAL; memset(&op, 0, sizeof(op)); op.cmd_q = cmd_q; - op.jobid = ccp_gen_jobid(cmd_q->ccp); - op.ksb_ctx = cmd_q->ksb_ctx; + op.jobid = CCP_NEW_JOBID(cmd_q->ccp); + op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */ op.u.sha.type = sha->type; op.u.sha.msg_bits = sha->msg_bits; - /* The SHA context fits in a single (32-byte) KSB entry and - * must be in little endian format. Use the 256-bit byte swap - * passthru option to convert from big endian to little endian. - */ - ret = ccp_init_dm_workarea(&ctx, cmd_q, - CCP_SHA_KSB_COUNT * CCP_KSB_BYTES, + ret = ccp_init_dm_workarea(&ctx, cmd_q, sb_count * CCP_SB_BYTES, DMA_BIDIRECTIONAL); if (ret) return ret; - if (sha->first) { - const __be32 *init; - switch (sha->type) { case CCP_SHA_TYPE_1: - init = ccp_sha1_init; - break; case CCP_SHA_TYPE_224: - init = ccp_sha224_init; - break; case CCP_SHA_TYPE_256: - init = ccp_sha256_init; + memcpy(ctx.address + ioffset, init, ctx_size); break; default: ret = -EINVAL; goto e_ctx; } - memcpy(ctx.address, init, CCP_SHA_CTXSIZE); } else { - ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len); + /* Restore the context */ + ccp_set_dm_area(&ctx, 0, sha->ctx, 0, + sb_count * CCP_SB_BYTES); } - ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, - CCP_PASSTHRU_BYTESWAP_256BIT); + ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_ctx; } - /* Send data to the CCP SHA engine */ - ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len, - CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE); - if (ret) - goto e_ctx; + if (sha->src) { + /* Send data to the CCP SHA engine; block_size is set above */ + ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len, + block_size, DMA_TO_DEVICE); + if (ret) + goto e_ctx; - while (src.sg_wa.bytes_left) { - ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false); - if (sha->final && !src.sg_wa.bytes_left) - op.eom = 1; + while (src.sg_wa.bytes_left) { + ccp_prepare_data(&src, NULL, &op, block_size, false); + if (sha->final && !src.sg_wa.bytes_left) + op.eom = 1; + + ret = cmd_q->ccp->vdata->perform->sha(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_data; + } - ret = cmd_q->ccp->vdata->perform->perform_sha(&op); + ccp_process_data(&src, NULL, &op); + } + } else { + op.eom = 1; + ret = cmd_q->ccp->vdata->perform->sha(&op); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_data; } - - ccp_process_data(&src, NULL, &op); } /* Retrieve the SHA context - convert from LE to BE using * 32-byte (256-bit) byteswapping to BE */ - ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, - CCP_PASSTHRU_BYTESWAP_256BIT); + ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_data; } - ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len); - - if (sha->final && sha->opad) { - /* HMAC operation, recursively perform final SHA */ - struct ccp_cmd hmac_cmd; - struct scatterlist sg; - u64 block_size, digest_size; - u8 *hmac_buf; - + if (sha->final) { + /* Finishing up, so get the digest */ switch (sha->type) { case CCP_SHA_TYPE_1: - block_size = SHA1_BLOCK_SIZE; - digest_size = SHA1_DIGEST_SIZE; - break; case CCP_SHA_TYPE_224: - block_size = SHA224_BLOCK_SIZE; - digest_size = SHA224_DIGEST_SIZE; - break; case CCP_SHA_TYPE_256: - block_size = SHA256_BLOCK_SIZE; - digest_size = SHA256_DIGEST_SIZE; + ccp_get_dm_area(&ctx, ooffset, + sha->ctx, 0, + digest_size); break; default: ret = -EINVAL; - goto e_data; + goto e_ctx; } + } else { + /* Stash the context */ + ccp_get_dm_area(&ctx, 0, sha->ctx, 0, + sb_count * CCP_SB_BYTES); + } + + if (sha->final && sha->opad) { + /* HMAC operation, recursively perform final SHA */ + struct ccp_cmd hmac_cmd; + struct scatterlist sg; + u8 *hmac_buf; if (sha->opad_len != block_size) { ret = -EINVAL; @@ -1153,7 +1174,18 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) sg_init_one(&sg, hmac_buf, block_size + digest_size); scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0); - memcpy(hmac_buf + block_size, ctx.address, digest_size); + switch (sha->type) { + case CCP_SHA_TYPE_1: + case CCP_SHA_TYPE_224: + case CCP_SHA_TYPE_256: + memcpy(hmac_buf + block_size, + ctx.address + ooffset, + digest_size); + break; + default: + ret = -EINVAL; + goto e_ctx; + } memset(&hmac_cmd, 0, sizeof(hmac_cmd)); hmac_cmd.engine = CCP_ENGINE_SHA; @@ -1176,7 +1208,8 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) } e_data: - ccp_free_data(&src, cmd_q); + if (sha->src) + ccp_free_data(&src, cmd_q); e_ctx: ccp_dm_free(&ctx); @@ -1190,7 +1223,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) struct ccp_dm_workarea exp, src; struct ccp_data dst; struct ccp_op op; - unsigned int ksb_count, i_len, o_len; + unsigned int sb_count, i_len, o_len; int ret; if (rsa->key_size > CCP_RSA_MAX_WIDTH) @@ -1208,16 +1241,17 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) o_len = ((rsa->key_size + 255) / 256) * 32; i_len = o_len * 2; - ksb_count = o_len / CCP_KSB_BYTES; + sb_count = o_len / CCP_SB_BYTES; memset(&op, 0, sizeof(op)); op.cmd_q = cmd_q; op.jobid = ccp_gen_jobid(cmd_q->ccp); - op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count); - if (!op.ksb_key) + op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q, sb_count); + + if (!op.sb_key) return -EIO; - /* The RSA exponent may span multiple (32-byte) KSB entries and must + /* The RSA exponent may span multiple (32-byte) SB entries and must * be in little endian format. Reverse copy each 32-byte chunk * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk) * and each byte within that chunk and do not perform any byte swap @@ -1225,14 +1259,14 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) */ ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE); if (ret) - goto e_ksb; + goto e_sb; ret = ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, - CCP_KSB_BYTES, false); + CCP_SB_BYTES, false); if (ret) goto e_exp; - ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key, - CCP_PASSTHRU_BYTESWAP_NOOP); + ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key, + CCP_PASSTHRU_BYTESWAP_NOOP); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_exp; @@ -1247,12 +1281,12 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) goto e_exp; ret = ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, - CCP_KSB_BYTES, false); + CCP_SB_BYTES, false); if (ret) goto e_src; src.address += o_len; /* Adjust the address for the copy operation */ ret = ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, - CCP_KSB_BYTES, false); + CCP_SB_BYTES, false); if (ret) goto e_src; src.address -= o_len; /* Reset the address to original value */ @@ -1274,7 +1308,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) op.u.rsa.mod_size = rsa->key_size; op.u.rsa.input_len = i_len; - ret = cmd_q->ccp->vdata->perform->perform_rsa(&op); + ret = cmd_q->ccp->vdata->perform->rsa(&op); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_dst; @@ -1291,8 +1325,8 @@ e_src: e_exp: ccp_dm_free(&exp); -e_ksb: - ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count); +e_sb: + cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count); return ret; } @@ -1306,7 +1340,7 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_op op; bool in_place = false; unsigned int i; - int ret; + int ret = 0; if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1))) return -EINVAL; @@ -1321,26 +1355,26 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, return -EINVAL; } - BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1); + BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1); memset(&op, 0, sizeof(op)); op.cmd_q = cmd_q; - op.jobid = ccp_gen_jobid(cmd_q->ccp); + op.jobid = CCP_NEW_JOBID(cmd_q->ccp); if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { /* Load the mask */ - op.ksb_key = cmd_q->ksb_key; + op.sb_key = cmd_q->sb_key; ret = ccp_init_dm_workarea(&mask, cmd_q, - CCP_PASSTHRU_KSB_COUNT * - CCP_KSB_BYTES, + CCP_PASSTHRU_SB_COUNT * + CCP_SB_BYTES, DMA_TO_DEVICE); if (ret) return ret; ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len); - ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key, - CCP_PASSTHRU_BYTESWAP_NOOP); + ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key, + CCP_PASSTHRU_BYTESWAP_NOOP); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_mask; @@ -1399,7 +1433,7 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, op.dst.u.dma.offset = dst.sg_wa.sg_used; op.dst.u.dma.length = op.src.u.dma.length; - ret = cmd_q->ccp->vdata->perform->perform_passthru(&op); + ret = cmd_q->ccp->vdata->perform->passthru(&op); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_dst; @@ -1448,7 +1482,7 @@ static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q, return -EINVAL; } - BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1); + BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1); memset(&op, 0, sizeof(op)); op.cmd_q = cmd_q; @@ -1456,13 +1490,13 @@ static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q, if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { /* Load the mask */ - op.ksb_key = cmd_q->ksb_key; + op.sb_key = cmd_q->sb_key; mask.length = pt->mask_len; mask.dma.address = pt->mask; mask.dma.length = pt->mask_len; - ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key, + ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key, CCP_PASSTHRU_BYTESWAP_NOOP); if (ret) { cmd->engine_error = cmd_q->cmd_error; @@ -1484,7 +1518,7 @@ static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q, op.dst.u.dma.offset = 0; op.dst.u.dma.length = pt->src_len; - ret = cmd_q->ccp->vdata->perform->perform_passthru(&op); + ret = cmd_q->ccp->vdata->perform->passthru(&op); if (ret) cmd->engine_error = cmd_q->cmd_error; @@ -1514,7 +1548,7 @@ static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) memset(&op, 0, sizeof(op)); op.cmd_q = cmd_q; - op.jobid = ccp_gen_jobid(cmd_q->ccp); + op.jobid = CCP_NEW_JOBID(cmd_q->ccp); /* Concatenate the modulus and the operands. Both the modulus and * the operands must be in little endian format. Since the input @@ -1575,7 +1609,7 @@ static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) op.u.ecc.function = cmd->u.ecc.function; - ret = cmd_q->ccp->vdata->perform->perform_ecc(&op); + ret = cmd_q->ccp->vdata->perform->ecc(&op); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_dst; @@ -1639,7 +1673,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) memset(&op, 0, sizeof(op)); op.cmd_q = cmd_q; - op.jobid = ccp_gen_jobid(cmd_q->ccp); + op.jobid = CCP_NEW_JOBID(cmd_q->ccp); /* Concatenate the modulus and the operands. Both the modulus and * the operands must be in little endian format. Since the input @@ -1677,7 +1711,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) goto e_src; src.address += CCP_ECC_OPERAND_SIZE; - /* Set the first point Z coordianate to 1 */ + /* Set the first point Z coordinate to 1 */ *src.address = 0x01; src.address += CCP_ECC_OPERAND_SIZE; @@ -1696,7 +1730,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) goto e_src; src.address += CCP_ECC_OPERAND_SIZE; - /* Set the second point Z coordianate to 1 */ + /* Set the second point Z coordinate to 1 */ *src.address = 0x01; src.address += CCP_ECC_OPERAND_SIZE; } else { @@ -1739,7 +1773,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) op.u.ecc.function = cmd->u.ecc.function; - ret = cmd_q->ccp->vdata->perform->perform_ecc(&op); + ret = cmd_q->ccp->vdata->perform->ecc(&op); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_dst; @@ -1810,7 +1844,7 @@ int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) cmd->engine_error = 0; cmd_q->cmd_error = 0; cmd_q->int_rcvd = 0; - cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status)); + cmd_q->free_slots = cmd_q->ccp->vdata->perform->get_free_slots(cmd_q); switch (cmd->engine) { case CCP_ENGINE_AES: diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/ccp-pci.c index 0bf262e36b6b..28a9996c1085 100644 --- a/drivers/crypto/ccp/ccp-pci.c +++ b/drivers/crypto/ccp/ccp-pci.c @@ -4,6 +4,7 @@ * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> + * Author: Gary R Hook <gary.hook@amd.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -25,9 +26,6 @@ #include "ccp-dev.h" -#define IO_BAR 2 -#define IO_OFFSET 0x20000 - #define MSIX_VECTORS 2 struct ccp_msix { @@ -143,10 +141,11 @@ static void ccp_free_irqs(struct ccp_device *ccp) free_irq(ccp_pci->msix[ccp_pci->msix_count].vector, dev); pci_disable_msix(pdev); - } else { + } else if (ccp->irq) { free_irq(ccp->irq, dev); pci_disable_msi(pdev); } + ccp->irq = 0; } static int ccp_find_mmio_area(struct ccp_device *ccp) @@ -156,10 +155,11 @@ static int ccp_find_mmio_area(struct ccp_device *ccp) resource_size_t io_len; unsigned long io_flags; - io_flags = pci_resource_flags(pdev, IO_BAR); - io_len = pci_resource_len(pdev, IO_BAR); - if ((io_flags & IORESOURCE_MEM) && (io_len >= (IO_OFFSET + 0x800))) - return IO_BAR; + io_flags = pci_resource_flags(pdev, ccp->vdata->bar); + io_len = pci_resource_len(pdev, ccp->vdata->bar); + if ((io_flags & IORESOURCE_MEM) && + (io_len >= (ccp->vdata->offset + 0x800))) + return ccp->vdata->bar; return -EIO; } @@ -216,7 +216,7 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_err(dev, "pci_iomap failed\n"); goto e_device; } - ccp->io_regs = ccp->io_map + IO_OFFSET; + ccp->io_regs = ccp->io_map + ccp->vdata->offset; ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); if (ret) { @@ -230,6 +230,9 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_set_drvdata(dev, ccp); + if (ccp->vdata->setup) + ccp->vdata->setup(ccp); + ret = ccp->vdata->perform->init(ccp); if (ret) goto e_iomap; @@ -322,6 +325,8 @@ static int ccp_pci_resume(struct pci_dev *pdev) static const struct pci_device_id ccp_pci_table[] = { { PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&ccpv3 }, + { PCI_VDEVICE(AMD, 0x1456), (kernel_ulong_t)&ccpv5a }, + { PCI_VDEVICE(AMD, 0x1468), (kernel_ulong_t)&ccpv5b }, /* Last entry must be zero */ { 0, } }; |