diff options
Diffstat (limited to 'drivers/crypto/marvell')
-rw-r--r-- | drivers/crypto/marvell/cesa/cesa.c | 2 | ||||
-rw-r--r-- | drivers/crypto/marvell/cesa/cesa.h | 9 | ||||
-rw-r--r-- | drivers/crypto/marvell/cesa/cipher.c | 7 | ||||
-rw-r--r-- | drivers/crypto/marvell/cesa/hash.c | 7 | ||||
-rw-r--r-- | drivers/crypto/marvell/cesa/tdma.c | 53 | ||||
-rw-r--r-- | drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h | 125 | ||||
-rw-r--r-- | drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c | 51 | ||||
-rw-r--r-- | drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c | 7 |
8 files changed, 183 insertions, 78 deletions
diff --git a/drivers/crypto/marvell/cesa/cesa.c b/drivers/crypto/marvell/cesa/cesa.c index 5fd31ba715c2..24273cb082ba 100644 --- a/drivers/crypto/marvell/cesa/cesa.c +++ b/drivers/crypto/marvell/cesa/cesa.c @@ -94,7 +94,7 @@ static int mv_cesa_std_process(struct mv_cesa_engine *engine, u32 status) static int mv_cesa_int_process(struct mv_cesa_engine *engine, u32 status) { - if (engine->chain.first && engine->chain.last) + if (engine->chain_hw.first && engine->chain_hw.last) return mv_cesa_tdma_process(engine, status); return mv_cesa_std_process(engine, status); diff --git a/drivers/crypto/marvell/cesa/cesa.h b/drivers/crypto/marvell/cesa/cesa.h index d215a6bed6bc..50ca1039fdaa 100644 --- a/drivers/crypto/marvell/cesa/cesa.h +++ b/drivers/crypto/marvell/cesa/cesa.h @@ -440,8 +440,10 @@ struct mv_cesa_dev { * SRAM * @queue: fifo of the pending crypto requests * @load: engine load counter, useful for load balancing - * @chain: list of the current tdma descriptors being processed - * by this engine. + * @chain_hw: list of the current tdma descriptors being processed + * by the hardware. + * @chain_sw: list of the current tdma descriptors that will be + * submitted to the hardware. * @complete_queue: fifo of the processed requests by the engine * * Structure storing CESA engine information. @@ -463,7 +465,8 @@ struct mv_cesa_engine { struct gen_pool *pool; struct crypto_queue queue; atomic_t load; - struct mv_cesa_tdma_chain chain; + struct mv_cesa_tdma_chain chain_hw; + struct mv_cesa_tdma_chain chain_sw; struct list_head complete_queue; int irq; }; diff --git a/drivers/crypto/marvell/cesa/cipher.c b/drivers/crypto/marvell/cesa/cipher.c index 0f37dfd42d85..eabed9d977df 100644 --- a/drivers/crypto/marvell/cesa/cipher.c +++ b/drivers/crypto/marvell/cesa/cipher.c @@ -75,9 +75,12 @@ mv_cesa_skcipher_dma_cleanup(struct skcipher_request *req) static inline void mv_cesa_skcipher_cleanup(struct skcipher_request *req) { struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(req); + struct mv_cesa_engine *engine = creq->base.engine; if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_skcipher_dma_cleanup(req); + + atomic_sub(req->cryptlen, &engine->load); } static void mv_cesa_skcipher_std_step(struct skcipher_request *req) @@ -212,7 +215,6 @@ mv_cesa_skcipher_complete(struct crypto_async_request *req) struct mv_cesa_engine *engine = creq->base.engine; unsigned int ivsize; - atomic_sub(skreq->cryptlen, &engine->load); ivsize = crypto_skcipher_ivsize(crypto_skcipher_reqtfm(skreq)); if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) { @@ -459,6 +461,9 @@ static int mv_cesa_skcipher_queue_req(struct skcipher_request *req, struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(req); struct mv_cesa_engine *engine; + if (!req->cryptlen) + return 0; + ret = mv_cesa_skcipher_req_init(req, tmpl); if (ret) return ret; diff --git a/drivers/crypto/marvell/cesa/hash.c b/drivers/crypto/marvell/cesa/hash.c index f150861ceaf6..e339ce7ad533 100644 --- a/drivers/crypto/marvell/cesa/hash.c +++ b/drivers/crypto/marvell/cesa/hash.c @@ -110,9 +110,12 @@ static inline void mv_cesa_ahash_dma_cleanup(struct ahash_request *req) static inline void mv_cesa_ahash_cleanup(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_engine *engine = creq->base.engine; if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ahash_dma_cleanup(req); + + atomic_sub(req->nbytes, &engine->load); } static void mv_cesa_ahash_last_cleanup(struct ahash_request *req) @@ -395,8 +398,6 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req) } } } - - atomic_sub(ahashreq->nbytes, &engine->load); } static void mv_cesa_ahash_prepare(struct crypto_async_request *req, @@ -663,7 +664,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) if (ret) goto err_free_tdma; - if (iter.src.sg) { + if (iter.base.len > iter.src.op_offset) { /* * Add all the new data, inserting an operation block and * launch command between each full SRAM block-worth of diff --git a/drivers/crypto/marvell/cesa/tdma.c b/drivers/crypto/marvell/cesa/tdma.c index 388a06e180d6..243305354420 100644 --- a/drivers/crypto/marvell/cesa/tdma.c +++ b/drivers/crypto/marvell/cesa/tdma.c @@ -38,6 +38,15 @@ void mv_cesa_dma_step(struct mv_cesa_req *dreq) { struct mv_cesa_engine *engine = dreq->engine; + spin_lock_bh(&engine->lock); + if (engine->chain_sw.first == dreq->chain.first) { + engine->chain_sw.first = NULL; + engine->chain_sw.last = NULL; + } + engine->chain_hw.first = dreq->chain.first; + engine->chain_hw.last = dreq->chain.last; + spin_unlock_bh(&engine->lock); + writel_relaxed(0, engine->regs + CESA_SA_CFG); mv_cesa_set_int_mask(engine, CESA_SA_INT_ACC0_IDMA_DONE); @@ -96,25 +105,27 @@ void mv_cesa_dma_prepare(struct mv_cesa_req *dreq, void mv_cesa_tdma_chain(struct mv_cesa_engine *engine, struct mv_cesa_req *dreq) { - if (engine->chain.first == NULL && engine->chain.last == NULL) { - engine->chain.first = dreq->chain.first; - engine->chain.last = dreq->chain.last; - } else { - struct mv_cesa_tdma_desc *last; + struct mv_cesa_tdma_desc *last = engine->chain_sw.last; - last = engine->chain.last; + /* + * Break the DMA chain if the request being queued needs the IV + * regs to be set before lauching the request. + */ + if (!last || dreq->chain.first->flags & CESA_TDMA_SET_STATE) + engine->chain_sw.first = dreq->chain.first; + else { last->next = dreq->chain.first; - engine->chain.last = dreq->chain.last; - - /* - * Break the DMA chain if the CESA_TDMA_BREAK_CHAIN is set on - * the last element of the current chain, or if the request - * being queued needs the IV regs to be set before lauching - * the request. - */ - if (!(last->flags & CESA_TDMA_BREAK_CHAIN) && - !(dreq->chain.first->flags & CESA_TDMA_SET_STATE)) - last->next_dma = cpu_to_le32(dreq->chain.first->cur_dma); + last->next_dma = cpu_to_le32(dreq->chain.first->cur_dma); + } + last = dreq->chain.last; + engine->chain_sw.last = last; + /* + * Break the DMA chain if the CESA_TDMA_BREAK_CHAIN is set on + * the last element of the current chain. + */ + if (last->flags & CESA_TDMA_BREAK_CHAIN) { + engine->chain_sw.first = NULL; + engine->chain_sw.last = NULL; } } @@ -127,7 +138,7 @@ int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status) tdma_cur = readl(engine->regs + CESA_TDMA_CUR); - for (tdma = engine->chain.first; tdma; tdma = next) { + for (tdma = engine->chain_hw.first; tdma; tdma = next) { spin_lock_bh(&engine->lock); next = tdma->next; spin_unlock_bh(&engine->lock); @@ -149,12 +160,12 @@ int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status) &backlog); /* Re-chaining to the next request */ - engine->chain.first = tdma->next; + engine->chain_hw.first = tdma->next; tdma->next = NULL; /* If this is the last request, clear the chain */ - if (engine->chain.first == NULL) - engine->chain.last = NULL; + if (engine->chain_hw.first == NULL) + engine->chain_hw.last = NULL; spin_unlock_bh(&engine->lock); ctx = crypto_tfm_ctx(req->tfm); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h index e27e849b01df..90a031421aac 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h @@ -34,6 +34,9 @@ #define SG_COMP_2 2 #define SG_COMP_1 1 +#define OTX2_CPT_DPTR_RPTR_ALIGN 8 +#define OTX2_CPT_RES_ADDR_ALIGN 32 + union otx2_cpt_opcode { u16 flags; struct { @@ -347,22 +350,48 @@ static inline struct otx2_cpt_inst_info * cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, gfp_t gfp) { - u32 dlen = 0, g_len, sg_len, info_len; - int align = OTX2_CPT_DMA_MINALIGN; + u32 dlen = 0, g_len, s_len, sg_len, info_len; struct otx2_cpt_inst_info *info; - u16 g_sz_bytes, s_sz_bytes; u32 total_mem_len; int i; - g_sz_bytes = ((req->in_cnt + 2) / 3) * - sizeof(struct cn10kb_cpt_sglist_component); - s_sz_bytes = ((req->out_cnt + 2) / 3) * - sizeof(struct cn10kb_cpt_sglist_component); + /* Allocate memory to meet below alignment requirement: + * ------------------------------------ + * | struct otx2_cpt_inst_info | + * | (No alignment required) | + * | --------------------------------| + * | | padding for ARCH_DMA_MINALIGN | + * | | alignment | + * |------------------------------------| + * | SG List Gather/Input memory | + * | Length = multiple of 32Bytes | + * | Alignment = 8Byte | + * |---------------------------------- | + * | SG List Scatter/Output memory | + * | Length = multiple of 32Bytes | + * | Alignment = 8Byte | + * | -------------------------------| + * | | padding for 32B alignment | + * |------------------------------------| + * | Result response memory | + * | Alignment = 32Byte | + * ------------------------------------ + */ + + info_len = sizeof(*info); + + g_len = ((req->in_cnt + 2) / 3) * + sizeof(struct cn10kb_cpt_sglist_component); + s_len = ((req->out_cnt + 2) / 3) * + sizeof(struct cn10kb_cpt_sglist_component); + sg_len = g_len + s_len; - g_len = ALIGN(g_sz_bytes, align); - sg_len = ALIGN(g_len + s_sz_bytes, align); - info_len = ALIGN(sizeof(*info), align); - total_mem_len = sg_len + info_len + sizeof(union otx2_cpt_res_s); + /* Allocate extra memory for SG and response address alignment */ + total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN); + total_mem_len += (ARCH_DMA_MINALIGN - 1) & + ~(OTX2_CPT_DPTR_RPTR_ALIGN - 1); + total_mem_len += ALIGN(sg_len, OTX2_CPT_RES_ADDR_ALIGN); + total_mem_len += sizeof(union otx2_cpt_res_s); info = kzalloc(total_mem_len, gfp); if (unlikely(!info)) @@ -372,7 +401,8 @@ cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, dlen += req->in[i].size; info->dlen = dlen; - info->in_buffer = (u8 *)info + info_len; + info->in_buffer = PTR_ALIGN((u8 *)info + info_len, ARCH_DMA_MINALIGN); + info->out_buffer = info->in_buffer + g_len; info->gthr_sz = req->in_cnt; info->sctr_sz = req->out_cnt; @@ -384,7 +414,7 @@ cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, } if (sgv2io_components_setup(pdev, req->out, req->out_cnt, - &info->in_buffer[g_len])) { + info->out_buffer)) { dev_err(&pdev->dev, "Failed to setup scatter list\n"); goto destroy_info; } @@ -401,8 +431,10 @@ cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, * Get buffer for union otx2_cpt_res_s response * structure and its physical address */ - info->completion_addr = info->in_buffer + sg_len; - info->comp_baddr = info->dptr_baddr + sg_len; + info->completion_addr = PTR_ALIGN((info->in_buffer + sg_len), + OTX2_CPT_RES_ADDR_ALIGN); + info->comp_baddr = ALIGN((info->dptr_baddr + sg_len), + OTX2_CPT_RES_ADDR_ALIGN); return info; @@ -417,10 +449,9 @@ static inline struct otx2_cpt_inst_info * otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, gfp_t gfp) { - int align = OTX2_CPT_DMA_MINALIGN; struct otx2_cpt_inst_info *info; - u32 dlen, align_dlen, info_len; - u16 g_sz_bytes, s_sz_bytes; + u32 dlen, info_len; + u16 g_len, s_len; u32 total_mem_len; if (unlikely(req->in_cnt > OTX2_CPT_MAX_SG_IN_CNT || @@ -429,22 +460,54 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, return NULL; } - g_sz_bytes = ((req->in_cnt + 3) / 4) * - sizeof(struct otx2_cpt_sglist_component); - s_sz_bytes = ((req->out_cnt + 3) / 4) * - sizeof(struct otx2_cpt_sglist_component); + /* Allocate memory to meet below alignment requirement: + * ------------------------------------ + * | struct otx2_cpt_inst_info | + * | (No alignment required) | + * | --------------------------------| + * | | padding for ARCH_DMA_MINALIGN | + * | | alignment | + * |------------------------------------| + * | SG List Header of 8 Byte | + * |------------------------------------| + * | SG List Gather/Input memory | + * | Length = multiple of 32Bytes | + * | Alignment = 8Byte | + * |---------------------------------- | + * | SG List Scatter/Output memory | + * | Length = multiple of 32Bytes | + * | Alignment = 8Byte | + * | -------------------------------| + * | | padding for 32B alignment | + * |------------------------------------| + * | Result response memory | + * | Alignment = 32Byte | + * ------------------------------------ + */ + + info_len = sizeof(*info); + + g_len = ((req->in_cnt + 3) / 4) * + sizeof(struct otx2_cpt_sglist_component); + s_len = ((req->out_cnt + 3) / 4) * + sizeof(struct otx2_cpt_sglist_component); + + dlen = g_len + s_len + SG_LIST_HDR_SIZE; - dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE; - align_dlen = ALIGN(dlen, align); - info_len = ALIGN(sizeof(*info), align); - total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s); + /* Allocate extra memory for SG and response address alignment */ + total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN); + total_mem_len += (ARCH_DMA_MINALIGN - 1) & + ~(OTX2_CPT_DPTR_RPTR_ALIGN - 1); + total_mem_len += ALIGN(dlen, OTX2_CPT_RES_ADDR_ALIGN); + total_mem_len += sizeof(union otx2_cpt_res_s); info = kzalloc(total_mem_len, gfp); if (unlikely(!info)) return NULL; info->dlen = dlen; - info->in_buffer = (u8 *)info + info_len; + info->in_buffer = PTR_ALIGN((u8 *)info + info_len, ARCH_DMA_MINALIGN); + info->out_buffer = info->in_buffer + SG_LIST_HDR_SIZE + g_len; ((u16 *)info->in_buffer)[0] = req->out_cnt; ((u16 *)info->in_buffer)[1] = req->in_cnt; @@ -460,7 +523,7 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, } if (setup_sgio_components(pdev, req->out, req->out_cnt, - &info->in_buffer[8 + g_sz_bytes])) { + info->out_buffer)) { dev_err(&pdev->dev, "Failed to setup scatter list\n"); goto destroy_info; } @@ -476,8 +539,10 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, * Get buffer for union otx2_cpt_res_s response * structure and its physical address */ - info->completion_addr = info->in_buffer + align_dlen; - info->comp_baddr = info->dptr_baddr + align_dlen; + info->completion_addr = PTR_ALIGN((info->in_buffer + dlen), + OTX2_CPT_RES_ADDR_ALIGN); + info->comp_baddr = ALIGN((info->dptr_baddr + dlen), + OTX2_CPT_RES_ADDR_ALIGN); return info; diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c index 5c9484646172..1493a373baf7 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c @@ -1490,11 +1490,13 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf) union otx2_cpt_opcode opcode; union otx2_cpt_res_s *result; union otx2_cpt_inst_s inst; + dma_addr_t result_baddr; dma_addr_t rptr_baddr; struct pci_dev *pdev; - u32 len, compl_rlen; + int timeout = 10000; + void *base, *rptr; int ret, etype; - void *rptr; + u32 len; /* * We don't get capabilities if it was already done @@ -1519,22 +1521,28 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf) if (ret) goto delete_grps; - compl_rlen = ALIGN(sizeof(union otx2_cpt_res_s), OTX2_CPT_DMA_MINALIGN); - len = compl_rlen + LOADFVC_RLEN; + /* Allocate extra memory for "rptr" and "result" pointer alignment */ + len = LOADFVC_RLEN + ARCH_DMA_MINALIGN + + sizeof(union otx2_cpt_res_s) + OTX2_CPT_RES_ADDR_ALIGN; - result = kzalloc(len, GFP_KERNEL); - if (!result) { + base = kzalloc(len, GFP_KERNEL); + if (!base) { ret = -ENOMEM; goto lf_cleanup; } - rptr_baddr = dma_map_single(&pdev->dev, (void *)result, len, - DMA_BIDIRECTIONAL); + + rptr = PTR_ALIGN(base, ARCH_DMA_MINALIGN); + rptr_baddr = dma_map_single(&pdev->dev, rptr, len, DMA_BIDIRECTIONAL); if (dma_mapping_error(&pdev->dev, rptr_baddr)) { dev_err(&pdev->dev, "DMA mapping failed\n"); ret = -EFAULT; - goto free_result; + goto free_rptr; } - rptr = (u8 *)result + compl_rlen; + + result = (union otx2_cpt_res_s *)PTR_ALIGN(rptr + LOADFVC_RLEN, + OTX2_CPT_RES_ADDR_ALIGN); + result_baddr = ALIGN(rptr_baddr + LOADFVC_RLEN, + OTX2_CPT_RES_ADDR_ALIGN); /* Fill in the command */ opcode.s.major = LOADFVC_MAJOR_OP; @@ -1546,27 +1554,38 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf) /* 64-bit swap for microcode data reads, not needed for addresses */ cpu_to_be64s(&iq_cmd.cmd.u); iq_cmd.dptr = 0; - iq_cmd.rptr = rptr_baddr + compl_rlen; + iq_cmd.rptr = rptr_baddr; iq_cmd.cptr.u = 0; for (etype = 1; etype < OTX2_CPT_MAX_ENG_TYPES; etype++) { result->s.compcode = OTX2_CPT_COMPLETION_CODE_INIT; iq_cmd.cptr.s.grp = otx2_cpt_get_eng_grp(&cptpf->eng_grps, etype); - otx2_cpt_fill_inst(&inst, &iq_cmd, rptr_baddr); + otx2_cpt_fill_inst(&inst, &iq_cmd, result_baddr); lfs->ops->send_cmd(&inst, 1, &cptpf->lfs.lf[0]); + timeout = 10000; while (lfs->ops->cpt_get_compcode(result) == - OTX2_CPT_COMPLETION_CODE_INIT) + OTX2_CPT_COMPLETION_CODE_INIT) { cpu_relax(); + udelay(1); + timeout--; + if (!timeout) { + ret = -ENODEV; + cptpf->is_eng_caps_discovered = false; + dev_warn(&pdev->dev, "Timeout on CPT load_fvc completion poll\n"); + goto error_no_response; + } + } cptpf->eng_caps[etype].u = be64_to_cpup(rptr); } - dma_unmap_single(&pdev->dev, rptr_baddr, len, DMA_BIDIRECTIONAL); cptpf->is_eng_caps_discovered = true; -free_result: - kfree(result); +error_no_response: + dma_unmap_single(&pdev->dev, rptr_baddr, len, DMA_BIDIRECTIONAL); +free_rptr: + kfree(base); lf_cleanup: otx2_cptlf_shutdown(lfs); delete_grps: diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c index 5387c68f3c9d..426244107037 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c @@ -264,9 +264,10 @@ static int cpt_process_ccode(struct otx2_cptlfs_info *lfs, break; } - dev_err(&pdev->dev, - "Request failed with software error code 0x%x\n", - cpt_status->s.uc_compcode); + pr_debug("Request failed with software error code 0x%x: algo = %s driver = %s\n", + cpt_status->s.uc_compcode, + info->req->areq->tfm->__crt_alg->cra_name, + info->req->areq->tfm->__crt_alg->cra_driver_name); otx2_cpt_dump_sg_list(pdev, info->req); break; } |