diff options
Diffstat (limited to 'drivers/crypto')
67 files changed, 926 insertions, 5688 deletions
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 0991f026cb07..3d02702456a5 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -611,13 +611,13 @@ config CRYPTO_DEV_QCOM_RNG To compile this driver as a module, choose M here. The module will be called qcom-rng. If unsure, say N. -config CRYPTO_DEV_VMX - bool "Support for VMX cryptographic acceleration instructions" - depends on PPC64 && VSX - help - Support for VMX cryptographic acceleration instructions. - -source "drivers/crypto/vmx/Kconfig" +#config CRYPTO_DEV_VMX +# bool "Support for VMX cryptographic acceleration instructions" +# depends on PPC64 && VSX +# help +# Support for VMX cryptographic acceleration instructions. +# +#source "drivers/crypto/vmx/Kconfig" config CRYPTO_DEV_IMGTEC_HASH tristate "Imagination Technologies hardware hash accelerator" diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index d859d6a5f3a4..95331bc6456b 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -42,7 +42,7 @@ obj-$(CONFIG_CRYPTO_DEV_SL3516) += gemini/ obj-y += stm32/ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/ -obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ +#obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/ obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/ obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) += axis/ diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c index d358334e5981..ee2a28c906ed 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c @@ -362,7 +362,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) digestsize = SHA512_DIGEST_SIZE; /* the padding could be up to two block. */ - buf = kzalloc(bs * 2, GFP_KERNEL | GFP_DMA); + buf = kcalloc(2, bs, GFP_KERNEL | GFP_DMA); if (!buf) { err = -ENOMEM; goto theend; diff --git a/drivers/crypto/ccp/platform-access.c b/drivers/crypto/ccp/platform-access.c index 94367bc49e35..1b8ed3389733 100644 --- a/drivers/crypto/ccp/platform-access.c +++ b/drivers/crypto/ccp/platform-access.c @@ -118,9 +118,16 @@ int psp_send_platform_access_msg(enum psp_platform_access_msg msg, goto unlock; } - /* Store the status in request header for caller to investigate */ + /* + * Read status from PSP. If status is non-zero, it indicates an error + * occurred during "processing" of the command. + * If status is zero, it indicates the command was "processed" + * successfully, but the result of the command is in the payload. + * Return both cases to the caller as -EIO to investigate. + */ cmd_reg = ioread32(cmd); - req->header.status = FIELD_GET(PSP_CMDRESP_STS, cmd_reg); + if (FIELD_GET(PSP_CMDRESP_STS, cmd_reg)) + req->header.status = FIELD_GET(PSP_CMDRESP_STS, cmd_reg); if (req->header.status) { ret = -EIO; goto unlock; diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c index 124a2e0c8999..56bf832c2947 100644 --- a/drivers/crypto/ccp/psp-dev.c +++ b/drivers/crypto/ccp/psp-dev.c @@ -156,11 +156,14 @@ static unsigned int psp_get_capability(struct psp_device *psp) } psp->capability = val; - /* Detect if TSME and SME are both enabled */ + /* Detect TSME and/or SME status */ if (PSP_CAPABILITY(psp, PSP_SECURITY_REPORTING) && - psp->capability & (PSP_SECURITY_TSME_STATUS << PSP_CAPABILITY_PSP_SECURITY_OFFSET) && - cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) - dev_notice(psp->dev, "psp: Both TSME and SME are active, SME is unnecessary when TSME is active.\n"); + psp->capability & (PSP_SECURITY_TSME_STATUS << PSP_CAPABILITY_PSP_SECURITY_OFFSET)) { + if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) + dev_notice(psp->dev, "psp: Both TSME and SME are active, SME is unnecessary when TSME is active.\n"); + else + dev_notice(psp->dev, "psp: TSME enabled\n"); + } return 0; } diff --git a/drivers/crypto/hisilicon/debugfs.c b/drivers/crypto/hisilicon/debugfs.c index 80ed4b2d209c..cd67fa348ca7 100644 --- a/drivers/crypto/hisilicon/debugfs.c +++ b/drivers/crypto/hisilicon/debugfs.c @@ -24,6 +24,8 @@ #define QM_DFX_QN_SHIFT 16 #define QM_DFX_CNT_CLR_CE 0x100118 #define QM_DBG_WRITE_LEN 1024 +#define QM_IN_IDLE_ST_REG 0x1040e4 +#define QM_IN_IDLE_STATE 0x1 static const char * const qm_debug_file_name[] = { [CURRENT_QM] = "current_qm", @@ -81,6 +83,30 @@ static const struct debugfs_reg32 qm_dfx_regs[] = { {"QM_DFX_FF_ST5 ", 0x1040dc}, {"QM_DFX_FF_ST6 ", 0x1040e0}, {"QM_IN_IDLE_ST ", 0x1040e4}, + {"QM_CACHE_CTL ", 0x100050}, + {"QM_TIMEOUT_CFG ", 0x100070}, + {"QM_DB_TIMEOUT_CFG ", 0x100074}, + {"QM_FLR_PENDING_TIME_CFG ", 0x100078}, + {"QM_ARUSR_MCFG1 ", 0x100088}, + {"QM_AWUSR_MCFG1 ", 0x100098}, + {"QM_AXI_M_CFG_ENABLE ", 0x1000B0}, + {"QM_RAS_CE_THRESHOLD ", 0x1000F8}, + {"QM_AXI_TIMEOUT_CTRL ", 0x100120}, + {"QM_AXI_TIMEOUT_STATUS ", 0x100124}, + {"QM_CQE_AGGR_TIMEOUT_CTRL ", 0x100144}, + {"ACC_RAS_MSI_INT_SEL ", 0x1040fc}, + {"QM_CQE_OUT ", 0x104100}, + {"QM_EQE_OUT ", 0x104104}, + {"QM_AEQE_OUT ", 0x104108}, + {"QM_DB_INFO0 ", 0x104180}, + {"QM_DB_INFO1 ", 0x104184}, + {"QM_AM_CTRL_GLOBAL ", 0x300000}, + {"QM_AM_CURR_PORT_STS ", 0x300100}, + {"QM_AM_CURR_TRANS_RETURN ", 0x300150}, + {"QM_AM_CURR_RD_MAX_TXID ", 0x300154}, + {"QM_AM_CURR_WR_MAX_TXID ", 0x300158}, + {"QM_AM_ALARM_RRESP ", 0x300180}, + {"QM_AM_ALARM_BRESP ", 0x300184}, }; static const struct debugfs_reg32 qm_vf_dfx_regs[] = { @@ -1001,6 +1027,30 @@ static int qm_diff_regs_show(struct seq_file *s, void *unused) } DEFINE_SHOW_ATTRIBUTE(qm_diff_regs); +static int qm_state_show(struct seq_file *s, void *unused) +{ + struct hisi_qm *qm = s->private; + u32 val; + int ret; + + /* If device is in suspended, directly return the idle state. */ + ret = hisi_qm_get_dfx_access(qm); + if (!ret) { + val = readl(qm->io_base + QM_IN_IDLE_ST_REG); + hisi_qm_put_dfx_access(qm); + } else if (ret == -EAGAIN) { + val = QM_IN_IDLE_STATE; + } else { + return ret; + } + + seq_printf(s, "%u\n", val); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(qm_state); + static ssize_t qm_status_read(struct file *filp, char __user *buffer, size_t count, loff_t *pos) { @@ -1062,6 +1112,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(qm_atomic64_ops, qm_debugfs_atomic64_get, void hisi_qm_debug_init(struct hisi_qm *qm) { struct dfx_diff_registers *qm_regs = qm->debug.qm_diff_regs; + struct qm_dev_dfx *dev_dfx = &qm->debug.dev_dfx; struct qm_dfx *dfx = &qm->debug.dfx; struct dentry *qm_d; void *data; @@ -1072,6 +1123,9 @@ void hisi_qm_debug_init(struct hisi_qm *qm) /* only show this in PF */ if (qm->fun_type == QM_HW_PF) { + debugfs_create_file("qm_state", 0444, qm->debug.qm_d, + qm, &qm_state_fops); + qm_create_debugfs_file(qm, qm->debug.debug_root, CURRENT_QM); for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++) qm_create_debugfs_file(qm, qm->debug.qm_d, i); @@ -1087,6 +1141,10 @@ void hisi_qm_debug_init(struct hisi_qm *qm) debugfs_create_file("status", 0444, qm->debug.qm_d, qm, &qm_status_fops); + + debugfs_create_u32("dev_state", 0444, qm->debug.qm_d, &dev_dfx->dev_state); + debugfs_create_u32("dev_timeout", 0644, qm->debug.qm_d, &dev_dfx->dev_timeout); + for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) { data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset); debugfs_create_file(qm_dfx_files[i].name, diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 3255b2a070c7..d93aa6630a57 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -440,7 +440,7 @@ MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63), 0(default)"); struct hisi_qp *hpre_create_qp(u8 type) { - int node = cpu_to_node(smp_processor_id()); + int node = cpu_to_node(raw_smp_processor_id()); struct hisi_qp *qp = NULL; int ret; diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 4b20b94e6371..92f0a1d9b4a6 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -236,6 +236,12 @@ #define QM_DEV_ALG_MAX_LEN 256 + /* abnormal status value for stopping queue */ +#define QM_STOP_QUEUE_FAIL 1 +#define QM_DUMP_SQC_FAIL 3 +#define QM_DUMP_CQC_FAIL 4 +#define QM_FINISH_WAIT 5 + #define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \ (((hop_num) << QM_CQ_HOP_NUM_SHIFT) | \ ((pg_sz) << QM_CQ_PAGE_SIZE_SHIFT) | \ @@ -312,6 +318,7 @@ static const struct hisi_qm_cap_info qm_cap_info_comm[] = { {QM_SUPPORT_DB_ISOLATION, 0x30, 0, BIT(0), 0x0, 0x0, 0x0}, {QM_SUPPORT_FUNC_QOS, 0x3100, 0, BIT(8), 0x0, 0x0, 0x1}, {QM_SUPPORT_STOP_QP, 0x3100, 0, BIT(9), 0x0, 0x0, 0x1}, + {QM_SUPPORT_STOP_FUNC, 0x3100, 0, BIT(10), 0x0, 0x0, 0x1}, {QM_SUPPORT_MB_COMMAND, 0x3100, 0, BIT(11), 0x0, 0x0, 0x1}, {QM_SUPPORT_SVA_PREFETCH, 0x3100, 0, BIT(14), 0x0, 0x0, 0x1}, }; @@ -1674,6 +1681,11 @@ unlock: return ret; } +static int qm_drain_qm(struct hisi_qm *qm) +{ + return hisi_qm_mb(qm, QM_MB_CMD_FLUSH_QM, 0, 0, 0); +} + static int qm_stop_qp(struct hisi_qp *qp) { return hisi_qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0); @@ -2031,43 +2043,25 @@ static void qp_stop_fail_cb(struct hisi_qp *qp) } } -/** - * qm_drain_qp() - Drain a qp. - * @qp: The qp we want to drain. - * - * Determine whether the queue is cleared by judging the tail pointers of - * sq and cq. - */ -static int qm_drain_qp(struct hisi_qp *qp) +static int qm_wait_qp_empty(struct hisi_qm *qm, u32 *state, u32 qp_id) { - struct hisi_qm *qm = qp->qm; struct device *dev = &qm->pdev->dev; struct qm_sqc sqc; struct qm_cqc cqc; int ret, i = 0; - /* No need to judge if master OOO is blocked. */ - if (qm_check_dev_error(qm)) - return 0; - - /* Kunpeng930 supports drain qp by device */ - if (test_bit(QM_SUPPORT_STOP_QP, &qm->caps)) { - ret = qm_stop_qp(qp); - if (ret) - dev_err(dev, "Failed to stop qp(%u)!\n", qp->qp_id); - return ret; - } - while (++i) { - ret = qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp->qp_id, 1); + ret = qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp_id, 1); if (ret) { dev_err_ratelimited(dev, "Failed to dump sqc!\n"); + *state = QM_DUMP_SQC_FAIL; return ret; } - ret = qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp->qp_id, 1); + ret = qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp_id, 1); if (ret) { dev_err_ratelimited(dev, "Failed to dump cqc!\n"); + *state = QM_DUMP_CQC_FAIL; return ret; } @@ -2076,8 +2070,9 @@ static int qm_drain_qp(struct hisi_qp *qp) break; if (i == MAX_WAIT_COUNTS) { - dev_err(dev, "Fail to empty queue %u!\n", qp->qp_id); - return -EBUSY; + dev_err(dev, "Fail to empty queue %u!\n", qp_id); + *state = QM_STOP_QUEUE_FAIL; + return -ETIMEDOUT; } usleep_range(WAIT_PERIOD_US_MIN, WAIT_PERIOD_US_MAX); @@ -2086,9 +2081,53 @@ static int qm_drain_qp(struct hisi_qp *qp) return 0; } -static int qm_stop_qp_nolock(struct hisi_qp *qp) +/** + * qm_drain_qp() - Drain a qp. + * @qp: The qp we want to drain. + * + * If the device does not support stopping queue by sending mailbox, + * determine whether the queue is cleared by judging the tail pointers of + * sq and cq. + */ +static int qm_drain_qp(struct hisi_qp *qp) +{ + struct hisi_qm *qm = qp->qm; + struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(qm->pdev)); + u32 state = 0; + int ret; + + /* No need to judge if master OOO is blocked. */ + if (qm_check_dev_error(pf_qm)) + return 0; + + /* HW V3 supports drain qp by device */ + if (test_bit(QM_SUPPORT_STOP_QP, &qm->caps)) { + ret = qm_stop_qp(qp); + if (ret) { + dev_err(&qm->pdev->dev, "Failed to stop qp!\n"); + state = QM_STOP_QUEUE_FAIL; + goto set_dev_state; + } + return ret; + } + + ret = qm_wait_qp_empty(qm, &state, qp->qp_id); + if (ret) + goto set_dev_state; + + return 0; + +set_dev_state: + if (qm->debug.dev_dfx.dev_timeout) + qm->debug.dev_dfx.dev_state = state; + + return ret; +} + +static void qm_stop_qp_nolock(struct hisi_qp *qp) { - struct device *dev = &qp->qm->pdev->dev; + struct hisi_qm *qm = qp->qm; + struct device *dev = &qm->pdev->dev; int ret; /* @@ -2099,39 +2138,36 @@ static int qm_stop_qp_nolock(struct hisi_qp *qp) */ if (atomic_read(&qp->qp_status.flags) != QP_START) { qp->is_resetting = false; - return 0; + return; } atomic_set(&qp->qp_status.flags, QP_STOP); - ret = qm_drain_qp(qp); - if (ret) - dev_err(dev, "Failed to drain out data for stopping!\n"); + /* V3 supports direct stop function when FLR prepare */ + if (qm->ver < QM_HW_V3 || qm->status.stop_reason == QM_NORMAL) { + ret = qm_drain_qp(qp); + if (ret) + dev_err(dev, "Failed to drain out data for stopping qp(%u)!\n", qp->qp_id); + } - flush_workqueue(qp->qm->wq); + flush_workqueue(qm->wq); if (unlikely(qp->is_resetting && atomic_read(&qp->qp_status.used))) qp_stop_fail_cb(qp); dev_dbg(dev, "stop queue %u!", qp->qp_id); - - return 0; } /** * hisi_qm_stop_qp() - Stop a qp in qm. * @qp: The qp we want to stop. * - * This function is reverse of hisi_qm_start_qp. Return 0 if successful. + * This function is reverse of hisi_qm_start_qp. */ -int hisi_qm_stop_qp(struct hisi_qp *qp) +void hisi_qm_stop_qp(struct hisi_qp *qp) { - int ret; - down_write(&qp->qm->qps_lock); - ret = qm_stop_qp_nolock(qp); + qm_stop_qp_nolock(qp); up_write(&qp->qm->qps_lock); - - return ret; } EXPORT_SYMBOL_GPL(hisi_qm_stop_qp); @@ -2309,7 +2345,31 @@ static int hisi_qm_uacce_start_queue(struct uacce_queue *q) static void hisi_qm_uacce_stop_queue(struct uacce_queue *q) { - hisi_qm_stop_qp(q->priv); + struct hisi_qp *qp = q->priv; + struct hisi_qm *qm = qp->qm; + struct qm_dev_dfx *dev_dfx = &qm->debug.dev_dfx; + u32 i = 0; + + hisi_qm_stop_qp(qp); + + if (!dev_dfx->dev_timeout || !dev_dfx->dev_state) + return; + + /* + * After the queue fails to be stopped, + * wait for a period of time before releasing the queue. + */ + while (++i) { + msleep(WAIT_PERIOD); + + /* Since dev_timeout maybe modified, check i >= dev_timeout */ + if (i >= dev_dfx->dev_timeout) { + dev_err(&qm->pdev->dev, "Stop q %u timeout, state %u\n", + qp->qp_id, dev_dfx->dev_state); + dev_dfx->dev_state = QM_FINISH_WAIT; + break; + } + } } static int hisi_qm_is_q_updated(struct uacce_queue *q) @@ -3054,25 +3114,18 @@ static int qm_restart(struct hisi_qm *qm) } /* Stop started qps in reset flow */ -static int qm_stop_started_qp(struct hisi_qm *qm) +static void qm_stop_started_qp(struct hisi_qm *qm) { - struct device *dev = &qm->pdev->dev; struct hisi_qp *qp; - int i, ret; + int i; for (i = 0; i < qm->qp_num; i++) { qp = &qm->qp_array[i]; - if (qp && atomic_read(&qp->qp_status.flags) == QP_START) { + if (atomic_read(&qp->qp_status.flags) == QP_START) { qp->is_resetting = true; - ret = qm_stop_qp_nolock(qp); - if (ret < 0) { - dev_err(dev, "Failed to stop qp%d!\n", i); - return ret; - } + qm_stop_qp_nolock(qp); } } - - return 0; } /** @@ -3112,21 +3165,31 @@ int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r) down_write(&qm->qps_lock); - qm->status.stop_reason = r; if (atomic_read(&qm->status.flags) == QM_STOP) goto err_unlock; /* Stop all the request sending at first. */ atomic_set(&qm->status.flags, QM_STOP); + qm->status.stop_reason = r; - if (qm->status.stop_reason == QM_SOFT_RESET || - qm->status.stop_reason == QM_DOWN) { + if (qm->status.stop_reason != QM_NORMAL) { hisi_qm_set_hw_reset(qm, QM_RESET_STOP_TX_OFFSET); - ret = qm_stop_started_qp(qm); - if (ret < 0) { - dev_err(dev, "Failed to stop started qp!\n"); - goto err_unlock; + /* + * When performing soft reset, the hardware will no longer + * do tasks, and the tasks in the device will be flushed + * out directly since the master ooo is closed. + */ + if (test_bit(QM_SUPPORT_STOP_FUNC, &qm->caps) && + r != QM_SOFT_RESET) { + ret = qm_drain_qm(qm); + if (ret) { + dev_err(dev, "failed to drain qm!\n"); + goto err_unlock; + } } + + qm_stop_started_qp(qm); + hisi_qm_set_hw_reset(qm, QM_RESET_STOP_RX_OFFSET); } @@ -3141,6 +3204,7 @@ int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r) } qm_clear_queues(qm); + qm->status.stop_reason = QM_NORMAL; err_unlock: up_write(&qm->qps_lock); diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index f028dcfd0ead..93a972fcbf63 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -118,7 +118,7 @@ struct sec_aead { }; /* Get an en/de-cipher queue cyclically to balance load over queues of TFM */ -static inline int sec_alloc_queue_id(struct sec_ctx *ctx, struct sec_req *req) +static inline u32 sec_alloc_queue_id(struct sec_ctx *ctx, struct sec_req *req) { if (req->c_req.encrypt) return (u32)atomic_inc_return(&ctx->enc_qcyclic) % @@ -485,8 +485,7 @@ static void sec_alg_resource_free(struct sec_ctx *ctx, sec_free_mac_resource(dev, qp_ctx->res); } -static int sec_alloc_qp_ctx_resource(struct hisi_qm *qm, struct sec_ctx *ctx, - struct sec_qp_ctx *qp_ctx) +static int sec_alloc_qp_ctx_resource(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ctx) { u16 q_depth = qp_ctx->qp->sq_depth; struct device *dev = ctx->dev; @@ -541,8 +540,7 @@ static void sec_free_qp_ctx_resource(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ kfree(qp_ctx->req_list); } -static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx, - int qp_ctx_id, int alg_type) +static int sec_create_qp_ctx(struct sec_ctx *ctx, int qp_ctx_id) { struct sec_qp_ctx *qp_ctx; struct hisi_qp *qp; @@ -561,7 +559,7 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx, idr_init(&qp_ctx->req_idr); INIT_LIST_HEAD(&qp_ctx->backlog); - ret = sec_alloc_qp_ctx_resource(qm, ctx, qp_ctx); + ret = sec_alloc_qp_ctx_resource(ctx, qp_ctx); if (ret) goto err_destroy_idr; @@ -614,7 +612,7 @@ static int sec_ctx_base_init(struct sec_ctx *ctx) } for (i = 0; i < sec->ctx_q_num; i++) { - ret = sec_create_qp_ctx(&sec->qm, ctx, i, 0); + ret = sec_create_qp_ctx(ctx, i); if (ret) goto err_sec_release_qp_ctx; } @@ -750,9 +748,7 @@ static void sec_skcipher_uninit(struct crypto_skcipher *tfm) sec_ctx_base_uninit(ctx); } -static int sec_skcipher_3des_setkey(struct crypto_skcipher *tfm, const u8 *key, - const u32 keylen, - const enum sec_cmode c_mode) +static int sec_skcipher_3des_setkey(struct crypto_skcipher *tfm, const u8 *key, const u32 keylen) { struct sec_ctx *ctx = crypto_skcipher_ctx(tfm); struct sec_cipher_ctx *c_ctx = &ctx->c_ctx; @@ -843,7 +839,7 @@ static int sec_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, switch (c_alg) { case SEC_CALG_3DES: - ret = sec_skcipher_3des_setkey(tfm, key, keylen, c_mode); + ret = sec_skcipher_3des_setkey(tfm, key, keylen); break; case SEC_CALG_AES: case SEC_CALG_SM4: @@ -1371,7 +1367,7 @@ static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req) sec_sqe3->bd_param = cpu_to_le32(bd_param); sec_sqe3->c_len_ivin |= cpu_to_le32(c_req->c_len); - sec_sqe3->tag = cpu_to_le64(req); + sec_sqe3->tag = cpu_to_le64((unsigned long)req); return 0; } @@ -2145,8 +2141,8 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req) return sec_skcipher_crypto(sk_req, false); } -#define SEC_SKCIPHER_GEN_ALG(sec_cra_name, sec_set_key, sec_min_key_size, \ - sec_max_key_size, ctx_init, ctx_exit, blk_size, iv_size)\ +#define SEC_SKCIPHER_ALG(sec_cra_name, sec_set_key, \ + sec_min_key_size, sec_max_key_size, blk_size, iv_size)\ {\ .base = {\ .cra_name = sec_cra_name,\ @@ -2158,8 +2154,8 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req) .cra_ctxsize = sizeof(struct sec_ctx),\ .cra_module = THIS_MODULE,\ },\ - .init = ctx_init,\ - .exit = ctx_exit,\ + .init = sec_skcipher_ctx_init,\ + .exit = sec_skcipher_ctx_exit,\ .setkey = sec_set_key,\ .decrypt = sec_skcipher_decrypt,\ .encrypt = sec_skcipher_encrypt,\ @@ -2168,11 +2164,6 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req) .ivsize = iv_size,\ } -#define SEC_SKCIPHER_ALG(name, key_func, min_key_size, \ - max_key_size, blk_size, iv_size) \ - SEC_SKCIPHER_GEN_ALG(name, key_func, min_key_size, max_key_size, \ - sec_skcipher_ctx_init, sec_skcipher_ctx_exit, blk_size, iv_size) - static struct sec_skcipher sec_skciphers[] = { { .alg_msk = BIT(0), diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 7bb99381bbdf..c290d8937b19 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -282,6 +282,11 @@ static const struct debugfs_reg32 sec_dfx_regs[] = { {"SEC_BD_SAA6 ", 0x301C38}, {"SEC_BD_SAA7 ", 0x301C3C}, {"SEC_BD_SAA8 ", 0x301C40}, + {"SEC_RAS_CE_ENABLE ", 0x301050}, + {"SEC_RAS_FE_ENABLE ", 0x301054}, + {"SEC_RAS_NFE_ENABLE ", 0x301058}, + {"SEC_REQ_TRNG_TIME_TH ", 0x30112C}, + {"SEC_CHANNEL_RNG_REQ_THLD ", 0x302110}, }; /* define the SEC's dfx regs region and region length */ @@ -374,7 +379,7 @@ void sec_destroy_qps(struct hisi_qp **qps, int qp_num) struct hisi_qp **sec_create_qps(void) { - int node = cpu_to_node(smp_processor_id()); + int node = cpu_to_node(raw_smp_processor_id()); u32 ctx_num = ctx_q_num; struct hisi_qp **qps; int ret; diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c index c650c741a18d..94e2d66b04b6 100644 --- a/drivers/crypto/hisilicon/zip/zip_crypto.c +++ b/drivers/crypto/hisilicon/zip/zip_crypto.c @@ -591,6 +591,7 @@ static struct acomp_alg hisi_zip_acomp_deflate = { .base = { .cra_name = "deflate", .cra_driver_name = "hisi-deflate-acomp", + .cra_flags = CRYPTO_ALG_ASYNC, .cra_module = THIS_MODULE, .cra_priority = HZIP_ALG_PRIORITY, .cra_ctxsize = sizeof(struct hisi_zip_ctx), diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 479ba8a1d6b5..c065fd867161 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -454,7 +454,7 @@ MODULE_DEVICE_TABLE(pci, hisi_zip_dev_ids); int zip_create_qps(struct hisi_qp **qps, int qp_num, int node) { if (node == NUMA_NO_NODE) - node = cpu_to_node(smp_processor_id()); + node = cpu_to_node(raw_smp_processor_id()); return hisi_qm_alloc_qps_node(&zip_devices, qp_num, 0, node, qps); } diff --git a/drivers/crypto/intel/iaa/iaa_crypto.h b/drivers/crypto/intel/iaa/iaa_crypto.h index 014420f7beb0..2524091a5f70 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto.h +++ b/drivers/crypto/intel/iaa/iaa_crypto.h @@ -59,10 +59,8 @@ struct iaa_device_compression_mode { const char *name; struct aecs_comp_table_record *aecs_comp_table; - struct aecs_decomp_table_record *aecs_decomp_table; dma_addr_t aecs_comp_table_dma_addr; - dma_addr_t aecs_decomp_table_dma_addr; }; /* Representation of IAA device with wqs, populated by probe */ @@ -107,23 +105,6 @@ struct aecs_comp_table_record { u32 reserved_padding[2]; } __packed; -/* AECS for decompress */ -struct aecs_decomp_table_record { - u32 crc; - u32 xor_checksum; - u32 low_filter_param; - u32 high_filter_param; - u32 output_mod_idx; - u32 drop_init_decomp_out_bytes; - u32 reserved[36]; - u32 output_accum_data[2]; - u32 out_bits_valid; - u32 bit_off_indexing; - u32 input_accum_data[64]; - u8 size_qw[32]; - u32 decomp_state[1220]; -} __packed; - int iaa_aecs_init_fixed(void); void iaa_aecs_cleanup_fixed(void); @@ -136,9 +117,6 @@ struct iaa_compression_mode { int ll_table_size; u32 *d_table; int d_table_size; - u32 *header_table; - int header_table_size; - u16 gen_decomp_table_flags; iaa_dev_comp_init_fn_t init; iaa_dev_comp_free_fn_t free; }; @@ -148,9 +126,6 @@ int add_iaa_compression_mode(const char *name, int ll_table_size, const u32 *d_table, int d_table_size, - const u8 *header_table, - int header_table_size, - u16 gen_decomp_table_flags, iaa_dev_comp_init_fn_t init, iaa_dev_comp_free_fn_t free); diff --git a/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c b/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c index 45cf5d74f0fb..19d9a333ac49 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c @@ -78,7 +78,6 @@ int iaa_aecs_init_fixed(void) sizeof(fixed_ll_sym), fixed_d_sym, sizeof(fixed_d_sym), - NULL, 0, 0, init_fixed_mode, NULL); if (!ret) pr_debug("IAA fixed compression mode initialized\n"); diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index dfd3baf0a8d8..1cd304de5388 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -258,16 +258,14 @@ static void free_iaa_compression_mode(struct iaa_compression_mode *mode) kfree(mode->name); kfree(mode->ll_table); kfree(mode->d_table); - kfree(mode->header_table); kfree(mode); } /* - * IAA Compression modes are defined by an ll_table, a d_table, and an - * optional header_table. These tables are typically generated and - * captured using statistics collected from running actual - * compress/decompress workloads. + * IAA Compression modes are defined by an ll_table and a d_table. + * These tables are typically generated and captured using statistics + * collected from running actual compress/decompress workloads. * * A module or other kernel code can add and remove compression modes * with a given name using the exported @add_iaa_compression_mode() @@ -315,9 +313,6 @@ EXPORT_SYMBOL_GPL(remove_iaa_compression_mode); * @ll_table_size: The ll table size in bytes * @d_table: The d table * @d_table_size: The d table size in bytes - * @header_table: Optional header table - * @header_table_size: Optional header table size in bytes - * @gen_decomp_table_flags: Otional flags used to generate the decomp table * @init: Optional callback function to init the compression mode data * @free: Optional callback function to free the compression mode data * @@ -330,9 +325,6 @@ int add_iaa_compression_mode(const char *name, int ll_table_size, const u32 *d_table, int d_table_size, - const u8 *header_table, - int header_table_size, - u16 gen_decomp_table_flags, iaa_dev_comp_init_fn_t init, iaa_dev_comp_free_fn_t free) { @@ -370,16 +362,6 @@ int add_iaa_compression_mode(const char *name, mode->d_table_size = d_table_size; } - if (header_table) { - mode->header_table = kzalloc(header_table_size, GFP_KERNEL); - if (!mode->header_table) - goto free; - memcpy(mode->header_table, header_table, header_table_size); - mode->header_table_size = header_table_size; - } - - mode->gen_decomp_table_flags = gen_decomp_table_flags; - mode->init = init; mode->free = free; @@ -420,10 +402,6 @@ static void free_device_compression_mode(struct iaa_device *iaa_device, if (device_mode->aecs_comp_table) dma_free_coherent(dev, size, device_mode->aecs_comp_table, device_mode->aecs_comp_table_dma_addr); - if (device_mode->aecs_decomp_table) - dma_free_coherent(dev, size, device_mode->aecs_decomp_table, - device_mode->aecs_decomp_table_dma_addr); - kfree(device_mode); } @@ -440,73 +418,6 @@ static int check_completion(struct device *dev, bool compress, bool only_once); -static int decompress_header(struct iaa_device_compression_mode *device_mode, - struct iaa_compression_mode *mode, - struct idxd_wq *wq) -{ - dma_addr_t src_addr, src2_addr; - struct idxd_desc *idxd_desc; - struct iax_hw_desc *desc; - struct device *dev; - int ret = 0; - - idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); - if (IS_ERR(idxd_desc)) - return PTR_ERR(idxd_desc); - - desc = idxd_desc->iax_hw; - - dev = &wq->idxd->pdev->dev; - - src_addr = dma_map_single(dev, (void *)mode->header_table, - mode->header_table_size, DMA_TO_DEVICE); - dev_dbg(dev, "%s: mode->name %s, src_addr %llx, dev %p, src %p, slen %d\n", - __func__, mode->name, src_addr, dev, - mode->header_table, mode->header_table_size); - if (unlikely(dma_mapping_error(dev, src_addr))) { - dev_dbg(dev, "dma_map_single err, exiting\n"); - ret = -ENOMEM; - return ret; - } - - desc->flags = IAX_AECS_GEN_FLAG; - desc->opcode = IAX_OPCODE_DECOMPRESS; - - desc->src1_addr = (u64)src_addr; - desc->src1_size = mode->header_table_size; - - src2_addr = device_mode->aecs_decomp_table_dma_addr; - desc->src2_addr = (u64)src2_addr; - desc->src2_size = 1088; - dev_dbg(dev, "%s: mode->name %s, src2_addr %llx, dev %p, src2_size %d\n", - __func__, mode->name, desc->src2_addr, dev, desc->src2_size); - desc->max_dst_size = 0; // suppressed output - - desc->decompr_flags = mode->gen_decomp_table_flags; - - desc->priv = 0; - - desc->completion_addr = idxd_desc->compl_dma; - - ret = idxd_submit_desc(wq, idxd_desc); - if (ret) { - pr_err("%s: submit_desc failed ret=0x%x\n", __func__, ret); - goto out; - } - - ret = check_completion(dev, idxd_desc->iax_completion, false, false); - if (ret) - dev_dbg(dev, "%s: mode->name %s check_completion failed ret=%d\n", - __func__, mode->name, ret); - else - dev_dbg(dev, "%s: mode->name %s succeeded\n", __func__, - mode->name); -out: - dma_unmap_single(dev, src_addr, 1088, DMA_TO_DEVICE); - - return ret; -} - static int init_device_compression_mode(struct iaa_device *iaa_device, struct iaa_compression_mode *mode, int idx, struct idxd_wq *wq) @@ -529,24 +440,11 @@ static int init_device_compression_mode(struct iaa_device *iaa_device, if (!device_mode->aecs_comp_table) goto free; - device_mode->aecs_decomp_table = dma_alloc_coherent(dev, size, - &device_mode->aecs_decomp_table_dma_addr, GFP_KERNEL); - if (!device_mode->aecs_decomp_table) - goto free; - /* Add Huffman table to aecs */ memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table)); memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size); memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size); - if (mode->header_table) { - ret = decompress_header(device_mode, mode, wq); - if (ret) { - pr_debug("iaa header decompression failed: ret=%d\n", ret); - goto free; - } - } - if (mode->init) { ret = mode->init(device_mode); if (ret) @@ -1324,7 +1222,7 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req, *compression_crc = idxd_desc->iax_completion->crc; - if (!ctx->async_mode) + if (!ctx->async_mode || disable_async) idxd_free_desc(wq, idxd_desc); out: return ret; @@ -1570,7 +1468,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req, *dlen = req->dlen; - if (!ctx->async_mode) + if (!ctx->async_mode || disable_async) idxd_free_desc(wq, idxd_desc); /* Update stats */ @@ -1596,6 +1494,7 @@ static int iaa_comp_acompress(struct acomp_req *req) u32 compression_crc; struct idxd_wq *wq; struct device *dev; + u64 start_time_ns; int order = -1; compression_ctx = crypto_tfm_ctx(tfm); @@ -1669,8 +1568,10 @@ static int iaa_comp_acompress(struct acomp_req *req) " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, req->dst, req->dlen, sg_dma_len(req->dst)); + start_time_ns = iaa_get_ts(); ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr, &req->dlen, &compression_crc, disable_async); + update_max_comp_delay_ns(start_time_ns); if (ret == -EINPROGRESS) return ret; @@ -1717,6 +1618,7 @@ static int iaa_comp_adecompress_alloc_dest(struct acomp_req *req) struct iaa_wq *iaa_wq; struct device *dev; struct idxd_wq *wq; + u64 start_time_ns; int order = -1; cpu = get_cpu(); @@ -1773,8 +1675,10 @@ alloc_dest: dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p," " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, req->dst, req->dlen, sg_dma_len(req->dst)); + start_time_ns = iaa_get_ts(); ret = iaa_decompress(tfm, req, wq, src_addr, req->slen, dst_addr, &req->dlen, true); + update_max_decomp_delay_ns(start_time_ns); if (ret == -EOVERFLOW) { dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); req->dlen *= 2; @@ -1805,6 +1709,7 @@ static int iaa_comp_adecompress(struct acomp_req *req) int nr_sgs, cpu, ret = 0; struct iaa_wq *iaa_wq; struct device *dev; + u64 start_time_ns; struct idxd_wq *wq; if (!iaa_crypto_enabled) { @@ -1864,8 +1769,10 @@ static int iaa_comp_adecompress(struct acomp_req *req) " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, req->dst, req->dlen, sg_dma_len(req->dst)); + start_time_ns = iaa_get_ts(); ret = iaa_decompress(tfm, req, wq, src_addr, req->slen, dst_addr, &req->dlen, false); + update_max_decomp_delay_ns(start_time_ns); if (ret == -EINPROGRESS) return ret; @@ -1916,6 +1823,7 @@ static struct acomp_alg iaa_acomp_fixed_deflate = { .base = { .cra_name = "deflate", .cra_driver_name = "deflate-iaa", + .cra_flags = CRYPTO_ALG_ASYNC, .cra_ctxsize = sizeof(struct iaa_compression_ctx), .cra_module = THIS_MODULE, .cra_priority = IAA_ALG_PRIORITY, diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.c b/drivers/crypto/intel/iaa/iaa_crypto_stats.c index 2e3b7b73af20..c9f83af4b307 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_stats.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.c @@ -22,8 +22,6 @@ static u64 total_decomp_calls; static u64 total_sw_decomp_calls; static u64 max_comp_delay_ns; static u64 max_decomp_delay_ns; -static u64 max_acomp_delay_ns; -static u64 max_adecomp_delay_ns; static u64 total_comp_bytes_out; static u64 total_decomp_bytes_in; static u64 total_completion_einval_errors; @@ -92,26 +90,6 @@ void update_max_decomp_delay_ns(u64 start_time_ns) max_decomp_delay_ns = time_diff; } -void update_max_acomp_delay_ns(u64 start_time_ns) -{ - u64 time_diff; - - time_diff = ktime_get_ns() - start_time_ns; - - if (time_diff > max_acomp_delay_ns) - max_acomp_delay_ns = time_diff; -} - -void update_max_adecomp_delay_ns(u64 start_time_ns) -{ - u64 time_diff; - - time_diff = ktime_get_ns() - start_time_ns; - - if (time_diff > max_adecomp_delay_ns) - max_adecomp_delay_ns = time_diff; -} - void update_wq_comp_calls(struct idxd_wq *idxd_wq) { struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); @@ -151,8 +129,6 @@ static void reset_iaa_crypto_stats(void) total_sw_decomp_calls = 0; max_comp_delay_ns = 0; max_decomp_delay_ns = 0; - max_acomp_delay_ns = 0; - max_adecomp_delay_ns = 0; total_comp_bytes_out = 0; total_decomp_bytes_in = 0; total_completion_einval_errors = 0; @@ -275,17 +251,11 @@ int __init iaa_crypto_debugfs_init(void) return -ENODEV; iaa_crypto_debugfs_root = debugfs_create_dir("iaa_crypto", NULL); - if (!iaa_crypto_debugfs_root) - return -ENOMEM; debugfs_create_u64("max_comp_delay_ns", 0644, iaa_crypto_debugfs_root, &max_comp_delay_ns); debugfs_create_u64("max_decomp_delay_ns", 0644, iaa_crypto_debugfs_root, &max_decomp_delay_ns); - debugfs_create_u64("max_acomp_delay_ns", 0644, - iaa_crypto_debugfs_root, &max_comp_delay_ns); - debugfs_create_u64("max_adecomp_delay_ns", 0644, - iaa_crypto_debugfs_root, &max_decomp_delay_ns); debugfs_create_u64("total_comp_calls", 0644, iaa_crypto_debugfs_root, &total_comp_calls); debugfs_create_u64("total_decomp_calls", 0644, diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.h b/drivers/crypto/intel/iaa/iaa_crypto_stats.h index c10b87b86fa4..c916ca83f070 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_stats.h +++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.h @@ -15,8 +15,6 @@ void update_total_sw_decomp_calls(void); void update_total_decomp_bytes_in(int n); void update_max_comp_delay_ns(u64 start_time_ns); void update_max_decomp_delay_ns(u64 start_time_ns); -void update_max_acomp_delay_ns(u64 start_time_ns); -void update_max_adecomp_delay_ns(u64 start_time_ns); void update_completion_einval_errs(void); void update_completion_timeout_errs(void); void update_completion_comp_buf_overflow_errs(void); @@ -26,6 +24,8 @@ void update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n); void update_wq_decomp_calls(struct idxd_wq *idxd_wq); void update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n); +static inline u64 iaa_get_ts(void) { return ktime_get_ns(); } + #else static inline int iaa_crypto_debugfs_init(void) { return 0; } static inline void iaa_crypto_debugfs_cleanup(void) {} @@ -37,8 +37,6 @@ static inline void update_total_sw_decomp_calls(void) {} static inline void update_total_decomp_bytes_in(int n) {} static inline void update_max_comp_delay_ns(u64 start_time_ns) {} static inline void update_max_decomp_delay_ns(u64 start_time_ns) {} -static inline void update_max_acomp_delay_ns(u64 start_time_ns) {} -static inline void update_max_adecomp_delay_ns(u64 start_time_ns) {} static inline void update_completion_einval_errs(void) {} static inline void update_completion_timeout_errs(void) {} static inline void update_completion_comp_buf_overflow_errs(void) {} @@ -48,6 +46,8 @@ static inline void update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n) {} static inline void update_wq_decomp_calls(struct idxd_wq *idxd_wq) {} static inline void update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n) {} +static inline u64 iaa_get_ts(void) { return 0; } + #endif // CONFIG_CRYPTO_DEV_IAA_CRYPTO_STATS #endif diff --git a/drivers/crypto/intel/qat/Kconfig b/drivers/crypto/intel/qat/Kconfig index c120f6715a09..02fb8abe4e6e 100644 --- a/drivers/crypto/intel/qat/Kconfig +++ b/drivers/crypto/intel/qat/Kconfig @@ -106,3 +106,17 @@ config CRYPTO_DEV_QAT_C62XVF To compile this as a module, choose M here: the module will be called qat_c62xvf. + +config CRYPTO_DEV_QAT_ERROR_INJECTION + bool "Support for Intel(R) QAT Devices Heartbeat Error Injection" + depends on CRYPTO_DEV_QAT + depends on DEBUG_FS + help + Enables a mechanism that allows to inject a heartbeat error on + Intel(R) QuickAssist devices for testing purposes. + + This is intended for developer use only. + If unsure, say N. + + This functionality is available via debugfs entry of the Intel(R) + QuickAssist device diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c index a87d29ae724f..1102c47f8293 100644 --- a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c @@ -361,53 +361,6 @@ static u32 get_ena_thd_mask(struct adf_accel_dev *accel_dev, u32 obj_num) } } -static u16 get_ring_to_svc_map(struct adf_accel_dev *accel_dev) -{ - enum adf_cfg_service_type rps[RP_GROUP_COUNT] = { }; - const struct adf_fw_config *fw_config; - u16 ring_to_svc_map; - int i, j; - - fw_config = get_fw_config(accel_dev); - if (!fw_config) - return 0; - - for (i = 0; i < RP_GROUP_COUNT; i++) { - switch (fw_config[i].ae_mask) { - case ADF_AE_GROUP_0: - j = RP_GROUP_0; - break; - case ADF_AE_GROUP_1: - j = RP_GROUP_1; - break; - default: - return 0; - } - - switch (fw_config[i].obj) { - case ADF_FW_SYM_OBJ: - rps[j] = SYM; - break; - case ADF_FW_ASYM_OBJ: - rps[j] = ASYM; - break; - case ADF_FW_DC_OBJ: - rps[j] = COMP; - break; - default: - rps[j] = 0; - break; - } - } - - ring_to_svc_map = rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_0_SHIFT | - rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_1_SHIFT | - rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_2_SHIFT | - rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_3_SHIFT; - - return ring_to_svc_map; -} - static const char *uof_get_name(struct adf_accel_dev *accel_dev, u32 obj_num, const char * const fw_objs[], int num_objs) { @@ -433,6 +386,20 @@ static const char *uof_get_name_420xx(struct adf_accel_dev *accel_dev, u32 obj_n return uof_get_name(accel_dev, obj_num, adf_420xx_fw_objs, num_fw_objs); } +static int uof_get_obj_type(struct adf_accel_dev *accel_dev, u32 obj_num) +{ + const struct adf_fw_config *fw_config; + + if (obj_num >= uof_get_num_objs(accel_dev)) + return -EINVAL; + + fw_config = get_fw_config(accel_dev); + if (!fw_config) + return -EINVAL; + + return fw_config[obj_num].obj; +} + static u32 uof_get_ae_mask(struct adf_accel_dev *accel_dev, u32 obj_num) { const struct adf_fw_config *fw_config; @@ -496,12 +463,13 @@ void adf_init_hw_data_420xx(struct adf_hw_device_data *hw_data, u32 dev_id) hw_data->fw_mmp_name = ADF_420XX_MMP; hw_data->uof_get_name = uof_get_name_420xx; hw_data->uof_get_num_objs = uof_get_num_objs; + hw_data->uof_get_obj_type = uof_get_obj_type; hw_data->uof_get_ae_mask = uof_get_ae_mask; hw_data->get_rp_group = get_rp_group; hw_data->get_ena_thd_mask = get_ena_thd_mask; hw_data->set_msix_rttable = adf_gen4_set_msix_default_rttable; hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer; - hw_data->get_ring_to_svc_map = get_ring_to_svc_map; + hw_data->get_ring_to_svc_map = adf_gen4_get_ring_to_svc_map; hw_data->disable_iov = adf_disable_sriov; hw_data->ring_pair_reset = adf_gen4_ring_pair_reset; hw_data->enable_pm = adf_gen4_enable_pm; diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c index 94a0ebb03d8c..927506cf271d 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -320,53 +320,6 @@ static u32 get_ena_thd_mask_401xx(struct adf_accel_dev *accel_dev, u32 obj_num) } } -static u16 get_ring_to_svc_map(struct adf_accel_dev *accel_dev) -{ - enum adf_cfg_service_type rps[RP_GROUP_COUNT]; - const struct adf_fw_config *fw_config; - u16 ring_to_svc_map; - int i, j; - - fw_config = get_fw_config(accel_dev); - if (!fw_config) - return 0; - - for (i = 0; i < RP_GROUP_COUNT; i++) { - switch (fw_config[i].ae_mask) { - case ADF_AE_GROUP_0: - j = RP_GROUP_0; - break; - case ADF_AE_GROUP_1: - j = RP_GROUP_1; - break; - default: - return 0; - } - - switch (fw_config[i].obj) { - case ADF_FW_SYM_OBJ: - rps[j] = SYM; - break; - case ADF_FW_ASYM_OBJ: - rps[j] = ASYM; - break; - case ADF_FW_DC_OBJ: - rps[j] = COMP; - break; - default: - rps[j] = 0; - break; - } - } - - ring_to_svc_map = rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_0_SHIFT | - rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_1_SHIFT | - rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_2_SHIFT | - rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_3_SHIFT; - - return ring_to_svc_map; -} - static const char *uof_get_name(struct adf_accel_dev *accel_dev, u32 obj_num, const char * const fw_objs[], int num_objs) { @@ -399,6 +352,20 @@ static const char *uof_get_name_402xx(struct adf_accel_dev *accel_dev, u32 obj_n return uof_get_name(accel_dev, obj_num, adf_402xx_fw_objs, num_fw_objs); } +static int uof_get_obj_type(struct adf_accel_dev *accel_dev, u32 obj_num) +{ + const struct adf_fw_config *fw_config; + + if (obj_num >= uof_get_num_objs(accel_dev)) + return -EINVAL; + + fw_config = get_fw_config(accel_dev); + if (!fw_config) + return -EINVAL; + + return fw_config[obj_num].obj; +} + static u32 uof_get_ae_mask(struct adf_accel_dev *accel_dev, u32 obj_num) { const struct adf_fw_config *fw_config; @@ -479,11 +446,12 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id) break; } hw_data->uof_get_num_objs = uof_get_num_objs; + hw_data->uof_get_obj_type = uof_get_obj_type; hw_data->uof_get_ae_mask = uof_get_ae_mask; hw_data->get_rp_group = get_rp_group; hw_data->set_msix_rttable = adf_gen4_set_msix_default_rttable; hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer; - hw_data->get_ring_to_svc_map = get_ring_to_svc_map; + hw_data->get_ring_to_svc_map = adf_gen4_get_ring_to_svc_map; hw_data->disable_iov = adf_disable_sriov; hw_data->ring_pair_reset = adf_gen4_ring_pair_reset; hw_data->enable_pm = adf_gen4_enable_pm; diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile index 6908727bff3b..5915cde8a7aa 100644 --- a/drivers/crypto/intel/qat/qat_common/Makefile +++ b/drivers/crypto/intel/qat/qat_common/Makefile @@ -53,3 +53,5 @@ intel_qat-$(CONFIG_PCI_IOV) += adf_sriov.o adf_vf_isr.o adf_pfvf_utils.o \ adf_pfvf_pf_msg.o adf_pfvf_pf_proto.o \ adf_pfvf_vf_msg.o adf_pfvf_vf_proto.o \ adf_gen2_pfvf.o adf_gen4_pfvf.o + +intel_qat-$(CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION) += adf_heartbeat_inject.o diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h index a16c7e6edc65..08658c3a01e9 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h @@ -248,6 +248,7 @@ struct adf_hw_device_data { void (*set_msix_rttable)(struct adf_accel_dev *accel_dev); const char *(*uof_get_name)(struct adf_accel_dev *accel_dev, u32 obj_num); u32 (*uof_get_num_objs)(struct adf_accel_dev *accel_dev); + int (*uof_get_obj_type)(struct adf_accel_dev *accel_dev, u32 obj_num); u32 (*uof_get_ae_mask)(struct adf_accel_dev *accel_dev, u32 obj_num); int (*get_rp_group)(struct adf_accel_dev *accel_dev, u32 ae_mask); u32 (*get_ena_thd_mask)(struct adf_accel_dev *accel_dev, u32 obj_num); @@ -332,6 +333,7 @@ struct adf_accel_vf_info { struct ratelimit_state vf2pf_ratelimit; u32 vf_nr; bool init; + bool restarting; u8 vf_compat_ver; }; @@ -401,6 +403,7 @@ struct adf_accel_dev { struct adf_error_counters ras_errors; struct mutex state_lock; /* protect state of the device */ bool is_vf; + bool autoreset_on_error; u32 accel_id; }; #endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_aer.c b/drivers/crypto/intel/qat/qat_common/adf_aer.c index a39e70bd4b21..9da2278bd5b7 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_aer.c +++ b/drivers/crypto/intel/qat/qat_common/adf_aer.c @@ -7,8 +7,15 @@ #include <linux/delay.h> #include "adf_accel_devices.h" #include "adf_common_drv.h" +#include "adf_pfvf_pf_msg.h" + +struct adf_fatal_error_data { + struct adf_accel_dev *accel_dev; + struct work_struct work; +}; static struct workqueue_struct *device_reset_wq; +static struct workqueue_struct *device_sriov_wq; static pci_ers_result_t adf_error_detected(struct pci_dev *pdev, pci_channel_state_t state) @@ -26,6 +33,19 @@ static pci_ers_result_t adf_error_detected(struct pci_dev *pdev, return PCI_ERS_RESULT_DISCONNECT; } + set_bit(ADF_STATUS_RESTARTING, &accel_dev->status); + if (accel_dev->hw_device->exit_arb) { + dev_dbg(&pdev->dev, "Disabling arbitration\n"); + accel_dev->hw_device->exit_arb(accel_dev); + } + adf_error_notifier(accel_dev); + adf_pf2vf_notify_fatal_error(accel_dev); + adf_dev_restarting_notify(accel_dev); + adf_pf2vf_notify_restarting(accel_dev); + adf_pf2vf_wait_for_restarting_complete(accel_dev); + pci_clear_master(pdev); + adf_dev_down(accel_dev, false); + return PCI_ERS_RESULT_NEED_RESET; } @@ -37,6 +57,13 @@ struct adf_reset_dev_data { struct work_struct reset_work; }; +/* sriov dev data */ +struct adf_sriov_dev_data { + struct adf_accel_dev *accel_dev; + struct completion compl; + struct work_struct sriov_work; +}; + void adf_reset_sbr(struct adf_accel_dev *accel_dev) { struct pci_dev *pdev = accel_to_pci_dev(accel_dev); @@ -82,29 +109,57 @@ void adf_dev_restore(struct adf_accel_dev *accel_dev) } } +static void adf_device_sriov_worker(struct work_struct *work) +{ + struct adf_sriov_dev_data *sriov_data = + container_of(work, struct adf_sriov_dev_data, sriov_work); + + adf_reenable_sriov(sriov_data->accel_dev); + complete(&sriov_data->compl); +} + static void adf_device_reset_worker(struct work_struct *work) { struct adf_reset_dev_data *reset_data = container_of(work, struct adf_reset_dev_data, reset_work); struct adf_accel_dev *accel_dev = reset_data->accel_dev; + unsigned long wait_jiffies = msecs_to_jiffies(10000); + struct adf_sriov_dev_data sriov_data; adf_dev_restarting_notify(accel_dev); if (adf_dev_restart(accel_dev)) { /* The device hanged and we can't restart it so stop here */ dev_err(&GET_DEV(accel_dev), "Restart device failed\n"); - if (reset_data->mode == ADF_DEV_RESET_ASYNC) + if (reset_data->mode == ADF_DEV_RESET_ASYNC || + completion_done(&reset_data->compl)) kfree(reset_data); WARN(1, "QAT: device restart failed. Device is unusable\n"); return; } + + sriov_data.accel_dev = accel_dev; + init_completion(&sriov_data.compl); + INIT_WORK(&sriov_data.sriov_work, adf_device_sriov_worker); + queue_work(device_sriov_wq, &sriov_data.sriov_work); + if (wait_for_completion_timeout(&sriov_data.compl, wait_jiffies)) + adf_pf2vf_notify_restarted(accel_dev); + adf_dev_restarted_notify(accel_dev); clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status); - /* The dev is back alive. Notify the caller if in sync mode */ - if (reset_data->mode == ADF_DEV_RESET_SYNC) - complete(&reset_data->compl); - else + /* + * The dev is back alive. Notify the caller if in sync mode + * + * If device restart will take a more time than expected, + * the schedule_reset() function can timeout and exit. This can be + * detected by calling the completion_done() function. In this case + * the reset_data structure needs to be freed here. + */ + if (reset_data->mode == ADF_DEV_RESET_ASYNC || + completion_done(&reset_data->compl)) kfree(reset_data); + else + complete(&reset_data->compl); } static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev, @@ -137,8 +192,9 @@ static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev, dev_err(&GET_DEV(accel_dev), "Reset device timeout expired\n"); ret = -EFAULT; + } else { + kfree(reset_data); } - kfree(reset_data); return ret; } return 0; @@ -147,14 +203,25 @@ static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev, static pci_ers_result_t adf_slot_reset(struct pci_dev *pdev) { struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev); + int res = 0; if (!accel_dev) { pr_err("QAT: Can't find acceleration device\n"); return PCI_ERS_RESULT_DISCONNECT; } - if (adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_SYNC)) + + if (!pdev->is_busmaster) + pci_set_master(pdev); + pci_restore_state(pdev); + pci_save_state(pdev); + res = adf_dev_up(accel_dev, false); + if (res && res != -EALREADY) return PCI_ERS_RESULT_DISCONNECT; + adf_reenable_sriov(accel_dev); + adf_pf2vf_notify_restarted(accel_dev); + adf_dev_restarted_notify(accel_dev); + clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status); return PCI_ERS_RESULT_RECOVERED; } @@ -171,11 +238,62 @@ const struct pci_error_handlers adf_err_handler = { }; EXPORT_SYMBOL_GPL(adf_err_handler); +int adf_dev_autoreset(struct adf_accel_dev *accel_dev) +{ + if (accel_dev->autoreset_on_error) + return adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_ASYNC); + + return 0; +} + +static void adf_notify_fatal_error_worker(struct work_struct *work) +{ + struct adf_fatal_error_data *wq_data = + container_of(work, struct adf_fatal_error_data, work); + struct adf_accel_dev *accel_dev = wq_data->accel_dev; + struct adf_hw_device_data *hw_device = accel_dev->hw_device; + + adf_error_notifier(accel_dev); + + if (!accel_dev->is_vf) { + /* Disable arbitration to stop processing of new requests */ + if (accel_dev->autoreset_on_error && hw_device->exit_arb) + hw_device->exit_arb(accel_dev); + if (accel_dev->pf.vf_info) + adf_pf2vf_notify_fatal_error(accel_dev); + adf_dev_autoreset(accel_dev); + } + + kfree(wq_data); +} + +int adf_notify_fatal_error(struct adf_accel_dev *accel_dev) +{ + struct adf_fatal_error_data *wq_data; + + wq_data = kzalloc(sizeof(*wq_data), GFP_ATOMIC); + if (!wq_data) + return -ENOMEM; + + wq_data->accel_dev = accel_dev; + INIT_WORK(&wq_data->work, adf_notify_fatal_error_worker); + adf_misc_wq_queue_work(&wq_data->work); + + return 0; +} + int adf_init_aer(void) { device_reset_wq = alloc_workqueue("qat_device_reset_wq", WQ_MEM_RECLAIM, 0); - return !device_reset_wq ? -EFAULT : 0; + if (!device_reset_wq) + return -EFAULT; + + device_sriov_wq = alloc_workqueue("qat_device_sriov_wq", 0, 0); + if (!device_sriov_wq) + return -EFAULT; + + return 0; } void adf_exit_aer(void) @@ -183,4 +301,8 @@ void adf_exit_aer(void) if (device_reset_wq) destroy_workqueue(device_reset_wq); device_reset_wq = NULL; + + if (device_sriov_wq) + destroy_workqueue(device_sriov_wq); + device_sriov_wq = NULL; } diff --git a/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h b/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h index 322b76903a73..e015ad6cace2 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h +++ b/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h @@ -49,5 +49,6 @@ ADF_ETRMGR_BANK "%d" ADF_ETRMGR_CORE_AFFINITY #define ADF_ACCEL_STR "Accelerator%d" #define ADF_HEARTBEAT_TIMER "HeartbeatTimer" +#define ADF_SRIOV_ENABLED "SriovEnabled" #endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_clock.c b/drivers/crypto/intel/qat/qat_common/adf_clock.c index 01e0a389e462..cf89f57de2a7 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_clock.c +++ b/drivers/crypto/intel/qat/qat_common/adf_clock.c @@ -83,6 +83,9 @@ static int measure_clock(struct adf_accel_dev *accel_dev, u32 *frequency) } delta_us = timespec_to_us(&ts3) - timespec_to_us(&ts1); + if (!delta_us) + return -EINVAL; + temp = (timestamp2 - timestamp1) * ME_CLK_DIVIDER * 10; temp = DIV_ROUND_CLOSEST_ULL(temp, delta_us); /* diff --git a/drivers/crypto/intel/qat/qat_common/adf_cnv_dbgfs.c b/drivers/crypto/intel/qat/qat_common/adf_cnv_dbgfs.c index 07119c487da0..627953a72d47 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_cnv_dbgfs.c +++ b/drivers/crypto/intel/qat/qat_common/adf_cnv_dbgfs.c @@ -16,7 +16,6 @@ #define CNV_ERR_INFO_MASK GENMASK(11, 0) #define CNV_ERR_TYPE_MASK GENMASK(15, 12) -#define CNV_SLICE_ERR_MASK GENMASK(7, 0) #define CNV_SLICE_ERR_SIGN_BIT_INDEX 7 #define CNV_DELTA_ERR_SIGN_BIT_INDEX 11 diff --git a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h index f06188033a93..57328249c89e 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h @@ -40,6 +40,7 @@ enum adf_event { ADF_EVENT_SHUTDOWN, ADF_EVENT_RESTARTING, ADF_EVENT_RESTARTED, + ADF_EVENT_FATAL_ERROR, }; struct service_hndl { @@ -60,6 +61,8 @@ int adf_dev_restart(struct adf_accel_dev *accel_dev); void adf_devmgr_update_class_index(struct adf_hw_device_data *hw_data); void adf_clean_vf_map(bool); +int adf_notify_fatal_error(struct adf_accel_dev *accel_dev); +void adf_error_notifier(struct adf_accel_dev *accel_dev); int adf_devmgr_add_dev(struct adf_accel_dev *accel_dev, struct adf_accel_dev *pf); void adf_devmgr_rm_dev(struct adf_accel_dev *accel_dev, @@ -84,12 +87,14 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev); extern const struct pci_error_handlers adf_err_handler; void adf_reset_sbr(struct adf_accel_dev *accel_dev); void adf_reset_flr(struct adf_accel_dev *accel_dev); +int adf_dev_autoreset(struct adf_accel_dev *accel_dev); void adf_dev_restore(struct adf_accel_dev *accel_dev); int adf_init_aer(void); void adf_exit_aer(void); int adf_init_arb(struct adf_accel_dev *accel_dev); void adf_exit_arb(struct adf_accel_dev *accel_dev); void adf_update_ring_arb(struct adf_etr_ring_data *ring); +int adf_disable_arb_thd(struct adf_accel_dev *accel_dev, u32 ae, u32 thr); int adf_dev_get(struct adf_accel_dev *accel_dev); void adf_dev_put(struct adf_accel_dev *accel_dev); @@ -188,6 +193,7 @@ bool adf_misc_wq_queue_delayed_work(struct delayed_work *work, #if defined(CONFIG_PCI_IOV) int adf_sriov_configure(struct pci_dev *pdev, int numvfs); void adf_disable_sriov(struct adf_accel_dev *accel_dev); +void adf_reenable_sriov(struct adf_accel_dev *accel_dev); void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask); void adf_disable_all_vf2pf_interrupts(struct adf_accel_dev *accel_dev); bool adf_recv_and_handle_pf2vf_msg(struct adf_accel_dev *accel_dev); @@ -208,6 +214,10 @@ static inline void adf_disable_sriov(struct adf_accel_dev *accel_dev) { } +static inline void adf_reenable_sriov(struct adf_accel_dev *accel_dev) +{ +} + static inline int adf_init_pf_wq(void) { return 0; diff --git a/drivers/crypto/intel/qat/qat_common/adf_dev_mgr.c b/drivers/crypto/intel/qat/qat_common/adf_dev_mgr.c index 86ee36feefad..f07b748795f7 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_dev_mgr.c +++ b/drivers/crypto/intel/qat/qat_common/adf_dev_mgr.c @@ -60,10 +60,10 @@ static int adf_get_vf_real_id(u32 fake) /** * adf_clean_vf_map() - Cleans VF id mapings - * - * Function cleans internal ids for virtual functions. * @vf: flag indicating whether mappings is cleaned * for vfs only or for vfs and pfs + * + * Function cleans internal ids for virtual functions. */ void adf_clean_vf_map(bool vf) { diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c index 9985683056d5..d28e1921940a 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c @@ -4,6 +4,7 @@ #include "adf_accel_devices.h" #include "adf_cfg_services.h" #include "adf_common_drv.h" +#include "adf_fw_config.h" #include "adf_gen4_hw_data.h" #include "adf_gen4_pm.h" @@ -398,6 +399,9 @@ int adf_gen4_init_thd2arb_map(struct adf_accel_dev *accel_dev) ADF_GEN4_ADMIN_ACCELENGINES; if (srv_id == SVC_DCC) { + if (ae_cnt > ICP_QAT_HW_AE_DELIMITER) + return -EINVAL; + memcpy(thd2arb_map, thrd_to_arb_map_dcc, array_size(sizeof(*thd2arb_map), ae_cnt)); return 0; @@ -430,3 +434,58 @@ int adf_gen4_init_thd2arb_map(struct adf_accel_dev *accel_dev) return 0; } EXPORT_SYMBOL_GPL(adf_gen4_init_thd2arb_map); + +u16 adf_gen4_get_ring_to_svc_map(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); + enum adf_cfg_service_type rps[RP_GROUP_COUNT] = { }; + unsigned int ae_mask, start_id, worker_obj_cnt, i; + u16 ring_to_svc_map; + int rp_group; + + if (!hw_data->get_rp_group || !hw_data->uof_get_ae_mask || + !hw_data->uof_get_obj_type || !hw_data->uof_get_num_objs) + return 0; + + /* If dcc, all rings handle compression requests */ + if (adf_get_service_enabled(accel_dev) == SVC_DCC) { + for (i = 0; i < RP_GROUP_COUNT; i++) + rps[i] = COMP; + goto set_mask; + } + + worker_obj_cnt = hw_data->uof_get_num_objs(accel_dev) - + ADF_GEN4_ADMIN_ACCELENGINES; + start_id = worker_obj_cnt - RP_GROUP_COUNT; + + for (i = start_id; i < worker_obj_cnt; i++) { + ae_mask = hw_data->uof_get_ae_mask(accel_dev, i); + rp_group = hw_data->get_rp_group(accel_dev, ae_mask); + if (rp_group >= RP_GROUP_COUNT || rp_group < RP_GROUP_0) + return 0; + + switch (hw_data->uof_get_obj_type(accel_dev, i)) { + case ADF_FW_SYM_OBJ: + rps[rp_group] = SYM; + break; + case ADF_FW_ASYM_OBJ: + rps[rp_group] = ASYM; + break; + case ADF_FW_DC_OBJ: + rps[rp_group] = COMP; + break; + default: + rps[rp_group] = 0; + break; + } + } + +set_mask: + ring_to_svc_map = rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_0_SHIFT | + rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_1_SHIFT | + rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_2_SHIFT | + rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_3_SHIFT; + + return ring_to_svc_map; +} +EXPORT_SYMBOL_GPL(adf_gen4_get_ring_to_svc_map); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h index 7d8a774cadc8..c6e80df5a85a 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h @@ -235,5 +235,6 @@ int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number); void adf_gen4_set_msix_default_rttable(struct adf_accel_dev *accel_dev); void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev); int adf_gen4_init_thd2arb_map(struct adf_accel_dev *accel_dev); +u16 adf_gen4_get_ring_to_svc_map(struct adf_accel_dev *accel_dev); #endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_ras.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_ras.c index 048c24607939..2dd3772bf58a 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_ras.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_ras.c @@ -1007,8 +1007,7 @@ static bool adf_handle_spppar_err(struct adf_accel_dev *accel_dev, static bool adf_handle_ssmcpppar_err(struct adf_accel_dev *accel_dev, void __iomem *csr, u32 iastatssm) { - u32 reg = ADF_CSR_RD(csr, ADF_GEN4_SSMCPPERR); - u32 bits_num = BITS_PER_REG(reg); + u32 reg, bits_num = BITS_PER_REG(reg); bool reset_required = false; unsigned long errs_bits; u32 bit_iterator; @@ -1106,8 +1105,7 @@ static bool adf_handle_rf_parr_err(struct adf_accel_dev *accel_dev, static bool adf_handle_ser_err_ssmsh(struct adf_accel_dev *accel_dev, void __iomem *csr, u32 iastatssm) { - u32 reg = ADF_CSR_RD(csr, ADF_GEN4_SER_ERR_SSMSH); - u32 bits_num = BITS_PER_REG(reg); + u32 reg, bits_num = BITS_PER_REG(reg); bool reset_required = false; unsigned long errs_bits; u32 bit_iterator; diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c index 13f48d2f6da8..b19aa1ef8eee 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c +++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c @@ -23,12 +23,6 @@ #define ADF_HB_EMPTY_SIG 0xA5A5A5A5 -/* Heartbeat counter pair */ -struct hb_cnt_pair { - __u16 resp_heartbeat_cnt; - __u16 req_heartbeat_cnt; -}; - static int adf_hb_check_polling_freq(struct adf_accel_dev *accel_dev) { u64 curr_time = adf_clock_get_current_time(); @@ -211,6 +205,19 @@ static int adf_hb_get_status(struct adf_accel_dev *accel_dev) return ret; } +static void adf_heartbeat_reset(struct adf_accel_dev *accel_dev) +{ + u64 curr_time = adf_clock_get_current_time(); + u64 time_since_reset = curr_time - accel_dev->heartbeat->last_hb_reset_time; + + if (time_since_reset < ADF_CFG_HB_RESET_MS) + return; + + accel_dev->heartbeat->last_hb_reset_time = curr_time; + if (adf_notify_fatal_error(accel_dev)) + dev_err(&GET_DEV(accel_dev), "Failed to notify fatal error\n"); +} + void adf_heartbeat_status(struct adf_accel_dev *accel_dev, enum adf_device_heartbeat_status *hb_status) { @@ -235,6 +242,7 @@ void adf_heartbeat_status(struct adf_accel_dev *accel_dev, "Heartbeat ERROR: QAT is not responding.\n"); *hb_status = HB_DEV_UNRESPONSIVE; hb->hb_failed_counter++; + adf_heartbeat_reset(accel_dev); return; } diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h index b22e3cb29798..16fdfb48b196 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h +++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h @@ -13,17 +13,26 @@ struct dentry; #define ADF_CFG_HB_TIMER_DEFAULT_MS 500 #define ADF_CFG_HB_COUNT_THRESHOLD 3 +#define ADF_CFG_HB_RESET_MS 5000 + enum adf_device_heartbeat_status { HB_DEV_UNRESPONSIVE = 0, HB_DEV_ALIVE, HB_DEV_UNSUPPORTED, }; +/* Heartbeat counter pair */ +struct hb_cnt_pair { + __u16 resp_heartbeat_cnt; + __u16 req_heartbeat_cnt; +}; + struct adf_heartbeat { unsigned int hb_sent_counter; unsigned int hb_failed_counter; unsigned int hb_timer; u64 last_hb_check_time; + u64 last_hb_reset_time; bool ctrs_cnt_checked; struct hb_dma_addr { dma_addr_t phy_addr; @@ -35,6 +44,9 @@ struct adf_heartbeat { struct dentry *cfg; struct dentry *sent; struct dentry *failed; +#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION + struct dentry *inject_error; +#endif } dbgfs; }; @@ -51,6 +63,15 @@ void adf_heartbeat_status(struct adf_accel_dev *accel_dev, enum adf_device_heartbeat_status *hb_status); void adf_heartbeat_check_ctrs(struct adf_accel_dev *accel_dev); +#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION +int adf_heartbeat_inject_error(struct adf_accel_dev *accel_dev); +#else +static inline int adf_heartbeat_inject_error(struct adf_accel_dev *accel_dev) +{ + return -EPERM; +} +#endif + #else static inline int adf_heartbeat_init(struct adf_accel_dev *accel_dev) { diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c index 2661af6a2ef6..cccdff24b48d 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c +++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c @@ -155,6 +155,44 @@ static const struct file_operations adf_hb_cfg_fops = { .write = adf_hb_cfg_write, }; +static ssize_t adf_hb_error_inject_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct adf_accel_dev *accel_dev = file->private_data; + char buf[3]; + int ret; + + /* last byte left as string termination */ + if (*ppos != 0 || count != 2) + return -EINVAL; + + if (copy_from_user(buf, user_buf, count)) + return -EFAULT; + buf[count] = '\0'; + + if (buf[0] != '1') + return -EINVAL; + + ret = adf_heartbeat_inject_error(accel_dev); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Heartbeat error injection failed with status %d\n", + ret); + return ret; + } + + dev_info(&GET_DEV(accel_dev), "Heartbeat error injection enabled\n"); + + return count; +} + +static const struct file_operations adf_hb_error_inject_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = adf_hb_error_inject_write, +}; + void adf_heartbeat_dbgfs_add(struct adf_accel_dev *accel_dev) { struct adf_heartbeat *hb = accel_dev->heartbeat; @@ -171,6 +209,17 @@ void adf_heartbeat_dbgfs_add(struct adf_accel_dev *accel_dev) &hb->hb_failed_counter, &adf_hb_stats_fops); hb->dbgfs.cfg = debugfs_create_file("config", 0600, hb->dbgfs.base_dir, accel_dev, &adf_hb_cfg_fops); + + if (IS_ENABLED(CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION)) { + struct dentry *inject_error __maybe_unused; + + inject_error = debugfs_create_file("inject_error", 0200, + hb->dbgfs.base_dir, accel_dev, + &adf_hb_error_inject_fops); +#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION + hb->dbgfs.inject_error = inject_error; +#endif + } } EXPORT_SYMBOL_GPL(adf_heartbeat_dbgfs_add); @@ -189,6 +238,10 @@ void adf_heartbeat_dbgfs_rm(struct adf_accel_dev *accel_dev) hb->dbgfs.failed = NULL; debugfs_remove(hb->dbgfs.cfg); hb->dbgfs.cfg = NULL; +#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION + debugfs_remove(hb->dbgfs.inject_error); + hb->dbgfs.inject_error = NULL; +#endif debugfs_remove(hb->dbgfs.base_dir); hb->dbgfs.base_dir = NULL; } diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat_inject.c b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_inject.c new file mode 100644 index 000000000000..a3b474bdef6c --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_inject.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Intel Corporation */ +#include <linux/random.h> + +#include "adf_admin.h" +#include "adf_common_drv.h" +#include "adf_heartbeat.h" + +#define MAX_HB_TICKS 0xFFFFFFFF + +static int adf_hb_set_timer_to_max(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + + accel_dev->heartbeat->hb_timer = 0; + + if (hw_data->stop_timer) + hw_data->stop_timer(accel_dev); + + return adf_send_admin_hb_timer(accel_dev, MAX_HB_TICKS); +} + +static void adf_set_hb_counters_fail(struct adf_accel_dev *accel_dev, u32 ae, + u32 thr) +{ + struct hb_cnt_pair *stats = accel_dev->heartbeat->dma.virt_addr; + struct adf_hw_device_data *hw_device = accel_dev->hw_device; + const size_t max_aes = hw_device->get_num_aes(hw_device); + const size_t hb_ctrs = hw_device->num_hb_ctrs; + size_t thr_id = ae * hb_ctrs + thr; + u16 num_rsp = stats[thr_id].resp_heartbeat_cnt; + + /* + * Inject live.req != live.rsp and live.rsp == last.rsp + * to trigger the heartbeat error detection + */ + stats[thr_id].req_heartbeat_cnt++; + stats += (max_aes * hb_ctrs); + stats[thr_id].resp_heartbeat_cnt = num_rsp; +} + +int adf_heartbeat_inject_error(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_device = accel_dev->hw_device; + const size_t max_aes = hw_device->get_num_aes(hw_device); + const size_t hb_ctrs = hw_device->num_hb_ctrs; + u32 rand, rand_ae, rand_thr; + unsigned long ae_mask; + int ret; + + ae_mask = hw_device->ae_mask; + + do { + /* Ensure we have a valid ae */ + get_random_bytes(&rand, sizeof(rand)); + rand_ae = rand % max_aes; + } while (!test_bit(rand_ae, &ae_mask)); + + get_random_bytes(&rand, sizeof(rand)); + rand_thr = rand % hb_ctrs; + + /* Increase the heartbeat timer to prevent FW updating HB counters */ + ret = adf_hb_set_timer_to_max(accel_dev); + if (ret) + return ret; + + /* Configure worker threads to stop processing any packet */ + ret = adf_disable_arb_thd(accel_dev, rand_ae, rand_thr); + if (ret) + return ret; + + /* Change HB counters memory to simulate a hang */ + adf_set_hb_counters_fail(accel_dev, rand_ae, rand_thr); + + return 0; +} diff --git a/drivers/crypto/intel/qat/qat_common/adf_hw_arbiter.c b/drivers/crypto/intel/qat/qat_common/adf_hw_arbiter.c index da6956699246..65bd26b25abc 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_hw_arbiter.c +++ b/drivers/crypto/intel/qat/qat_common/adf_hw_arbiter.c @@ -103,3 +103,28 @@ void adf_exit_arb(struct adf_accel_dev *accel_dev) csr_ops->write_csr_ring_srv_arb_en(csr, i, 0); } EXPORT_SYMBOL_GPL(adf_exit_arb); + +int adf_disable_arb_thd(struct adf_accel_dev *accel_dev, u32 ae, u32 thr) +{ + void __iomem *csr = accel_dev->transport->banks[0].csr_addr; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + const u32 *thd_2_arb_cfg; + struct arb_info info; + u32 ae_thr_map; + + if (ADF_AE_STRAND0_THREAD == thr || ADF_AE_STRAND1_THREAD == thr) + thr = ADF_AE_ADMIN_THREAD; + + hw_data->get_arb_info(&info); + thd_2_arb_cfg = hw_data->get_arb_mapping(accel_dev); + if (!thd_2_arb_cfg) + return -EFAULT; + + /* Disable scheduling for this particular AE and thread */ + ae_thr_map = *(thd_2_arb_cfg + ae); + ae_thr_map &= ~(GENMASK(3, 0) << (thr * BIT(2))); + + WRITE_CSR_ARB_WT2SAM(csr, info.arb_offset, info.wt2sam_offset, ae, + ae_thr_map); + return 0; +} diff --git a/drivers/crypto/intel/qat/qat_common/adf_init.c b/drivers/crypto/intel/qat/qat_common/adf_init.c index f43ae9111553..74f0818c0703 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_init.c +++ b/drivers/crypto/intel/qat/qat_common/adf_init.c @@ -433,6 +433,18 @@ int adf_dev_restarted_notify(struct adf_accel_dev *accel_dev) return 0; } +void adf_error_notifier(struct adf_accel_dev *accel_dev) +{ + struct service_hndl *service; + + list_for_each_entry(service, &service_table, list) { + if (service->event_hld(accel_dev, ADF_EVENT_FATAL_ERROR)) + dev_err(&GET_DEV(accel_dev), + "Failed to send error event to %s.\n", + service->name); + } +} + static int adf_dev_shutdown_cache_cfg(struct adf_accel_dev *accel_dev) { char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0}; diff --git a/drivers/crypto/intel/qat/qat_common/adf_isr.c b/drivers/crypto/intel/qat/qat_common/adf_isr.c index 3557a0d6dea2..cae1aee5479a 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_isr.c +++ b/drivers/crypto/intel/qat/qat_common/adf_isr.c @@ -139,8 +139,13 @@ static bool adf_handle_ras_int(struct adf_accel_dev *accel_dev) if (ras_ops->handle_interrupt && ras_ops->handle_interrupt(accel_dev, &reset_required)) { - if (reset_required) + if (reset_required) { dev_err(&GET_DEV(accel_dev), "Fatal error, reset required\n"); + if (adf_notify_fatal_error(accel_dev)) + dev_err(&GET_DEV(accel_dev), + "Failed to notify fatal error\n"); + } + return true; } @@ -272,7 +277,7 @@ static int adf_isr_alloc_msix_vectors_data(struct adf_accel_dev *accel_dev) if (!accel_dev->pf.vf_info) msix_num_entries += hw_data->num_banks; - irqs = kzalloc_node(msix_num_entries * sizeof(*irqs), + irqs = kcalloc_node(msix_num_entries, sizeof(*irqs), GFP_KERNEL, dev_to_node(&GET_DEV(accel_dev))); if (!irqs) return -ENOMEM; @@ -375,8 +380,6 @@ EXPORT_SYMBOL_GPL(adf_isr_resource_alloc); /** * adf_init_misc_wq() - Init misc workqueue * - * Function init workqueue 'qat_misc_wq' for general purpose. - * * Return: 0 on success, error code otherwise. */ int __init adf_init_misc_wq(void) diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h b/drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h index 204a42438992..d1b3ef9cadac 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h +++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h @@ -99,6 +99,8 @@ enum pf2vf_msgtype { ADF_PF2VF_MSGTYPE_RESTARTING = 0x01, ADF_PF2VF_MSGTYPE_VERSION_RESP = 0x02, ADF_PF2VF_MSGTYPE_BLKMSG_RESP = 0x03, + ADF_PF2VF_MSGTYPE_FATAL_ERROR = 0x04, + ADF_PF2VF_MSGTYPE_RESTARTED = 0x05, /* Values from 0x10 are Gen4 specific, message type is only 4 bits in Gen2 devices. */ ADF_PF2VF_MSGTYPE_RP_RESET_RESP = 0x10, }; @@ -112,6 +114,7 @@ enum vf2pf_msgtype { ADF_VF2PF_MSGTYPE_LARGE_BLOCK_REQ = 0x07, ADF_VF2PF_MSGTYPE_MEDIUM_BLOCK_REQ = 0x08, ADF_VF2PF_MSGTYPE_SMALL_BLOCK_REQ = 0x09, + ADF_VF2PF_MSGTYPE_RESTARTING_COMPLETE = 0x0a, /* Values from 0x10 are Gen4 specific, message type is only 4 bits in Gen2 devices. */ ADF_VF2PF_MSGTYPE_RP_RESET = 0x10, }; @@ -124,8 +127,10 @@ enum pfvf_compatibility_version { ADF_PFVF_COMPAT_FAST_ACK = 0x03, /* Ring to service mapping support for non-standard mappings */ ADF_PFVF_COMPAT_RING_TO_SVC_MAP = 0x04, + /* Fallback compat */ + ADF_PFVF_COMPAT_FALLBACK = 0x05, /* Reference to the latest version */ - ADF_PFVF_COMPAT_THIS_VERSION = 0x04, + ADF_PFVF_COMPAT_THIS_VERSION = 0x05, }; /* PF->VF Version Response */ diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c index 14c069f0d71a..0e31f4b41844 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c +++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c @@ -1,21 +1,83 @@ // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) /* Copyright(c) 2015 - 2021 Intel Corporation */ +#include <linux/delay.h> #include <linux/pci.h> #include "adf_accel_devices.h" #include "adf_pfvf_msg.h" #include "adf_pfvf_pf_msg.h" #include "adf_pfvf_pf_proto.h" +#define ADF_PF_WAIT_RESTARTING_COMPLETE_DELAY 100 +#define ADF_VF_SHUTDOWN_RETRY 100 + void adf_pf2vf_notify_restarting(struct adf_accel_dev *accel_dev) { struct adf_accel_vf_info *vf; struct pfvf_message msg = { .type = ADF_PF2VF_MSGTYPE_RESTARTING }; int i, num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev)); + dev_dbg(&GET_DEV(accel_dev), "pf2vf notify restarting\n"); for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) { - if (vf->init && adf_send_pf2vf_msg(accel_dev, i, msg)) + vf->restarting = false; + if (!vf->init) + continue; + if (adf_send_pf2vf_msg(accel_dev, i, msg)) dev_err(&GET_DEV(accel_dev), "Failed to send restarting msg to VF%d\n", i); + else if (vf->vf_compat_ver >= ADF_PFVF_COMPAT_FALLBACK) + vf->restarting = true; + } +} + +void adf_pf2vf_wait_for_restarting_complete(struct adf_accel_dev *accel_dev) +{ + int num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev)); + int i, retries = ADF_VF_SHUTDOWN_RETRY; + struct adf_accel_vf_info *vf; + bool vf_running; + + dev_dbg(&GET_DEV(accel_dev), "pf2vf wait for restarting complete\n"); + do { + vf_running = false; + for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) + if (vf->restarting) + vf_running = true; + if (!vf_running) + break; + msleep(ADF_PF_WAIT_RESTARTING_COMPLETE_DELAY); + } while (--retries); + + if (vf_running) + dev_warn(&GET_DEV(accel_dev), "Some VFs are still running\n"); +} + +void adf_pf2vf_notify_restarted(struct adf_accel_dev *accel_dev) +{ + struct pfvf_message msg = { .type = ADF_PF2VF_MSGTYPE_RESTARTED }; + int i, num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev)); + struct adf_accel_vf_info *vf; + + dev_dbg(&GET_DEV(accel_dev), "pf2vf notify restarted\n"); + for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) { + if (vf->init && vf->vf_compat_ver >= ADF_PFVF_COMPAT_FALLBACK && + adf_send_pf2vf_msg(accel_dev, i, msg)) + dev_err(&GET_DEV(accel_dev), + "Failed to send restarted msg to VF%d\n", i); + } +} + +void adf_pf2vf_notify_fatal_error(struct adf_accel_dev *accel_dev) +{ + struct pfvf_message msg = { .type = ADF_PF2VF_MSGTYPE_FATAL_ERROR }; + int i, num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev)); + struct adf_accel_vf_info *vf; + + dev_dbg(&GET_DEV(accel_dev), "pf2vf notify fatal error\n"); + for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) { + if (vf->init && vf->vf_compat_ver >= ADF_PFVF_COMPAT_FALLBACK && + adf_send_pf2vf_msg(accel_dev, i, msg)) + dev_err(&GET_DEV(accel_dev), + "Failed to send fatal error msg to VF%d\n", i); } } diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h index e8982d1ac896..f203d88c919c 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h +++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h @@ -5,7 +5,28 @@ #include "adf_accel_devices.h" +#if defined(CONFIG_PCI_IOV) void adf_pf2vf_notify_restarting(struct adf_accel_dev *accel_dev); +void adf_pf2vf_wait_for_restarting_complete(struct adf_accel_dev *accel_dev); +void adf_pf2vf_notify_restarted(struct adf_accel_dev *accel_dev); +void adf_pf2vf_notify_fatal_error(struct adf_accel_dev *accel_dev); +#else +static inline void adf_pf2vf_notify_restarting(struct adf_accel_dev *accel_dev) +{ +} + +static inline void adf_pf2vf_wait_for_restarting_complete(struct adf_accel_dev *accel_dev) +{ +} + +static inline void adf_pf2vf_notify_restarted(struct adf_accel_dev *accel_dev) +{ +} + +static inline void adf_pf2vf_notify_fatal_error(struct adf_accel_dev *accel_dev) +{ +} +#endif typedef int (*adf_pf2vf_blkmsg_provider)(struct adf_accel_dev *accel_dev, u8 *buffer, u8 compat); diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c index 388e58bcbcaf..9ab93fbfefde 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c +++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c @@ -291,6 +291,14 @@ static int adf_handle_vf2pf_msg(struct adf_accel_dev *accel_dev, u8 vf_nr, vf_info->init = false; } break; + case ADF_VF2PF_MSGTYPE_RESTARTING_COMPLETE: + { + dev_dbg(&GET_DEV(accel_dev), + "Restarting Complete received from VF%d\n", vf_nr); + vf_info->restarting = false; + vf_info->init = false; + } + break; case ADF_VF2PF_MSGTYPE_LARGE_BLOCK_REQ: case ADF_VF2PF_MSGTYPE_MEDIUM_BLOCK_REQ: case ADF_VF2PF_MSGTYPE_SMALL_BLOCK_REQ: diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c b/drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c index 1015155b6374..dc284a089c88 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c +++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c @@ -308,6 +308,12 @@ static bool adf_handle_pf2vf_msg(struct adf_accel_dev *accel_dev, adf_pf2vf_handle_pf_restarting(accel_dev); return false; + case ADF_PF2VF_MSGTYPE_RESTARTED: + dev_dbg(&GET_DEV(accel_dev), "Restarted message received from PF\n"); + return true; + case ADF_PF2VF_MSGTYPE_FATAL_ERROR: + dev_err(&GET_DEV(accel_dev), "Fatal error received from PF\n"); + return true; case ADF_PF2VF_MSGTYPE_VERSION_RESP: case ADF_PF2VF_MSGTYPE_BLKMSG_RESP: case ADF_PF2VF_MSGTYPE_RP_RESET_RESP: diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.c b/drivers/crypto/intel/qat/qat_common/adf_rl.c index de1b214dba1f..d4f2db3c53d8 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.c +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.c @@ -788,6 +788,24 @@ static void clear_sla(struct adf_rl *rl_data, struct rl_sla *sla) sla_type_arr[node_id] = NULL; } +static void free_all_sla(struct adf_accel_dev *accel_dev) +{ + struct adf_rl *rl_data = accel_dev->rate_limiting; + int sla_id; + + mutex_lock(&rl_data->rl_lock); + + for (sla_id = 0; sla_id < RL_NODES_CNT_MAX; sla_id++) { + if (!rl_data->sla[sla_id]) + continue; + + kfree(rl_data->sla[sla_id]); + rl_data->sla[sla_id] = NULL; + } + + mutex_unlock(&rl_data->rl_lock); +} + /** * add_update_sla() - handles the creation and the update of an SLA * @accel_dev: pointer to acceleration device structure @@ -1155,7 +1173,7 @@ void adf_rl_stop(struct adf_accel_dev *accel_dev) return; adf_sysfs_rl_rm(accel_dev); - adf_rl_remove_sla_all(accel_dev, true); + free_all_sla(accel_dev); } void adf_rl_exit(struct adf_accel_dev *accel_dev) diff --git a/drivers/crypto/intel/qat/qat_common/adf_sriov.c b/drivers/crypto/intel/qat/qat_common/adf_sriov.c index f44025bb6f99..87a70c00c41e 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/intel/qat/qat_common/adf_sriov.c @@ -60,7 +60,6 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev) /* This ptr will be populated when VFs will be created */ vf_info->accel_dev = accel_dev; vf_info->vf_nr = i; - vf_info->vf_compat_ver = 0; mutex_init(&vf_info->pf2vf_lock); ratelimit_state_init(&vf_info->vf2pf_ratelimit, @@ -84,6 +83,32 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev) return pci_enable_sriov(pdev, totalvfs); } +void adf_reenable_sriov(struct adf_accel_dev *accel_dev) +{ + struct pci_dev *pdev = accel_to_pci_dev(accel_dev); + char cfg[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0}; + unsigned long val = 0; + + if (adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC, + ADF_SRIOV_ENABLED, cfg)) + return; + + if (!accel_dev->pf.vf_info) + return; + + if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY, + &val, ADF_DEC)) + return; + + if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC, + &val, ADF_DEC)) + return; + + set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); + dev_dbg(&pdev->dev, "Re-enabling SRIOV\n"); + adf_enable_sriov(accel_dev); +} + /** * adf_disable_sriov() - Disable SRIOV for the device * @accel_dev: Pointer to accel device. @@ -103,6 +128,7 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev) return; adf_pf2vf_notify_restarting(accel_dev); + adf_pf2vf_wait_for_restarting_complete(accel_dev); pci_disable_sriov(accel_to_pci_dev(accel_dev)); /* Disable VF to PF interrupts */ @@ -115,8 +141,10 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev) for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++) mutex_destroy(&vf->pf2vf_lock); - kfree(accel_dev->pf.vf_info); - accel_dev->pf.vf_info = NULL; + if (!test_bit(ADF_STATUS_RESTARTING, &accel_dev->status)) { + kfree(accel_dev->pf.vf_info); + accel_dev->pf.vf_info = NULL; + } } EXPORT_SYMBOL_GPL(adf_disable_sriov); @@ -194,6 +222,10 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs) if (ret) return ret; + val = 1; + adf_cfg_add_key_value_param(accel_dev, ADF_GENERAL_SEC, ADF_SRIOV_ENABLED, + &val, ADF_DEC); + return numvfs; } EXPORT_SYMBOL_GPL(adf_sriov_configure); diff --git a/drivers/crypto/intel/qat/qat_common/adf_sysfs.c b/drivers/crypto/intel/qat/qat_common/adf_sysfs.c index d450dad32c9e..4e7f70d4049d 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_sysfs.c +++ b/drivers/crypto/intel/qat/qat_common/adf_sysfs.c @@ -204,6 +204,42 @@ static ssize_t pm_idle_enabled_store(struct device *dev, struct device_attribute } static DEVICE_ATTR_RW(pm_idle_enabled); +static ssize_t auto_reset_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + char *auto_reset; + struct adf_accel_dev *accel_dev; + + accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev)); + if (!accel_dev) + return -EINVAL; + + auto_reset = accel_dev->autoreset_on_error ? "on" : "off"; + + return sysfs_emit(buf, "%s\n", auto_reset); +} + +static ssize_t auto_reset_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct adf_accel_dev *accel_dev; + bool enabled = false; + int ret; + + ret = kstrtobool(buf, &enabled); + if (ret) + return ret; + + accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev)); + if (!accel_dev) + return -EINVAL; + + accel_dev->autoreset_on_error = enabled; + + return count; +} +static DEVICE_ATTR_RW(auto_reset); + static DEVICE_ATTR_RW(state); static DEVICE_ATTR_RW(cfg_services); @@ -291,6 +327,7 @@ static struct attribute *qat_attrs[] = { &dev_attr_pm_idle_enabled.attr, &dev_attr_rp2srv.attr, &dev_attr_num_rps.attr, + &dev_attr_auto_reset.attr, NULL, }; diff --git a/drivers/crypto/intel/qat/qat_common/adf_vf_isr.c b/drivers/crypto/intel/qat/qat_common/adf_vf_isr.c index b05c3957a160..cdbb2d687b1b 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/intel/qat/qat_common/adf_vf_isr.c @@ -293,8 +293,6 @@ EXPORT_SYMBOL_GPL(adf_flush_vf_wq); /** * adf_init_vf_wq() - Init workqueue for VF * - * Function init workqueue 'adf_vf_stop_wq' for VF. - * * Return: 0 on success, error code otherwise. */ int __init adf_init_vf_wq(void) diff --git a/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c b/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c index bf8c0ee62917..2ba4aa22e092 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c +++ b/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c @@ -13,15 +13,6 @@ #include "qat_compression.h" #include "qat_algs_send.h" -#define QAT_RFC_1950_HDR_SIZE 2 -#define QAT_RFC_1950_FOOTER_SIZE 4 -#define QAT_RFC_1950_CM_DEFLATE 8 -#define QAT_RFC_1950_CM_DEFLATE_CINFO_32K 7 -#define QAT_RFC_1950_CM_MASK 0x0f -#define QAT_RFC_1950_CM_OFFSET 4 -#define QAT_RFC_1950_DICT_MASK 0x20 -#define QAT_RFC_1950_COMP_HDR 0x785e - static DEFINE_MUTEX(algs_lock); static unsigned int active_devs; diff --git a/drivers/crypto/intel/qat/qat_common/qat_crypto.c b/drivers/crypto/intel/qat/qat_common/qat_crypto.c index 40c8e74d1cf9..101c6ea41673 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_crypto.c +++ b/drivers/crypto/intel/qat/qat_common/qat_crypto.c @@ -105,8 +105,8 @@ struct qat_crypto_instance *qat_crypto_get_instance_node(int node) } /** - * qat_crypto_vf_dev_config() - * create dev config required to create crypto inst. + * qat_crypto_vf_dev_config() - create dev config required to create + * crypto inst. * * @accel_dev: Pointer to acceleration device. * diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index 7a3083debc2b..59d472cb11e7 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -41,7 +41,7 @@ static const char version[] = DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; -MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); +MODULE_AUTHOR("David S. Miller <davem@davemloft.net>"); MODULE_DESCRIPTION("Niagara2 Crypto driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_MODULE_VERSION); diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index 70edf40bc523..f74b3c81ba6d 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -371,6 +371,11 @@ static int rk_crypto_probe(struct platform_device *pdev) } crypto_info->engine = crypto_engine_alloc_init(&pdev->dev, true); + if (!crypto_info->engine) { + err = -ENOMEM; + goto err_crypto; + } + crypto_engine_start(crypto_info->engine); init_completion(&crypto_info->complete); diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c index de53eddf6796..cb92b7fa99c6 100644 --- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -225,11 +225,11 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request struct virtio_crypto *vcrypto = ctx->vcrypto; struct virtio_crypto_op_data_req *req_data = vc_req->req_data; struct scatterlist *sgs[4], outhdr_sg, inhdr_sg, srcdata_sg, dstdata_sg; - void *src_buf = NULL, *dst_buf = NULL; + void *src_buf, *dst_buf = NULL; unsigned int num_out = 0, num_in = 0; int node = dev_to_node(&vcrypto->vdev->dev); unsigned long flags; - int ret = -ENOMEM; + int ret; bool verify = vc_akcipher_req->opcode == VIRTIO_CRYPTO_AKCIPHER_VERIFY; unsigned int src_len = verify ? req->src_len + req->dst_len : req->src_len; @@ -240,7 +240,7 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request /* src data */ src_buf = kcalloc_node(src_len, 1, GFP_KERNEL, node); if (!src_buf) - goto err; + return -ENOMEM; if (verify) { /* for verify operation, both src and dst data work as OUT direction */ @@ -255,7 +255,7 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request /* dst data */ dst_buf = kcalloc_node(req->dst_len, 1, GFP_KERNEL, node); if (!dst_buf) - goto err; + goto free_src; sg_init_one(&dstdata_sg, dst_buf, req->dst_len); sgs[num_out + num_in++] = &dstdata_sg; @@ -278,9 +278,9 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request return 0; err: - kfree(src_buf); kfree(dst_buf); - +free_src: + kfree(src_buf); return -ENOMEM; } diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index b909c6a2bf1c..6a67d70e7f1c 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -42,8 +42,6 @@ static void virtcrypto_ctrlq_callback(struct virtqueue *vq) virtio_crypto_ctrlq_callback(vc_ctrl_req); spin_lock_irqsave(&vcrypto->ctrl_lock, flags); } - if (unlikely(virtqueue_is_broken(vq))) - break; } while (!virtqueue_enable_cb(vq)); spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags); } diff --git a/drivers/crypto/vmx/.gitignore b/drivers/crypto/vmx/.gitignore deleted file mode 100644 index 7aa71d83f739..000000000000 --- a/drivers/crypto/vmx/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -aesp8-ppc.S -ghashp8-ppc.S diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig deleted file mode 100644 index b2c28b87f14b..000000000000 --- a/drivers/crypto/vmx/Kconfig +++ /dev/null @@ -1,14 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -config CRYPTO_DEV_VMX_ENCRYPT - tristate "Encryption acceleration support on P8 CPU" - depends on CRYPTO_DEV_VMX - select CRYPTO_AES - select CRYPTO_CBC - select CRYPTO_CTR - select CRYPTO_GHASH - select CRYPTO_XTS - default m - help - Support for VMX cryptographic acceleration instructions on Power8 CPU. - This module supports acceleration for AES and GHASH in hardware. If you - choose 'M' here, this module will be called vmx-crypto. diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile deleted file mode 100644 index 7257b8c44626..000000000000 --- a/drivers/crypto/vmx/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o -vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o - -ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) -override flavour := linux-ppc64le -else -ifdef CONFIG_PPC64_ELF_ABI_V2 -override flavour := linux-ppc64-elfv2 -else -override flavour := linux-ppc64 -endif -endif - -quiet_cmd_perl = PERL $@ - cmd_perl = $(PERL) $< $(flavour) > $@ - -targets += aesp8-ppc.S ghashp8-ppc.S - -$(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE - $(call if_changed,perl) - -OBJECT_FILES_NON_STANDARD_aesp8-ppc.o := y diff --git a/drivers/crypto/vmx/aes.c b/drivers/crypto/vmx/aes.c deleted file mode 100644 index ec06189fbf99..000000000000 --- a/drivers/crypto/vmx/aes.c +++ /dev/null @@ -1,134 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * AES routines supporting VMX instructions on the Power 8 - * - * Copyright (C) 2015 International Business Machines Inc. - * - * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> - */ - -#include <linux/types.h> -#include <linux/err.h> -#include <linux/crypto.h> -#include <linux/delay.h> -#include <asm/simd.h> -#include <asm/switch_to.h> -#include <crypto/aes.h> -#include <crypto/internal/cipher.h> -#include <crypto/internal/simd.h> - -#include "aesp8-ppc.h" - -struct p8_aes_ctx { - struct crypto_cipher *fallback; - struct aes_key enc_key; - struct aes_key dec_key; -}; - -static int p8_aes_init(struct crypto_tfm *tfm) -{ - const char *alg = crypto_tfm_alg_name(tfm); - struct crypto_cipher *fallback; - struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); - - fallback = crypto_alloc_cipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(fallback)) { - printk(KERN_ERR - "Failed to allocate transformation for '%s': %ld\n", - alg, PTR_ERR(fallback)); - return PTR_ERR(fallback); - } - - crypto_cipher_set_flags(fallback, - crypto_cipher_get_flags((struct - crypto_cipher *) - tfm)); - ctx->fallback = fallback; - - return 0; -} - -static void p8_aes_exit(struct crypto_tfm *tfm) -{ - struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); - - if (ctx->fallback) { - crypto_free_cipher(ctx->fallback); - ctx->fallback = NULL; - } -} - -static int p8_aes_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - int ret; - struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); - - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); - ret |= aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - ret |= crypto_cipher_setkey(ctx->fallback, key, keylen); - - return ret ? -EINVAL : 0; -} - -static void p8_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); - - if (!crypto_simd_usable()) { - crypto_cipher_encrypt_one(ctx->fallback, dst, src); - } else { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - aes_p8_encrypt(src, dst, &ctx->enc_key); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - } -} - -static void p8_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); - - if (!crypto_simd_usable()) { - crypto_cipher_decrypt_one(ctx->fallback, dst, src); - } else { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - aes_p8_decrypt(src, dst, &ctx->dec_key); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - } -} - -struct crypto_alg p8_aes_alg = { - .cra_name = "aes", - .cra_driver_name = "p8_aes", - .cra_module = THIS_MODULE, - .cra_priority = 1000, - .cra_type = NULL, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_NEED_FALLBACK, - .cra_alignmask = 0, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct p8_aes_ctx), - .cra_init = p8_aes_init, - .cra_exit = p8_aes_exit, - .cra_cipher = { - .cia_min_keysize = AES_MIN_KEY_SIZE, - .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = p8_aes_setkey, - .cia_encrypt = p8_aes_encrypt, - .cia_decrypt = p8_aes_decrypt, - }, -}; diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c deleted file mode 100644 index ed0debc7acb5..000000000000 --- a/drivers/crypto/vmx/aes_cbc.c +++ /dev/null @@ -1,133 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * AES CBC routines supporting VMX instructions on the Power 8 - * - * Copyright (C) 2015 International Business Machines Inc. - * - * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> - */ - -#include <asm/simd.h> -#include <asm/switch_to.h> -#include <crypto/aes.h> -#include <crypto/internal/simd.h> -#include <crypto/internal/skcipher.h> - -#include "aesp8-ppc.h" - -struct p8_aes_cbc_ctx { - struct crypto_skcipher *fallback; - struct aes_key enc_key; - struct aes_key dec_key; -}; - -static int p8_aes_cbc_init(struct crypto_skcipher *tfm) -{ - struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - struct crypto_skcipher *fallback; - - fallback = crypto_alloc_skcipher("cbc(aes)", 0, - CRYPTO_ALG_NEED_FALLBACK | - CRYPTO_ALG_ASYNC); - if (IS_ERR(fallback)) { - pr_err("Failed to allocate cbc(aes) fallback: %ld\n", - PTR_ERR(fallback)); - return PTR_ERR(fallback); - } - - crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) + - crypto_skcipher_reqsize(fallback)); - ctx->fallback = fallback; - return 0; -} - -static void p8_aes_cbc_exit(struct crypto_skcipher *tfm) -{ - struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - - crypto_free_skcipher(ctx->fallback); -} - -static int p8_aes_cbc_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - int ret; - - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); - ret |= aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - ret |= crypto_skcipher_setkey(ctx->fallback, key, keylen); - - return ret ? -EINVAL : 0; -} - -static int p8_aes_cbc_crypt(struct skcipher_request *req, int enc) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - const struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - int ret; - - if (!crypto_simd_usable()) { - struct skcipher_request *subreq = skcipher_request_ctx(req); - - *subreq = *req; - skcipher_request_set_tfm(subreq, ctx->fallback); - return enc ? crypto_skcipher_encrypt(subreq) : - crypto_skcipher_decrypt(subreq); - } - - ret = skcipher_walk_virt(&walk, req, false); - while ((nbytes = walk.nbytes) != 0) { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - aes_p8_cbc_encrypt(walk.src.virt.addr, - walk.dst.virt.addr, - round_down(nbytes, AES_BLOCK_SIZE), - enc ? &ctx->enc_key : &ctx->dec_key, - walk.iv, enc); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - ret = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE); - } - return ret; -} - -static int p8_aes_cbc_encrypt(struct skcipher_request *req) -{ - return p8_aes_cbc_crypt(req, 1); -} - -static int p8_aes_cbc_decrypt(struct skcipher_request *req) -{ - return p8_aes_cbc_crypt(req, 0); -} - -struct skcipher_alg p8_aes_cbc_alg = { - .base.cra_name = "cbc(aes)", - .base.cra_driver_name = "p8_aes_cbc", - .base.cra_module = THIS_MODULE, - .base.cra_priority = 2000, - .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK, - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct p8_aes_cbc_ctx), - .setkey = p8_aes_cbc_setkey, - .encrypt = p8_aes_cbc_encrypt, - .decrypt = p8_aes_cbc_decrypt, - .init = p8_aes_cbc_init, - .exit = p8_aes_cbc_exit, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, -}; diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c deleted file mode 100644 index 9a3da8cd62f3..000000000000 --- a/drivers/crypto/vmx/aes_ctr.c +++ /dev/null @@ -1,149 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * AES CTR routines supporting VMX instructions on the Power 8 - * - * Copyright (C) 2015 International Business Machines Inc. - * - * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> - */ - -#include <asm/simd.h> -#include <asm/switch_to.h> -#include <crypto/aes.h> -#include <crypto/internal/simd.h> -#include <crypto/internal/skcipher.h> - -#include "aesp8-ppc.h" - -struct p8_aes_ctr_ctx { - struct crypto_skcipher *fallback; - struct aes_key enc_key; -}; - -static int p8_aes_ctr_init(struct crypto_skcipher *tfm) -{ - struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - struct crypto_skcipher *fallback; - - fallback = crypto_alloc_skcipher("ctr(aes)", 0, - CRYPTO_ALG_NEED_FALLBACK | - CRYPTO_ALG_ASYNC); - if (IS_ERR(fallback)) { - pr_err("Failed to allocate ctr(aes) fallback: %ld\n", - PTR_ERR(fallback)); - return PTR_ERR(fallback); - } - - crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) + - crypto_skcipher_reqsize(fallback)); - ctx->fallback = fallback; - return 0; -} - -static void p8_aes_ctr_exit(struct crypto_skcipher *tfm) -{ - struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - - crypto_free_skcipher(ctx->fallback); -} - -static int p8_aes_ctr_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - int ret; - - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - ret |= crypto_skcipher_setkey(ctx->fallback, key, keylen); - - return ret ? -EINVAL : 0; -} - -static void p8_aes_ctr_final(const struct p8_aes_ctr_ctx *ctx, - struct skcipher_walk *walk) -{ - u8 *ctrblk = walk->iv; - u8 keystream[AES_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - crypto_xor_cpy(dst, keystream, src, nbytes); - crypto_inc(ctrblk, AES_BLOCK_SIZE); -} - -static int p8_aes_ctr_crypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - const struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - int ret; - - if (!crypto_simd_usable()) { - struct skcipher_request *subreq = skcipher_request_ctx(req); - - *subreq = *req; - skcipher_request_set_tfm(subreq, ctx->fallback); - return crypto_skcipher_encrypt(subreq); - } - - ret = skcipher_walk_virt(&walk, req, false); - while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr, - walk.dst.virt.addr, - nbytes / AES_BLOCK_SIZE, - &ctx->enc_key, walk.iv); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - do { - crypto_inc(walk.iv, AES_BLOCK_SIZE); - } while ((nbytes -= AES_BLOCK_SIZE) >= AES_BLOCK_SIZE); - - ret = skcipher_walk_done(&walk, nbytes); - } - if (nbytes) { - p8_aes_ctr_final(ctx, &walk); - ret = skcipher_walk_done(&walk, 0); - } - return ret; -} - -struct skcipher_alg p8_aes_ctr_alg = { - .base.cra_name = "ctr(aes)", - .base.cra_driver_name = "p8_aes_ctr", - .base.cra_module = THIS_MODULE, - .base.cra_priority = 2000, - .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct p8_aes_ctr_ctx), - .setkey = p8_aes_ctr_setkey, - .encrypt = p8_aes_ctr_crypt, - .decrypt = p8_aes_ctr_crypt, - .init = p8_aes_ctr_init, - .exit = p8_aes_ctr_exit, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .chunksize = AES_BLOCK_SIZE, -}; diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c deleted file mode 100644 index dabbccb41550..000000000000 --- a/drivers/crypto/vmx/aes_xts.c +++ /dev/null @@ -1,162 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * AES XTS routines supporting VMX In-core instructions on Power 8 - * - * Copyright (C) 2015 International Business Machines Inc. - * - * Author: Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com> - */ - -#include <asm/simd.h> -#include <asm/switch_to.h> -#include <crypto/aes.h> -#include <crypto/internal/simd.h> -#include <crypto/internal/skcipher.h> -#include <crypto/xts.h> - -#include "aesp8-ppc.h" - -struct p8_aes_xts_ctx { - struct crypto_skcipher *fallback; - struct aes_key enc_key; - struct aes_key dec_key; - struct aes_key tweak_key; -}; - -static int p8_aes_xts_init(struct crypto_skcipher *tfm) -{ - struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - struct crypto_skcipher *fallback; - - fallback = crypto_alloc_skcipher("xts(aes)", 0, - CRYPTO_ALG_NEED_FALLBACK | - CRYPTO_ALG_ASYNC); - if (IS_ERR(fallback)) { - pr_err("Failed to allocate xts(aes) fallback: %ld\n", - PTR_ERR(fallback)); - return PTR_ERR(fallback); - } - - crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) + - crypto_skcipher_reqsize(fallback)); - ctx->fallback = fallback; - return 0; -} - -static void p8_aes_xts_exit(struct crypto_skcipher *tfm) -{ - struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - crypto_free_skcipher(ctx->fallback); -} - -static int p8_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - int ret; - - ret = xts_verify_key(tfm, key, keylen); - if (ret) - return ret; - - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - ret = aes_p8_set_encrypt_key(key + keylen/2, (keylen/2) * 8, &ctx->tweak_key); - ret |= aes_p8_set_encrypt_key(key, (keylen/2) * 8, &ctx->enc_key); - ret |= aes_p8_set_decrypt_key(key, (keylen/2) * 8, &ctx->dec_key); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - ret |= crypto_skcipher_setkey(ctx->fallback, key, keylen); - - return ret ? -EINVAL : 0; -} - -static int p8_aes_xts_crypt(struct skcipher_request *req, int enc) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - const struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - u8 tweak[AES_BLOCK_SIZE]; - int ret; - - if (req->cryptlen < AES_BLOCK_SIZE) - return -EINVAL; - - if (!crypto_simd_usable() || (req->cryptlen % XTS_BLOCK_SIZE) != 0) { - struct skcipher_request *subreq = skcipher_request_ctx(req); - - *subreq = *req; - skcipher_request_set_tfm(subreq, ctx->fallback); - return enc ? crypto_skcipher_encrypt(subreq) : - crypto_skcipher_decrypt(subreq); - } - - ret = skcipher_walk_virt(&walk, req, false); - if (ret) - return ret; - - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - - aes_p8_encrypt(walk.iv, tweak, &ctx->tweak_key); - - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - while ((nbytes = walk.nbytes) != 0) { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - if (enc) - aes_p8_xts_encrypt(walk.src.virt.addr, - walk.dst.virt.addr, - round_down(nbytes, AES_BLOCK_SIZE), - &ctx->enc_key, NULL, tweak); - else - aes_p8_xts_decrypt(walk.src.virt.addr, - walk.dst.virt.addr, - round_down(nbytes, AES_BLOCK_SIZE), - &ctx->dec_key, NULL, tweak); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - ret = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE); - } - return ret; -} - -static int p8_aes_xts_encrypt(struct skcipher_request *req) -{ - return p8_aes_xts_crypt(req, 1); -} - -static int p8_aes_xts_decrypt(struct skcipher_request *req) -{ - return p8_aes_xts_crypt(req, 0); -} - -struct skcipher_alg p8_aes_xts_alg = { - .base.cra_name = "xts(aes)", - .base.cra_driver_name = "p8_aes_xts", - .base.cra_module = THIS_MODULE, - .base.cra_priority = 2000, - .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK, - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct p8_aes_xts_ctx), - .setkey = p8_aes_xts_setkey, - .encrypt = p8_aes_xts_encrypt, - .decrypt = p8_aes_xts_decrypt, - .init = p8_aes_xts_init, - .exit = p8_aes_xts_exit, - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, -}; diff --git a/drivers/crypto/vmx/aesp8-ppc.h b/drivers/crypto/vmx/aesp8-ppc.h deleted file mode 100644 index 5764d4438388..000000000000 --- a/drivers/crypto/vmx/aesp8-ppc.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <linux/types.h> -#include <crypto/aes.h> - -struct aes_key { - u8 key[AES_MAX_KEYLENGTH]; - int rounds; -}; - -extern struct shash_alg p8_ghash_alg; -extern struct crypto_alg p8_aes_alg; -extern struct skcipher_alg p8_aes_cbc_alg; -extern struct skcipher_alg p8_aes_ctr_alg; -extern struct skcipher_alg p8_aes_xts_alg; - -int aes_p8_set_encrypt_key(const u8 *userKey, const int bits, - struct aes_key *key); -int aes_p8_set_decrypt_key(const u8 *userKey, const int bits, - struct aes_key *key); -void aes_p8_encrypt(const u8 *in, u8 *out, const struct aes_key *key); -void aes_p8_decrypt(const u8 *in, u8 *out, const struct aes_key *key); -void aes_p8_cbc_encrypt(const u8 *in, u8 *out, size_t len, - const struct aes_key *key, u8 *iv, const int enc); -void aes_p8_ctr32_encrypt_blocks(const u8 *in, u8 *out, - size_t len, const struct aes_key *key, - const u8 *iv); -void aes_p8_xts_encrypt(const u8 *in, u8 *out, size_t len, - const struct aes_key *key1, const struct aes_key *key2, u8 *iv); -void aes_p8_xts_decrypt(const u8 *in, u8 *out, size_t len, - const struct aes_key *key1, const struct aes_key *key2, u8 *iv); diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl deleted file mode 100644 index f729589d792e..000000000000 --- a/drivers/crypto/vmx/aesp8-ppc.pl +++ /dev/null @@ -1,3889 +0,0 @@ -#! /usr/bin/env perl -# SPDX-License-Identifier: GPL-2.0 - -# This code is taken from CRYPTOGAMs[1] and is included here using the option -# in the license to distribute the code under the GPL. Therefore this program -# is free software; you can redistribute it and/or modify it under the terms of -# the GNU General Public License version 2 as published by the Free Software -# Foundation. -# -# [1] https://www.openssl.org/~appro/cryptogams/ - -# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org> -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain copyright notices, -# this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials -# provided with the distribution. -# -# * Neither the name of the CRYPTOGAMS nor the names of its -# copyright holder and contributors may be used to endorse or -# promote products derived from this software without specific -# prior written permission. -# -# ALTERNATIVELY, provided that this notice is retained in full, this -# product may be distributed under the terms of the GNU General Public -# License (GPL), in which case the provisions of the GPL apply INSTEAD OF -# those given above. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# ==================================================================== -# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see https://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# This module implements support for AES instructions as per PowerISA -# specification version 2.07, first implemented by POWER8 processor. -# The module is endian-agnostic in sense that it supports both big- -# and little-endian cases. Data alignment in parallelizable modes is -# handled with VSX loads and stores, which implies MSR.VSX flag being -# set. It should also be noted that ISA specification doesn't prohibit -# alignment exceptions for these instructions on page boundaries. -# Initially alignment was handled in pure AltiVec/VMX way [when data -# is aligned programmatically, which in turn guarantees exception- -# free execution], but it turned to hamper performance when vcipher -# instructions are interleaved. It's reckoned that eventual -# misalignment penalties at page boundaries are in average lower -# than additional overhead in pure AltiVec approach. -# -# May 2016 -# -# Add XTS subroutine, 9x on little- and 12x improvement on big-endian -# systems were measured. -# -###################################################################### -# Current large-block performance in cycles per byte processed with -# 128-bit key (less is better). -# -# CBC en-/decrypt CTR XTS -# POWER8[le] 3.96/0.72 0.74 1.1 -# POWER8[be] 3.75/0.65 0.66 1.0 - -$flavour = shift; - -if ($flavour =~ /64/) { - $SIZE_T =8; - $LRSAVE =2*$SIZE_T; - $STU ="stdu"; - $POP ="ld"; - $PUSH ="std"; - $UCMP ="cmpld"; - $SHL ="sldi"; -} elsif ($flavour =~ /32/) { - $SIZE_T =4; - $LRSAVE =$SIZE_T; - $STU ="stwu"; - $POP ="lwz"; - $PUSH ="stw"; - $UCMP ="cmplw"; - $SHL ="slwi"; -} else { die "nonsense $flavour"; } - -$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or -die "can't locate ppc-xlate.pl"; - -open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; - -$FRAME=8*$SIZE_T; -$prefix="aes_p8"; - -$sp="r1"; -$vrsave="r12"; - -######################################################################### -{{{ # Key setup procedures # -my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); -my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); -my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); - -$code.=<<___; -.machine "any" - -.text - -.align 7 -rcon: -.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev -.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev -.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev -.long 0,0,0,0 ?asis -.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe -Lconsts: - mflr r0 - bcl 20,31,\$+4 - mflr $ptr #vvvvv "distance between . and rcon - addi $ptr,$ptr,-0x58 - mtlr r0 - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 -.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" - -.globl .${prefix}_set_encrypt_key -Lset_encrypt_key: - mflr r11 - $PUSH r11,$LRSAVE($sp) - - li $ptr,-1 - ${UCMP}i $inp,0 - beq- Lenc_key_abort # if ($inp==0) return -1; - ${UCMP}i $out,0 - beq- Lenc_key_abort # if ($out==0) return -1; - li $ptr,-2 - cmpwi $bits,128 - blt- Lenc_key_abort - cmpwi $bits,256 - bgt- Lenc_key_abort - andi. r0,$bits,0x3f - bne- Lenc_key_abort - - lis r0,0xfff0 - mfspr $vrsave,256 - mtspr 256,r0 - - bl Lconsts - mtlr r11 - - neg r9,$inp - lvx $in0,0,$inp - addi $inp,$inp,15 # 15 is not typo - lvsr $key,0,r9 # borrow $key - li r8,0x20 - cmpwi $bits,192 - lvx $in1,0,$inp - le?vspltisb $mask,0x0f # borrow $mask - lvx $rcon,0,$ptr - le?vxor $key,$key,$mask # adjust for byte swap - lvx $mask,r8,$ptr - addi $ptr,$ptr,0x10 - vperm $in0,$in0,$in1,$key # align [and byte swap in LE] - li $cnt,8 - vxor $zero,$zero,$zero - mtctr $cnt - - ?lvsr $outperm,0,$out - vspltisb $outmask,-1 - lvx $outhead,0,$out - ?vperm $outmask,$zero,$outmask,$outperm - - blt Loop128 - addi $inp,$inp,8 - beq L192 - addi $inp,$inp,8 - b L256 - -.align 4 -Loop128: - vperm $key,$in0,$in0,$mask # rotate-n-splat - vsldoi $tmp,$zero,$in0,12 # >>32 - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - vcipherlast $key,$key,$rcon - stvx $stage,0,$out - addi $out,$out,16 - - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vadduwm $rcon,$rcon,$rcon - vxor $in0,$in0,$key - bdnz Loop128 - - lvx $rcon,0,$ptr # last two round keys - - vperm $key,$in0,$in0,$mask # rotate-n-splat - vsldoi $tmp,$zero,$in0,12 # >>32 - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - vcipherlast $key,$key,$rcon - stvx $stage,0,$out - addi $out,$out,16 - - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vadduwm $rcon,$rcon,$rcon - vxor $in0,$in0,$key - - vperm $key,$in0,$in0,$mask # rotate-n-splat - vsldoi $tmp,$zero,$in0,12 # >>32 - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - vcipherlast $key,$key,$rcon - stvx $stage,0,$out - addi $out,$out,16 - - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vxor $in0,$in0,$key - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - stvx $stage,0,$out - - addi $inp,$out,15 # 15 is not typo - addi $out,$out,0x50 - - li $rounds,10 - b Ldone - -.align 4 -L192: - lvx $tmp,0,$inp - li $cnt,4 - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - stvx $stage,0,$out - addi $out,$out,16 - vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] - vspltisb $key,8 # borrow $key - mtctr $cnt - vsububm $mask,$mask,$key # adjust the mask - -Loop192: - vperm $key,$in1,$in1,$mask # roate-n-splat - vsldoi $tmp,$zero,$in0,12 # >>32 - vcipherlast $key,$key,$rcon - - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - - vsldoi $stage,$zero,$in1,8 - vspltw $tmp,$in0,3 - vxor $tmp,$tmp,$in1 - vsldoi $in1,$zero,$in1,12 # >>32 - vadduwm $rcon,$rcon,$rcon - vxor $in1,$in1,$tmp - vxor $in0,$in0,$key - vxor $in1,$in1,$key - vsldoi $stage,$stage,$in0,8 - - vperm $key,$in1,$in1,$mask # rotate-n-splat - vsldoi $tmp,$zero,$in0,12 # >>32 - vperm $outtail,$stage,$stage,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - vcipherlast $key,$key,$rcon - stvx $stage,0,$out - addi $out,$out,16 - - vsldoi $stage,$in0,$in1,8 - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vperm $outtail,$stage,$stage,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - stvx $stage,0,$out - addi $out,$out,16 - - vspltw $tmp,$in0,3 - vxor $tmp,$tmp,$in1 - vsldoi $in1,$zero,$in1,12 # >>32 - vadduwm $rcon,$rcon,$rcon - vxor $in1,$in1,$tmp - vxor $in0,$in0,$key - vxor $in1,$in1,$key - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - stvx $stage,0,$out - addi $inp,$out,15 # 15 is not typo - addi $out,$out,16 - bdnz Loop192 - - li $rounds,12 - addi $out,$out,0x20 - b Ldone - -.align 4 -L256: - lvx $tmp,0,$inp - li $cnt,7 - li $rounds,14 - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - stvx $stage,0,$out - addi $out,$out,16 - vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] - mtctr $cnt - -Loop256: - vperm $key,$in1,$in1,$mask # rotate-n-splat - vsldoi $tmp,$zero,$in0,12 # >>32 - vperm $outtail,$in1,$in1,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - vcipherlast $key,$key,$rcon - stvx $stage,0,$out - addi $out,$out,16 - - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vadduwm $rcon,$rcon,$rcon - vxor $in0,$in0,$key - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - stvx $stage,0,$out - addi $inp,$out,15 # 15 is not typo - addi $out,$out,16 - bdz Ldone - - vspltw $key,$in0,3 # just splat - vsldoi $tmp,$zero,$in1,12 # >>32 - vsbox $key,$key - - vxor $in1,$in1,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in1,$in1,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in1,$in1,$tmp - - vxor $in1,$in1,$key - b Loop256 - -.align 4 -Ldone: - lvx $in1,0,$inp # redundant in aligned case - vsel $in1,$outhead,$in1,$outmask - stvx $in1,0,$inp - li $ptr,0 - mtspr 256,$vrsave - stw $rounds,0($out) - -Lenc_key_abort: - mr r3,$ptr - blr - .long 0 - .byte 0,12,0x14,1,0,0,3,0 - .long 0 -.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key - -.globl .${prefix}_set_decrypt_key - $STU $sp,-$FRAME($sp) - mflr r10 - $PUSH r10,$FRAME+$LRSAVE($sp) - bl Lset_encrypt_key - mtlr r10 - - cmpwi r3,0 - bne- Ldec_key_abort - - slwi $cnt,$rounds,4 - subi $inp,$out,240 # first round key - srwi $rounds,$rounds,1 - add $out,$inp,$cnt # last round key - mtctr $rounds - -Ldeckey: - lwz r0, 0($inp) - lwz r6, 4($inp) - lwz r7, 8($inp) - lwz r8, 12($inp) - addi $inp,$inp,16 - lwz r9, 0($out) - lwz r10,4($out) - lwz r11,8($out) - lwz r12,12($out) - stw r0, 0($out) - stw r6, 4($out) - stw r7, 8($out) - stw r8, 12($out) - subi $out,$out,16 - stw r9, -16($inp) - stw r10,-12($inp) - stw r11,-8($inp) - stw r12,-4($inp) - bdnz Ldeckey - - xor r3,r3,r3 # return value -Ldec_key_abort: - addi $sp,$sp,$FRAME - blr - .long 0 - .byte 0,12,4,1,0x80,0,3,0 - .long 0 -.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key -___ -}}} -######################################################################### -{{{ # Single block en- and decrypt procedures # -sub gen_block () { -my $dir = shift; -my $n = $dir eq "de" ? "n" : ""; -my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); - -$code.=<<___; -.globl .${prefix}_${dir}crypt - lwz $rounds,240($key) - lis r0,0xfc00 - mfspr $vrsave,256 - li $idx,15 # 15 is not typo - mtspr 256,r0 - - lvx v0,0,$inp - neg r11,$out - lvx v1,$idx,$inp - lvsl v2,0,$inp # inpperm - le?vspltisb v4,0x0f - ?lvsl v3,0,r11 # outperm - le?vxor v2,v2,v4 - li $idx,16 - vperm v0,v0,v1,v2 # align [and byte swap in LE] - lvx v1,0,$key - ?lvsl v5,0,$key # keyperm - srwi $rounds,$rounds,1 - lvx v2,$idx,$key - addi $idx,$idx,16 - subi $rounds,$rounds,1 - ?vperm v1,v1,v2,v5 # align round key - - vxor v0,v0,v1 - lvx v1,$idx,$key - addi $idx,$idx,16 - mtctr $rounds - -Loop_${dir}c: - ?vperm v2,v2,v1,v5 - v${n}cipher v0,v0,v2 - lvx v2,$idx,$key - addi $idx,$idx,16 - ?vperm v1,v1,v2,v5 - v${n}cipher v0,v0,v1 - lvx v1,$idx,$key - addi $idx,$idx,16 - bdnz Loop_${dir}c - - ?vperm v2,v2,v1,v5 - v${n}cipher v0,v0,v2 - lvx v2,$idx,$key - ?vperm v1,v1,v2,v5 - v${n}cipherlast v0,v0,v1 - - vspltisb v2,-1 - vxor v1,v1,v1 - li $idx,15 # 15 is not typo - ?vperm v2,v1,v2,v3 # outmask - le?vxor v3,v3,v4 - lvx v1,0,$out # outhead - vperm v0,v0,v0,v3 # rotate [and byte swap in LE] - vsel v1,v1,v0,v2 - lvx v4,$idx,$out - stvx v1,0,$out - vsel v0,v0,v4,v2 - stvx v0,$idx,$out - - mtspr 256,$vrsave - blr - .long 0 - .byte 0,12,0x14,0,0,0,3,0 - .long 0 -.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt -___ -} -&gen_block("en"); -&gen_block("de"); -}}} -######################################################################### -{{{ # CBC en- and decrypt procedures # -my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); -my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); -my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= - map("v$_",(4..10)); -$code.=<<___; -.globl .${prefix}_cbc_encrypt - ${UCMP}i $len,16 - bltlr- - - cmpwi $enc,0 # test direction - lis r0,0xffe0 - mfspr $vrsave,256 - mtspr 256,r0 - - li $idx,15 - vxor $rndkey0,$rndkey0,$rndkey0 - le?vspltisb $tmp,0x0f - - lvx $ivec,0,$ivp # load [unaligned] iv - lvsl $inpperm,0,$ivp - lvx $inptail,$idx,$ivp - le?vxor $inpperm,$inpperm,$tmp - vperm $ivec,$ivec,$inptail,$inpperm - - neg r11,$inp - ?lvsl $keyperm,0,$key # prepare for unaligned key - lwz $rounds,240($key) - - lvsr $inpperm,0,r11 # prepare for unaligned load - lvx $inptail,0,$inp - addi $inp,$inp,15 # 15 is not typo - le?vxor $inpperm,$inpperm,$tmp - - ?lvsr $outperm,0,$out # prepare for unaligned store - vspltisb $outmask,-1 - lvx $outhead,0,$out - ?vperm $outmask,$rndkey0,$outmask,$outperm - le?vxor $outperm,$outperm,$tmp - - srwi $rounds,$rounds,1 - li $idx,16 - subi $rounds,$rounds,1 - beq Lcbc_dec - -Lcbc_enc: - vmr $inout,$inptail - lvx $inptail,0,$inp - addi $inp,$inp,16 - mtctr $rounds - subi $len,$len,16 # len-=16 - - lvx $rndkey0,0,$key - vperm $inout,$inout,$inptail,$inpperm - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - vxor $inout,$inout,$ivec - -Loop_cbc_enc: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipher $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - bdnz Loop_cbc_enc - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key - li $idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipherlast $ivec,$inout,$rndkey0 - ${UCMP}i $len,16 - - vperm $tmp,$ivec,$ivec,$outperm - vsel $inout,$outhead,$tmp,$outmask - vmr $outhead,$tmp - stvx $inout,0,$out - addi $out,$out,16 - bge Lcbc_enc - - b Lcbc_done - -.align 4 -Lcbc_dec: - ${UCMP}i $len,128 - bge _aesp8_cbc_decrypt8x - vmr $tmp,$inptail - lvx $inptail,0,$inp - addi $inp,$inp,16 - mtctr $rounds - subi $len,$len,16 # len-=16 - - lvx $rndkey0,0,$key - vperm $tmp,$tmp,$inptail,$inpperm - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$tmp,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - -Loop_cbc_dec: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vncipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vncipher $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - bdnz Loop_cbc_dec - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vncipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key - li $idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vncipherlast $inout,$inout,$rndkey0 - ${UCMP}i $len,16 - - vxor $inout,$inout,$ivec - vmr $ivec,$tmp - vperm $tmp,$inout,$inout,$outperm - vsel $inout,$outhead,$tmp,$outmask - vmr $outhead,$tmp - stvx $inout,0,$out - addi $out,$out,16 - bge Lcbc_dec - -Lcbc_done: - addi $out,$out,-1 - lvx $inout,0,$out # redundant in aligned case - vsel $inout,$outhead,$inout,$outmask - stvx $inout,0,$out - - neg $enc,$ivp # write [unaligned] iv - li $idx,15 # 15 is not typo - vxor $rndkey0,$rndkey0,$rndkey0 - vspltisb $outmask,-1 - le?vspltisb $tmp,0x0f - ?lvsl $outperm,0,$enc - ?vperm $outmask,$rndkey0,$outmask,$outperm - le?vxor $outperm,$outperm,$tmp - lvx $outhead,0,$ivp - vperm $ivec,$ivec,$ivec,$outperm - vsel $inout,$outhead,$ivec,$outmask - lvx $inptail,$idx,$ivp - stvx $inout,0,$ivp - vsel $inout,$ivec,$inptail,$outmask - stvx $inout,$idx,$ivp - - mtspr 256,$vrsave - blr - .long 0 - .byte 0,12,0x14,0,0,0,6,0 - .long 0 -___ -######################################################################### -{{ # Optimized CBC decrypt procedure # -my $key_="r11"; -my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); -my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); -my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); -my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys - # v26-v31 last 6 round keys -my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment - -$code.=<<___; -.align 5 -_aesp8_cbc_decrypt8x: - $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) - li r10,`$FRAME+8*16+15` - li r11,`$FRAME+8*16+31` - stvx v20,r10,$sp # ABI says so - addi r10,r10,32 - stvx v21,r11,$sp - addi r11,r11,32 - stvx v22,r10,$sp - addi r10,r10,32 - stvx v23,r11,$sp - addi r11,r11,32 - stvx v24,r10,$sp - addi r10,r10,32 - stvx v25,r11,$sp - addi r11,r11,32 - stvx v26,r10,$sp - addi r10,r10,32 - stvx v27,r11,$sp - addi r11,r11,32 - stvx v28,r10,$sp - addi r10,r10,32 - stvx v29,r11,$sp - addi r11,r11,32 - stvx v30,r10,$sp - stvx v31,r11,$sp - li r0,-1 - stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave - li $x10,0x10 - $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) - li $x20,0x20 - $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) - li $x30,0x30 - $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) - li $x40,0x40 - $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) - li $x50,0x50 - $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) - li $x60,0x60 - $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) - li $x70,0x70 - mtspr 256,r0 - - subi $rounds,$rounds,3 # -4 in total - subi $len,$len,128 # bias - - lvx $rndkey0,$x00,$key # load key schedule - lvx v30,$x10,$key - addi $key,$key,0x20 - lvx v31,$x00,$key - ?vperm $rndkey0,$rndkey0,v30,$keyperm - addi $key_,$sp,$FRAME+15 - mtctr $rounds - -Load_cbc_dec_key: - ?vperm v24,v30,v31,$keyperm - lvx v30,$x10,$key - addi $key,$key,0x20 - stvx v24,$x00,$key_ # off-load round[1] - ?vperm v25,v31,v30,$keyperm - lvx v31,$x00,$key - stvx v25,$x10,$key_ # off-load round[2] - addi $key_,$key_,0x20 - bdnz Load_cbc_dec_key - - lvx v26,$x10,$key - ?vperm v24,v30,v31,$keyperm - lvx v27,$x20,$key - stvx v24,$x00,$key_ # off-load round[3] - ?vperm v25,v31,v26,$keyperm - lvx v28,$x30,$key - stvx v25,$x10,$key_ # off-load round[4] - addi $key_,$sp,$FRAME+15 # rewind $key_ - ?vperm v26,v26,v27,$keyperm - lvx v29,$x40,$key - ?vperm v27,v27,v28,$keyperm - lvx v30,$x50,$key - ?vperm v28,v28,v29,$keyperm - lvx v31,$x60,$key - ?vperm v29,v29,v30,$keyperm - lvx $out0,$x70,$key # borrow $out0 - ?vperm v30,v30,v31,$keyperm - lvx v24,$x00,$key_ # pre-load round[1] - ?vperm v31,v31,$out0,$keyperm - lvx v25,$x10,$key_ # pre-load round[2] - - #lvx $inptail,0,$inp # "caller" already did this - #addi $inp,$inp,15 # 15 is not typo - subi $inp,$inp,15 # undo "caller" - - le?li $idx,8 - lvx_u $in0,$x00,$inp # load first 8 "words" - le?lvsl $inpperm,0,$idx - le?vspltisb $tmp,0x0f - lvx_u $in1,$x10,$inp - le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u - lvx_u $in2,$x20,$inp - le?vperm $in0,$in0,$in0,$inpperm - lvx_u $in3,$x30,$inp - le?vperm $in1,$in1,$in1,$inpperm - lvx_u $in4,$x40,$inp - le?vperm $in2,$in2,$in2,$inpperm - vxor $out0,$in0,$rndkey0 - lvx_u $in5,$x50,$inp - le?vperm $in3,$in3,$in3,$inpperm - vxor $out1,$in1,$rndkey0 - lvx_u $in6,$x60,$inp - le?vperm $in4,$in4,$in4,$inpperm - vxor $out2,$in2,$rndkey0 - lvx_u $in7,$x70,$inp - addi $inp,$inp,0x80 - le?vperm $in5,$in5,$in5,$inpperm - vxor $out3,$in3,$rndkey0 - le?vperm $in6,$in6,$in6,$inpperm - vxor $out4,$in4,$rndkey0 - le?vperm $in7,$in7,$in7,$inpperm - vxor $out5,$in5,$rndkey0 - vxor $out6,$in6,$rndkey0 - vxor $out7,$in7,$rndkey0 - - mtctr $rounds - b Loop_cbc_dec8x -.align 5 -Loop_cbc_dec8x: - vncipher $out0,$out0,v24 - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - vncipher $out5,$out5,v24 - vncipher $out6,$out6,v24 - vncipher $out7,$out7,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vncipher $out0,$out0,v25 - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - vncipher $out5,$out5,v25 - vncipher $out6,$out6,v25 - vncipher $out7,$out7,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_cbc_dec8x - - subic $len,$len,128 # $len-=128 - vncipher $out0,$out0,v24 - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - vncipher $out5,$out5,v24 - vncipher $out6,$out6,v24 - vncipher $out7,$out7,v24 - - subfe. r0,r0,r0 # borrow?-1:0 - vncipher $out0,$out0,v25 - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - vncipher $out5,$out5,v25 - vncipher $out6,$out6,v25 - vncipher $out7,$out7,v25 - - and r0,r0,$len - vncipher $out0,$out0,v26 - vncipher $out1,$out1,v26 - vncipher $out2,$out2,v26 - vncipher $out3,$out3,v26 - vncipher $out4,$out4,v26 - vncipher $out5,$out5,v26 - vncipher $out6,$out6,v26 - vncipher $out7,$out7,v26 - - add $inp,$inp,r0 # $inp is adjusted in such - # way that at exit from the - # loop inX-in7 are loaded - # with last "words" - vncipher $out0,$out0,v27 - vncipher $out1,$out1,v27 - vncipher $out2,$out2,v27 - vncipher $out3,$out3,v27 - vncipher $out4,$out4,v27 - vncipher $out5,$out5,v27 - vncipher $out6,$out6,v27 - vncipher $out7,$out7,v27 - - addi $key_,$sp,$FRAME+15 # rewind $key_ - vncipher $out0,$out0,v28 - vncipher $out1,$out1,v28 - vncipher $out2,$out2,v28 - vncipher $out3,$out3,v28 - vncipher $out4,$out4,v28 - vncipher $out5,$out5,v28 - vncipher $out6,$out6,v28 - vncipher $out7,$out7,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - - vncipher $out0,$out0,v29 - vncipher $out1,$out1,v29 - vncipher $out2,$out2,v29 - vncipher $out3,$out3,v29 - vncipher $out4,$out4,v29 - vncipher $out5,$out5,v29 - vncipher $out6,$out6,v29 - vncipher $out7,$out7,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - - vncipher $out0,$out0,v30 - vxor $ivec,$ivec,v31 # xor with last round key - vncipher $out1,$out1,v30 - vxor $in0,$in0,v31 - vncipher $out2,$out2,v30 - vxor $in1,$in1,v31 - vncipher $out3,$out3,v30 - vxor $in2,$in2,v31 - vncipher $out4,$out4,v30 - vxor $in3,$in3,v31 - vncipher $out5,$out5,v30 - vxor $in4,$in4,v31 - vncipher $out6,$out6,v30 - vxor $in5,$in5,v31 - vncipher $out7,$out7,v30 - vxor $in6,$in6,v31 - - vncipherlast $out0,$out0,$ivec - vncipherlast $out1,$out1,$in0 - lvx_u $in0,$x00,$inp # load next input block - vncipherlast $out2,$out2,$in1 - lvx_u $in1,$x10,$inp - vncipherlast $out3,$out3,$in2 - le?vperm $in0,$in0,$in0,$inpperm - lvx_u $in2,$x20,$inp - vncipherlast $out4,$out4,$in3 - le?vperm $in1,$in1,$in1,$inpperm - lvx_u $in3,$x30,$inp - vncipherlast $out5,$out5,$in4 - le?vperm $in2,$in2,$in2,$inpperm - lvx_u $in4,$x40,$inp - vncipherlast $out6,$out6,$in5 - le?vperm $in3,$in3,$in3,$inpperm - lvx_u $in5,$x50,$inp - vncipherlast $out7,$out7,$in6 - le?vperm $in4,$in4,$in4,$inpperm - lvx_u $in6,$x60,$inp - vmr $ivec,$in7 - le?vperm $in5,$in5,$in5,$inpperm - lvx_u $in7,$x70,$inp - addi $inp,$inp,0x80 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $in6,$in6,$in6,$inpperm - vxor $out0,$in0,$rndkey0 - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - le?vperm $in7,$in7,$in7,$inpperm - vxor $out1,$in1,$rndkey0 - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x20,$out - vxor $out2,$in2,$rndkey0 - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x30,$out - vxor $out3,$in3,$rndkey0 - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x40,$out - vxor $out4,$in4,$rndkey0 - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x50,$out - vxor $out5,$in5,$rndkey0 - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x60,$out - vxor $out6,$in6,$rndkey0 - stvx_u $out7,$x70,$out - addi $out,$out,0x80 - vxor $out7,$in7,$rndkey0 - - mtctr $rounds - beq Loop_cbc_dec8x # did $len-=128 borrow? - - addic. $len,$len,128 - beq Lcbc_dec8x_done - nop - nop - -Loop_cbc_dec8x_tail: # up to 7 "words" tail... - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - vncipher $out5,$out5,v24 - vncipher $out6,$out6,v24 - vncipher $out7,$out7,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - vncipher $out5,$out5,v25 - vncipher $out6,$out6,v25 - vncipher $out7,$out7,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_cbc_dec8x_tail - - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - vncipher $out5,$out5,v24 - vncipher $out6,$out6,v24 - vncipher $out7,$out7,v24 - - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - vncipher $out5,$out5,v25 - vncipher $out6,$out6,v25 - vncipher $out7,$out7,v25 - - vncipher $out1,$out1,v26 - vncipher $out2,$out2,v26 - vncipher $out3,$out3,v26 - vncipher $out4,$out4,v26 - vncipher $out5,$out5,v26 - vncipher $out6,$out6,v26 - vncipher $out7,$out7,v26 - - vncipher $out1,$out1,v27 - vncipher $out2,$out2,v27 - vncipher $out3,$out3,v27 - vncipher $out4,$out4,v27 - vncipher $out5,$out5,v27 - vncipher $out6,$out6,v27 - vncipher $out7,$out7,v27 - - vncipher $out1,$out1,v28 - vncipher $out2,$out2,v28 - vncipher $out3,$out3,v28 - vncipher $out4,$out4,v28 - vncipher $out5,$out5,v28 - vncipher $out6,$out6,v28 - vncipher $out7,$out7,v28 - - vncipher $out1,$out1,v29 - vncipher $out2,$out2,v29 - vncipher $out3,$out3,v29 - vncipher $out4,$out4,v29 - vncipher $out5,$out5,v29 - vncipher $out6,$out6,v29 - vncipher $out7,$out7,v29 - - vncipher $out1,$out1,v30 - vxor $ivec,$ivec,v31 # last round key - vncipher $out2,$out2,v30 - vxor $in1,$in1,v31 - vncipher $out3,$out3,v30 - vxor $in2,$in2,v31 - vncipher $out4,$out4,v30 - vxor $in3,$in3,v31 - vncipher $out5,$out5,v30 - vxor $in4,$in4,v31 - vncipher $out6,$out6,v30 - vxor $in5,$in5,v31 - vncipher $out7,$out7,v30 - vxor $in6,$in6,v31 - - cmplwi $len,32 # switch($len) - blt Lcbc_dec8x_one - nop - beq Lcbc_dec8x_two - cmplwi $len,64 - blt Lcbc_dec8x_three - nop - beq Lcbc_dec8x_four - cmplwi $len,96 - blt Lcbc_dec8x_five - nop - beq Lcbc_dec8x_six - -Lcbc_dec8x_seven: - vncipherlast $out1,$out1,$ivec - vncipherlast $out2,$out2,$in1 - vncipherlast $out3,$out3,$in2 - vncipherlast $out4,$out4,$in3 - vncipherlast $out5,$out5,$in4 - vncipherlast $out6,$out6,$in5 - vncipherlast $out7,$out7,$in6 - vmr $ivec,$in7 - - le?vperm $out1,$out1,$out1,$inpperm - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x00,$out - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x10,$out - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x20,$out - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x30,$out - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x40,$out - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x50,$out - stvx_u $out7,$x60,$out - addi $out,$out,0x70 - b Lcbc_dec8x_done - -.align 5 -Lcbc_dec8x_six: - vncipherlast $out2,$out2,$ivec - vncipherlast $out3,$out3,$in2 - vncipherlast $out4,$out4,$in3 - vncipherlast $out5,$out5,$in4 - vncipherlast $out6,$out6,$in5 - vncipherlast $out7,$out7,$in6 - vmr $ivec,$in7 - - le?vperm $out2,$out2,$out2,$inpperm - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x00,$out - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x10,$out - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x20,$out - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x30,$out - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x40,$out - stvx_u $out7,$x50,$out - addi $out,$out,0x60 - b Lcbc_dec8x_done - -.align 5 -Lcbc_dec8x_five: - vncipherlast $out3,$out3,$ivec - vncipherlast $out4,$out4,$in3 - vncipherlast $out5,$out5,$in4 - vncipherlast $out6,$out6,$in5 - vncipherlast $out7,$out7,$in6 - vmr $ivec,$in7 - - le?vperm $out3,$out3,$out3,$inpperm - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x00,$out - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x10,$out - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x20,$out - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x30,$out - stvx_u $out7,$x40,$out - addi $out,$out,0x50 - b Lcbc_dec8x_done - -.align 5 -Lcbc_dec8x_four: - vncipherlast $out4,$out4,$ivec - vncipherlast $out5,$out5,$in4 - vncipherlast $out6,$out6,$in5 - vncipherlast $out7,$out7,$in6 - vmr $ivec,$in7 - - le?vperm $out4,$out4,$out4,$inpperm - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x00,$out - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x10,$out - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x20,$out - stvx_u $out7,$x30,$out - addi $out,$out,0x40 - b Lcbc_dec8x_done - -.align 5 -Lcbc_dec8x_three: - vncipherlast $out5,$out5,$ivec - vncipherlast $out6,$out6,$in5 - vncipherlast $out7,$out7,$in6 - vmr $ivec,$in7 - - le?vperm $out5,$out5,$out5,$inpperm - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x00,$out - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x10,$out - stvx_u $out7,$x20,$out - addi $out,$out,0x30 - b Lcbc_dec8x_done - -.align 5 -Lcbc_dec8x_two: - vncipherlast $out6,$out6,$ivec - vncipherlast $out7,$out7,$in6 - vmr $ivec,$in7 - - le?vperm $out6,$out6,$out6,$inpperm - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x00,$out - stvx_u $out7,$x10,$out - addi $out,$out,0x20 - b Lcbc_dec8x_done - -.align 5 -Lcbc_dec8x_one: - vncipherlast $out7,$out7,$ivec - vmr $ivec,$in7 - - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out7,0,$out - addi $out,$out,0x10 - -Lcbc_dec8x_done: - le?vperm $ivec,$ivec,$ivec,$inpperm - stvx_u $ivec,0,$ivp # write [unaligned] iv - - li r10,`$FRAME+15` - li r11,`$FRAME+31` - stvx $inpperm,r10,$sp # wipe copies of round keys - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - stvx $inpperm,r10,$sp - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - stvx $inpperm,r10,$sp - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - stvx $inpperm,r10,$sp - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - - mtspr 256,$vrsave - lvx v20,r10,$sp # ABI says so - addi r10,r10,32 - lvx v21,r11,$sp - addi r11,r11,32 - lvx v22,r10,$sp - addi r10,r10,32 - lvx v23,r11,$sp - addi r11,r11,32 - lvx v24,r10,$sp - addi r10,r10,32 - lvx v25,r11,$sp - addi r11,r11,32 - lvx v26,r10,$sp - addi r10,r10,32 - lvx v27,r11,$sp - addi r11,r11,32 - lvx v28,r10,$sp - addi r10,r10,32 - lvx v29,r11,$sp - addi r11,r11,32 - lvx v30,r10,$sp - lvx v31,r11,$sp - $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) - $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) - $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) - $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) - $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) - $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) - addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` - blr - .long 0 - .byte 0,12,0x14,0,0x80,6,6,0 - .long 0 -.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt -___ -}} }}} - -######################################################################### -{{{ # CTR procedure[s] # - -####################### WARNING: Here be dragons! ####################### -# -# This code is written as 'ctr32', based on a 32-bit counter used -# upstream. The kernel does *not* use a 32-bit counter. The kernel uses -# a 128-bit counter. -# -# This leads to subtle changes from the upstream code: the counter -# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in -# both the bulk (8 blocks at a time) path, and in the individual block -# path. Be aware of this when doing updates. -# -# See: -# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug") -# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword") -# https://github.com/openssl/openssl/pull/8942 -# -######################################################################### -my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); -my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); -my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= - map("v$_",(4..11)); -my $dat=$tmp; - -$code.=<<___; -.globl .${prefix}_ctr32_encrypt_blocks - ${UCMP}i $len,1 - bltlr- - - lis r0,0xfff0 - mfspr $vrsave,256 - mtspr 256,r0 - - li $idx,15 - vxor $rndkey0,$rndkey0,$rndkey0 - le?vspltisb $tmp,0x0f - - lvx $ivec,0,$ivp # load [unaligned] iv - lvsl $inpperm,0,$ivp - lvx $inptail,$idx,$ivp - vspltisb $one,1 - le?vxor $inpperm,$inpperm,$tmp - vperm $ivec,$ivec,$inptail,$inpperm - vsldoi $one,$rndkey0,$one,1 - - neg r11,$inp - ?lvsl $keyperm,0,$key # prepare for unaligned key - lwz $rounds,240($key) - - lvsr $inpperm,0,r11 # prepare for unaligned load - lvx $inptail,0,$inp - addi $inp,$inp,15 # 15 is not typo - le?vxor $inpperm,$inpperm,$tmp - - srwi $rounds,$rounds,1 - li $idx,16 - subi $rounds,$rounds,1 - - ${UCMP}i $len,8 - bge _aesp8_ctr32_encrypt8x - - ?lvsr $outperm,0,$out # prepare for unaligned store - vspltisb $outmask,-1 - lvx $outhead,0,$out - ?vperm $outmask,$rndkey0,$outmask,$outperm - le?vxor $outperm,$outperm,$tmp - - lvx $rndkey0,0,$key - mtctr $rounds - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$ivec,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - b Loop_ctr32_enc - -.align 5 -Loop_ctr32_enc: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipher $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - bdnz Loop_ctr32_enc - - vadduqm $ivec,$ivec,$one # Kernel change for 128-bit - vmr $dat,$inptail - lvx $inptail,0,$inp - addi $inp,$inp,16 - subic. $len,$len,1 # blocks-- - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key - vperm $dat,$dat,$inptail,$inpperm - li $idx,16 - ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm - lvx $rndkey0,0,$key - vxor $dat,$dat,$rndkey1 # last round key - vcipherlast $inout,$inout,$dat - - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - vperm $inout,$inout,$inout,$outperm - vsel $dat,$outhead,$inout,$outmask - mtctr $rounds - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vmr $outhead,$inout - vxor $inout,$ivec,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - stvx $dat,0,$out - addi $out,$out,16 - bne Loop_ctr32_enc - - addi $out,$out,-1 - lvx $inout,0,$out # redundant in aligned case - vsel $inout,$outhead,$inout,$outmask - stvx $inout,0,$out - - mtspr 256,$vrsave - blr - .long 0 - .byte 0,12,0x14,0,0,0,6,0 - .long 0 -___ -######################################################################### -{{ # Optimized CTR procedure # -my $key_="r11"; -my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); -my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); -my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); -my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys - # v26-v31 last 6 round keys -my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment -my ($two,$three,$four)=($outhead,$outperm,$outmask); - -$code.=<<___; -.align 5 -_aesp8_ctr32_encrypt8x: - $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) - li r10,`$FRAME+8*16+15` - li r11,`$FRAME+8*16+31` - stvx v20,r10,$sp # ABI says so - addi r10,r10,32 - stvx v21,r11,$sp - addi r11,r11,32 - stvx v22,r10,$sp - addi r10,r10,32 - stvx v23,r11,$sp - addi r11,r11,32 - stvx v24,r10,$sp - addi r10,r10,32 - stvx v25,r11,$sp - addi r11,r11,32 - stvx v26,r10,$sp - addi r10,r10,32 - stvx v27,r11,$sp - addi r11,r11,32 - stvx v28,r10,$sp - addi r10,r10,32 - stvx v29,r11,$sp - addi r11,r11,32 - stvx v30,r10,$sp - stvx v31,r11,$sp - li r0,-1 - stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave - li $x10,0x10 - $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) - li $x20,0x20 - $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) - li $x30,0x30 - $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) - li $x40,0x40 - $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) - li $x50,0x50 - $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) - li $x60,0x60 - $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) - li $x70,0x70 - mtspr 256,r0 - - subi $rounds,$rounds,3 # -4 in total - - lvx $rndkey0,$x00,$key # load key schedule - lvx v30,$x10,$key - addi $key,$key,0x20 - lvx v31,$x00,$key - ?vperm $rndkey0,$rndkey0,v30,$keyperm - addi $key_,$sp,$FRAME+15 - mtctr $rounds - -Load_ctr32_enc_key: - ?vperm v24,v30,v31,$keyperm - lvx v30,$x10,$key - addi $key,$key,0x20 - stvx v24,$x00,$key_ # off-load round[1] - ?vperm v25,v31,v30,$keyperm - lvx v31,$x00,$key - stvx v25,$x10,$key_ # off-load round[2] - addi $key_,$key_,0x20 - bdnz Load_ctr32_enc_key - - lvx v26,$x10,$key - ?vperm v24,v30,v31,$keyperm - lvx v27,$x20,$key - stvx v24,$x00,$key_ # off-load round[3] - ?vperm v25,v31,v26,$keyperm - lvx v28,$x30,$key - stvx v25,$x10,$key_ # off-load round[4] - addi $key_,$sp,$FRAME+15 # rewind $key_ - ?vperm v26,v26,v27,$keyperm - lvx v29,$x40,$key - ?vperm v27,v27,v28,$keyperm - lvx v30,$x50,$key - ?vperm v28,v28,v29,$keyperm - lvx v31,$x60,$key - ?vperm v29,v29,v30,$keyperm - lvx $out0,$x70,$key # borrow $out0 - ?vperm v30,v30,v31,$keyperm - lvx v24,$x00,$key_ # pre-load round[1] - ?vperm v31,v31,$out0,$keyperm - lvx v25,$x10,$key_ # pre-load round[2] - - vadduqm $two,$one,$one - subi $inp,$inp,15 # undo "caller" - $SHL $len,$len,4 - - vadduqm $out1,$ivec,$one # counter values ... - vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit) - vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] - le?li $idx,8 - vadduqm $out3,$out1,$two - vxor $out1,$out1,$rndkey0 - le?lvsl $inpperm,0,$idx - vadduqm $out4,$out2,$two - vxor $out2,$out2,$rndkey0 - le?vspltisb $tmp,0x0f - vadduqm $out5,$out3,$two - vxor $out3,$out3,$rndkey0 - le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u - vadduqm $out6,$out4,$two - vxor $out4,$out4,$rndkey0 - vadduqm $out7,$out5,$two - vxor $out5,$out5,$rndkey0 - vadduqm $ivec,$out6,$two # next counter value - vxor $out6,$out6,$rndkey0 - vxor $out7,$out7,$rndkey0 - - mtctr $rounds - b Loop_ctr32_enc8x -.align 5 -Loop_ctr32_enc8x: - vcipher $out0,$out0,v24 - vcipher $out1,$out1,v24 - vcipher $out2,$out2,v24 - vcipher $out3,$out3,v24 - vcipher $out4,$out4,v24 - vcipher $out5,$out5,v24 - vcipher $out6,$out6,v24 - vcipher $out7,$out7,v24 -Loop_ctr32_enc8x_middle: - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vcipher $out0,$out0,v25 - vcipher $out1,$out1,v25 - vcipher $out2,$out2,v25 - vcipher $out3,$out3,v25 - vcipher $out4,$out4,v25 - vcipher $out5,$out5,v25 - vcipher $out6,$out6,v25 - vcipher $out7,$out7,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_ctr32_enc8x - - subic r11,$len,256 # $len-256, borrow $key_ - vcipher $out0,$out0,v24 - vcipher $out1,$out1,v24 - vcipher $out2,$out2,v24 - vcipher $out3,$out3,v24 - vcipher $out4,$out4,v24 - vcipher $out5,$out5,v24 - vcipher $out6,$out6,v24 - vcipher $out7,$out7,v24 - - subfe r0,r0,r0 # borrow?-1:0 - vcipher $out0,$out0,v25 - vcipher $out1,$out1,v25 - vcipher $out2,$out2,v25 - vcipher $out3,$out3,v25 - vcipher $out4,$out4,v25 - vcipher $out5,$out5,v25 - vcipher $out6,$out6,v25 - vcipher $out7,$out7,v25 - - and r0,r0,r11 - addi $key_,$sp,$FRAME+15 # rewind $key_ - vcipher $out0,$out0,v26 - vcipher $out1,$out1,v26 - vcipher $out2,$out2,v26 - vcipher $out3,$out3,v26 - vcipher $out4,$out4,v26 - vcipher $out5,$out5,v26 - vcipher $out6,$out6,v26 - vcipher $out7,$out7,v26 - lvx v24,$x00,$key_ # re-pre-load round[1] - - subic $len,$len,129 # $len-=129 - vcipher $out0,$out0,v27 - addi $len,$len,1 # $len-=128 really - vcipher $out1,$out1,v27 - vcipher $out2,$out2,v27 - vcipher $out3,$out3,v27 - vcipher $out4,$out4,v27 - vcipher $out5,$out5,v27 - vcipher $out6,$out6,v27 - vcipher $out7,$out7,v27 - lvx v25,$x10,$key_ # re-pre-load round[2] - - vcipher $out0,$out0,v28 - lvx_u $in0,$x00,$inp # load input - vcipher $out1,$out1,v28 - lvx_u $in1,$x10,$inp - vcipher $out2,$out2,v28 - lvx_u $in2,$x20,$inp - vcipher $out3,$out3,v28 - lvx_u $in3,$x30,$inp - vcipher $out4,$out4,v28 - lvx_u $in4,$x40,$inp - vcipher $out5,$out5,v28 - lvx_u $in5,$x50,$inp - vcipher $out6,$out6,v28 - lvx_u $in6,$x60,$inp - vcipher $out7,$out7,v28 - lvx_u $in7,$x70,$inp - addi $inp,$inp,0x80 - - vcipher $out0,$out0,v29 - le?vperm $in0,$in0,$in0,$inpperm - vcipher $out1,$out1,v29 - le?vperm $in1,$in1,$in1,$inpperm - vcipher $out2,$out2,v29 - le?vperm $in2,$in2,$in2,$inpperm - vcipher $out3,$out3,v29 - le?vperm $in3,$in3,$in3,$inpperm - vcipher $out4,$out4,v29 - le?vperm $in4,$in4,$in4,$inpperm - vcipher $out5,$out5,v29 - le?vperm $in5,$in5,$in5,$inpperm - vcipher $out6,$out6,v29 - le?vperm $in6,$in6,$in6,$inpperm - vcipher $out7,$out7,v29 - le?vperm $in7,$in7,$in7,$inpperm - - add $inp,$inp,r0 # $inp is adjusted in such - # way that at exit from the - # loop inX-in7 are loaded - # with last "words" - subfe. r0,r0,r0 # borrow?-1:0 - vcipher $out0,$out0,v30 - vxor $in0,$in0,v31 # xor with last round key - vcipher $out1,$out1,v30 - vxor $in1,$in1,v31 - vcipher $out2,$out2,v30 - vxor $in2,$in2,v31 - vcipher $out3,$out3,v30 - vxor $in3,$in3,v31 - vcipher $out4,$out4,v30 - vxor $in4,$in4,v31 - vcipher $out5,$out5,v30 - vxor $in5,$in5,v31 - vcipher $out6,$out6,v30 - vxor $in6,$in6,v31 - vcipher $out7,$out7,v30 - vxor $in7,$in7,v31 - - bne Lctr32_enc8x_break # did $len-129 borrow? - - vcipherlast $in0,$out0,$in0 - vcipherlast $in1,$out1,$in1 - vadduqm $out1,$ivec,$one # counter values ... - vcipherlast $in2,$out2,$in2 - vadduqm $out2,$ivec,$two - vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] - vcipherlast $in3,$out3,$in3 - vadduqm $out3,$out1,$two - vxor $out1,$out1,$rndkey0 - vcipherlast $in4,$out4,$in4 - vadduqm $out4,$out2,$two - vxor $out2,$out2,$rndkey0 - vcipherlast $in5,$out5,$in5 - vadduqm $out5,$out3,$two - vxor $out3,$out3,$rndkey0 - vcipherlast $in6,$out6,$in6 - vadduqm $out6,$out4,$two - vxor $out4,$out4,$rndkey0 - vcipherlast $in7,$out7,$in7 - vadduqm $out7,$out5,$two - vxor $out5,$out5,$rndkey0 - le?vperm $in0,$in0,$in0,$inpperm - vadduqm $ivec,$out6,$two # next counter value - vxor $out6,$out6,$rndkey0 - le?vperm $in1,$in1,$in1,$inpperm - vxor $out7,$out7,$rndkey0 - mtctr $rounds - - vcipher $out0,$out0,v24 - stvx_u $in0,$x00,$out - le?vperm $in2,$in2,$in2,$inpperm - vcipher $out1,$out1,v24 - stvx_u $in1,$x10,$out - le?vperm $in3,$in3,$in3,$inpperm - vcipher $out2,$out2,v24 - stvx_u $in2,$x20,$out - le?vperm $in4,$in4,$in4,$inpperm - vcipher $out3,$out3,v24 - stvx_u $in3,$x30,$out - le?vperm $in5,$in5,$in5,$inpperm - vcipher $out4,$out4,v24 - stvx_u $in4,$x40,$out - le?vperm $in6,$in6,$in6,$inpperm - vcipher $out5,$out5,v24 - stvx_u $in5,$x50,$out - le?vperm $in7,$in7,$in7,$inpperm - vcipher $out6,$out6,v24 - stvx_u $in6,$x60,$out - vcipher $out7,$out7,v24 - stvx_u $in7,$x70,$out - addi $out,$out,0x80 - - b Loop_ctr32_enc8x_middle - -.align 5 -Lctr32_enc8x_break: - cmpwi $len,-0x60 - blt Lctr32_enc8x_one - nop - beq Lctr32_enc8x_two - cmpwi $len,-0x40 - blt Lctr32_enc8x_three - nop - beq Lctr32_enc8x_four - cmpwi $len,-0x20 - blt Lctr32_enc8x_five - nop - beq Lctr32_enc8x_six - cmpwi $len,0x00 - blt Lctr32_enc8x_seven - -Lctr32_enc8x_eight: - vcipherlast $out0,$out0,$in0 - vcipherlast $out1,$out1,$in1 - vcipherlast $out2,$out2,$in2 - vcipherlast $out3,$out3,$in3 - vcipherlast $out4,$out4,$in4 - vcipherlast $out5,$out5,$in5 - vcipherlast $out6,$out6,$in6 - vcipherlast $out7,$out7,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x20,$out - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x30,$out - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x40,$out - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x50,$out - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x60,$out - stvx_u $out7,$x70,$out - addi $out,$out,0x80 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_seven: - vcipherlast $out0,$out0,$in1 - vcipherlast $out1,$out1,$in2 - vcipherlast $out2,$out2,$in3 - vcipherlast $out3,$out3,$in4 - vcipherlast $out4,$out4,$in5 - vcipherlast $out5,$out5,$in6 - vcipherlast $out6,$out6,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x20,$out - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x30,$out - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x40,$out - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x50,$out - stvx_u $out6,$x60,$out - addi $out,$out,0x70 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_six: - vcipherlast $out0,$out0,$in2 - vcipherlast $out1,$out1,$in3 - vcipherlast $out2,$out2,$in4 - vcipherlast $out3,$out3,$in5 - vcipherlast $out4,$out4,$in6 - vcipherlast $out5,$out5,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x20,$out - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x30,$out - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x40,$out - stvx_u $out5,$x50,$out - addi $out,$out,0x60 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_five: - vcipherlast $out0,$out0,$in3 - vcipherlast $out1,$out1,$in4 - vcipherlast $out2,$out2,$in5 - vcipherlast $out3,$out3,$in6 - vcipherlast $out4,$out4,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x20,$out - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x30,$out - stvx_u $out4,$x40,$out - addi $out,$out,0x50 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_four: - vcipherlast $out0,$out0,$in4 - vcipherlast $out1,$out1,$in5 - vcipherlast $out2,$out2,$in6 - vcipherlast $out3,$out3,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x20,$out - stvx_u $out3,$x30,$out - addi $out,$out,0x40 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_three: - vcipherlast $out0,$out0,$in5 - vcipherlast $out1,$out1,$in6 - vcipherlast $out2,$out2,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - stvx_u $out2,$x20,$out - addi $out,$out,0x30 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_two: - vcipherlast $out0,$out0,$in6 - vcipherlast $out1,$out1,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - stvx_u $out1,$x10,$out - addi $out,$out,0x20 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_one: - vcipherlast $out0,$out0,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - stvx_u $out0,0,$out - addi $out,$out,0x10 - -Lctr32_enc8x_done: - li r10,`$FRAME+15` - li r11,`$FRAME+31` - stvx $inpperm,r10,$sp # wipe copies of round keys - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - stvx $inpperm,r10,$sp - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - stvx $inpperm,r10,$sp - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - stvx $inpperm,r10,$sp - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - - mtspr 256,$vrsave - lvx v20,r10,$sp # ABI says so - addi r10,r10,32 - lvx v21,r11,$sp - addi r11,r11,32 - lvx v22,r10,$sp - addi r10,r10,32 - lvx v23,r11,$sp - addi r11,r11,32 - lvx v24,r10,$sp - addi r10,r10,32 - lvx v25,r11,$sp - addi r11,r11,32 - lvx v26,r10,$sp - addi r10,r10,32 - lvx v27,r11,$sp - addi r11,r11,32 - lvx v28,r10,$sp - addi r10,r10,32 - lvx v29,r11,$sp - addi r11,r11,32 - lvx v30,r10,$sp - lvx v31,r11,$sp - $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) - $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) - $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) - $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) - $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) - $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) - addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` - blr - .long 0 - .byte 0,12,0x14,0,0x80,6,6,0 - .long 0 -.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks -___ -}} }}} - -######################################################################### -{{{ # XTS procedures # -# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # -# const AES_KEY *key1, const AES_KEY *key2, # -# [const] unsigned char iv[16]); # -# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # -# input tweak value is assumed to be encrypted already, and last tweak # -# value, one suitable for consecutive call on same chunk of data, is # -# written back to original buffer. In addition, in "tweak chaining" # -# mode only complete input blocks are processed. # - -my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); -my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); -my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); -my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); -my $taillen = $key2; - - ($inp,$idx) = ($idx,$inp); # reassign - -$code.=<<___; -.globl .${prefix}_xts_encrypt - mr $inp,r3 # reassign - li r3,-1 - ${UCMP}i $len,16 - bltlr- - - lis r0,0xfff0 - mfspr r12,256 # save vrsave - li r11,0 - mtspr 256,r0 - - vspltisb $seven,0x07 # 0x070707..07 - le?lvsl $leperm,r11,r11 - le?vspltisb $tmp,0x0f - le?vxor $leperm,$leperm,$seven - - li $idx,15 - lvx $tweak,0,$ivp # load [unaligned] iv - lvsl $inpperm,0,$ivp - lvx $inptail,$idx,$ivp - le?vxor $inpperm,$inpperm,$tmp - vperm $tweak,$tweak,$inptail,$inpperm - - neg r11,$inp - lvsr $inpperm,0,r11 # prepare for unaligned load - lvx $inout,0,$inp - addi $inp,$inp,15 # 15 is not typo - le?vxor $inpperm,$inpperm,$tmp - - ${UCMP}i $key2,0 # key2==NULL? - beq Lxts_enc_no_key2 - - ?lvsl $keyperm,0,$key2 # prepare for unaligned key - lwz $rounds,240($key2) - srwi $rounds,$rounds,1 - subi $rounds,$rounds,1 - li $idx,16 - - lvx $rndkey0,0,$key2 - lvx $rndkey1,$idx,$key2 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $tweak,$tweak,$rndkey0 - lvx $rndkey0,$idx,$key2 - addi $idx,$idx,16 - mtctr $rounds - -Ltweak_xts_enc: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $tweak,$tweak,$rndkey1 - lvx $rndkey1,$idx,$key2 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipher $tweak,$tweak,$rndkey0 - lvx $rndkey0,$idx,$key2 - addi $idx,$idx,16 - bdnz Ltweak_xts_enc - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $tweak,$tweak,$rndkey1 - lvx $rndkey1,$idx,$key2 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipherlast $tweak,$tweak,$rndkey0 - - li $ivp,0 # don't chain the tweak - b Lxts_enc - -Lxts_enc_no_key2: - li $idx,-16 - and $len,$len,$idx # in "tweak chaining" - # mode only complete - # blocks are processed -Lxts_enc: - lvx $inptail,0,$inp - addi $inp,$inp,16 - - ?lvsl $keyperm,0,$key1 # prepare for unaligned key - lwz $rounds,240($key1) - srwi $rounds,$rounds,1 - subi $rounds,$rounds,1 - li $idx,16 - - vslb $eighty7,$seven,$seven # 0x808080..80 - vor $eighty7,$eighty7,$seven # 0x878787..87 - vspltisb $tmp,1 # 0x010101..01 - vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 - - ${UCMP}i $len,96 - bge _aesp8_xts_encrypt6x - - andi. $taillen,$len,15 - subic r0,$len,32 - subi $taillen,$taillen,16 - subfe r0,r0,r0 - and r0,r0,$taillen - add $inp,$inp,r0 - - lvx $rndkey0,0,$key1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - vperm $inout,$inout,$inptail,$inpperm - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$inout,$tweak - vxor $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - mtctr $rounds - b Loop_xts_enc - -.align 5 -Loop_xts_enc: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipher $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - bdnz Loop_xts_enc - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key1 - li $idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $rndkey0,$rndkey0,$tweak - vcipherlast $output,$inout,$rndkey0 - - le?vperm $tmp,$output,$output,$leperm - be?nop - le?stvx_u $tmp,0,$out - be?stvx_u $output,0,$out - addi $out,$out,16 - - subic. $len,$len,16 - beq Lxts_enc_done - - vmr $inout,$inptail - lvx $inptail,0,$inp - addi $inp,$inp,16 - lvx $rndkey0,0,$key1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - - subic r0,$len,32 - subfe r0,r0,r0 - and r0,r0,$taillen - add $inp,$inp,r0 - - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vand $tmp,$tmp,$eighty7 - vxor $tweak,$tweak,$tmp - - vperm $inout,$inout,$inptail,$inpperm - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$inout,$tweak - vxor $output,$output,$rndkey0 # just in case $len<16 - vxor $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - - mtctr $rounds - ${UCMP}i $len,16 - bge Loop_xts_enc - - vxor $output,$output,$tweak - lvsr $inpperm,0,$len # $inpperm is no longer needed - vxor $inptail,$inptail,$inptail # $inptail is no longer needed - vspltisb $tmp,-1 - vperm $inptail,$inptail,$tmp,$inpperm - vsel $inout,$inout,$output,$inptail - - subi r11,$out,17 - subi $out,$out,16 - mtctr $len - li $len,16 -Loop_xts_enc_steal: - lbzu r0,1(r11) - stb r0,16(r11) - bdnz Loop_xts_enc_steal - - mtctr $rounds - b Loop_xts_enc # one more time... - -Lxts_enc_done: - ${UCMP}i $ivp,0 - beq Lxts_enc_ret - - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vand $tmp,$tmp,$eighty7 - vxor $tweak,$tweak,$tmp - - le?vperm $tweak,$tweak,$tweak,$leperm - stvx_u $tweak,0,$ivp - -Lxts_enc_ret: - mtspr 256,r12 # restore vrsave - li r3,0 - blr - .long 0 - .byte 0,12,0x04,0,0x80,6,6,0 - .long 0 -.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt - -.globl .${prefix}_xts_decrypt - mr $inp,r3 # reassign - li r3,-1 - ${UCMP}i $len,16 - bltlr- - - lis r0,0xfff8 - mfspr r12,256 # save vrsave - li r11,0 - mtspr 256,r0 - - andi. r0,$len,15 - neg r0,r0 - andi. r0,r0,16 - sub $len,$len,r0 - - vspltisb $seven,0x07 # 0x070707..07 - le?lvsl $leperm,r11,r11 - le?vspltisb $tmp,0x0f - le?vxor $leperm,$leperm,$seven - - li $idx,15 - lvx $tweak,0,$ivp # load [unaligned] iv - lvsl $inpperm,0,$ivp - lvx $inptail,$idx,$ivp - le?vxor $inpperm,$inpperm,$tmp - vperm $tweak,$tweak,$inptail,$inpperm - - neg r11,$inp - lvsr $inpperm,0,r11 # prepare for unaligned load - lvx $inout,0,$inp - addi $inp,$inp,15 # 15 is not typo - le?vxor $inpperm,$inpperm,$tmp - - ${UCMP}i $key2,0 # key2==NULL? - beq Lxts_dec_no_key2 - - ?lvsl $keyperm,0,$key2 # prepare for unaligned key - lwz $rounds,240($key2) - srwi $rounds,$rounds,1 - subi $rounds,$rounds,1 - li $idx,16 - - lvx $rndkey0,0,$key2 - lvx $rndkey1,$idx,$key2 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $tweak,$tweak,$rndkey0 - lvx $rndkey0,$idx,$key2 - addi $idx,$idx,16 - mtctr $rounds - -Ltweak_xts_dec: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $tweak,$tweak,$rndkey1 - lvx $rndkey1,$idx,$key2 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipher $tweak,$tweak,$rndkey0 - lvx $rndkey0,$idx,$key2 - addi $idx,$idx,16 - bdnz Ltweak_xts_dec - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $tweak,$tweak,$rndkey1 - lvx $rndkey1,$idx,$key2 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipherlast $tweak,$tweak,$rndkey0 - - li $ivp,0 # don't chain the tweak - b Lxts_dec - -Lxts_dec_no_key2: - neg $idx,$len - andi. $idx,$idx,15 - add $len,$len,$idx # in "tweak chaining" - # mode only complete - # blocks are processed -Lxts_dec: - lvx $inptail,0,$inp - addi $inp,$inp,16 - - ?lvsl $keyperm,0,$key1 # prepare for unaligned key - lwz $rounds,240($key1) - srwi $rounds,$rounds,1 - subi $rounds,$rounds,1 - li $idx,16 - - vslb $eighty7,$seven,$seven # 0x808080..80 - vor $eighty7,$eighty7,$seven # 0x878787..87 - vspltisb $tmp,1 # 0x010101..01 - vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 - - ${UCMP}i $len,96 - bge _aesp8_xts_decrypt6x - - lvx $rndkey0,0,$key1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - vperm $inout,$inout,$inptail,$inpperm - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$inout,$tweak - vxor $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - mtctr $rounds - - ${UCMP}i $len,16 - blt Ltail_xts_dec - be?b Loop_xts_dec - -.align 5 -Loop_xts_dec: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vncipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vncipher $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - bdnz Loop_xts_dec - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vncipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key1 - li $idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $rndkey0,$rndkey0,$tweak - vncipherlast $output,$inout,$rndkey0 - - le?vperm $tmp,$output,$output,$leperm - be?nop - le?stvx_u $tmp,0,$out - be?stvx_u $output,0,$out - addi $out,$out,16 - - subic. $len,$len,16 - beq Lxts_dec_done - - vmr $inout,$inptail - lvx $inptail,0,$inp - addi $inp,$inp,16 - lvx $rndkey0,0,$key1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vand $tmp,$tmp,$eighty7 - vxor $tweak,$tweak,$tmp - - vperm $inout,$inout,$inptail,$inpperm - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$inout,$tweak - vxor $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - - mtctr $rounds - ${UCMP}i $len,16 - bge Loop_xts_dec - -Ltail_xts_dec: - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak1,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vand $tmp,$tmp,$eighty7 - vxor $tweak1,$tweak1,$tmp - - subi $inp,$inp,16 - add $inp,$inp,$len - - vxor $inout,$inout,$tweak # :-( - vxor $inout,$inout,$tweak1 # :-) - -Loop_xts_dec_short: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vncipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vncipher $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - bdnz Loop_xts_dec_short - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vncipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key1 - li $idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $rndkey0,$rndkey0,$tweak1 - vncipherlast $output,$inout,$rndkey0 - - le?vperm $tmp,$output,$output,$leperm - be?nop - le?stvx_u $tmp,0,$out - be?stvx_u $output,0,$out - - vmr $inout,$inptail - lvx $inptail,0,$inp - #addi $inp,$inp,16 - lvx $rndkey0,0,$key1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - vperm $inout,$inout,$inptail,$inpperm - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - - lvsr $inpperm,0,$len # $inpperm is no longer needed - vxor $inptail,$inptail,$inptail # $inptail is no longer needed - vspltisb $tmp,-1 - vperm $inptail,$inptail,$tmp,$inpperm - vsel $inout,$inout,$output,$inptail - - vxor $rndkey0,$rndkey0,$tweak - vxor $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - - subi r11,$out,1 - mtctr $len - li $len,16 -Loop_xts_dec_steal: - lbzu r0,1(r11) - stb r0,16(r11) - bdnz Loop_xts_dec_steal - - mtctr $rounds - b Loop_xts_dec # one more time... - -Lxts_dec_done: - ${UCMP}i $ivp,0 - beq Lxts_dec_ret - - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vand $tmp,$tmp,$eighty7 - vxor $tweak,$tweak,$tmp - - le?vperm $tweak,$tweak,$tweak,$leperm - stvx_u $tweak,0,$ivp - -Lxts_dec_ret: - mtspr 256,r12 # restore vrsave - li r3,0 - blr - .long 0 - .byte 0,12,0x04,0,0x80,6,6,0 - .long 0 -.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt -___ -######################################################################### -{{ # Optimized XTS procedures # -my $key_=$key2; -my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); - $x00=0 if ($flavour =~ /osx/); -my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); -my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); -my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); -my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys - # v26-v31 last 6 round keys -my ($keyperm)=($out0); # aliases with "caller", redundant assignment -my $taillen=$x70; - -$code.=<<___; -.align 5 -_aesp8_xts_encrypt6x: - $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) - mflr r11 - li r7,`$FRAME+8*16+15` - li r3,`$FRAME+8*16+31` - $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) - stvx v20,r7,$sp # ABI says so - addi r7,r7,32 - stvx v21,r3,$sp - addi r3,r3,32 - stvx v22,r7,$sp - addi r7,r7,32 - stvx v23,r3,$sp - addi r3,r3,32 - stvx v24,r7,$sp - addi r7,r7,32 - stvx v25,r3,$sp - addi r3,r3,32 - stvx v26,r7,$sp - addi r7,r7,32 - stvx v27,r3,$sp - addi r3,r3,32 - stvx v28,r7,$sp - addi r7,r7,32 - stvx v29,r3,$sp - addi r3,r3,32 - stvx v30,r7,$sp - stvx v31,r3,$sp - li r0,-1 - stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave - li $x10,0x10 - $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) - li $x20,0x20 - $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) - li $x30,0x30 - $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) - li $x40,0x40 - $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) - li $x50,0x50 - $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) - li $x60,0x60 - $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) - li $x70,0x70 - mtspr 256,r0 - - xxlor 2, 32+$eighty7, 32+$eighty7 - vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87 - xxlor 1, 32+$eighty7, 32+$eighty7 - - # Load XOR Lconsts. - mr $x70, r6 - bl Lconsts - lxvw4x 0, $x40, r6 # load XOR contents - mr r6, $x70 - li $x70,0x70 - - subi $rounds,$rounds,3 # -4 in total - - lvx $rndkey0,$x00,$key1 # load key schedule - lvx v30,$x10,$key1 - addi $key1,$key1,0x20 - lvx v31,$x00,$key1 - ?vperm $rndkey0,$rndkey0,v30,$keyperm - addi $key_,$sp,$FRAME+15 - mtctr $rounds - -Load_xts_enc_key: - ?vperm v24,v30,v31,$keyperm - lvx v30,$x10,$key1 - addi $key1,$key1,0x20 - stvx v24,$x00,$key_ # off-load round[1] - ?vperm v25,v31,v30,$keyperm - lvx v31,$x00,$key1 - stvx v25,$x10,$key_ # off-load round[2] - addi $key_,$key_,0x20 - bdnz Load_xts_enc_key - - lvx v26,$x10,$key1 - ?vperm v24,v30,v31,$keyperm - lvx v27,$x20,$key1 - stvx v24,$x00,$key_ # off-load round[3] - ?vperm v25,v31,v26,$keyperm - lvx v28,$x30,$key1 - stvx v25,$x10,$key_ # off-load round[4] - addi $key_,$sp,$FRAME+15 # rewind $key_ - ?vperm v26,v26,v27,$keyperm - lvx v29,$x40,$key1 - ?vperm v27,v27,v28,$keyperm - lvx v30,$x50,$key1 - ?vperm v28,v28,v29,$keyperm - lvx v31,$x60,$key1 - ?vperm v29,v29,v30,$keyperm - lvx $twk5,$x70,$key1 # borrow $twk5 - ?vperm v30,v30,v31,$keyperm - lvx v24,$x00,$key_ # pre-load round[1] - ?vperm v31,v31,$twk5,$keyperm - lvx v25,$x10,$key_ # pre-load round[2] - - # Switch to use the following codes with 0x010101..87 to generate tweak. - # eighty7 = 0x010101..87 - # vsrab tmp, tweak, seven # next tweak value, right shift 7 bits - # vand tmp, tmp, eighty7 # last byte with carry - # vaddubm tweak, tweak, tweak # left shift 1 bit (x2) - # xxlor vsx, 0, 0 - # vpermxor tweak, tweak, tmp, vsx - - vperm $in0,$inout,$inptail,$inpperm - subi $inp,$inp,31 # undo "caller" - vxor $twk0,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vand $tmp,$tmp,$eighty7 - vxor $out0,$in0,$twk0 - xxlor 32+$in1, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in1 - - lvx_u $in1,$x10,$inp - vxor $twk1,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in1,$in1,$in1,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out1,$in1,$twk1 - xxlor 32+$in2, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in2 - - lvx_u $in2,$x20,$inp - andi. $taillen,$len,15 - vxor $twk2,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in2,$in2,$in2,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out2,$in2,$twk2 - xxlor 32+$in3, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in3 - - lvx_u $in3,$x30,$inp - sub $len,$len,$taillen - vxor $twk3,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in3,$in3,$in3,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out3,$in3,$twk3 - xxlor 32+$in4, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in4 - - lvx_u $in4,$x40,$inp - subi $len,$len,0x60 - vxor $twk4,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in4,$in4,$in4,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out4,$in4,$twk4 - xxlor 32+$in5, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in5 - - lvx_u $in5,$x50,$inp - addi $inp,$inp,0x60 - vxor $twk5,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in5,$in5,$in5,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out5,$in5,$twk5 - xxlor 32+$in0, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in0 - - vxor v31,v31,$rndkey0 - mtctr $rounds - b Loop_xts_enc6x - -.align 5 -Loop_xts_enc6x: - vcipher $out0,$out0,v24 - vcipher $out1,$out1,v24 - vcipher $out2,$out2,v24 - vcipher $out3,$out3,v24 - vcipher $out4,$out4,v24 - vcipher $out5,$out5,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vcipher $out0,$out0,v25 - vcipher $out1,$out1,v25 - vcipher $out2,$out2,v25 - vcipher $out3,$out3,v25 - vcipher $out4,$out4,v25 - vcipher $out5,$out5,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_xts_enc6x - - xxlor 32+$eighty7, 1, 1 # 0x010101..87 - - subic $len,$len,96 # $len-=96 - vxor $in0,$twk0,v31 # xor with last round key - vcipher $out0,$out0,v24 - vcipher $out1,$out1,v24 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk0,$tweak,$rndkey0 - vaddubm $tweak,$tweak,$tweak - vcipher $out2,$out2,v24 - vcipher $out3,$out3,v24 - vcipher $out4,$out4,v24 - vcipher $out5,$out5,v24 - - subfe. r0,r0,r0 # borrow?-1:0 - vand $tmp,$tmp,$eighty7 - vcipher $out0,$out0,v25 - vcipher $out1,$out1,v25 - xxlor 32+$in1, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in1 - vcipher $out2,$out2,v25 - vcipher $out3,$out3,v25 - vxor $in1,$twk1,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk1,$tweak,$rndkey0 - vcipher $out4,$out4,v25 - vcipher $out5,$out5,v25 - - and r0,r0,$len - vaddubm $tweak,$tweak,$tweak - vcipher $out0,$out0,v26 - vcipher $out1,$out1,v26 - vand $tmp,$tmp,$eighty7 - vcipher $out2,$out2,v26 - vcipher $out3,$out3,v26 - xxlor 32+$in2, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in2 - vcipher $out4,$out4,v26 - vcipher $out5,$out5,v26 - - add $inp,$inp,r0 # $inp is adjusted in such - # way that at exit from the - # loop inX-in5 are loaded - # with last "words" - vxor $in2,$twk2,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk2,$tweak,$rndkey0 - vaddubm $tweak,$tweak,$tweak - vcipher $out0,$out0,v27 - vcipher $out1,$out1,v27 - vcipher $out2,$out2,v27 - vcipher $out3,$out3,v27 - vand $tmp,$tmp,$eighty7 - vcipher $out4,$out4,v27 - vcipher $out5,$out5,v27 - - addi $key_,$sp,$FRAME+15 # rewind $key_ - xxlor 32+$in3, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in3 - vcipher $out0,$out0,v28 - vcipher $out1,$out1,v28 - vxor $in3,$twk3,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk3,$tweak,$rndkey0 - vcipher $out2,$out2,v28 - vcipher $out3,$out3,v28 - vaddubm $tweak,$tweak,$tweak - vcipher $out4,$out4,v28 - vcipher $out5,$out5,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - vand $tmp,$tmp,$eighty7 - - vcipher $out0,$out0,v29 - vcipher $out1,$out1,v29 - xxlor 32+$in4, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in4 - vcipher $out2,$out2,v29 - vcipher $out3,$out3,v29 - vxor $in4,$twk4,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk4,$tweak,$rndkey0 - vcipher $out4,$out4,v29 - vcipher $out5,$out5,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vaddubm $tweak,$tweak,$tweak - - vcipher $out0,$out0,v30 - vcipher $out1,$out1,v30 - vand $tmp,$tmp,$eighty7 - vcipher $out2,$out2,v30 - vcipher $out3,$out3,v30 - xxlor 32+$in5, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in5 - vcipher $out4,$out4,v30 - vcipher $out5,$out5,v30 - vxor $in5,$twk5,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk5,$tweak,$rndkey0 - - vcipherlast $out0,$out0,$in0 - lvx_u $in0,$x00,$inp # load next input block - vaddubm $tweak,$tweak,$tweak - vcipherlast $out1,$out1,$in1 - lvx_u $in1,$x10,$inp - vcipherlast $out2,$out2,$in2 - le?vperm $in0,$in0,$in0,$leperm - lvx_u $in2,$x20,$inp - vand $tmp,$tmp,$eighty7 - vcipherlast $out3,$out3,$in3 - le?vperm $in1,$in1,$in1,$leperm - lvx_u $in3,$x30,$inp - vcipherlast $out4,$out4,$in4 - le?vperm $in2,$in2,$in2,$leperm - lvx_u $in4,$x40,$inp - xxlor 10, 32+$in0, 32+$in0 - xxlor 32+$in0, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in0 - xxlor 32+$in0, 10, 10 - vcipherlast $tmp,$out5,$in5 # last block might be needed - # in stealing mode - le?vperm $in3,$in3,$in3,$leperm - lvx_u $in5,$x50,$inp - addi $inp,$inp,0x60 - le?vperm $in4,$in4,$in4,$leperm - le?vperm $in5,$in5,$in5,$leperm - - le?vperm $out0,$out0,$out0,$leperm - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $out0,$in0,$twk0 - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - vxor $out1,$in1,$twk1 - le?vperm $out3,$out3,$out3,$leperm - stvx_u $out2,$x20,$out - vxor $out2,$in2,$twk2 - le?vperm $out4,$out4,$out4,$leperm - stvx_u $out3,$x30,$out - vxor $out3,$in3,$twk3 - le?vperm $out5,$tmp,$tmp,$leperm - stvx_u $out4,$x40,$out - vxor $out4,$in4,$twk4 - le?stvx_u $out5,$x50,$out - be?stvx_u $tmp, $x50,$out - vxor $out5,$in5,$twk5 - addi $out,$out,0x60 - - mtctr $rounds - beq Loop_xts_enc6x # did $len-=96 borrow? - - xxlor 32+$eighty7, 2, 2 # 0x010101..87 - - addic. $len,$len,0x60 - beq Lxts_enc6x_zero - cmpwi $len,0x20 - blt Lxts_enc6x_one - nop - beq Lxts_enc6x_two - cmpwi $len,0x40 - blt Lxts_enc6x_three - nop - beq Lxts_enc6x_four - -Lxts_enc6x_five: - vxor $out0,$in1,$twk0 - vxor $out1,$in2,$twk1 - vxor $out2,$in3,$twk2 - vxor $out3,$in4,$twk3 - vxor $out4,$in5,$twk4 - - bl _aesp8_xts_enc5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk5 # unused tweak - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$leperm - stvx_u $out2,$x20,$out - vxor $tmp,$out4,$twk5 # last block prep for stealing - le?vperm $out4,$out4,$out4,$leperm - stvx_u $out3,$x30,$out - stvx_u $out4,$x40,$out - addi $out,$out,0x50 - bne Lxts_enc6x_steal - b Lxts_enc6x_done - -.align 4 -Lxts_enc6x_four: - vxor $out0,$in2,$twk0 - vxor $out1,$in3,$twk1 - vxor $out2,$in4,$twk2 - vxor $out3,$in5,$twk3 - vxor $out4,$out4,$out4 - - bl _aesp8_xts_enc5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk4 # unused tweak - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - vxor $tmp,$out3,$twk4 # last block prep for stealing - le?vperm $out3,$out3,$out3,$leperm - stvx_u $out2,$x20,$out - stvx_u $out3,$x30,$out - addi $out,$out,0x40 - bne Lxts_enc6x_steal - b Lxts_enc6x_done - -.align 4 -Lxts_enc6x_three: - vxor $out0,$in3,$twk0 - vxor $out1,$in4,$twk1 - vxor $out2,$in5,$twk2 - vxor $out3,$out3,$out3 - vxor $out4,$out4,$out4 - - bl _aesp8_xts_enc5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk3 # unused tweak - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $tmp,$out2,$twk3 # last block prep for stealing - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - stvx_u $out2,$x20,$out - addi $out,$out,0x30 - bne Lxts_enc6x_steal - b Lxts_enc6x_done - -.align 4 -Lxts_enc6x_two: - vxor $out0,$in4,$twk0 - vxor $out1,$in5,$twk1 - vxor $out2,$out2,$out2 - vxor $out3,$out3,$out3 - vxor $out4,$out4,$out4 - - bl _aesp8_xts_enc5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk2 # unused tweak - vxor $tmp,$out1,$twk2 # last block prep for stealing - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - stvx_u $out1,$x10,$out - addi $out,$out,0x20 - bne Lxts_enc6x_steal - b Lxts_enc6x_done - -.align 4 -Lxts_enc6x_one: - vxor $out0,$in5,$twk0 - nop -Loop_xts_enc1x: - vcipher $out0,$out0,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vcipher $out0,$out0,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_xts_enc1x - - add $inp,$inp,$taillen - cmpwi $taillen,0 - vcipher $out0,$out0,v24 - - subi $inp,$inp,16 - vcipher $out0,$out0,v25 - - lvsr $inpperm,0,$taillen - vcipher $out0,$out0,v26 - - lvx_u $in0,0,$inp - vcipher $out0,$out0,v27 - - addi $key_,$sp,$FRAME+15 # rewind $key_ - vcipher $out0,$out0,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - - vcipher $out0,$out0,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vxor $twk0,$twk0,v31 - - le?vperm $in0,$in0,$in0,$leperm - vcipher $out0,$out0,v30 - - vperm $in0,$in0,$in0,$inpperm - vcipherlast $out0,$out0,$twk0 - - vmr $twk0,$twk1 # unused tweak - vxor $tmp,$out0,$twk1 # last block prep for stealing - le?vperm $out0,$out0,$out0,$leperm - stvx_u $out0,$x00,$out # store output - addi $out,$out,0x10 - bne Lxts_enc6x_steal - b Lxts_enc6x_done - -.align 4 -Lxts_enc6x_zero: - cmpwi $taillen,0 - beq Lxts_enc6x_done - - add $inp,$inp,$taillen - subi $inp,$inp,16 - lvx_u $in0,0,$inp - lvsr $inpperm,0,$taillen # $in5 is no more - le?vperm $in0,$in0,$in0,$leperm - vperm $in0,$in0,$in0,$inpperm - vxor $tmp,$tmp,$twk0 -Lxts_enc6x_steal: - vxor $in0,$in0,$twk0 - vxor $out0,$out0,$out0 - vspltisb $out1,-1 - vperm $out0,$out0,$out1,$inpperm - vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? - - subi r30,$out,17 - subi $out,$out,16 - mtctr $taillen -Loop_xts_enc6x_steal: - lbzu r0,1(r30) - stb r0,16(r30) - bdnz Loop_xts_enc6x_steal - - li $taillen,0 - mtctr $rounds - b Loop_xts_enc1x # one more time... - -.align 4 -Lxts_enc6x_done: - ${UCMP}i $ivp,0 - beq Lxts_enc6x_ret - - vxor $tweak,$twk0,$rndkey0 - le?vperm $tweak,$tweak,$tweak,$leperm - stvx_u $tweak,0,$ivp - -Lxts_enc6x_ret: - mtlr r11 - li r10,`$FRAME+15` - li r11,`$FRAME+31` - stvx $seven,r10,$sp # wipe copies of round keys - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - stvx $seven,r10,$sp - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - stvx $seven,r10,$sp - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - stvx $seven,r10,$sp - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - - mtspr 256,$vrsave - lvx v20,r10,$sp # ABI says so - addi r10,r10,32 - lvx v21,r11,$sp - addi r11,r11,32 - lvx v22,r10,$sp - addi r10,r10,32 - lvx v23,r11,$sp - addi r11,r11,32 - lvx v24,r10,$sp - addi r10,r10,32 - lvx v25,r11,$sp - addi r11,r11,32 - lvx v26,r10,$sp - addi r10,r10,32 - lvx v27,r11,$sp - addi r11,r11,32 - lvx v28,r10,$sp - addi r10,r10,32 - lvx v29,r11,$sp - addi r11,r11,32 - lvx v30,r10,$sp - lvx v31,r11,$sp - $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) - $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) - $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) - $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) - $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) - $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) - addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` - blr - .long 0 - .byte 0,12,0x04,1,0x80,6,6,0 - .long 0 - -.align 5 -_aesp8_xts_enc5x: - vcipher $out0,$out0,v24 - vcipher $out1,$out1,v24 - vcipher $out2,$out2,v24 - vcipher $out3,$out3,v24 - vcipher $out4,$out4,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vcipher $out0,$out0,v25 - vcipher $out1,$out1,v25 - vcipher $out2,$out2,v25 - vcipher $out3,$out3,v25 - vcipher $out4,$out4,v25 - lvx v25,$x10,$key_ # round[4] - bdnz _aesp8_xts_enc5x - - add $inp,$inp,$taillen - cmpwi $taillen,0 - vcipher $out0,$out0,v24 - vcipher $out1,$out1,v24 - vcipher $out2,$out2,v24 - vcipher $out3,$out3,v24 - vcipher $out4,$out4,v24 - - subi $inp,$inp,16 - vcipher $out0,$out0,v25 - vcipher $out1,$out1,v25 - vcipher $out2,$out2,v25 - vcipher $out3,$out3,v25 - vcipher $out4,$out4,v25 - vxor $twk0,$twk0,v31 - - vcipher $out0,$out0,v26 - lvsr $inpperm,r0,$taillen # $in5 is no more - vcipher $out1,$out1,v26 - vcipher $out2,$out2,v26 - vcipher $out3,$out3,v26 - vcipher $out4,$out4,v26 - vxor $in1,$twk1,v31 - - vcipher $out0,$out0,v27 - lvx_u $in0,0,$inp - vcipher $out1,$out1,v27 - vcipher $out2,$out2,v27 - vcipher $out3,$out3,v27 - vcipher $out4,$out4,v27 - vxor $in2,$twk2,v31 - - addi $key_,$sp,$FRAME+15 # rewind $key_ - vcipher $out0,$out0,v28 - vcipher $out1,$out1,v28 - vcipher $out2,$out2,v28 - vcipher $out3,$out3,v28 - vcipher $out4,$out4,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - vxor $in3,$twk3,v31 - - vcipher $out0,$out0,v29 - le?vperm $in0,$in0,$in0,$leperm - vcipher $out1,$out1,v29 - vcipher $out2,$out2,v29 - vcipher $out3,$out3,v29 - vcipher $out4,$out4,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vxor $in4,$twk4,v31 - - vcipher $out0,$out0,v30 - vperm $in0,$in0,$in0,$inpperm - vcipher $out1,$out1,v30 - vcipher $out2,$out2,v30 - vcipher $out3,$out3,v30 - vcipher $out4,$out4,v30 - - vcipherlast $out0,$out0,$twk0 - vcipherlast $out1,$out1,$in1 - vcipherlast $out2,$out2,$in2 - vcipherlast $out3,$out3,$in3 - vcipherlast $out4,$out4,$in4 - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 - -.align 5 -_aesp8_xts_decrypt6x: - $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) - mflr r11 - li r7,`$FRAME+8*16+15` - li r3,`$FRAME+8*16+31` - $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) - stvx v20,r7,$sp # ABI says so - addi r7,r7,32 - stvx v21,r3,$sp - addi r3,r3,32 - stvx v22,r7,$sp - addi r7,r7,32 - stvx v23,r3,$sp - addi r3,r3,32 - stvx v24,r7,$sp - addi r7,r7,32 - stvx v25,r3,$sp - addi r3,r3,32 - stvx v26,r7,$sp - addi r7,r7,32 - stvx v27,r3,$sp - addi r3,r3,32 - stvx v28,r7,$sp - addi r7,r7,32 - stvx v29,r3,$sp - addi r3,r3,32 - stvx v30,r7,$sp - stvx v31,r3,$sp - li r0,-1 - stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave - li $x10,0x10 - $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) - li $x20,0x20 - $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) - li $x30,0x30 - $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) - li $x40,0x40 - $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) - li $x50,0x50 - $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) - li $x60,0x60 - $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) - li $x70,0x70 - mtspr 256,r0 - - xxlor 2, 32+$eighty7, 32+$eighty7 - vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87 - xxlor 1, 32+$eighty7, 32+$eighty7 - - # Load XOR Lconsts. - mr $x70, r6 - bl Lconsts - lxvw4x 0, $x40, r6 # load XOR contents - mr r6, $x70 - li $x70,0x70 - - subi $rounds,$rounds,3 # -4 in total - - lvx $rndkey0,$x00,$key1 # load key schedule - lvx v30,$x10,$key1 - addi $key1,$key1,0x20 - lvx v31,$x00,$key1 - ?vperm $rndkey0,$rndkey0,v30,$keyperm - addi $key_,$sp,$FRAME+15 - mtctr $rounds - -Load_xts_dec_key: - ?vperm v24,v30,v31,$keyperm - lvx v30,$x10,$key1 - addi $key1,$key1,0x20 - stvx v24,$x00,$key_ # off-load round[1] - ?vperm v25,v31,v30,$keyperm - lvx v31,$x00,$key1 - stvx v25,$x10,$key_ # off-load round[2] - addi $key_,$key_,0x20 - bdnz Load_xts_dec_key - - lvx v26,$x10,$key1 - ?vperm v24,v30,v31,$keyperm - lvx v27,$x20,$key1 - stvx v24,$x00,$key_ # off-load round[3] - ?vperm v25,v31,v26,$keyperm - lvx v28,$x30,$key1 - stvx v25,$x10,$key_ # off-load round[4] - addi $key_,$sp,$FRAME+15 # rewind $key_ - ?vperm v26,v26,v27,$keyperm - lvx v29,$x40,$key1 - ?vperm v27,v27,v28,$keyperm - lvx v30,$x50,$key1 - ?vperm v28,v28,v29,$keyperm - lvx v31,$x60,$key1 - ?vperm v29,v29,v30,$keyperm - lvx $twk5,$x70,$key1 # borrow $twk5 - ?vperm v30,v30,v31,$keyperm - lvx v24,$x00,$key_ # pre-load round[1] - ?vperm v31,v31,$twk5,$keyperm - lvx v25,$x10,$key_ # pre-load round[2] - - vperm $in0,$inout,$inptail,$inpperm - subi $inp,$inp,31 # undo "caller" - vxor $twk0,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vand $tmp,$tmp,$eighty7 - vxor $out0,$in0,$twk0 - xxlor 32+$in1, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in1 - - lvx_u $in1,$x10,$inp - vxor $twk1,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in1,$in1,$in1,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out1,$in1,$twk1 - xxlor 32+$in2, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in2 - - lvx_u $in2,$x20,$inp - andi. $taillen,$len,15 - vxor $twk2,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in2,$in2,$in2,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out2,$in2,$twk2 - xxlor 32+$in3, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in3 - - lvx_u $in3,$x30,$inp - sub $len,$len,$taillen - vxor $twk3,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in3,$in3,$in3,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out3,$in3,$twk3 - xxlor 32+$in4, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in4 - - lvx_u $in4,$x40,$inp - subi $len,$len,0x60 - vxor $twk4,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in4,$in4,$in4,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out4,$in4,$twk4 - xxlor 32+$in5, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in5 - - lvx_u $in5,$x50,$inp - addi $inp,$inp,0x60 - vxor $twk5,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in5,$in5,$in5,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out5,$in5,$twk5 - xxlor 32+$in0, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in0 - - vxor v31,v31,$rndkey0 - mtctr $rounds - b Loop_xts_dec6x - -.align 5 -Loop_xts_dec6x: - vncipher $out0,$out0,v24 - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - vncipher $out5,$out5,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vncipher $out0,$out0,v25 - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - vncipher $out5,$out5,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_xts_dec6x - - xxlor 32+$eighty7, 1, 1 # 0x010101..87 - - subic $len,$len,96 # $len-=96 - vxor $in0,$twk0,v31 # xor with last round key - vncipher $out0,$out0,v24 - vncipher $out1,$out1,v24 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk0,$tweak,$rndkey0 - vaddubm $tweak,$tweak,$tweak - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - vncipher $out5,$out5,v24 - - subfe. r0,r0,r0 # borrow?-1:0 - vand $tmp,$tmp,$eighty7 - vncipher $out0,$out0,v25 - vncipher $out1,$out1,v25 - xxlor 32+$in1, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in1 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vxor $in1,$twk1,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk1,$tweak,$rndkey0 - vncipher $out4,$out4,v25 - vncipher $out5,$out5,v25 - - and r0,r0,$len - vaddubm $tweak,$tweak,$tweak - vncipher $out0,$out0,v26 - vncipher $out1,$out1,v26 - vand $tmp,$tmp,$eighty7 - vncipher $out2,$out2,v26 - vncipher $out3,$out3,v26 - xxlor 32+$in2, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in2 - vncipher $out4,$out4,v26 - vncipher $out5,$out5,v26 - - add $inp,$inp,r0 # $inp is adjusted in such - # way that at exit from the - # loop inX-in5 are loaded - # with last "words" - vxor $in2,$twk2,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk2,$tweak,$rndkey0 - vaddubm $tweak,$tweak,$tweak - vncipher $out0,$out0,v27 - vncipher $out1,$out1,v27 - vncipher $out2,$out2,v27 - vncipher $out3,$out3,v27 - vand $tmp,$tmp,$eighty7 - vncipher $out4,$out4,v27 - vncipher $out5,$out5,v27 - - addi $key_,$sp,$FRAME+15 # rewind $key_ - xxlor 32+$in3, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in3 - vncipher $out0,$out0,v28 - vncipher $out1,$out1,v28 - vxor $in3,$twk3,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk3,$tweak,$rndkey0 - vncipher $out2,$out2,v28 - vncipher $out3,$out3,v28 - vaddubm $tweak,$tweak,$tweak - vncipher $out4,$out4,v28 - vncipher $out5,$out5,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - vand $tmp,$tmp,$eighty7 - - vncipher $out0,$out0,v29 - vncipher $out1,$out1,v29 - xxlor 32+$in4, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in4 - vncipher $out2,$out2,v29 - vncipher $out3,$out3,v29 - vxor $in4,$twk4,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk4,$tweak,$rndkey0 - vncipher $out4,$out4,v29 - vncipher $out5,$out5,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vaddubm $tweak,$tweak,$tweak - - vncipher $out0,$out0,v30 - vncipher $out1,$out1,v30 - vand $tmp,$tmp,$eighty7 - vncipher $out2,$out2,v30 - vncipher $out3,$out3,v30 - xxlor 32+$in5, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in5 - vncipher $out4,$out4,v30 - vncipher $out5,$out5,v30 - vxor $in5,$twk5,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk5,$tweak,$rndkey0 - - vncipherlast $out0,$out0,$in0 - lvx_u $in0,$x00,$inp # load next input block - vaddubm $tweak,$tweak,$tweak - vncipherlast $out1,$out1,$in1 - lvx_u $in1,$x10,$inp - vncipherlast $out2,$out2,$in2 - le?vperm $in0,$in0,$in0,$leperm - lvx_u $in2,$x20,$inp - vand $tmp,$tmp,$eighty7 - vncipherlast $out3,$out3,$in3 - le?vperm $in1,$in1,$in1,$leperm - lvx_u $in3,$x30,$inp - vncipherlast $out4,$out4,$in4 - le?vperm $in2,$in2,$in2,$leperm - lvx_u $in4,$x40,$inp - xxlor 10, 32+$in0, 32+$in0 - xxlor 32+$in0, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in0 - xxlor 32+$in0, 10, 10 - vncipherlast $out5,$out5,$in5 - le?vperm $in3,$in3,$in3,$leperm - lvx_u $in5,$x50,$inp - addi $inp,$inp,0x60 - le?vperm $in4,$in4,$in4,$leperm - le?vperm $in5,$in5,$in5,$leperm - - le?vperm $out0,$out0,$out0,$leperm - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $out0,$in0,$twk0 - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - vxor $out1,$in1,$twk1 - le?vperm $out3,$out3,$out3,$leperm - stvx_u $out2,$x20,$out - vxor $out2,$in2,$twk2 - le?vperm $out4,$out4,$out4,$leperm - stvx_u $out3,$x30,$out - vxor $out3,$in3,$twk3 - le?vperm $out5,$out5,$out5,$leperm - stvx_u $out4,$x40,$out - vxor $out4,$in4,$twk4 - stvx_u $out5,$x50,$out - vxor $out5,$in5,$twk5 - addi $out,$out,0x60 - - mtctr $rounds - beq Loop_xts_dec6x # did $len-=96 borrow? - - xxlor 32+$eighty7, 2, 2 # 0x010101..87 - - addic. $len,$len,0x60 - beq Lxts_dec6x_zero - cmpwi $len,0x20 - blt Lxts_dec6x_one - nop - beq Lxts_dec6x_two - cmpwi $len,0x40 - blt Lxts_dec6x_three - nop - beq Lxts_dec6x_four - -Lxts_dec6x_five: - vxor $out0,$in1,$twk0 - vxor $out1,$in2,$twk1 - vxor $out2,$in3,$twk2 - vxor $out3,$in4,$twk3 - vxor $out4,$in5,$twk4 - - bl _aesp8_xts_dec5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk5 # unused tweak - vxor $twk1,$tweak,$rndkey0 - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $out0,$in0,$twk1 - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$leperm - stvx_u $out2,$x20,$out - le?vperm $out4,$out4,$out4,$leperm - stvx_u $out3,$x30,$out - stvx_u $out4,$x40,$out - addi $out,$out,0x50 - bne Lxts_dec6x_steal - b Lxts_dec6x_done - -.align 4 -Lxts_dec6x_four: - vxor $out0,$in2,$twk0 - vxor $out1,$in3,$twk1 - vxor $out2,$in4,$twk2 - vxor $out3,$in5,$twk3 - vxor $out4,$out4,$out4 - - bl _aesp8_xts_dec5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk4 # unused tweak - vmr $twk1,$twk5 - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $out0,$in0,$twk5 - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$leperm - stvx_u $out2,$x20,$out - stvx_u $out3,$x30,$out - addi $out,$out,0x40 - bne Lxts_dec6x_steal - b Lxts_dec6x_done - -.align 4 -Lxts_dec6x_three: - vxor $out0,$in3,$twk0 - vxor $out1,$in4,$twk1 - vxor $out2,$in5,$twk2 - vxor $out3,$out3,$out3 - vxor $out4,$out4,$out4 - - bl _aesp8_xts_dec5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk3 # unused tweak - vmr $twk1,$twk4 - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $out0,$in0,$twk4 - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - stvx_u $out2,$x20,$out - addi $out,$out,0x30 - bne Lxts_dec6x_steal - b Lxts_dec6x_done - -.align 4 -Lxts_dec6x_two: - vxor $out0,$in4,$twk0 - vxor $out1,$in5,$twk1 - vxor $out2,$out2,$out2 - vxor $out3,$out3,$out3 - vxor $out4,$out4,$out4 - - bl _aesp8_xts_dec5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk2 # unused tweak - vmr $twk1,$twk3 - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $out0,$in0,$twk3 - stvx_u $out1,$x10,$out - addi $out,$out,0x20 - bne Lxts_dec6x_steal - b Lxts_dec6x_done - -.align 4 -Lxts_dec6x_one: - vxor $out0,$in5,$twk0 - nop -Loop_xts_dec1x: - vncipher $out0,$out0,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vncipher $out0,$out0,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_xts_dec1x - - subi r0,$taillen,1 - vncipher $out0,$out0,v24 - - andi. r0,r0,16 - cmpwi $taillen,0 - vncipher $out0,$out0,v25 - - sub $inp,$inp,r0 - vncipher $out0,$out0,v26 - - lvx_u $in0,0,$inp - vncipher $out0,$out0,v27 - - addi $key_,$sp,$FRAME+15 # rewind $key_ - vncipher $out0,$out0,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - - vncipher $out0,$out0,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vxor $twk0,$twk0,v31 - - le?vperm $in0,$in0,$in0,$leperm - vncipher $out0,$out0,v30 - - mtctr $rounds - vncipherlast $out0,$out0,$twk0 - - vmr $twk0,$twk1 # unused tweak - vmr $twk1,$twk2 - le?vperm $out0,$out0,$out0,$leperm - stvx_u $out0,$x00,$out # store output - addi $out,$out,0x10 - vxor $out0,$in0,$twk2 - bne Lxts_dec6x_steal - b Lxts_dec6x_done - -.align 4 -Lxts_dec6x_zero: - cmpwi $taillen,0 - beq Lxts_dec6x_done - - lvx_u $in0,0,$inp - le?vperm $in0,$in0,$in0,$leperm - vxor $out0,$in0,$twk1 -Lxts_dec6x_steal: - vncipher $out0,$out0,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vncipher $out0,$out0,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Lxts_dec6x_steal - - add $inp,$inp,$taillen - vncipher $out0,$out0,v24 - - cmpwi $taillen,0 - vncipher $out0,$out0,v25 - - lvx_u $in0,0,$inp - vncipher $out0,$out0,v26 - - lvsr $inpperm,0,$taillen # $in5 is no more - vncipher $out0,$out0,v27 - - addi $key_,$sp,$FRAME+15 # rewind $key_ - vncipher $out0,$out0,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - - vncipher $out0,$out0,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vxor $twk1,$twk1,v31 - - le?vperm $in0,$in0,$in0,$leperm - vncipher $out0,$out0,v30 - - vperm $in0,$in0,$in0,$inpperm - vncipherlast $tmp,$out0,$twk1 - - le?vperm $out0,$tmp,$tmp,$leperm - le?stvx_u $out0,0,$out - be?stvx_u $tmp,0,$out - - vxor $out0,$out0,$out0 - vspltisb $out1,-1 - vperm $out0,$out0,$out1,$inpperm - vsel $out0,$in0,$tmp,$out0 - vxor $out0,$out0,$twk0 - - subi r30,$out,1 - mtctr $taillen -Loop_xts_dec6x_steal: - lbzu r0,1(r30) - stb r0,16(r30) - bdnz Loop_xts_dec6x_steal - - li $taillen,0 - mtctr $rounds - b Loop_xts_dec1x # one more time... - -.align 4 -Lxts_dec6x_done: - ${UCMP}i $ivp,0 - beq Lxts_dec6x_ret - - vxor $tweak,$twk0,$rndkey0 - le?vperm $tweak,$tweak,$tweak,$leperm - stvx_u $tweak,0,$ivp - -Lxts_dec6x_ret: - mtlr r11 - li r10,`$FRAME+15` - li r11,`$FRAME+31` - stvx $seven,r10,$sp # wipe copies of round keys - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - stvx $seven,r10,$sp - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - stvx $seven,r10,$sp - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - stvx $seven,r10,$sp - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - - mtspr 256,$vrsave - lvx v20,r10,$sp # ABI says so - addi r10,r10,32 - lvx v21,r11,$sp - addi r11,r11,32 - lvx v22,r10,$sp - addi r10,r10,32 - lvx v23,r11,$sp - addi r11,r11,32 - lvx v24,r10,$sp - addi r10,r10,32 - lvx v25,r11,$sp - addi r11,r11,32 - lvx v26,r10,$sp - addi r10,r10,32 - lvx v27,r11,$sp - addi r11,r11,32 - lvx v28,r10,$sp - addi r10,r10,32 - lvx v29,r11,$sp - addi r11,r11,32 - lvx v30,r10,$sp - lvx v31,r11,$sp - $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) - $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) - $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) - $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) - $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) - $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) - addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` - blr - .long 0 - .byte 0,12,0x04,1,0x80,6,6,0 - .long 0 - -.align 5 -_aesp8_xts_dec5x: - vncipher $out0,$out0,v24 - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vncipher $out0,$out0,v25 - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - lvx v25,$x10,$key_ # round[4] - bdnz _aesp8_xts_dec5x - - subi r0,$taillen,1 - vncipher $out0,$out0,v24 - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - - andi. r0,r0,16 - cmpwi $taillen,0 - vncipher $out0,$out0,v25 - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - vxor $twk0,$twk0,v31 - - sub $inp,$inp,r0 - vncipher $out0,$out0,v26 - vncipher $out1,$out1,v26 - vncipher $out2,$out2,v26 - vncipher $out3,$out3,v26 - vncipher $out4,$out4,v26 - vxor $in1,$twk1,v31 - - vncipher $out0,$out0,v27 - lvx_u $in0,0,$inp - vncipher $out1,$out1,v27 - vncipher $out2,$out2,v27 - vncipher $out3,$out3,v27 - vncipher $out4,$out4,v27 - vxor $in2,$twk2,v31 - - addi $key_,$sp,$FRAME+15 # rewind $key_ - vncipher $out0,$out0,v28 - vncipher $out1,$out1,v28 - vncipher $out2,$out2,v28 - vncipher $out3,$out3,v28 - vncipher $out4,$out4,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - vxor $in3,$twk3,v31 - - vncipher $out0,$out0,v29 - le?vperm $in0,$in0,$in0,$leperm - vncipher $out1,$out1,v29 - vncipher $out2,$out2,v29 - vncipher $out3,$out3,v29 - vncipher $out4,$out4,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vxor $in4,$twk4,v31 - - vncipher $out0,$out0,v30 - vncipher $out1,$out1,v30 - vncipher $out2,$out2,v30 - vncipher $out3,$out3,v30 - vncipher $out4,$out4,v30 - - vncipherlast $out0,$out0,$twk0 - vncipherlast $out1,$out1,$in1 - vncipherlast $out2,$out2,$in2 - vncipherlast $out3,$out3,$in3 - vncipherlast $out4,$out4,$in4 - mtctr $rounds - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 -___ -}} }}} - -my $consts=1; -foreach(split("\n",$code)) { - s/\`([^\`]*)\`/eval($1)/geo; - - # constants table endian-specific conversion - if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { - my $conv=$3; - my @bytes=(); - - # convert to endian-agnostic format - if ($1 eq "long") { - foreach (split(/,\s*/,$2)) { - my $l = /^0/?oct:int; - push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; - } - } else { - @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); - } - - # little-endian conversion - if ($flavour =~ /le$/o) { - SWITCH: for($conv) { - /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; - /\?rev/ && do { @bytes=reverse(@bytes); last; }; - } - } - - #emit - print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; - next; - } - $consts=0 if (m/Lconsts:/o); # end of table - - # instructions prefixed with '?' are endian-specific and need - # to be adjusted accordingly... - if ($flavour =~ /le$/o) { # little-endian - s/le\?//o or - s/be\?/#be#/o or - s/\?lvsr/lvsl/o or - s/\?lvsl/lvsr/o or - s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or - s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or - s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; - } else { # big-endian - s/le\?/#le#/o or - s/be\?//o or - s/\?([a-z]+)/$1/o; - } - - print $_,"\n"; -} - -close STDOUT; diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c deleted file mode 100644 index 77eca20bc7ac..000000000000 --- a/drivers/crypto/vmx/ghash.c +++ /dev/null @@ -1,185 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GHASH routines supporting VMX instructions on the Power 8 - * - * Copyright (C) 2015, 2019 International Business Machines Inc. - * - * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> - * - * Extended by Daniel Axtens <dja@axtens.net> to replace the fallback - * mechanism. The new approach is based on arm64 code, which is: - * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org> - */ - -#include <linux/types.h> -#include <linux/err.h> -#include <linux/crypto.h> -#include <linux/delay.h> -#include <asm/simd.h> -#include <asm/switch_to.h> -#include <crypto/aes.h> -#include <crypto/ghash.h> -#include <crypto/scatterwalk.h> -#include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> -#include <crypto/b128ops.h> -#include "aesp8-ppc.h" - -void gcm_init_p8(u128 htable[16], const u64 Xi[2]); -void gcm_gmult_p8(u64 Xi[2], const u128 htable[16]); -void gcm_ghash_p8(u64 Xi[2], const u128 htable[16], - const u8 *in, size_t len); - -struct p8_ghash_ctx { - /* key used by vector asm */ - u128 htable[16]; - /* key used by software fallback */ - be128 key; -}; - -struct p8_ghash_desc_ctx { - u64 shash[2]; - u8 buffer[GHASH_DIGEST_SIZE]; - int bytes; -}; - -static int p8_ghash_init(struct shash_desc *desc) -{ - struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); - - dctx->bytes = 0; - memset(dctx->shash, 0, GHASH_DIGEST_SIZE); - return 0; -} - -static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen) -{ - struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(tfm)); - - if (keylen != GHASH_BLOCK_SIZE) - return -EINVAL; - - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - gcm_init_p8(ctx->htable, (const u64 *) key); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - memcpy(&ctx->key, key, GHASH_BLOCK_SIZE); - - return 0; -} - -static inline void __ghash_block(struct p8_ghash_ctx *ctx, - struct p8_ghash_desc_ctx *dctx) -{ - if (crypto_simd_usable()) { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - gcm_ghash_p8(dctx->shash, ctx->htable, - dctx->buffer, GHASH_DIGEST_SIZE); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - } else { - crypto_xor((u8 *)dctx->shash, dctx->buffer, GHASH_BLOCK_SIZE); - gf128mul_lle((be128 *)dctx->shash, &ctx->key); - } -} - -static inline void __ghash_blocks(struct p8_ghash_ctx *ctx, - struct p8_ghash_desc_ctx *dctx, - const u8 *src, unsigned int srclen) -{ - if (crypto_simd_usable()) { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - gcm_ghash_p8(dctx->shash, ctx->htable, - src, srclen); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - } else { - while (srclen >= GHASH_BLOCK_SIZE) { - crypto_xor((u8 *)dctx->shash, src, GHASH_BLOCK_SIZE); - gf128mul_lle((be128 *)dctx->shash, &ctx->key); - srclen -= GHASH_BLOCK_SIZE; - src += GHASH_BLOCK_SIZE; - } - } -} - -static int p8_ghash_update(struct shash_desc *desc, - const u8 *src, unsigned int srclen) -{ - unsigned int len; - struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm)); - struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); - - if (dctx->bytes) { - if (dctx->bytes + srclen < GHASH_DIGEST_SIZE) { - memcpy(dctx->buffer + dctx->bytes, src, - srclen); - dctx->bytes += srclen; - return 0; - } - memcpy(dctx->buffer + dctx->bytes, src, - GHASH_DIGEST_SIZE - dctx->bytes); - - __ghash_block(ctx, dctx); - - src += GHASH_DIGEST_SIZE - dctx->bytes; - srclen -= GHASH_DIGEST_SIZE - dctx->bytes; - dctx->bytes = 0; - } - len = srclen & ~(GHASH_DIGEST_SIZE - 1); - if (len) { - __ghash_blocks(ctx, dctx, src, len); - src += len; - srclen -= len; - } - if (srclen) { - memcpy(dctx->buffer, src, srclen); - dctx->bytes = srclen; - } - return 0; -} - -static int p8_ghash_final(struct shash_desc *desc, u8 *out) -{ - int i; - struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm)); - struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); - - if (dctx->bytes) { - for (i = dctx->bytes; i < GHASH_DIGEST_SIZE; i++) - dctx->buffer[i] = 0; - __ghash_block(ctx, dctx); - dctx->bytes = 0; - } - memcpy(out, dctx->shash, GHASH_DIGEST_SIZE); - return 0; -} - -struct shash_alg p8_ghash_alg = { - .digestsize = GHASH_DIGEST_SIZE, - .init = p8_ghash_init, - .update = p8_ghash_update, - .final = p8_ghash_final, - .setkey = p8_ghash_setkey, - .descsize = sizeof(struct p8_ghash_desc_ctx) - + sizeof(struct ghash_desc_ctx), - .base = { - .cra_name = "ghash", - .cra_driver_name = "p8_ghash", - .cra_priority = 1000, - .cra_blocksize = GHASH_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct p8_ghash_ctx), - .cra_module = THIS_MODULE, - }, -}; diff --git a/drivers/crypto/vmx/ghashp8-ppc.pl b/drivers/crypto/vmx/ghashp8-ppc.pl deleted file mode 100644 index 041e633c214f..000000000000 --- a/drivers/crypto/vmx/ghashp8-ppc.pl +++ /dev/null @@ -1,243 +0,0 @@ -#!/usr/bin/env perl -# SPDX-License-Identifier: GPL-2.0 - -# This code is taken from the OpenSSL project but the author (Andy Polyakov) -# has relicensed it under the GPLv2. Therefore this program is free software; -# you can redistribute it and/or modify it under the terms of the GNU General -# Public License version 2 as published by the Free Software Foundation. -# -# The original headers, including the original license headers, are -# included below for completeness. - -# ==================================================================== -# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see https://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# GHASH for PowerISA v2.07. -# -# July 2014 -# -# Accurate performance measurements are problematic, because it's -# always virtualized setup with possibly throttled processor. -# Relative comparison is therefore more informative. This initial -# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x -# faster than "4-bit" integer-only compiler-generated 64-bit code. -# "Initial version" means that there is room for futher improvement. - -$flavour=shift; -$output =shift; - -if ($flavour =~ /64/) { - $SIZE_T=8; - $LRSAVE=2*$SIZE_T; - $STU="stdu"; - $POP="ld"; - $PUSH="std"; -} elsif ($flavour =~ /32/) { - $SIZE_T=4; - $LRSAVE=$SIZE_T; - $STU="stwu"; - $POP="lwz"; - $PUSH="stw"; -} else { die "nonsense $flavour"; } - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or -die "can't locate ppc-xlate.pl"; - -open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; - -my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block - -my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3)); -my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12)); -my $vrsave="r12"; - -$code=<<___; -.machine "any" - -.text - -.globl .gcm_init_p8 - lis r0,0xfff0 - li r8,0x10 - mfspr $vrsave,256 - li r9,0x20 - mtspr 256,r0 - li r10,0x30 - lvx_u $H,0,r4 # load H - le?xor r7,r7,r7 - le?addi r7,r7,0x8 # need a vperm start with 08 - le?lvsr 5,0,r7 - le?vspltisb 6,0x0f - le?vxor 5,5,6 # set a b-endian mask - le?vperm $H,$H,$H,5 - - vspltisb $xC2,-16 # 0xf0 - vspltisb $t0,1 # one - vaddubm $xC2,$xC2,$xC2 # 0xe0 - vxor $zero,$zero,$zero - vor $xC2,$xC2,$t0 # 0xe1 - vsldoi $xC2,$xC2,$zero,15 # 0xe1... - vsldoi $t1,$zero,$t0,1 # ...1 - vaddubm $xC2,$xC2,$xC2 # 0xc2... - vspltisb $t2,7 - vor $xC2,$xC2,$t1 # 0xc2....01 - vspltb $t1,$H,0 # most significant byte - vsl $H,$H,$t0 # H<<=1 - vsrab $t1,$t1,$t2 # broadcast carry bit - vand $t1,$t1,$xC2 - vxor $H,$H,$t1 # twisted H - - vsldoi $H,$H,$H,8 # twist even more ... - vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 - vsldoi $Hl,$zero,$H,8 # ... and split - vsldoi $Hh,$H,$zero,8 - - stvx_u $xC2,0,r3 # save pre-computed table - stvx_u $Hl,r8,r3 - stvx_u $H, r9,r3 - stvx_u $Hh,r10,r3 - - mtspr 256,$vrsave - blr - .long 0 - .byte 0,12,0x14,0,0,0,2,0 - .long 0 -.size .gcm_init_p8,.-.gcm_init_p8 - -.globl .gcm_gmult_p8 - lis r0,0xfff8 - li r8,0x10 - mfspr $vrsave,256 - li r9,0x20 - mtspr 256,r0 - li r10,0x30 - lvx_u $IN,0,$Xip # load Xi - - lvx_u $Hl,r8,$Htbl # load pre-computed table - le?lvsl $lemask,r0,r0 - lvx_u $H, r9,$Htbl - le?vspltisb $t0,0x07 - lvx_u $Hh,r10,$Htbl - le?vxor $lemask,$lemask,$t0 - lvx_u $xC2,0,$Htbl - le?vperm $IN,$IN,$IN,$lemask - vxor $zero,$zero,$zero - - vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo - vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi - vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi - - vpmsumd $t2,$Xl,$xC2 # 1st phase - - vsldoi $t0,$Xm,$zero,8 - vsldoi $t1,$zero,$Xm,8 - vxor $Xl,$Xl,$t0 - vxor $Xh,$Xh,$t1 - - vsldoi $Xl,$Xl,$Xl,8 - vxor $Xl,$Xl,$t2 - - vsldoi $t1,$Xl,$Xl,8 # 2nd phase - vpmsumd $Xl,$Xl,$xC2 - vxor $t1,$t1,$Xh - vxor $Xl,$Xl,$t1 - - le?vperm $Xl,$Xl,$Xl,$lemask - stvx_u $Xl,0,$Xip # write out Xi - - mtspr 256,$vrsave - blr - .long 0 - .byte 0,12,0x14,0,0,0,2,0 - .long 0 -.size .gcm_gmult_p8,.-.gcm_gmult_p8 - -.globl .gcm_ghash_p8 - lis r0,0xfff8 - li r8,0x10 - mfspr $vrsave,256 - li r9,0x20 - mtspr 256,r0 - li r10,0x30 - lvx_u $Xl,0,$Xip # load Xi - - lvx_u $Hl,r8,$Htbl # load pre-computed table - le?lvsl $lemask,r0,r0 - lvx_u $H, r9,$Htbl - le?vspltisb $t0,0x07 - lvx_u $Hh,r10,$Htbl - le?vxor $lemask,$lemask,$t0 - lvx_u $xC2,0,$Htbl - le?vperm $Xl,$Xl,$Xl,$lemask - vxor $zero,$zero,$zero - - lvx_u $IN,0,$inp - addi $inp,$inp,16 - subi $len,$len,16 - le?vperm $IN,$IN,$IN,$lemask - vxor $IN,$IN,$Xl - b Loop - -.align 5 -Loop: - subic $len,$len,16 - vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo - subfe. r0,r0,r0 # borrow?-1:0 - vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi - and r0,r0,$len - vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi - add $inp,$inp,r0 - - vpmsumd $t2,$Xl,$xC2 # 1st phase - - vsldoi $t0,$Xm,$zero,8 - vsldoi $t1,$zero,$Xm,8 - vxor $Xl,$Xl,$t0 - vxor $Xh,$Xh,$t1 - - vsldoi $Xl,$Xl,$Xl,8 - vxor $Xl,$Xl,$t2 - lvx_u $IN,0,$inp - addi $inp,$inp,16 - - vsldoi $t1,$Xl,$Xl,8 # 2nd phase - vpmsumd $Xl,$Xl,$xC2 - le?vperm $IN,$IN,$IN,$lemask - vxor $t1,$t1,$Xh - vxor $IN,$IN,$t1 - vxor $IN,$IN,$Xl - beq Loop # did $len-=16 borrow? - - vxor $Xl,$Xl,$t1 - le?vperm $Xl,$Xl,$Xl,$lemask - stvx_u $Xl,0,$Xip # write out Xi - - mtspr 256,$vrsave - blr - .long 0 - .byte 0,12,0x14,0,0,0,4,0 - .long 0 -.size .gcm_ghash_p8,.-.gcm_ghash_p8 - -.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" -.align 2 -___ - -foreach (split("\n",$code)) { - if ($flavour =~ /le$/o) { # little-endian - s/le\?//o or - s/be\?/#be#/o; - } else { - s/le\?/#le#/o or - s/be\?//o; - } - print $_,"\n"; -} - -close STDOUT; # enforce flush diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl deleted file mode 100644 index b583898c11ae..000000000000 --- a/drivers/crypto/vmx/ppc-xlate.pl +++ /dev/null @@ -1,231 +0,0 @@ -#!/usr/bin/env perl -# SPDX-License-Identifier: GPL-2.0 - -# PowerPC assembler distiller by <appro>. - -my $flavour = shift; -my $output = shift; -open STDOUT,">$output" || die "can't open $output: $!"; - -my %GLOBALS; -my $dotinlocallabels=($flavour=~/linux/)?1:0; -my $elfv2abi=(($flavour =~ /linux-ppc64le/) or ($flavour =~ /linux-ppc64-elfv2/))?1:0; -my $dotfunctions=($elfv2abi=~1)?0:1; - -################################################################ -# directives which need special treatment on different platforms -################################################################ -my $globl = sub { - my $junk = shift; - my $name = shift; - my $global = \$GLOBALS{$name}; - my $ret; - - $name =~ s|^[\.\_]||; - - SWITCH: for ($flavour) { - /aix/ && do { $name = ".$name"; - last; - }; - /osx/ && do { $name = "_$name"; - last; - }; - /linux/ - && do { $ret = "_GLOBAL($name)"; - last; - }; - } - - $ret = ".globl $name\nalign 5\n$name:" if (!$ret); - $$global = $name; - $ret; -}; -my $text = sub { - my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; - $ret = ".abiversion 2\n".$ret if ($elfv2abi); - $ret; -}; -my $machine = sub { - my $junk = shift; - my $arch = shift; - if ($flavour =~ /osx/) - { $arch =~ s/\"//g; - $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any"); - } - ".machine $arch"; -}; -my $size = sub { - if ($flavour =~ /linux/) - { shift; - my $name = shift; $name =~ s|^[\.\_]||; - my $ret = ".size $name,.-".($dotfunctions?".":"").$name; - $ret .= "\n.size .$name,.-.$name" if ($dotfunctions); - $ret; - } - else - { ""; } -}; -my $asciz = sub { - shift; - my $line = join(",",@_); - if ($line =~ /^"(.*)"$/) - { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; } - else - { ""; } -}; -my $quad = sub { - shift; - my @ret; - my ($hi,$lo); - for (@_) { - if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io) - { $hi=$1?"0x$1":"0"; $lo="0x$2"; } - elsif (/^([0-9]+)$/o) - { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl - else - { $hi=undef; $lo=$_; } - - if (defined($hi)) - { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); } - else - { push(@ret,".quad $lo"); } - } - join("\n",@ret); -}; - -################################################################ -# simplified mnemonics not handled by at least one assembler -################################################################ -my $cmplw = sub { - my $f = shift; - my $cr = 0; $cr = shift if ($#_>1); - # Some out-of-date 32-bit GNU assembler just can't handle cmplw... - ($flavour =~ /linux.*32/) ? - " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 : - " cmplw ".join(',',$cr,@_); -}; -my $bdnz = sub { - my $f = shift; - my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint - " bc $bo,0,".shift; -} if ($flavour!~/linux/); -my $bltlr = sub { - my $f = shift; - my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 : - " bclr $bo,0"; -}; -my $bnelr = sub { - my $f = shift; - my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 : - " bclr $bo,2"; -}; -my $beqlr = sub { - my $f = shift; - my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 : - " bclr $bo,2"; -}; -# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two -# arguments is 64, with "operand out of range" error. -my $extrdi = sub { - my ($f,$ra,$rs,$n,$b) = @_; - $b = ($b+$n)&63; $n = 64-$n; - " rldicl $ra,$rs,$b,$n"; -}; -my $vmr = sub { - my ($f,$vx,$vy) = @_; - " vor $vx,$vy,$vy"; -}; - -# Some ABIs specify vrsave, special-purpose register #256, as reserved -# for system use. -my $no_vrsave = ($elfv2abi); -my $mtspr = sub { - my ($f,$idx,$ra) = @_; - if ($idx == 256 && $no_vrsave) { - " or $ra,$ra,$ra"; - } else { - " mtspr $idx,$ra"; - } -}; -my $mfspr = sub { - my ($f,$rd,$idx) = @_; - if ($idx == 256 && $no_vrsave) { - " li $rd,-1"; - } else { - " mfspr $rd,$idx"; - } -}; - -# PowerISA 2.06 stuff -sub vsxmem_op { - my ($f, $vrt, $ra, $rb, $op) = @_; - " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1); -} -# made-up unaligned memory reference AltiVec/VMX instructions -my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x -my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x -my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx -my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx -my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x -my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x - -# PowerISA 2.07 stuff -sub vcrypto_op { - my ($f, $vrt, $vra, $vrb, $op) = @_; - " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op; -} -my $vcipher = sub { vcrypto_op(@_, 1288); }; -my $vcipherlast = sub { vcrypto_op(@_, 1289); }; -my $vncipher = sub { vcrypto_op(@_, 1352); }; -my $vncipherlast= sub { vcrypto_op(@_, 1353); }; -my $vsbox = sub { vcrypto_op(@_, 0, 1480); }; -my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); }; -my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); }; -my $vpmsumb = sub { vcrypto_op(@_, 1032); }; -my $vpmsumd = sub { vcrypto_op(@_, 1224); }; -my $vpmsubh = sub { vcrypto_op(@_, 1096); }; -my $vpmsumw = sub { vcrypto_op(@_, 1160); }; -my $vaddudm = sub { vcrypto_op(@_, 192); }; -my $vadduqm = sub { vcrypto_op(@_, 256); }; - -my $mtsle = sub { - my ($f, $arg) = @_; - " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2); -}; - -print "#include <asm/ppc_asm.h>\n" if $flavour =~ /linux/; - -while($line=<>) { - - $line =~ s|[#!;].*$||; # get rid of asm-style comments... - $line =~ s|/\*.*\*/||; # ... and C-style comments... - $line =~ s|^\s+||; # ... and skip white spaces in beginning... - $line =~ s|\s+$||; # ... and at the end - - { - $line =~ s|\b\.L(\w+)|L$1|g; # common denominator for Locallabel - $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels); - } - - { - $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||; - my $c = $1; $c = "\t" if ($c eq ""); - my $mnemonic = $2; - my $f = $3; - my $opcode = eval("\$$mnemonic"); - $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/); - if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); } - elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; } - } - - print $line if ($line); - print "\n"; -} - -close STDOUT; diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c deleted file mode 100644 index 7eb713cc87c8..000000000000 --- a/drivers/crypto/vmx/vmx.c +++ /dev/null @@ -1,77 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Routines supporting VMX instructions on the Power 8 - * - * Copyright (C) 2015 International Business Machines Inc. - * - * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> - */ - -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/types.h> -#include <linux/err.h> -#include <linux/cpufeature.h> -#include <linux/crypto.h> -#include <asm/cputable.h> -#include <crypto/internal/hash.h> -#include <crypto/internal/skcipher.h> - -#include "aesp8-ppc.h" - -static int __init p8_init(void) -{ - int ret; - - ret = crypto_register_shash(&p8_ghash_alg); - if (ret) - goto err; - - ret = crypto_register_alg(&p8_aes_alg); - if (ret) - goto err_unregister_ghash; - - ret = crypto_register_skcipher(&p8_aes_cbc_alg); - if (ret) - goto err_unregister_aes; - - ret = crypto_register_skcipher(&p8_aes_ctr_alg); - if (ret) - goto err_unregister_aes_cbc; - - ret = crypto_register_skcipher(&p8_aes_xts_alg); - if (ret) - goto err_unregister_aes_ctr; - - return 0; - -err_unregister_aes_ctr: - crypto_unregister_skcipher(&p8_aes_ctr_alg); -err_unregister_aes_cbc: - crypto_unregister_skcipher(&p8_aes_cbc_alg); -err_unregister_aes: - crypto_unregister_alg(&p8_aes_alg); -err_unregister_ghash: - crypto_unregister_shash(&p8_ghash_alg); -err: - return ret; -} - -static void __exit p8_exit(void) -{ - crypto_unregister_skcipher(&p8_aes_xts_alg); - crypto_unregister_skcipher(&p8_aes_ctr_alg); - crypto_unregister_skcipher(&p8_aes_cbc_alg); - crypto_unregister_alg(&p8_aes_alg); - crypto_unregister_shash(&p8_ghash_alg); -} - -module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, p8_init); -module_exit(p8_exit); - -MODULE_AUTHOR("Marcelo Cerri<mhcerri@br.ibm.com>"); -MODULE_DESCRIPTION("IBM VMX cryptographic acceleration instructions " - "support on Power 8"); -MODULE_LICENSE("GPL"); -MODULE_VERSION("1.0.0"); -MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/drivers/crypto/xilinx/zynqmp-aes-gcm.c b/drivers/crypto/xilinx/zynqmp-aes-gcm.c index 3c205324b22b..e61405718840 100644 --- a/drivers/crypto/xilinx/zynqmp-aes-gcm.c +++ b/drivers/crypto/xilinx/zynqmp-aes-gcm.c @@ -231,7 +231,10 @@ static int zynqmp_handle_aes_req(struct crypto_engine *engine, err = zynqmp_aes_aead_cipher(areq); } + local_bh_disable(); crypto_finalize_aead_request(engine, areq, err); + local_bh_enable(); + return 0; } |