From 08eb67d23e5172a5d1e60f1f0acccee569fe10ba Mon Sep 17 00:00:00 2001 From: Chenghai Huang Date: Thu, 18 Dec 2025 21:44:43 +0800 Subject: crypto: hisilicon/sec - move backlog management to qp and store sqe in qp for callback When multiple tfm use a same qp, the backlog data should be managed centrally by the qp, rather than in the qp_ctx of each req. Additionally, since SEC_BD_TYPE1 and SEC_BD_TYPE2 cannot use the tag of the sqe to carry the virtual address of the req, the sent sqe is stored in the qp. This allows the callback function to get the req address. To handle the differences between hardware types, the callback functions are split into two separate implementations. Fixes: f0ae287c5045 ("crypto: hisilicon/sec2 - implement full backlog mode for sec") Signed-off-by: Chenghai Huang Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- include/linux/hisi_acc_qm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index ca1ec437a3ca..9485896d5dc0 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -447,6 +447,11 @@ struct hisi_qp_ops { int (*fill_sqe)(void *sqe, void *q_parm, void *d_parm); }; +struct instance_backlog { + struct list_head list; + spinlock_t lock; +}; + struct hisi_qp { u32 qp_id; u16 sq_depth; @@ -471,6 +476,9 @@ struct hisi_qp { bool is_in_kernel; u16 pasid; struct uacce_queue *uacce_q; + + struct instance_backlog backlog; + const void **msg; }; static inline int vfs_num_set(const char *val, const struct kernel_param *kp) -- cgit v1.2.3 From 21452eaa06edb5f6038720e643aed0bbfffad9c3 Mon Sep 17 00:00:00 2001 From: Chenghai Huang Date: Thu, 18 Dec 2025 21:44:45 +0800 Subject: crypto: hisilicon/qm - enhance the configuration of req_type in queue attributes Originally, when a queue was requested, it could only be configured with the default algorithm type of 0. Now, when multiple tfms use the same queue, the queue must be selected based on its attributes to meet the requirements of tfm tasks. So the algorithm type attribute of queue need to be distinguished. Just like a queue used for compression in ZIP cannot be used for decompression tasks. Fixes: 3f1ec97aacf1 ("crypto: hisilicon/qm - Put device finding logic into QM") Signed-off-by: Chenghai Huang Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 2 +- drivers/crypto/hisilicon/qm.c | 8 ++++---- drivers/crypto/hisilicon/sec2/sec_crypto.c | 1 - drivers/crypto/hisilicon/sec2/sec_main.c | 21 ++++++++++++++++----- drivers/crypto/hisilicon/zip/zip.h | 2 +- drivers/crypto/hisilicon/zip/zip_crypto.c | 13 +++++++++---- drivers/crypto/hisilicon/zip/zip_main.c | 4 ++-- include/linux/hisi_acc_qm.h | 3 +-- 8 files changed, 34 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index b94fecd765ee..884d5d0afaf4 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -465,7 +465,7 @@ struct hisi_qp *hpre_create_qp(u8 type) * type: 0 - RSA/DH. algorithm supported in V2, * 1 - ECC algorithm in V3. */ - ret = hisi_qm_alloc_qps_node(&hpre_devices, 1, type, node, &qp); + ret = hisi_qm_alloc_qps_node(&hpre_devices, 1, &type, node, &qp); if (!ret) return qp; diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 71071ac559d4..5c80ca04a8d4 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -3620,7 +3620,7 @@ static int hisi_qm_sort_devices(int node, struct list_head *head, * not meet the requirements will return error. */ int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, - u8 alg_type, int node, struct hisi_qp **qps) + u8 *alg_type, int node, struct hisi_qp **qps) { struct hisi_qm_resource *tmp; int ret = -ENODEV; @@ -3638,7 +3638,7 @@ int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, list_for_each_entry(tmp, &head, list) { for (i = 0; i < qp_num; i++) { - qps[i] = hisi_qm_create_qp(tmp->qm, alg_type); + qps[i] = hisi_qm_create_qp(tmp->qm, alg_type[i]); if (IS_ERR(qps[i])) { hisi_qm_free_qps(qps, i); break; @@ -3653,8 +3653,8 @@ int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, mutex_unlock(&qm_list->lock); if (ret) - pr_info("Failed to create qps, node[%d], alg[%u], qp[%d]!\n", - node, alg_type, qp_num); + pr_info("Failed to create qps, node[%d], qp[%d]!\n", + node, qp_num); err: free_list(&head); diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 4e41235116e1..364bd69c6088 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -626,7 +626,6 @@ static int sec_create_qp_ctx(struct sec_ctx *ctx, int qp_ctx_id) qp_ctx = &ctx->qp_ctx[qp_ctx_id]; qp = ctx->qps[qp_ctx_id]; - qp->req_type = 0; qp->qp_ctx = qp_ctx; qp_ctx->qp = qp; qp_ctx->ctx = ctx; diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 5eb2d6820742..7dd125f5f511 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -417,18 +417,29 @@ struct hisi_qp **sec_create_qps(void) int node = cpu_to_node(raw_smp_processor_id()); u32 ctx_num = ctx_q_num; struct hisi_qp **qps; + u8 *type; int ret; qps = kcalloc(ctx_num, sizeof(struct hisi_qp *), GFP_KERNEL); if (!qps) return NULL; - ret = hisi_qm_alloc_qps_node(&sec_devices, ctx_num, 0, node, qps); - if (!ret) - return qps; + /* The type of SEC is all 0, so just allocated by kcalloc */ + type = kcalloc(ctx_num, sizeof(u8), GFP_KERNEL); + if (!type) { + kfree(qps); + return NULL; + } - kfree(qps); - return NULL; + ret = hisi_qm_alloc_qps_node(&sec_devices, ctx_num, type, node, qps); + if (ret) { + kfree(type); + kfree(qps); + return NULL; + } + + kfree(type); + return qps; } u64 sec_get_alg_bitmap(struct hisi_qm *qm, u32 high, u32 low) diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h index 9fb2a9c01132..b83f228281ab 100644 --- a/drivers/crypto/hisilicon/zip/zip.h +++ b/drivers/crypto/hisilicon/zip/zip.h @@ -99,7 +99,7 @@ enum zip_cap_table_type { ZIP_CORE5_BITMAP, }; -int zip_create_qps(struct hisi_qp **qps, int qp_num, int node); +int zip_create_qps(struct hisi_qp **qps, int qp_num, int node, u8 *alg_type); int hisi_zip_register_to_crypto(struct hisi_qm *qm); void hisi_zip_unregister_from_crypto(struct hisi_qm *qm); bool hisi_zip_alg_support(struct hisi_qm *qm, u32 alg); diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c index b4a656e0177d..8250a33ba586 100644 --- a/drivers/crypto/hisilicon/zip/zip_crypto.c +++ b/drivers/crypto/hisilicon/zip/zip_crypto.c @@ -66,6 +66,7 @@ struct hisi_zip_qp_ctx { struct hisi_acc_sgl_pool *sgl_pool; struct hisi_zip *zip_dev; struct hisi_zip_ctx *ctx; + u8 req_type; }; struct hisi_zip_sqe_ops { @@ -245,7 +246,7 @@ static int hisi_zip_do_work(struct hisi_zip_qp_ctx *qp_ctx, goto err_unmap_input; } - hisi_zip_fill_sqe(qp_ctx->ctx, &zip_sqe, qp->req_type, req); + hisi_zip_fill_sqe(qp_ctx->ctx, &zip_sqe, qp_ctx->req_type, req); /* send command to start a task */ atomic64_inc(&dfx->send_cnt); @@ -360,7 +361,6 @@ static int hisi_zip_start_qp(struct hisi_qp *qp, struct hisi_zip_qp_ctx *qp_ctx, struct device *dev = &qp->qm->pdev->dev; int ret; - qp->req_type = req_type; qp->alg_type = alg_type; qp->qp_ctx = qp_ctx; @@ -397,10 +397,15 @@ static int hisi_zip_ctx_init(struct hisi_zip_ctx *hisi_zip_ctx, u8 req_type, int { struct hisi_qp *qps[HZIP_CTX_Q_NUM] = { NULL }; struct hisi_zip_qp_ctx *qp_ctx; + u8 alg_type[HZIP_CTX_Q_NUM]; struct hisi_zip *hisi_zip; int ret, i, j; - ret = zip_create_qps(qps, HZIP_CTX_Q_NUM, node); + /* alg_type = 0 for compress, 1 for decompress in hw sqe */ + for (i = 0; i < HZIP_CTX_Q_NUM; i++) + alg_type[i] = i; + + ret = zip_create_qps(qps, HZIP_CTX_Q_NUM, node, alg_type); if (ret) { pr_err("failed to create zip qps (%d)!\n", ret); return -ENODEV; @@ -409,7 +414,6 @@ static int hisi_zip_ctx_init(struct hisi_zip_ctx *hisi_zip_ctx, u8 req_type, int hisi_zip = container_of(qps[0]->qm, struct hisi_zip, qm); for (i = 0; i < HZIP_CTX_Q_NUM; i++) { - /* alg_type = 0 for compress, 1 for decompress in hw sqe */ qp_ctx = &hisi_zip_ctx->qp_ctx[i]; qp_ctx->ctx = hisi_zip_ctx; ret = hisi_zip_start_qp(qps[i], qp_ctx, i, req_type); @@ -422,6 +426,7 @@ static int hisi_zip_ctx_init(struct hisi_zip_ctx *hisi_zip_ctx, u8 req_type, int } qp_ctx->zip_dev = hisi_zip; + qp_ctx->req_type = req_type; } hisi_zip_ctx->ops = &hisi_zip_ops; diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 4fcbe6bada06..85b26ef17548 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -446,12 +446,12 @@ static const struct pci_device_id hisi_zip_dev_ids[] = { }; MODULE_DEVICE_TABLE(pci, hisi_zip_dev_ids); -int zip_create_qps(struct hisi_qp **qps, int qp_num, int node) +int zip_create_qps(struct hisi_qp **qps, int qp_num, int node, u8 *alg_type) { if (node == NUMA_NO_NODE) node = cpu_to_node(raw_smp_processor_id()); - return hisi_qm_alloc_qps_node(&zip_devices, qp_num, 0, node, qps); + return hisi_qm_alloc_qps_node(&zip_devices, qp_num, alg_type, node, qps); } bool hisi_zip_alg_support(struct hisi_qm *qm, u32 alg) diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 9485896d5dc0..dd4323633d81 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -457,7 +457,6 @@ struct hisi_qp { u16 sq_depth; u16 cq_depth; u8 alg_type; - u8 req_type; struct qm_dma qdma; void *sqe; @@ -583,7 +582,7 @@ struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev, void hisi_acc_free_sgl_pool(struct device *dev, struct hisi_acc_sgl_pool *pool); int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, - u8 alg_type, int node, struct hisi_qp **qps); + u8 *alg_type, int node, struct hisi_qp **qps); void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num); void hisi_qm_dev_shutdown(struct pci_dev *pdev); void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list); -- cgit v1.2.3 From 8cd9b608ee8dea78cac3f373bd5e3b3de2755d46 Mon Sep 17 00:00:00 2001 From: Chenghai Huang Date: Thu, 18 Dec 2025 21:44:46 +0800 Subject: crypto: hisilicon/qm - centralize the sending locks of each module into qm When a single queue used by multiple tfms, the protection of shared resources by individual module driver programs is no longer sufficient. The hisi_qp_send needs to be ensured by the lock in qp. Fixes: 5fdb4b345cfb ("crypto: hisilicon - add a lock for the qp send operation") Signed-off-by: Chenghai Huang Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_crypto.c | 4 ---- drivers/crypto/hisilicon/qm.c | 16 ++++++++++++---- drivers/crypto/hisilicon/zip/zip_crypto.c | 3 --- include/linux/hisi_acc_qm.h | 1 + 4 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index 4197281c8dff..220022ae7afb 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -109,7 +109,6 @@ struct hpre_ctx { struct hisi_qp *qp; struct device *dev; struct hpre *hpre; - spinlock_t req_lock; unsigned int key_sz; bool crt_g2_mode; union { @@ -410,7 +409,6 @@ static int hpre_ctx_init(struct hpre_ctx *ctx, u8 type) qp->qp_ctx = ctx; qp->req_cb = hpre_alg_cb; - spin_lock_init(&ctx->req_lock); ctx->qp = qp; ctx->dev = &qp->qm->pdev->dev; hpre = container_of(ctx->qp->qm, struct hpre, qm); @@ -478,9 +476,7 @@ static int hpre_send(struct hpre_ctx *ctx, struct hpre_sqe *msg) do { atomic64_inc(&dfx[HPRE_SEND_CNT].value); - spin_lock_bh(&ctx->req_lock); ret = hisi_qp_send(ctx->qp, msg); - spin_unlock_bh(&ctx->req_lock); if (ret != -EBUSY) break; atomic64_inc(&dfx[HPRE_SEND_BUSY_CNT].value); diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 5c80ca04a8d4..0f5e39884e4a 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -2369,26 +2369,33 @@ EXPORT_SYMBOL_GPL(hisi_qm_stop_qp); int hisi_qp_send(struct hisi_qp *qp, const void *msg) { struct hisi_qp_status *qp_status = &qp->qp_status; - u16 sq_tail = qp_status->sq_tail; - u16 sq_tail_next = (sq_tail + 1) % qp->sq_depth; - void *sqe = qm_get_avail_sqe(qp); + u16 sq_tail, sq_tail_next; + void *sqe; + spin_lock_bh(&qp->qp_lock); if (unlikely(atomic_read(&qp->qp_status.flags) == QP_STOP || atomic_read(&qp->qm->status.flags) == QM_STOP || qp->is_resetting)) { + spin_unlock_bh(&qp->qp_lock); dev_info_ratelimited(&qp->qm->pdev->dev, "QP is stopped or resetting\n"); return -EAGAIN; } - if (!sqe) + sqe = qm_get_avail_sqe(qp); + if (!sqe) { + spin_unlock_bh(&qp->qp_lock); return -EBUSY; + } + sq_tail = qp_status->sq_tail; + sq_tail_next = (sq_tail + 1) % qp->sq_depth; memcpy(sqe, msg, qp->qm->sqe_size); qp->msg[sq_tail] = msg; qm_db(qp->qm, qp->qp_id, QM_DOORBELL_CMD_SQ, sq_tail_next, 0); atomic_inc(&qp->qp_status.used); qp_status->sq_tail = sq_tail_next; + spin_unlock_bh(&qp->qp_lock); return 0; } @@ -2968,6 +2975,7 @@ static int hisi_qp_memory_init(struct hisi_qm *qm, size_t dma_size, int id, qp->qm = qm; qp->qp_id = id; + spin_lock_init(&qp->qp_lock); spin_lock_init(&qp->backlog.lock); INIT_LIST_HEAD(&qp->backlog.list); diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c index 8250a33ba586..2f9035c016f3 100644 --- a/drivers/crypto/hisilicon/zip/zip_crypto.c +++ b/drivers/crypto/hisilicon/zip/zip_crypto.c @@ -217,7 +217,6 @@ static int hisi_zip_do_work(struct hisi_zip_qp_ctx *qp_ctx, { struct hisi_acc_sgl_pool *pool = qp_ctx->sgl_pool; struct hisi_zip_dfx *dfx = &qp_ctx->zip_dev->dfx; - struct hisi_zip_req_q *req_q = &qp_ctx->req_q; struct acomp_req *a_req = req->req; struct hisi_qp *qp = qp_ctx->qp; struct device *dev = &qp->qm->pdev->dev; @@ -250,9 +249,7 @@ static int hisi_zip_do_work(struct hisi_zip_qp_ctx *qp_ctx, /* send command to start a task */ atomic64_inc(&dfx->send_cnt); - spin_lock_bh(&req_q->req_lock); ret = hisi_qp_send(qp, &zip_sqe); - spin_unlock_bh(&req_q->req_lock); if (unlikely(ret < 0)) { atomic64_inc(&dfx->send_busy_cnt); ret = -EAGAIN; diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index dd4323633d81..ef4d3a79bcb7 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -476,6 +476,7 @@ struct hisi_qp { u16 pasid; struct uacce_queue *uacce_q; + spinlock_t qp_lock; struct instance_backlog backlog; const void **msg; }; -- cgit v1.2.3 From 72f3bbebff15e87171271d643ee2672fb8e92031 Mon Sep 17 00:00:00 2001 From: Chenghai Huang Date: Thu, 18 Dec 2025 21:44:47 +0800 Subject: crypto: hisilicon - consolidate qp creation and start in hisi_qm_alloc_qps_node Consolidate the creation and start of qp into the function hisi_qm_alloc_qps_node. This change eliminates the need for each module to perform these steps in two separate phases (creation and start). Signed-off-by: Chenghai Huang Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_crypto.c | 40 ++--------------- drivers/crypto/hisilicon/qm.c | 70 +++++++++++++++++++++++------ drivers/crypto/hisilicon/sec2/sec_crypto.c | 8 ---- drivers/crypto/hisilicon/zip/zip_crypto.c | 43 +++--------------- include/linux/hisi_acc_qm.h | 1 - 5 files changed, 66 insertions(+), 96 deletions(-) (limited to 'include/linux') diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index 220022ae7afb..f410e610eaba 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -156,27 +156,6 @@ static void hpre_dfx_add_req_time(struct hpre_asym_request *hpre_req) ktime_get_ts64(&hpre_req->req_time); } -static struct hisi_qp *hpre_get_qp_and_start(u8 type) -{ - struct hisi_qp *qp; - int ret; - - qp = hpre_create_qp(type); - if (!qp) { - pr_err("Can not create hpre qp!\n"); - return ERR_PTR(-ENODEV); - } - - ret = hisi_qm_start_qp(qp, 0); - if (ret < 0) { - hisi_qm_free_qps(&qp, 1); - pci_err(qp->qm->pdev, "Can not start qp!\n"); - return ERR_PTR(-EINVAL); - } - - return qp; -} - static int hpre_get_data_dma_addr(struct hpre_asym_request *hpre_req, struct scatterlist *data, unsigned int len, int is_src, dma_addr_t *tmp) @@ -316,9 +295,8 @@ static int hpre_alg_res_post_hf(struct hpre_ctx *ctx, struct hpre_sqe *sqe, static void hpre_ctx_clear(struct hpre_ctx *ctx, bool is_clear_all) { - if (is_clear_all) { + if (is_clear_all) hisi_qm_free_qps(&ctx->qp, 1); - } ctx->crt_g2_mode = false; ctx->key_sz = 0; @@ -403,11 +381,10 @@ static int hpre_ctx_init(struct hpre_ctx *ctx, u8 type) struct hisi_qp *qp; struct hpre *hpre; - qp = hpre_get_qp_and_start(type); - if (IS_ERR(qp)) - return PTR_ERR(qp); + qp = hpre_create_qp(type); + if (!qp) + return -ENODEV; - qp->qp_ctx = ctx; qp->req_cb = hpre_alg_cb; ctx->qp = qp; ctx->dev = &qp->qm->pdev->dev; @@ -597,9 +574,6 @@ static void hpre_dh_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all) struct device *dev = ctx->dev; unsigned int sz = ctx->key_sz; - if (is_clear_all) - hisi_qm_stop_qp(ctx->qp); - if (ctx->dh.g) { dma_free_coherent(dev, sz, ctx->dh.g, ctx->dh.dma_g); ctx->dh.g = NULL; @@ -940,9 +914,6 @@ static void hpre_rsa_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all) unsigned int half_key_sz = ctx->key_sz >> 1; struct device *dev = ctx->dev; - if (is_clear_all) - hisi_qm_stop_qp(ctx->qp); - if (ctx->rsa.pubkey) { dma_free_coherent(dev, ctx->key_sz << 1, ctx->rsa.pubkey, ctx->rsa.dma_pubkey); @@ -1112,9 +1083,6 @@ static void hpre_ecc_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all) unsigned int sz = ctx->key_sz; unsigned int shift = sz << 1; - if (is_clear_all) - hisi_qm_stop_qp(ctx->qp); - if (ctx->ecdh.p) { /* ecdh: p->a->k->b */ memzero_explicit(ctx->ecdh.p + shift, sz); diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 0f5e39884e4a..b8e59f99f700 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -3553,6 +3553,14 @@ void hisi_qm_dev_err_uninit(struct hisi_qm *qm) } EXPORT_SYMBOL_GPL(hisi_qm_dev_err_uninit); +static void qm_release_qp_nolock(struct hisi_qp *qp) +{ + struct hisi_qm *qm = qp->qm; + + qm->qp_in_used--; + idr_remove(&qm->qp_idr, qp->qp_id); +} + /** * hisi_qm_free_qps() - free multiple queue pairs. * @qps: The queue pairs need to be freed. @@ -3565,8 +3573,15 @@ void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num) if (!qps || qp_num <= 0) return; - for (i = qp_num - 1; i >= 0; i--) - hisi_qm_release_qp(qps[i]); + down_write(&qps[0]->qm->qps_lock); + + for (i = qp_num - 1; i >= 0; i--) { + qm_stop_qp_nolock(qps[i]); + qm_release_qp_nolock(qps[i]); + } + + up_write(&qps[0]->qm->qps_lock); + qm_pm_put_sync(qps[0]->qm); } EXPORT_SYMBOL_GPL(hisi_qm_free_qps); @@ -3580,6 +3595,43 @@ static void free_list(struct list_head *head) } } +static int qm_get_and_start_qp(struct hisi_qm *qm, int qp_num, struct hisi_qp **qps, u8 *alg_type) +{ + int i, ret; + + ret = qm_pm_get_sync(qm); + if (ret) + return ret; + + down_write(&qm->qps_lock); + for (i = 0; i < qp_num; i++) { + qps[i] = qm_create_qp_nolock(qm, alg_type[i]); + if (IS_ERR(qps[i])) { + ret = -ENODEV; + goto stop_and_free; + } + + ret = qm_start_qp_nolock(qps[i], 0); + if (ret) { + qm_release_qp_nolock(qps[i]); + goto stop_and_free; + } + } + up_write(&qm->qps_lock); + + return 0; + +stop_and_free: + for (i--; i >= 0; i--) { + qm_stop_qp_nolock(qps[i]); + qm_release_qp_nolock(qps[i]); + } + up_write(&qm->qps_lock); + qm_pm_put_sync(qm); + + return ret; +} + static int hisi_qm_sort_devices(int node, struct list_head *head, struct hisi_qm_list *qm_list) { @@ -3633,7 +3685,6 @@ int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, struct hisi_qm_resource *tmp; int ret = -ENODEV; LIST_HEAD(head); - int i; if (!qps || !qm_list || qp_num <= 0) return -EINVAL; @@ -3645,18 +3696,9 @@ int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, } list_for_each_entry(tmp, &head, list) { - for (i = 0; i < qp_num; i++) { - qps[i] = hisi_qm_create_qp(tmp->qm, alg_type[i]); - if (IS_ERR(qps[i])) { - hisi_qm_free_qps(qps, i); - break; - } - } - - if (i == qp_num) { - ret = 0; + ret = qm_get_and_start_qp(tmp->qm, qp_num, qps, alg_type); + if (!ret) break; - } } mutex_unlock(&qm_list->lock); diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 364bd69c6088..d09d081f42dc 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -626,7 +626,6 @@ static int sec_create_qp_ctx(struct sec_ctx *ctx, int qp_ctx_id) qp_ctx = &ctx->qp_ctx[qp_ctx_id]; qp = ctx->qps[qp_ctx_id]; - qp->qp_ctx = qp_ctx; qp_ctx->qp = qp; qp_ctx->ctx = ctx; @@ -644,14 +643,8 @@ static int sec_create_qp_ctx(struct sec_ctx *ctx, int qp_ctx_id) if (ret) goto err_destroy_idr; - ret = hisi_qm_start_qp(qp, 0); - if (ret < 0) - goto err_resource_free; - return 0; -err_resource_free: - sec_free_qp_ctx_resource(ctx, qp_ctx); err_destroy_idr: idr_destroy(&qp_ctx->req_idr); return ret; @@ -660,7 +653,6 @@ err_destroy_idr: static void sec_release_qp_ctx(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ctx) { - hisi_qm_stop_qp(qp_ctx->qp); sec_free_qp_ctx_resource(ctx, qp_ctx); idr_destroy(&qp_ctx->req_idr); } diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c index 2f9035c016f3..108ab667131d 100644 --- a/drivers/crypto/hisilicon/zip/zip_crypto.c +++ b/drivers/crypto/hisilicon/zip/zip_crypto.c @@ -352,32 +352,6 @@ static int hisi_zip_adecompress(struct acomp_req *acomp_req) return ret; } -static int hisi_zip_start_qp(struct hisi_qp *qp, struct hisi_zip_qp_ctx *qp_ctx, - int alg_type, int req_type) -{ - struct device *dev = &qp->qm->pdev->dev; - int ret; - - qp->alg_type = alg_type; - qp->qp_ctx = qp_ctx; - - ret = hisi_qm_start_qp(qp, 0); - if (ret < 0) { - dev_err(dev, "failed to start qp (%d)!\n", ret); - return ret; - } - - qp_ctx->qp = qp; - - return 0; -} - -static void hisi_zip_release_qp(struct hisi_zip_qp_ctx *qp_ctx) -{ - hisi_qm_stop_qp(qp_ctx->qp); - hisi_qm_free_qps(&qp_ctx->qp, 1); -} - static const struct hisi_zip_sqe_ops hisi_zip_ops = { .sqe_type = 0x3, .fill_addr = hisi_zip_fill_addr, @@ -396,7 +370,7 @@ static int hisi_zip_ctx_init(struct hisi_zip_ctx *hisi_zip_ctx, u8 req_type, int struct hisi_zip_qp_ctx *qp_ctx; u8 alg_type[HZIP_CTX_Q_NUM]; struct hisi_zip *hisi_zip; - int ret, i, j; + int ret, i; /* alg_type = 0 for compress, 1 for decompress in hw sqe */ for (i = 0; i < HZIP_CTX_Q_NUM; i++) @@ -413,17 +387,9 @@ static int hisi_zip_ctx_init(struct hisi_zip_ctx *hisi_zip_ctx, u8 req_type, int for (i = 0; i < HZIP_CTX_Q_NUM; i++) { qp_ctx = &hisi_zip_ctx->qp_ctx[i]; qp_ctx->ctx = hisi_zip_ctx; - ret = hisi_zip_start_qp(qps[i], qp_ctx, i, req_type); - if (ret) { - for (j = i - 1; j >= 0; j--) - hisi_qm_stop_qp(hisi_zip_ctx->qp_ctx[j].qp); - - hisi_qm_free_qps(qps, HZIP_CTX_Q_NUM); - return ret; - } - qp_ctx->zip_dev = hisi_zip; qp_ctx->req_type = req_type; + qp_ctx->qp = qps[i]; } hisi_zip_ctx->ops = &hisi_zip_ops; @@ -433,10 +399,13 @@ static int hisi_zip_ctx_init(struct hisi_zip_ctx *hisi_zip_ctx, u8 req_type, int static void hisi_zip_ctx_exit(struct hisi_zip_ctx *hisi_zip_ctx) { + struct hisi_qp *qps[HZIP_CTX_Q_NUM] = { NULL }; int i; for (i = 0; i < HZIP_CTX_Q_NUM; i++) - hisi_zip_release_qp(&hisi_zip_ctx->qp_ctx[i]); + qps[i] = hisi_zip_ctx->qp_ctx[i].qp; + + hisi_qm_free_qps(qps, HZIP_CTX_Q_NUM); } static int hisi_zip_create_req_q(struct hisi_zip_ctx *ctx) diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index ef4d3a79bcb7..59f985804958 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -466,7 +466,6 @@ struct hisi_qp { struct hisi_qp_status qp_status; struct hisi_qp_ops *hw_ops; - void *qp_ctx; void (*req_cb)(struct hisi_qp *qp, void *data); void (*event_cb)(struct hisi_qp *qp); -- cgit v1.2.3 From 4705489742fdc294fe3de7bd9855432fb3cfe3cd Mon Sep 17 00:00:00 2001 From: Chenghai Huang Date: Thu, 18 Dec 2025 21:44:48 +0800 Subject: crypto: hisilicon/qm - add reference counting to queues for tfm kernel reuse Add reference counting to queues. When all queues are occupied, tfm will reuse queues with the same algorithm type that have already been allocated in the kernel. The corresponding queue will be released when the reference count reaches 1. Reviewed-by: Longfang Liu Signed-off-by: Chenghai Huang Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 65 +++++++++++++++++++++++++++++++++---------- include/linux/hisi_acc_qm.h | 1 + 2 files changed, 52 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index b8e59f99f700..1f1714ea5af7 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -2002,7 +2002,38 @@ static void hisi_qm_unset_hw_reset(struct hisi_qp *qp) *addr = 0; } -static struct hisi_qp *qm_create_qp_nolock(struct hisi_qm *qm, u8 alg_type) +static struct hisi_qp *find_shareable_qp(struct hisi_qm *qm, u8 alg_type, bool is_in_kernel) +{ + struct device *dev = &qm->pdev->dev; + struct hisi_qp *share_qp = NULL; + struct hisi_qp *qp; + u32 ref_count = ~0; + int i; + + if (!is_in_kernel) + goto queues_busy; + + for (i = 0; i < qm->qp_num; i++) { + qp = &qm->qp_array[i]; + if (qp->is_in_kernel && qp->alg_type == alg_type && qp->ref_count < ref_count) { + ref_count = qp->ref_count; + share_qp = qp; + } + } + + if (share_qp) { + share_qp->ref_count++; + return share_qp; + } + +queues_busy: + dev_info_ratelimited(dev, "All %u queues of QM are busy and no shareable queue\n", + qm->qp_num); + atomic64_inc(&qm->debug.dfx.create_qp_err_cnt); + return ERR_PTR(-EBUSY); +} + +static struct hisi_qp *qm_create_qp_nolock(struct hisi_qm *qm, u8 alg_type, bool is_in_kernel) { struct device *dev = &qm->pdev->dev; struct hisi_qp *qp; @@ -2013,12 +2044,9 @@ static struct hisi_qp *qm_create_qp_nolock(struct hisi_qm *qm, u8 alg_type) return ERR_PTR(-EPERM); } - if (qm->qp_in_used == qm->qp_num) { - dev_info_ratelimited(dev, "All %u queues of QM are busy!\n", - qm->qp_num); - atomic64_inc(&qm->debug.dfx.create_qp_err_cnt); - return ERR_PTR(-EBUSY); - } + /* Try to find a shareable queue when all queues are busy */ + if (qm->qp_in_used == qm->qp_num) + return find_shareable_qp(qm, alg_type, is_in_kernel); qp_id = idr_alloc_cyclic(&qm->qp_idr, NULL, 0, qm->qp_num, GFP_ATOMIC); if (qp_id < 0) { @@ -2034,10 +2062,10 @@ static struct hisi_qp *qm_create_qp_nolock(struct hisi_qm *qm, u8 alg_type) qp->event_cb = NULL; qp->req_cb = NULL; - qp->qp_id = qp_id; qp->alg_type = alg_type; - qp->is_in_kernel = true; + qp->is_in_kernel = is_in_kernel; qm->qp_in_used++; + qp->ref_count = 1; return qp; } @@ -2059,7 +2087,7 @@ static struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type) return ERR_PTR(ret); down_write(&qm->qps_lock); - qp = qm_create_qp_nolock(qm, alg_type); + qp = qm_create_qp_nolock(qm, alg_type, false); up_write(&qm->qps_lock); if (IS_ERR(qp)) @@ -2458,7 +2486,6 @@ static int hisi_qm_uacce_get_queue(struct uacce_device *uacce, qp->uacce_q = q; qp->event_cb = qm_qp_event_notifier; qp->pasid = arg; - qp->is_in_kernel = false; return 0; } @@ -3557,6 +3584,9 @@ static void qm_release_qp_nolock(struct hisi_qp *qp) { struct hisi_qm *qm = qp->qm; + if (--qp->ref_count) + return; + qm->qp_in_used--; idr_remove(&qm->qp_idr, qp->qp_id); } @@ -3576,7 +3606,9 @@ void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num) down_write(&qps[0]->qm->qps_lock); for (i = qp_num - 1; i >= 0; i--) { - qm_stop_qp_nolock(qps[i]); + if (qps[i]->ref_count == 1) + qm_stop_qp_nolock(qps[i]); + qm_release_qp_nolock(qps[i]); } @@ -3605,12 +3637,15 @@ static int qm_get_and_start_qp(struct hisi_qm *qm, int qp_num, struct hisi_qp ** down_write(&qm->qps_lock); for (i = 0; i < qp_num; i++) { - qps[i] = qm_create_qp_nolock(qm, alg_type[i]); + qps[i] = qm_create_qp_nolock(qm, alg_type[i], true); if (IS_ERR(qps[i])) { ret = -ENODEV; goto stop_and_free; } + if (qps[i]->ref_count != 1) + continue; + ret = qm_start_qp_nolock(qps[i], 0); if (ret) { qm_release_qp_nolock(qps[i]); @@ -3623,7 +3658,9 @@ static int qm_get_and_start_qp(struct hisi_qm *qm, int qp_num, struct hisi_qp ** stop_and_free: for (i--; i >= 0; i--) { - qm_stop_qp_nolock(qps[i]); + if (qps[i]->ref_count == 1) + qm_stop_qp_nolock(qps[i]); + qm_release_qp_nolock(qps[i]); } up_write(&qm->qps_lock); diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 59f985804958..f517b9db8392 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -475,6 +475,7 @@ struct hisi_qp { u16 pasid; struct uacce_queue *uacce_q; + u32 ref_count; spinlock_t qp_lock; struct instance_backlog backlog; const void **msg; -- cgit v1.2.3 From c15291e2278ea930ca47a034780f002bee1e1b4f Mon Sep 17 00:00:00 2001 From: Harsh Jain Date: Sat, 20 Dec 2025 21:28:52 +0530 Subject: firmware: zynqmp: Move crypto API's to separate file For better maintainability move crypto related API's to new zynqmp-crypto.c file. Signed-off-by: Harsh Jain Signed-off-by: Herbert Xu --- drivers/firmware/xilinx/Makefile | 2 +- drivers/firmware/xilinx/zynqmp-crypto.c | 59 +++++++++++++++++++++++++++++ drivers/firmware/xilinx/zynqmp.c | 49 ------------------------ include/linux/firmware/xlnx-zynqmp-crypto.h | 28 ++++++++++++++ include/linux/firmware/xlnx-zynqmp.h | 14 +------ 5 files changed, 89 insertions(+), 63 deletions(-) create mode 100644 drivers/firmware/xilinx/zynqmp-crypto.c create mode 100644 include/linux/firmware/xlnx-zynqmp-crypto.h (limited to 'include/linux') diff --git a/drivers/firmware/xilinx/Makefile b/drivers/firmware/xilinx/Makefile index 70f8f02f14a3..8db0e66b6b7e 100644 --- a/drivers/firmware/xilinx/Makefile +++ b/drivers/firmware/xilinx/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for Xilinx firmwares -obj-$(CONFIG_ZYNQMP_FIRMWARE) += zynqmp.o zynqmp-ufs.o +obj-$(CONFIG_ZYNQMP_FIRMWARE) += zynqmp.o zynqmp-ufs.o zynqmp-crypto.o obj-$(CONFIG_ZYNQMP_FIRMWARE_DEBUG) += zynqmp-debug.o diff --git a/drivers/firmware/xilinx/zynqmp-crypto.c b/drivers/firmware/xilinx/zynqmp-crypto.c new file mode 100644 index 000000000000..ea9cac6a1052 --- /dev/null +++ b/drivers/firmware/xilinx/zynqmp-crypto.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Firmware layer for XilSecure APIs. + * + * Copyright (C) 2014-2022 Xilinx, Inc. + * Copyright (C) 2022-2025 Advanced Micro Devices, Inc. + */ + +#include +#include + +/** + * zynqmp_pm_aes_engine - Access AES hardware to encrypt/decrypt the data using + * AES-GCM core. + * @address: Address of the AesParams structure. + * @out: Returned output value + * + * Return: Returns status, either success or error code. + */ +int zynqmp_pm_aes_engine(const u64 address, u32 *out) +{ + u32 ret_payload[PAYLOAD_ARG_CNT]; + int ret; + + if (!out) + return -EINVAL; + + ret = zynqmp_pm_invoke_fn(PM_SECURE_AES, ret_payload, 2, upper_32_bits(address), + lower_32_bits(address)); + *out = ret_payload[1]; + + return ret; +} +EXPORT_SYMBOL_GPL(zynqmp_pm_aes_engine); + +/** + * zynqmp_pm_sha_hash - Access the SHA engine to calculate the hash + * @address: Address of the data/ Address of output buffer where + * hash should be stored. + * @size: Size of the data. + * @flags: + * BIT(0) - for initializing csudma driver and SHA3(Here address + * and size inputs can be NULL). + * BIT(1) - to call Sha3_Update API which can be called multiple + * times when data is not contiguous. + * BIT(2) - to get final hash of the whole updated data. + * Hash will be overwritten at provided address with + * 48 bytes. + * + * Return: Returns status, either success or error code. + */ +int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags) +{ + u32 lower_addr = lower_32_bits(address); + u32 upper_addr = upper_32_bits(address); + + return zynqmp_pm_invoke_fn(PM_SECURE_SHA, NULL, 4, upper_addr, lower_addr, size, flags); +} +EXPORT_SYMBOL_GPL(zynqmp_pm_sha_hash); diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c index ad811f40e059..f15db1a818dc 100644 --- a/drivers/firmware/xilinx/zynqmp.c +++ b/drivers/firmware/xilinx/zynqmp.c @@ -1521,30 +1521,6 @@ int zynqmp_pm_load_pdi(const u32 src, const u64 address) } EXPORT_SYMBOL_GPL(zynqmp_pm_load_pdi); -/** - * zynqmp_pm_aes_engine - Access AES hardware to encrypt/decrypt the data using - * AES-GCM core. - * @address: Address of the AesParams structure. - * @out: Returned output value - * - * Return: Returns status, either success or error code. - */ -int zynqmp_pm_aes_engine(const u64 address, u32 *out) -{ - u32 ret_payload[PAYLOAD_ARG_CNT]; - int ret; - - if (!out) - return -EINVAL; - - ret = zynqmp_pm_invoke_fn(PM_SECURE_AES, ret_payload, 2, upper_32_bits(address), - lower_32_bits(address)); - *out = ret_payload[1]; - - return ret; -} -EXPORT_SYMBOL_GPL(zynqmp_pm_aes_engine); - /** * zynqmp_pm_efuse_access - Provides access to efuse memory. * @address: Address of the efuse params structure @@ -1569,31 +1545,6 @@ int zynqmp_pm_efuse_access(const u64 address, u32 *out) } EXPORT_SYMBOL_GPL(zynqmp_pm_efuse_access); -/** - * zynqmp_pm_sha_hash - Access the SHA engine to calculate the hash - * @address: Address of the data/ Address of output buffer where - * hash should be stored. - * @size: Size of the data. - * @flags: - * BIT(0) - for initializing csudma driver and SHA3(Here address - * and size inputs can be NULL). - * BIT(1) - to call Sha3_Update API which can be called multiple - * times when data is not contiguous. - * BIT(2) - to get final hash of the whole updated data. - * Hash will be overwritten at provided address with - * 48 bytes. - * - * Return: Returns status, either success or error code. - */ -int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags) -{ - u32 lower_addr = lower_32_bits(address); - u32 upper_addr = upper_32_bits(address); - - return zynqmp_pm_invoke_fn(PM_SECURE_SHA, NULL, 4, upper_addr, lower_addr, size, flags); -} -EXPORT_SYMBOL_GPL(zynqmp_pm_sha_hash); - /** * zynqmp_pm_register_notifier() - PM API for register a subsystem * to be notified about specific diff --git a/include/linux/firmware/xlnx-zynqmp-crypto.h b/include/linux/firmware/xlnx-zynqmp-crypto.h new file mode 100644 index 000000000000..f9eb523ba6a0 --- /dev/null +++ b/include/linux/firmware/xlnx-zynqmp-crypto.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Firmware layer for XilSECURE APIs. + * + * Copyright (C) 2014-2022 Xilinx, Inc. + * Copyright (C) 2022-2025 Advanced Micro Devices, Inc. + */ + +#ifndef __FIRMWARE_XLNX_ZYNQMP_CRYPTO_H__ +#define __FIRMWARE_XLNX_ZYNQMP_CRYPTO_H__ + +#if IS_REACHABLE(CONFIG_ZYNQMP_FIRMWARE) +int zynqmp_pm_aes_engine(const u64 address, u32 *out); +int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags); +#else +static inline int zynqmp_pm_aes_engine(const u64 address, u32 *out) +{ + return -ENODEV; +} + +static inline int zynqmp_pm_sha_hash(const u64 address, const u32 size, + const u32 flags) +{ + return -ENODEV; +} +#endif + +#endif /* __FIRMWARE_XLNX_ZYNQMP_CRYPTO_H__ */ diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 15fdbd089bbf..d70dcd462b44 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -17,6 +17,7 @@ #include #include +#include #define ZYNQMP_PM_VERSION_MAJOR 1 #define ZYNQMP_PM_VERSION_MINOR 0 @@ -589,9 +590,7 @@ int zynqmp_pm_release_node(const u32 node); int zynqmp_pm_set_requirement(const u32 node, const u32 capabilities, const u32 qos, const enum zynqmp_pm_request_ack ack); -int zynqmp_pm_aes_engine(const u64 address, u32 *out); int zynqmp_pm_efuse_access(const u64 address, u32 *out); -int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags); int zynqmp_pm_fpga_load(const u64 address, const u32 size, const u32 flags); int zynqmp_pm_fpga_get_status(u32 *value); int zynqmp_pm_fpga_get_config_status(u32 *value); @@ -772,22 +771,11 @@ static inline int zynqmp_pm_set_requirement(const u32 node, return -ENODEV; } -static inline int zynqmp_pm_aes_engine(const u64 address, u32 *out) -{ - return -ENODEV; -} - static inline int zynqmp_pm_efuse_access(const u64 address, u32 *out) { return -ENODEV; } -static inline int zynqmp_pm_sha_hash(const u64 address, const u32 size, - const u32 flags) -{ - return -ENODEV; -} - static inline int zynqmp_pm_fpga_load(const u64 address, const u32 size, const u32 flags) { -- cgit v1.2.3 From 465d7831261965248a7a1484bd186066d29427f0 Mon Sep 17 00:00:00 2001 From: Harsh Jain Date: Sat, 20 Dec 2025 21:28:54 +0530 Subject: firmware: zynqmp: Add helper API to self discovery the device Add API to get SoC version and family info. Signed-off-by: Harsh Jain Signed-off-by: Herbert Xu --- drivers/firmware/xilinx/zynqmp-crypto.c | 31 +++++++++++++++++++++++++++++ include/linux/firmware/xlnx-zynqmp-crypto.h | 19 ++++++++++++++++++ 2 files changed, 50 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/xilinx/zynqmp-crypto.c b/drivers/firmware/xilinx/zynqmp-crypto.c index ea9cac6a1052..6d17cb8b27b3 100644 --- a/drivers/firmware/xilinx/zynqmp-crypto.c +++ b/drivers/firmware/xilinx/zynqmp-crypto.c @@ -57,3 +57,34 @@ int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags) return zynqmp_pm_invoke_fn(PM_SECURE_SHA, NULL, 4, upper_addr, lower_addr, size, flags); } EXPORT_SYMBOL_GPL(zynqmp_pm_sha_hash); + +/** + * xlnx_get_crypto_dev_data() - Get crypto dev data of platform + * @feature_map: List of available feature map of all platform + * + * Return: Returns crypto dev data, either address crypto dev or ERR PTR + */ +void *xlnx_get_crypto_dev_data(struct xlnx_feature *feature_map) +{ + struct xlnx_feature *feature; + u32 pm_family_code; + int ret; + + /* Get the Family code and sub family code of platform */ + ret = zynqmp_pm_get_family_info(&pm_family_code); + if (ret < 0) + return ERR_PTR(ret); + + feature = feature_map; + for (; feature->family; feature++) { + if (feature->family == pm_family_code) { + ret = zynqmp_pm_feature(feature->feature_id); + if (ret < 0) + return ERR_PTR(ret); + + return feature->data; + } + } + return ERR_PTR(-ENODEV); +} +EXPORT_SYMBOL_GPL(xlnx_get_crypto_dev_data); diff --git a/include/linux/firmware/xlnx-zynqmp-crypto.h b/include/linux/firmware/xlnx-zynqmp-crypto.h index f9eb523ba6a0..cb08f412e931 100644 --- a/include/linux/firmware/xlnx-zynqmp-crypto.h +++ b/include/linux/firmware/xlnx-zynqmp-crypto.h @@ -9,9 +9,23 @@ #ifndef __FIRMWARE_XLNX_ZYNQMP_CRYPTO_H__ #define __FIRMWARE_XLNX_ZYNQMP_CRYPTO_H__ +/** + * struct xlnx_feature - Feature data + * @family: Family code of platform + * @subfamily: Subfamily code of platform + * @feature_id: Feature id of module + * @data: Collection of all supported platform data + */ +struct xlnx_feature { + u32 family; + u32 feature_id; + void *data; +}; + #if IS_REACHABLE(CONFIG_ZYNQMP_FIRMWARE) int zynqmp_pm_aes_engine(const u64 address, u32 *out); int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags); +void *xlnx_get_crypto_dev_data(struct xlnx_feature *feature_map); #else static inline int zynqmp_pm_aes_engine(const u64 address, u32 *out) { @@ -23,6 +37,11 @@ static inline int zynqmp_pm_sha_hash(const u64 address, const u32 size, { return -ENODEV; } + +static inline void *xlnx_get_crypto_dev_data(struct xlnx_feature *feature_map) +{ + return ERR_PTR(-ENODEV); +} #endif #endif /* __FIRMWARE_XLNX_ZYNQMP_CRYPTO_H__ */ -- cgit v1.2.3 From e9f6870bb753b11b325f7d7839b4be6956d448d9 Mon Sep 17 00:00:00 2001 From: Harsh Jain Date: Sat, 20 Dec 2025 21:29:03 +0530 Subject: firmware: xilinx: Add firmware API's to support aes-gcm in Versal device Add aes-gcm crypto API's for AMD/Xilinx Versal device. Signed-off-by: Harsh Jain Signed-off-by: Herbert Xu --- drivers/firmware/xilinx/zynqmp-crypto.c | 150 +++++++++++++++++++++++++++- include/linux/firmware/xlnx-zynqmp-crypto.h | 76 +++++++++++++- 2 files changed, 223 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/xilinx/zynqmp-crypto.c b/drivers/firmware/xilinx/zynqmp-crypto.c index 6d17cb8b27b3..f06f1e2f67b8 100644 --- a/drivers/firmware/xilinx/zynqmp-crypto.c +++ b/drivers/firmware/xilinx/zynqmp-crypto.c @@ -60,7 +60,7 @@ EXPORT_SYMBOL_GPL(zynqmp_pm_sha_hash); /** * xlnx_get_crypto_dev_data() - Get crypto dev data of platform - * @feature_map: List of available feature map of all platform + * @feature_map: List of available feature map of all platform * * Return: Returns crypto dev data, either address crypto dev or ERR PTR */ @@ -88,3 +88,151 @@ void *xlnx_get_crypto_dev_data(struct xlnx_feature *feature_map) return ERR_PTR(-ENODEV); } EXPORT_SYMBOL_GPL(xlnx_get_crypto_dev_data); + +/** + * versal_pm_aes_key_write - Write AES key registers + * @keylen: Size of the input key to be written + * @keysrc: Key Source to be selected to which provided + * key should be updated + * @keyaddr: Address of a buffer which should contain the key + * to be written + * + * This function provides support to write AES volatile user keys. + * + * Return: Returns status, either success or error+reason + */ +int versal_pm_aes_key_write(const u32 keylen, + const u32 keysrc, const u64 keyaddr) +{ + return zynqmp_pm_invoke_fn(XSECURE_API_AES_WRITE_KEY, NULL, 4, + keylen, keysrc, + lower_32_bits(keyaddr), + upper_32_bits(keyaddr)); +} +EXPORT_SYMBOL_GPL(versal_pm_aes_key_write); + +/** + * versal_pm_aes_key_zero - Zeroise AES User key registers + * @keysrc: Key Source to be selected to which provided + * key should be updated + * + * This function provides support to zeroise AES volatile user keys. + * + * Return: Returns status, either success or error+reason + */ +int versal_pm_aes_key_zero(const u32 keysrc) +{ + return zynqmp_pm_invoke_fn(XSECURE_API_AES_KEY_ZERO, NULL, 1, keysrc); +} +EXPORT_SYMBOL_GPL(versal_pm_aes_key_zero); + +/** + * versal_pm_aes_op_init - Init AES operation + * @hw_req: AES op init structure address + * + * This function provides support to init AES operation. + * + * Return: Returns status, either success or error+reason + */ +int versal_pm_aes_op_init(const u64 hw_req) +{ + return zynqmp_pm_invoke_fn(XSECURE_API_AES_OP_INIT, NULL, 2, + lower_32_bits(hw_req), + upper_32_bits(hw_req)); +} +EXPORT_SYMBOL_GPL(versal_pm_aes_op_init); + +/** + * versal_pm_aes_update_aad - AES update aad + * @aad_addr: AES aad address + * @aad_len: AES aad data length + * + * This function provides support to update AAD data. + * + * Return: Returns status, either success or error+reason + */ +int versal_pm_aes_update_aad(const u64 aad_addr, const u32 aad_len) +{ + return zynqmp_pm_invoke_fn(XSECURE_API_AES_UPDATE_AAD, NULL, 3, + lower_32_bits(aad_addr), + upper_32_bits(aad_addr), + aad_len); +} +EXPORT_SYMBOL_GPL(versal_pm_aes_update_aad); + +/** + * versal_pm_aes_enc_update - Access AES hardware to encrypt the data using + * AES-GCM core. + * @in_params: Address of the AesParams structure + * @in_addr: Address of input buffer + * + * Return: Returns status, either success or error code. + */ +int versal_pm_aes_enc_update(const u64 in_params, const u64 in_addr) +{ + return zynqmp_pm_invoke_fn(XSECURE_API_AES_ENCRYPT_UPDATE, NULL, 4, + lower_32_bits(in_params), + upper_32_bits(in_params), + lower_32_bits(in_addr), + upper_32_bits(in_addr)); +} +EXPORT_SYMBOL_GPL(versal_pm_aes_enc_update); + +/** + * versal_pm_aes_enc_final - Access AES hardware to store the GCM tag + * @gcm_addr: Address of the gcm tag + * + * Return: Returns status, either success or error code. + */ +int versal_pm_aes_enc_final(const u64 gcm_addr) +{ + return zynqmp_pm_invoke_fn(XSECURE_API_AES_ENCRYPT_FINAL, NULL, 2, + lower_32_bits(gcm_addr), + upper_32_bits(gcm_addr)); +} +EXPORT_SYMBOL_GPL(versal_pm_aes_enc_final); + +/** + * versal_pm_aes_dec_update - Access AES hardware to decrypt the data using + * AES-GCM core. + * @in_params: Address of the AesParams structure + * @in_addr: Address of input buffer + * + * Return: Returns status, either success or error code. + */ +int versal_pm_aes_dec_update(const u64 in_params, const u64 in_addr) +{ + return zynqmp_pm_invoke_fn(XSECURE_API_AES_DECRYPT_UPDATE, NULL, 4, + lower_32_bits(in_params), + upper_32_bits(in_params), + lower_32_bits(in_addr), + upper_32_bits(in_addr)); +} +EXPORT_SYMBOL_GPL(versal_pm_aes_dec_update); + +/** + * versal_pm_aes_dec_final - Access AES hardware to get the GCM tag + * @gcm_addr: Address of the gcm tag + * + * Return: Returns status, either success or error code. + */ +int versal_pm_aes_dec_final(const u64 gcm_addr) +{ + return zynqmp_pm_invoke_fn(XSECURE_API_AES_DECRYPT_FINAL, NULL, 2, + lower_32_bits(gcm_addr), + upper_32_bits(gcm_addr)); +} +EXPORT_SYMBOL_GPL(versal_pm_aes_dec_final); + +/** + * versal_pm_aes_init - Init AES block + * + * This function initialise AES block. + * + * Return: Returns status, either success or error+reason + */ +int versal_pm_aes_init(void) +{ + return zynqmp_pm_invoke_fn(XSECURE_API_AES_INIT, NULL, 0); +} +EXPORT_SYMBOL_GPL(versal_pm_aes_init); diff --git a/include/linux/firmware/xlnx-zynqmp-crypto.h b/include/linux/firmware/xlnx-zynqmp-crypto.h index cb08f412e931..56595ab37c43 100644 --- a/include/linux/firmware/xlnx-zynqmp-crypto.h +++ b/include/linux/firmware/xlnx-zynqmp-crypto.h @@ -2,8 +2,8 @@ /* * Firmware layer for XilSECURE APIs. * - * Copyright (C) 2014-2022 Xilinx, Inc. - * Copyright (C) 2022-2025 Advanced Micro Devices, Inc. + * Copyright (C) 2014-2022 Xilinx, Inc. + * Copyright (C) 2022-2025 Advanced Micro Devices, Inc. */ #ifndef __FIRMWARE_XLNX_ZYNQMP_CRYPTO_H__ @@ -22,10 +22,32 @@ struct xlnx_feature { void *data; }; +/* xilSecure API commands module id + api id */ +#define XSECURE_API_AES_INIT 0x509 +#define XSECURE_API_AES_OP_INIT 0x50a +#define XSECURE_API_AES_UPDATE_AAD 0x50b +#define XSECURE_API_AES_ENCRYPT_UPDATE 0x50c +#define XSECURE_API_AES_ENCRYPT_FINAL 0x50d +#define XSECURE_API_AES_DECRYPT_UPDATE 0x50e +#define XSECURE_API_AES_DECRYPT_FINAL 0x50f +#define XSECURE_API_AES_KEY_ZERO 0x510 +#define XSECURE_API_AES_WRITE_KEY 0x511 + #if IS_REACHABLE(CONFIG_ZYNQMP_FIRMWARE) int zynqmp_pm_aes_engine(const u64 address, u32 *out); int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags); void *xlnx_get_crypto_dev_data(struct xlnx_feature *feature_map); +int versal_pm_aes_key_write(const u32 keylen, + const u32 keysrc, const u64 keyaddr); +int versal_pm_aes_key_zero(const u32 keysrc); +int versal_pm_aes_op_init(const u64 hw_req); +int versal_pm_aes_update_aad(const u64 aad_addr, const u32 aad_len); +int versal_pm_aes_enc_update(const u64 in_params, const u64 in_addr); +int versal_pm_aes_dec_update(const u64 in_params, const u64 in_addr); +int versal_pm_aes_dec_final(const u64 gcm_addr); +int versal_pm_aes_enc_final(const u64 gcm_addr); +int versal_pm_aes_init(void); + #else static inline int zynqmp_pm_aes_engine(const u64 address, u32 *out) { @@ -42,6 +64,56 @@ static inline void *xlnx_get_crypto_dev_data(struct xlnx_feature *feature_map) { return ERR_PTR(-ENODEV); } + +static inline int versal_pm_aes_key_write(const u32 keylen, + const u32 keysrc, const u64 keyaddr) +{ + return -ENODEV; +} + +static inline int versal_pm_aes_key_zero(const u32 keysrc) +{ + return -ENODEV; +} + +static inline int versal_pm_aes_op_init(const u64 hw_req) +{ + return -ENODEV; +} + +static inline int versal_pm_aes_update_aad(const u64 aad_addr, + const u32 aad_len) +{ + return -ENODEV; +} + +static inline int versal_pm_aes_enc_update(const u64 in_params, + const u64 in_addr) +{ + return -ENODEV; +} + +static inline int versal_pm_aes_dec_update(const u64 in_params, + const u64 in_addr) +{ + return -ENODEV; +} + +static inline int versal_pm_aes_enc_final(const u64 gcm_addr) +{ + return -ENODEV; +} + +static inline int versal_pm_aes_dec_final(const u64 gcm_addr) +{ + return -ENODEV; +} + +static inline int versal_pm_aes_init(void) +{ + return -ENODEV; +} + #endif #endif /* __FIRMWARE_XLNX_ZYNQMP_CRYPTO_H__ */ -- cgit v1.2.3 From 3296992ffc4362fff6e8fb979090b4638f50e066 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 17 Jan 2026 18:18:05 +0800 Subject: crypto: hisilicon/qm - obtain the mailbox configuration at one time The malibox needs to be triggered by a 128bit atomic operation. The reason is that the PF and VFs of the device share the mmio memory of the mailbox, and the mutex cannot lock mailbox operations in different functions, especially when passing through VFs to virtual machines. Currently, the write operation to the mailbox is already a 128-bit atomic write. The read operation also needs to be modified to a 128-bit atomic read. Since there is no general 128-bit IO memory access API in the current ARM64 architecture, and the stp and ldp instructions do not guarantee atomic access to device memory, they cannot be extracted as a general API. Therefore, the 128-bit atomic read and write operations need to be implemented in the driver. Signed-off-by: Weili Qian Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 133 +++++++++++++++++++++++++++--------------- include/linux/hisi_acc_qm.h | 1 + 2 files changed, 87 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 2bb94e8c6a07..94f96b6b38f5 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -31,6 +31,7 @@ /* mailbox */ #define QM_MB_PING_ALL_VFS 0xffff #define QM_MB_STATUS_MASK GENMASK(12, 9) +#define QM_MB_BUSY_MASK BIT(13) /* sqc shift */ #define QM_SQ_HOP_NUM_SHIFT 0 @@ -582,16 +583,30 @@ static void qm_mb_pre_init(struct qm_mailbox *mailbox, u8 cmd, mailbox->rsvd = 0; } -/* return 0 mailbox ready, -ETIMEDOUT hardware timeout */ -int hisi_qm_wait_mb_ready(struct hisi_qm *qm) +/* + * The mailbox is 128 bits and requires a single read/write operation. + * Since there is no general 128-bit IO memory access API in the current + * ARM64 architecture, this needs to be implemented in the driver. + */ +static struct qm_mailbox qm_mb_read(struct hisi_qm *qm) { - u32 val; + struct qm_mailbox mailbox = {0}; + +#if IS_ENABLED(CONFIG_ARM64) + const void __iomem *fun_base = qm->io_base + QM_MB_CMD_SEND_BASE; + unsigned long tmp0, tmp1; - return readl_relaxed_poll_timeout(qm->io_base + QM_MB_CMD_SEND_BASE, - val, !((val >> QM_MB_BUSY_SHIFT) & - 0x1), POLL_PERIOD, POLL_TIMEOUT); + asm volatile("ldp %0, %1, %3\n" + "stp %0, %1, %2\n" + : "=&r" (tmp0), + "=&r" (tmp1), + "+Q" (mailbox) + : "Q" (*((char __iomem *)fun_base)) + : "memory"); +#endif + + return mailbox; } -EXPORT_SYMBOL_GPL(hisi_qm_wait_mb_ready); /* 128 bit should be written to hardware at one time to trigger a mailbox */ static void qm_mb_write(struct hisi_qm *qm, const void *src) @@ -614,35 +629,61 @@ static void qm_mb_write(struct hisi_qm *qm, const void *src) #endif } -static int qm_mb_nolock(struct hisi_qm *qm, struct qm_mailbox *mailbox) +int hisi_qm_wait_mb_ready(struct hisi_qm *qm) { + struct qm_mailbox mailbox = {0}; int ret; - u32 val; - if (unlikely(hisi_qm_wait_mb_ready(qm))) { + ret = read_poll_timeout(qm_mb_read, mailbox, + !(le16_to_cpu(mailbox.w0) & QM_MB_BUSY_MASK), + POLL_PERIOD, POLL_TIMEOUT, + true, qm); + if (ret) dev_err(&qm->pdev->dev, "QM mailbox is busy to start!\n"); - ret = -EBUSY; - goto mb_busy; - } - qm_mb_write(qm, mailbox); + return ret; +} +EXPORT_SYMBOL_GPL(hisi_qm_wait_mb_ready); + +static int qm_wait_mb_finish(struct hisi_qm *qm, struct qm_mailbox *mailbox) +{ + struct device *dev = &qm->pdev->dev; + int ret; - if (unlikely(hisi_qm_wait_mb_ready(qm))) { - dev_err(&qm->pdev->dev, "QM mailbox operation timeout!\n"); - ret = -ETIMEDOUT; - goto mb_busy; + ret = read_poll_timeout(qm_mb_read, *mailbox, + !(le16_to_cpu(mailbox->w0) & QM_MB_BUSY_MASK), + POLL_PERIOD, POLL_TIMEOUT, + true, qm); + if (ret) { + dev_err(dev, "QM mailbox operation timeout!\n"); + return ret; } - val = readl(qm->io_base + QM_MB_CMD_SEND_BASE); - if (val & QM_MB_STATUS_MASK) { - dev_err(&qm->pdev->dev, "QM mailbox operation failed!\n"); - ret = -EIO; - goto mb_busy; + if (le16_to_cpu(mailbox->w0) & QM_MB_STATUS_MASK) { + dev_err(dev, "QM mailbox operation failed!\n"); + return -EIO; } return 0; +} + +static int qm_mb_nolock(struct hisi_qm *qm, struct qm_mailbox *mailbox) +{ + int ret; + + ret = hisi_qm_wait_mb_ready(qm); + if (ret) + goto mb_err_cnt_increase; + + qm_mb_write(qm, mailbox); + + ret = qm_wait_mb_finish(qm, mailbox); + if (ret) + goto mb_err_cnt_increase; + + return 0; -mb_busy: +mb_err_cnt_increase: atomic64_inc(&qm->debug.dfx.mb_err_cnt); return ret; } @@ -663,6 +704,25 @@ int hisi_qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue, } EXPORT_SYMBOL_GPL(hisi_qm_mb); +int hisi_qm_mb_read(struct hisi_qm *qm, u64 *base, u8 cmd, u16 queue) +{ + struct qm_mailbox mailbox; + int ret; + + qm_mb_pre_init(&mailbox, cmd, 0, queue, 1); + mutex_lock(&qm->mailbox_lock); + ret = qm_mb_nolock(qm, &mailbox); + mutex_unlock(&qm->mailbox_lock); + if (ret) + return ret; + + *base = le32_to_cpu(mailbox.base_l) | + ((u64)le32_to_cpu(mailbox.base_h) << 32); + + return 0; +} +EXPORT_SYMBOL_GPL(hisi_qm_mb_read); + /* op 0: set xqc information to hardware, 1: get xqc information from hardware. */ int qm_set_and_get_xqc(struct hisi_qm *qm, u8 cmd, void *xqc, u32 qp_id, bool op) { @@ -1379,12 +1439,10 @@ static int qm_get_vft_v2(struct hisi_qm *qm, u32 *base, u32 *number) u64 sqc_vft; int ret; - ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1); + ret = hisi_qm_mb_read(qm, &sqc_vft, QM_MB_CMD_SQC_VFT_V2, 0); if (ret) return ret; - sqc_vft = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) | - ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) << 32); *base = QM_SQC_VFT_BASE_MASK_V2 & (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2); *number = (QM_SQC_VFT_NUM_MASK_V2 & (sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1; @@ -1524,25 +1582,6 @@ static enum acc_err_result qm_hw_error_handle_v2(struct hisi_qm *qm) return ACC_ERR_RECOVERED; } -static int qm_get_mb_cmd(struct hisi_qm *qm, u64 *msg, u16 fun_num) -{ - struct qm_mailbox mailbox; - int ret; - - qm_mb_pre_init(&mailbox, QM_MB_CMD_DST, 0, fun_num, 0); - mutex_lock(&qm->mailbox_lock); - ret = qm_mb_nolock(qm, &mailbox); - if (ret) - goto err_unlock; - - *msg = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) | - ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) << 32); - -err_unlock: - mutex_unlock(&qm->mailbox_lock); - return ret; -} - static void qm_clear_cmd_interrupt(struct hisi_qm *qm, u64 vf_mask) { u32 val; @@ -1871,7 +1910,7 @@ static int qm_get_ifc_v3(struct hisi_qm *qm, enum qm_ifc_cmd *cmd, u32 *data, u3 u64 msg; int ret; - ret = qm_get_mb_cmd(qm, &msg, fun_num); + ret = hisi_qm_mb_read(qm, &msg, QM_MB_CMD_DST, fun_num); if (ret) return ret; diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index f517b9db8392..51a6dc2b97e9 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -571,6 +571,7 @@ void hisi_qm_reset_done(struct pci_dev *pdev); int hisi_qm_wait_mb_ready(struct hisi_qm *qm); int hisi_qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue, bool op); +int hisi_qm_mb_read(struct hisi_qm *qm, u64 *base, u8 cmd, u16 queue); struct hisi_acc_sgl_pool; struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, -- cgit v1.2.3 From cc2f39d6ac48e6e3cb2d6240bc0d6df839dd0828 Mon Sep 17 00:00:00 2001 From: Lianjie Wang Date: Fri, 30 Jan 2026 06:50:16 +0900 Subject: hwrng: core - use RCU and work_struct to fix race condition Currently, hwrng_fill is not cleared until the hwrng_fillfn() thread exits. Since hwrng_unregister() reads hwrng_fill outside the rng_mutex lock, a concurrent hwrng_unregister() may call kthread_stop() again on the same task. Additionally, if hwrng_unregister() is called immediately after hwrng_register(), the stopped thread may have never been executed. Thus, hwrng_fill remains dirty even after hwrng_unregister() returns. In this case, subsequent calls to hwrng_register() will fail to start new threads, and hwrng_unregister() will call kthread_stop() on the same freed task. In both cases, a use-after-free occurs: refcount_t: addition on 0; use-after-free. WARNING: ... at lib/refcount.c:25 refcount_warn_saturate+0xec/0x1c0 Call Trace: kthread_stop+0x181/0x360 hwrng_unregister+0x288/0x380 virtrng_remove+0xe3/0x200 This patch fixes the race by protecting the global hwrng_fill pointer inside the rng_mutex lock, so that hwrng_fillfn() thread is stopped only once, and calls to kthread_run() and kthread_stop() are serialized with the lock held. To avoid deadlock in hwrng_fillfn() while being stopped with the lock held, we convert current_rng to RCU, so that get_current_rng() can read current_rng without holding the lock. To remove the lock from put_rng(), we also delay the actual cleanup into a work_struct. Since get_current_rng() no longer returns ERR_PTR values, the IS_ERR() checks are removed from its callers. With hwrng_fill protected by the rng_mutex lock, hwrng_fillfn() can no longer clear hwrng_fill itself. Therefore, if hwrng_fillfn() returns directly after current_rng is dropped, kthread_stop() would be called on a freed task_struct later. To fix this, hwrng_fillfn() calls schedule() now to keep the task alive until being stopped. The kthread_stop() call is also moved from hwrng_unregister() to drop_current_rng(), ensuring kthread_stop() is called on all possible paths where current_rng becomes NULL, so that the thread would not wait forever. Fixes: be4000bc4644 ("hwrng: create filler thread") Suggested-by: Herbert Xu Signed-off-by: Lianjie Wang Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 168 ++++++++++++++++++++++++++---------------- include/linux/hw_random.h | 2 + 2 files changed, 107 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 96d7fe41b373..aba92d777f72 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -20,23 +20,25 @@ #include #include #include +#include #include #include #include #include #include +#include #define RNG_MODULE_NAME "hw_random" #define RNG_BUFFER_SIZE (SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES) -static struct hwrng *current_rng; +static struct hwrng __rcu *current_rng; /* the current rng has been explicitly chosen by user via sysfs */ static int cur_rng_set_by_user; static struct task_struct *hwrng_fill; /* list of registered rngs */ static LIST_HEAD(rng_list); -/* Protects rng_list and current_rng */ +/* Protects rng_list, hwrng_fill and updating on current_rng */ static DEFINE_MUTEX(rng_mutex); /* Protects rng read functions, data_avail, rng_buffer and rng_fillbuf */ static DEFINE_MUTEX(reading_mutex); @@ -64,18 +66,39 @@ static size_t rng_buffer_size(void) return RNG_BUFFER_SIZE; } -static inline void cleanup_rng(struct kref *kref) +static void cleanup_rng_work(struct work_struct *work) { - struct hwrng *rng = container_of(kref, struct hwrng, ref); + struct hwrng *rng = container_of(work, struct hwrng, cleanup_work); + + /* + * Hold rng_mutex here so we serialize in case they set_current_rng + * on rng again immediately. + */ + mutex_lock(&rng_mutex); + + /* Skip if rng has been reinitialized. */ + if (kref_read(&rng->ref)) { + mutex_unlock(&rng_mutex); + return; + } if (rng->cleanup) rng->cleanup(rng); complete(&rng->cleanup_done); + mutex_unlock(&rng_mutex); +} + +static inline void cleanup_rng(struct kref *kref) +{ + struct hwrng *rng = container_of(kref, struct hwrng, ref); + + schedule_work(&rng->cleanup_work); } static int set_current_rng(struct hwrng *rng) { + struct hwrng *old_rng; int err; BUG_ON(!mutex_is_locked(&rng_mutex)); @@ -84,8 +107,14 @@ static int set_current_rng(struct hwrng *rng) if (err) return err; - drop_current_rng(); - current_rng = rng; + old_rng = rcu_dereference_protected(current_rng, + lockdep_is_held(&rng_mutex)); + rcu_assign_pointer(current_rng, rng); + + if (old_rng) { + synchronize_rcu(); + kref_put(&old_rng->ref, cleanup_rng); + } /* if necessary, start hwrng thread */ if (!hwrng_fill) { @@ -101,47 +130,56 @@ static int set_current_rng(struct hwrng *rng) static void drop_current_rng(void) { - BUG_ON(!mutex_is_locked(&rng_mutex)); - if (!current_rng) + struct hwrng *rng; + + rng = rcu_dereference_protected(current_rng, + lockdep_is_held(&rng_mutex)); + if (!rng) return; + RCU_INIT_POINTER(current_rng, NULL); + synchronize_rcu(); + + if (hwrng_fill) { + kthread_stop(hwrng_fill); + hwrng_fill = NULL; + } + /* decrease last reference for triggering the cleanup */ - kref_put(¤t_rng->ref, cleanup_rng); - current_rng = NULL; + kref_put(&rng->ref, cleanup_rng); } -/* Returns ERR_PTR(), NULL or refcounted hwrng */ +/* Returns NULL or refcounted hwrng */ static struct hwrng *get_current_rng_nolock(void) { - if (current_rng) - kref_get(¤t_rng->ref); + struct hwrng *rng; + + rng = rcu_dereference_protected(current_rng, + lockdep_is_held(&rng_mutex)); + if (rng) + kref_get(&rng->ref); - return current_rng; + return rng; } static struct hwrng *get_current_rng(void) { struct hwrng *rng; - if (mutex_lock_interruptible(&rng_mutex)) - return ERR_PTR(-ERESTARTSYS); + rcu_read_lock(); + rng = rcu_dereference(current_rng); + if (rng) + kref_get(&rng->ref); - rng = get_current_rng_nolock(); + rcu_read_unlock(); - mutex_unlock(&rng_mutex); return rng; } static void put_rng(struct hwrng *rng) { - /* - * Hold rng_mutex here so we serialize in case they set_current_rng - * on rng again immediately. - */ - mutex_lock(&rng_mutex); if (rng) kref_put(&rng->ref, cleanup_rng); - mutex_unlock(&rng_mutex); } static int hwrng_init(struct hwrng *rng) @@ -213,10 +251,6 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, while (size) { rng = get_current_rng(); - if (IS_ERR(rng)) { - err = PTR_ERR(rng); - goto out; - } if (!rng) { err = -ENODEV; goto out; @@ -303,7 +337,7 @@ static struct miscdevice rng_miscdev = { static int enable_best_rng(void) { - struct hwrng *rng, *new_rng = NULL; + struct hwrng *rng, *cur_rng, *new_rng = NULL; int ret = -ENODEV; BUG_ON(!mutex_is_locked(&rng_mutex)); @@ -321,7 +355,9 @@ static int enable_best_rng(void) new_rng = rng; } - ret = ((new_rng == current_rng) ? 0 : set_current_rng(new_rng)); + cur_rng = rcu_dereference_protected(current_rng, + lockdep_is_held(&rng_mutex)); + ret = ((new_rng == cur_rng) ? 0 : set_current_rng(new_rng)); if (!ret) cur_rng_set_by_user = 0; @@ -371,8 +407,6 @@ static ssize_t rng_current_show(struct device *dev, struct hwrng *rng; rng = get_current_rng(); - if (IS_ERR(rng)) - return PTR_ERR(rng); ret = sysfs_emit(buf, "%s\n", rng ? rng->name : "none"); put_rng(rng); @@ -416,8 +450,6 @@ static ssize_t rng_quality_show(struct device *dev, struct hwrng *rng; rng = get_current_rng(); - if (IS_ERR(rng)) - return PTR_ERR(rng); if (!rng) /* no need to put_rng */ return -ENODEV; @@ -432,6 +464,7 @@ static ssize_t rng_quality_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { + struct hwrng *rng; u16 quality; int ret = -EINVAL; @@ -448,12 +481,13 @@ static ssize_t rng_quality_store(struct device *dev, goto out; } - if (!current_rng) { + rng = rcu_dereference_protected(current_rng, lockdep_is_held(&rng_mutex)); + if (!rng) { ret = -ENODEV; goto out; } - current_rng->quality = quality; + rng->quality = quality; current_quality = quality; /* obsolete */ /* the best available RNG may have changed */ @@ -489,8 +523,20 @@ static int hwrng_fillfn(void *unused) struct hwrng *rng; rng = get_current_rng(); - if (IS_ERR(rng) || !rng) + if (!rng) { + /* + * Keep the task_struct alive until kthread_stop() + * is called to avoid UAF in drop_current_rng(). + */ + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + if (!kthread_should_stop()) + schedule(); + } + set_current_state(TASK_RUNNING); break; + } + mutex_lock(&reading_mutex); rc = rng_get_data(rng, rng_fillbuf, rng_buffer_size(), 1); @@ -518,14 +564,13 @@ static int hwrng_fillfn(void *unused) add_hwgenerator_randomness((void *)rng_fillbuf, rc, entropy >> 10, true); } - hwrng_fill = NULL; return 0; } int hwrng_register(struct hwrng *rng) { int err = -EINVAL; - struct hwrng *tmp; + struct hwrng *cur_rng, *tmp; if (!rng->name || (!rng->data_read && !rng->read)) goto out; @@ -540,6 +585,7 @@ int hwrng_register(struct hwrng *rng) } list_add_tail(&rng->list, &rng_list); + INIT_WORK(&rng->cleanup_work, cleanup_rng_work); init_completion(&rng->cleanup_done); complete(&rng->cleanup_done); init_completion(&rng->dying); @@ -547,16 +593,19 @@ int hwrng_register(struct hwrng *rng) /* Adjust quality field to always have a proper value */ rng->quality = min3(default_quality, 1024, rng->quality ?: 1024); - if (!cur_rng_set_by_user && - (!current_rng || rng->quality > current_rng->quality)) { - /* - * Set new rng as current as the new rng source - * provides better entropy quality and was not - * chosen by userspace. - */ - err = set_current_rng(rng); - if (err) - goto out_unlock; + if (!cur_rng_set_by_user) { + cur_rng = rcu_dereference_protected(current_rng, + lockdep_is_held(&rng_mutex)); + if (!cur_rng || rng->quality > cur_rng->quality) { + /* + * Set new rng as current as the new rng source + * provides better entropy quality and was not + * chosen by userspace. + */ + err = set_current_rng(rng); + if (err) + goto out_unlock; + } } mutex_unlock(&rng_mutex); return 0; @@ -569,14 +618,17 @@ EXPORT_SYMBOL_GPL(hwrng_register); void hwrng_unregister(struct hwrng *rng) { - struct hwrng *new_rng; + struct hwrng *cur_rng; int err; mutex_lock(&rng_mutex); list_del(&rng->list); complete_all(&rng->dying); - if (current_rng == rng) { + + cur_rng = rcu_dereference_protected(current_rng, + lockdep_is_held(&rng_mutex)); + if (cur_rng == rng) { err = enable_best_rng(); if (err) { drop_current_rng(); @@ -584,17 +636,7 @@ void hwrng_unregister(struct hwrng *rng) } } - new_rng = get_current_rng_nolock(); - if (list_empty(&rng_list)) { - mutex_unlock(&rng_mutex); - if (hwrng_fill) - kthread_stop(hwrng_fill); - } else - mutex_unlock(&rng_mutex); - - if (new_rng) - put_rng(new_rng); - + mutex_unlock(&rng_mutex); wait_for_completion(&rng->cleanup_done); } EXPORT_SYMBOL_GPL(hwrng_unregister); @@ -682,7 +724,7 @@ static int __init hwrng_modinit(void) static void __exit hwrng_modexit(void) { mutex_lock(&rng_mutex); - BUG_ON(current_rng); + WARN_ON(rcu_access_pointer(current_rng)); kfree(rng_buffer); kfree(rng_fillbuf); mutex_unlock(&rng_mutex); diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index b424555753b1..b77bc55a4cf3 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -15,6 +15,7 @@ #include #include #include +#include /** * struct hwrng - Hardware Random Number Generator driver @@ -48,6 +49,7 @@ struct hwrng { /* internal. */ struct list_head list; struct kref ref; + struct work_struct cleanup_work; struct completion cleanup_done; struct completion dying; }; -- cgit v1.2.3