diff options
| author | Maxime Ripard <mripard@kernel.org> | 2026-02-23 12:09:45 +0300 |
|---|---|---|
| committer | Maxime Ripard <mripard@kernel.org> | 2026-02-23 12:09:45 +0300 |
| commit | c17ee635fd3a482b2ad2bf5e269755c2eae5f25e (patch) | |
| tree | e3f147462d8a9fd0cf2312c8cd3c5a94da15c3e4 /drivers/accel | |
| parent | 803ec1faf7c1823e6e3b1f2aaa81be18528c9436 (diff) | |
| parent | 6de23f81a5e08be8fbf5e8d7e9febc72a5b5f27f (diff) | |
| download | linux-c17ee635fd3a482b2ad2bf5e269755c2eae5f25e.tar.xz | |
Merge drm/drm-fixes into drm-misc-fixes
7.0-rc1 was just released, let's merge it to kick the new release cycle.
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Diffstat (limited to 'drivers/accel')
76 files changed, 541 insertions, 623 deletions
diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile index 6344aaf523fa..3fa0e74fd8f5 100644 --- a/drivers/accel/amdxdna/Makefile +++ b/drivers/accel/amdxdna/Makefile @@ -18,7 +18,6 @@ amdxdna-y := \ amdxdna_sysfs.o \ amdxdna_ubuf.o \ npu1_regs.o \ - npu2_regs.o \ npu4_regs.o \ npu5_regs.o \ npu6_regs.o diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index 42d876a427c5..4503c7c77a3e 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -47,17 +47,6 @@ static void aie2_job_put(struct amdxdna_sched_job *job) kref_put(&job->refcnt, aie2_job_release); } -static void aie2_hwctx_status_shift_stop(struct amdxdna_hwctx *hwctx) -{ - hwctx->old_status = hwctx->status; - hwctx->status = HWCTX_STAT_STOP; -} - -static void aie2_hwctx_status_restore(struct amdxdna_hwctx *hwctx) -{ - hwctx->status = hwctx->old_status; -} - /* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */ static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx, struct drm_sched_job *bad_job) @@ -84,11 +73,6 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hw goto out; } - if (hwctx->status != HWCTX_STAT_READY) { - XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status); - goto out; - } - ret = aie2_config_cu(hwctx, NULL); if (ret) { XDNA_ERR(xdna, "Config cu failed, ret %d", ret); @@ -140,7 +124,6 @@ static int aie2_hwctx_suspend_cb(struct amdxdna_hwctx *hwctx, void *arg) aie2_hwctx_wait_for_idle(hwctx); aie2_hwctx_stop(xdna, hwctx, NULL); - aie2_hwctx_status_shift_stop(hwctx); return 0; } @@ -162,7 +145,6 @@ static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg) { struct amdxdna_dev *xdna = hwctx->client->xdna; - aie2_hwctx_status_restore(hwctx); return aie2_hwctx_restart(xdna, hwctx); } @@ -292,7 +274,7 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size) ret = -EINVAL; goto out; } - amdxdna_cmd_set_state(cmd_abo, fail_cmd_status); + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR); if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) { struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL); @@ -315,12 +297,19 @@ aie2_sched_job_run(struct drm_sched_job *sched_job) struct dma_fence *fence; int ret; + if (!hwctx->priv->mbox_chann) + return NULL; + if (!mmget_not_zero(job->mm)) return ERR_PTR(-ESRCH); kref_get(&job->refcnt); fence = dma_fence_get(job->fence); + ret = amdxdna_pm_resume_get(hwctx->client->xdna); + if (ret) + goto out; + if (job->drv_cmd) { switch (job->drv_cmd->opcode) { case SYNC_DEBUG_BO: @@ -347,6 +336,7 @@ aie2_sched_job_run(struct drm_sched_job *sched_job) out: if (ret) { + amdxdna_pm_suspend_put(hwctx->client->xdna); dma_fence_put(job->fence); aie2_job_put(job); mmput(job->mm); @@ -468,7 +458,13 @@ static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx) struct alloc_requests *xrs_req; int ret; - xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL); + if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) { + hwctx->num_unused_col = xdna->dev_handle->total_col - hwctx->num_col; + hwctx->num_col = xdna->dev_handle->total_col; + return aie2_create_context(xdna->dev_handle, hwctx); + } + + xrs_req = kzalloc_obj(*xrs_req); if (!xrs_req) return -ENOMEM; @@ -499,9 +495,15 @@ static void aie2_release_resource(struct amdxdna_hwctx *hwctx) struct amdxdna_dev *xdna = hwctx->client->xdna; int ret; - ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx); - if (ret) - XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret); + if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) { + ret = aie2_destroy_context(xdna->dev_handle, hwctx); + if (ret) + XDNA_ERR(xdna, "Destroy temporal only context failed, ret %d", ret); + } else { + ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx); + if (ret) + XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret); + } } static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx) @@ -557,7 +559,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) struct amdxdna_gem_obj *heap; int i, ret; - priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL); + priv = kzalloc_obj(*hwctx->priv); if (!priv) return -ENOMEM; hwctx->priv = priv; @@ -651,7 +653,6 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) } amdxdna_pm_suspend_put(xdna); - hwctx->status = HWCTX_STAT_INIT; init_waitqueue_head(&priv->job_free_wq); XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name); @@ -693,7 +694,9 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) aie2_hwctx_wait_for_idle(hwctx); /* Request fw to destroy hwctx and cancel the rest pending requests */ + drm_sched_stop(&hwctx->priv->sched, NULL); aie2_release_resource(hwctx); + drm_sched_start(&hwctx->priv->sched, 0); mutex_unlock(&xdna->dev_lock); drm_sched_entity_destroy(&hwctx->priv->entity); @@ -737,7 +740,7 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad))) return -EINVAL; - if (hwctx->status != HWCTX_STAT_INIT) { + if (hwctx->cus) { XDNA_ERR(xdna, "Not support re-config CU"); return -EINVAL; } @@ -768,7 +771,6 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size } wmb(); /* To avoid locking in command submit when check status */ - hwctx->status = HWCTX_STAT_READY; return 0; @@ -991,15 +993,11 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, goto free_chain; } - ret = amdxdna_pm_resume_get(xdna); - if (ret) - goto cleanup_job; - retry: ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx); if (ret) { XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret); - goto suspend_put; + goto cleanup_job; } for (i = 0; i < job->bo_cnt; i++) { @@ -1007,7 +1005,7 @@ retry: if (ret) { XDNA_WARN(xdna, "Failed to reserve fences %d", ret); drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); - goto suspend_put; + goto cleanup_job; } } @@ -1022,12 +1020,12 @@ retry: msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); } else if (time_after(jiffies, timeout)) { ret = -ETIME; - goto suspend_put; + goto cleanup_job; } ret = aie2_populate_range(abo); if (ret) - goto suspend_put; + goto cleanup_job; goto retry; } } @@ -1053,8 +1051,6 @@ retry: return 0; -suspend_put: - amdxdna_pm_suspend_put(xdna); cleanup_job: drm_sched_job_cleanup(&job->base); free_chain: diff --git a/drivers/accel/amdxdna/aie2_error.c b/drivers/accel/amdxdna/aie2_error.c index d452008ec4f4..9f0ea1d9c05f 100644 --- a/drivers/accel/amdxdna/aie2_error.c +++ b/drivers/accel/amdxdna/aie2_error.c @@ -338,8 +338,7 @@ void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev) destroy_workqueue(events->wq); mutex_lock(&xdna->dev_lock); - dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf, - events->addr, DMA_FROM_DEVICE); + aie2_free_msg_buffer(ndev, events->size, events->buf, events->addr); kfree(events); } @@ -351,12 +350,12 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev) struct async_events *events; int i, ret; - events = kzalloc(struct_size(events, event, total_col), GFP_KERNEL); + events = kzalloc_flex(*events, event, total_col); if (!events) return -ENOMEM; - events->buf = dma_alloc_noncoherent(xdna->ddev.dev, total_size, &events->addr, - DMA_FROM_DEVICE, GFP_KERNEL); + events->buf = aie2_alloc_msg_buffer(ndev, &total_size, &events->addr); + if (!events->buf) { ret = -ENOMEM; goto free_events; @@ -396,8 +395,7 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev) free_wq: destroy_workqueue(events->wq); free_buf: - dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf, - events->addr, DMA_FROM_DEVICE); + aie2_free_msg_buffer(ndev, events->size, events->buf, events->addr); free_events: kfree(events); return ret; diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index d493bb1c3360..7d7dcfeaf794 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -39,7 +39,6 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev, if (!ndev->mgmt_chann) return -ENODEV; - drm_WARN_ON(&xdna->ddev, xdna->rpm_on && !mutex_is_locked(&xdna->dev_lock)); ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg); if (ret == -ETIME) { xdna_mailbox_stop_channel(ndev->mgmt_chann); @@ -56,11 +55,34 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev, return ret; } +void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size, + dma_addr_t *dma_addr) +{ + struct amdxdna_dev *xdna = ndev->xdna; + int order; + + *size = max(*size, SZ_8K); + order = get_order(*size); + if (order > MAX_PAGE_ORDER) + return NULL; + *size = PAGE_SIZE << order; + + return dma_alloc_noncoherent(xdna->ddev.dev, *size, dma_addr, + DMA_FROM_DEVICE, GFP_KERNEL); +} + int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev) { DECLARE_AIE2_MSG(suspend, MSG_OP_SUSPEND); + int ret; - return aie2_send_mgmt_msg_wait(ndev, &msg); + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) { + XDNA_ERR(ndev->xdna, "Failed to suspend fw, ret %d", ret); + return ret; + } + + return aie2_psp_waitmode_poll(ndev->psp_hdl); } int aie2_resume_fw(struct amdxdna_dev_hdl *ndev) @@ -186,6 +208,40 @@ int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev, return 0; } +static int aie2_destroy_context_req(struct amdxdna_dev_hdl *ndev, u32 id) +{ + DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT); + struct amdxdna_dev *xdna = ndev->xdna; + int ret; + + req.context_id = id; + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) + XDNA_WARN(xdna, "Destroy context failed, ret %d", ret); + + return ret; +} + +static u32 aie2_get_context_priority(struct amdxdna_dev_hdl *ndev, + struct amdxdna_hwctx *hwctx) +{ + if (!AIE2_FEATURE_ON(ndev, AIE2_PREEMPT)) + return PRIORITY_HIGH; + + switch (hwctx->qos.priority) { + case AMDXDNA_QOS_REALTIME_PRIORITY: + return PRIORITY_REALTIME; + case AMDXDNA_QOS_HIGH_PRIORITY: + return PRIORITY_HIGH; + case AMDXDNA_QOS_NORMAL_PRIORITY: + return PRIORITY_NORMAL; + case AMDXDNA_QOS_LOW_PRIORITY: + return PRIORITY_LOW; + default: + return PRIORITY_HIGH; + } +} + int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx) { DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT); @@ -199,22 +255,24 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct req.aie_type = 1; req.start_col = hwctx->start_col; req.num_col = hwctx->num_col; + req.num_unused_col = hwctx->num_unused_col; req.num_cq_pairs_requested = 1; req.pasid = hwctx->client->pasid; - req.context_priority = 2; + req.context_priority = aie2_get_context_priority(ndev, hwctx); ret = aie2_send_mgmt_msg_wait(ndev, &msg); if (ret) return ret; hwctx->fw_ctx_id = resp.context_id; - WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id"); + if (WARN_ON_ONCE(hwctx->fw_ctx_id == -1)) + return -EINVAL; if (ndev->force_preempt_enabled) { ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FORCE_PREEMPT, &hwctx->fw_ctx_id); if (ret) { XDNA_ERR(xdna, "failed to enable force preempt %d", ret); - return ret; + goto del_ctx_req; } } @@ -231,51 +289,39 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct ret = pci_irq_vector(to_pci_dev(xdna->ddev.dev), resp.msix_id); if (ret == -EINVAL) { - XDNA_ERR(xdna, "not able to create channel"); - goto out_destroy_context; + XDNA_ERR(xdna, "Alloc IRQ failed %d", ret); + goto del_ctx_req; } intr_reg = i2x.mb_head_ptr_reg + 4; hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x, intr_reg, ret); if (!hwctx->priv->mbox_chann) { - XDNA_ERR(xdna, "not able to create channel"); + XDNA_ERR(xdna, "Not able to create channel"); ret = -EINVAL; - goto out_destroy_context; + goto del_ctx_req; } ndev->hwctx_num++; - XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d", - hwctx->name, ret, resp.msix_id); - XDNA_DBG(xdna, "%s created fw ctx %d pasid %d", hwctx->name, - hwctx->fw_ctx_id, hwctx->client->pasid); + XDNA_DBG(xdna, "Mailbox channel irq: %d, msix_id: %d", ret, resp.msix_id); + XDNA_DBG(xdna, "Created fw ctx %d pasid %d", hwctx->fw_ctx_id, hwctx->client->pasid); return 0; -out_destroy_context: - aie2_destroy_context(ndev, hwctx); +del_ctx_req: + aie2_destroy_context_req(ndev, hwctx->fw_ctx_id); return ret; } int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx) { - DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT); struct amdxdna_dev *xdna = ndev->xdna; int ret; - if (hwctx->fw_ctx_id == -1) - return 0; - xdna_mailbox_stop_channel(hwctx->priv->mbox_chann); - - req.context_id = hwctx->fw_ctx_id; - ret = aie2_send_mgmt_msg_wait(ndev, &msg); - if (ret) - XDNA_WARN(xdna, "%s destroy context failed, ret %d", hwctx->name, ret); - + ret = aie2_destroy_context_req(ndev, hwctx->fw_ctx_id); xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann); - XDNA_DBG(xdna, "%s destroyed fw ctx %d", hwctx->name, - hwctx->fw_ctx_id); + XDNA_DBG(xdna, "Destroyed fw ctx %d", hwctx->fw_ctx_id); hwctx->priv->mbox_chann = NULL; hwctx->fw_ctx_id = -1; ndev->hwctx_num--; @@ -316,14 +362,13 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, { DECLARE_AIE2_MSG(aie_column_info, MSG_OP_QUERY_COL_STATUS); struct amdxdna_dev *xdna = ndev->xdna; + u32 buf_sz = size, aie_bitmap = 0; struct amdxdna_client *client; dma_addr_t dma_addr; - u32 aie_bitmap = 0; u8 *buff_addr; int ret; - buff_addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr, - DMA_FROM_DEVICE, GFP_KERNEL); + buff_addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr); if (!buff_addr) return -ENOMEM; @@ -333,7 +378,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, *cols_filled = 0; req.dump_buff_addr = dma_addr; - req.dump_buff_size = size; + req.dump_buff_size = buf_sz; req.num_cols = hweight32(aie_bitmap); req.aie_bitmap = aie_bitmap; @@ -361,7 +406,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, *cols_filled = aie_bitmap; fail: - dma_free_noncoherent(xdna->ddev.dev, size, buff_addr, dma_addr, DMA_FROM_DEVICE); + aie2_free_msg_buffer(ndev, buf_sz, buff_addr, dma_addr); return ret; } @@ -372,19 +417,19 @@ int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev, DECLARE_AIE2_MSG(get_telemetry, MSG_OP_GET_TELEMETRY); struct amdxdna_dev *xdna = ndev->xdna; dma_addr_t dma_addr; + u32 buf_sz = size; u8 *addr; int ret; if (header->type >= MAX_TELEMETRY_TYPE) return -EINVAL; - addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr, - DMA_FROM_DEVICE, GFP_KERNEL); + addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr); if (!addr) return -ENOMEM; req.buf_addr = dma_addr; - req.buf_size = size; + req.buf_size = buf_sz; req.type = header->type; drm_clflush_virt_range(addr, size); /* device can access */ @@ -410,7 +455,7 @@ int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev, header->minor = resp.minor; free_buf: - dma_free_noncoherent(xdna->ddev.dev, size, addr, dma_addr, DMA_FROM_DEVICE); + aie2_free_msg_buffer(ndev, buf_sz, addr, dma_addr); return ret; } @@ -448,6 +493,9 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx, if (!chann) return -ENODEV; + if (!hwctx->cus) + return 0; + if (hwctx->cus->num_cus > MAX_NUM_CUS) { XDNA_DBG(xdna, "Exceed maximum CU %d", MAX_NUM_CUS); return -EINVAL; @@ -646,6 +694,7 @@ aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *siz u32 cmd_len; void *cmd; + memset(npu_slot, 0, sizeof(*npu_slot)); cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); if (*size < sizeof(*npu_slot) + cmd_len) return -EINVAL; @@ -654,7 +703,6 @@ aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *siz if (npu_slot->cu_idx == INVALID_CU_IDX) return -EINVAL; - memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->type = EXEC_NPU_TYPE_NON_ELF; npu_slot->arg_cnt = cmd_len / sizeof(u32); memcpy(npu_slot->args, cmd, cmd_len); @@ -671,6 +719,7 @@ aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si u32 cmd_len; u32 arg_sz; + memset(npu_slot, 0, sizeof(*npu_slot)); sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); arg_sz = cmd_len - sizeof(*sn); if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE) @@ -683,7 +732,6 @@ aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si if (npu_slot->cu_idx == INVALID_CU_IDX) return -EINVAL; - memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->type = EXEC_NPU_TYPE_PARTIAL_ELF; npu_slot->inst_buf_addr = sn->buffer; npu_slot->inst_size = sn->buffer_size; @@ -703,6 +751,7 @@ aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t u32 cmd_len; u32 arg_sz; + memset(npu_slot, 0, sizeof(*npu_slot)); pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); arg_sz = cmd_len - sizeof(*pd); if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) @@ -715,7 +764,6 @@ aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t if (npu_slot->cu_idx == INVALID_CU_IDX) return -EINVAL; - memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->type = EXEC_NPU_TYPE_PREEMPT; npu_slot->inst_buf_addr = pd->inst_buf; npu_slot->save_buf_addr = pd->save_buf; @@ -739,6 +787,7 @@ aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si u32 cmd_len; u32 arg_sz; + memset(npu_slot, 0, sizeof(*npu_slot)); pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); arg_sz = cmd_len - sizeof(*pd); if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) @@ -747,7 +796,6 @@ aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si if (*size < sizeof(*npu_slot) + arg_sz) return -EINVAL; - memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->type = EXEC_NPU_TYPE_ELF; npu_slot->inst_buf_addr = pd->inst_buf; npu_slot->save_buf_addr = pd->save_buf; diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h index 1c957a6298d3..728ef56f7f0a 100644 --- a/drivers/accel/amdxdna/aie2_msg_priv.h +++ b/drivers/accel/amdxdna/aie2_msg_priv.h @@ -108,11 +108,17 @@ struct cq_pair { struct cq_info i2x_q; }; +#define PRIORITY_REALTIME 1 +#define PRIORITY_HIGH 2 +#define PRIORITY_NORMAL 3 +#define PRIORITY_LOW 4 + struct create_ctx_req { __u32 aie_type; __u8 start_col; __u8 num_col; - __u16 reserved; + __u8 num_unused_col; + __u8 reserved; __u8 num_cq_pairs_requested; __u8 reserved1; __u16 pasid; diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 8141d8e51636..2a51b2658bfc 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -57,41 +57,23 @@ struct mgmt_mbox_chann_info { static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor) { const struct aie2_fw_feature_tbl *feature; - struct amdxdna_dev *xdna = ndev->xdna; - - /* - * The driver supported mailbox behavior is defined by - * ndev->priv->protocol_major and protocol_minor. - * - * When protocol_major and fw_major are different, it means driver - * and firmware are incompatible. - */ - if (ndev->priv->protocol_major != fw_major) { - XDNA_ERR(xdna, "Incompatible firmware protocol major %d minor %d", - fw_major, fw_minor); - return -EINVAL; - } + bool found = false; - /* - * When protocol_minor is greater then fw_minor, that means driver - * relies on operation the installed firmware does not support. - */ - if (ndev->priv->protocol_minor > fw_minor) { - XDNA_ERR(xdna, "Firmware minor version smaller than supported"); - return -EINVAL; - } - - for (feature = ndev->priv->fw_feature_tbl; feature && feature->min_minor; - feature++) { + for (feature = ndev->priv->fw_feature_tbl; feature->major; feature++) { + if (feature->major != fw_major) + continue; if (fw_minor < feature->min_minor) continue; if (feature->max_minor > 0 && fw_minor > feature->max_minor) continue; - set_bit(feature->feature, &ndev->feature_mask); + ndev->feature_mask |= feature->features; + + /* firmware version matches one of the driver support entry */ + found = true; } - return 0; + return found ? 0 : -EOPNOTSUPP; } static void aie2_dump_chann_info_debug(struct amdxdna_dev_hdl *ndev) @@ -322,7 +304,7 @@ static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 dpm_level) if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == dpm_level) return 0; - return ndev->priv->hw_ops.set_dpm(ndev, dpm_level); + return aie2_pm_set_dpm(ndev, dpm_level); } static struct xrs_action_ops aie2_xrs_actions = { @@ -671,7 +653,7 @@ static int aie2_get_aie_metadata(struct amdxdna_client *client, int ret = 0; ndev = xdna->dev_handle; - meta = kzalloc(sizeof(*meta), GFP_KERNEL); + meta = kzalloc_obj(*meta); if (!meta) return -ENOMEM; @@ -766,7 +748,7 @@ static int aie2_get_clock_metadata(struct amdxdna_client *client, int ret = 0; ndev = xdna->dev_handle; - clock = kzalloc(sizeof(*clock), GFP_KERNEL); + clock = kzalloc_obj(*clock); if (!clock) return -ENOMEM; @@ -793,7 +775,7 @@ static int aie2_hwctx_status_cb(struct amdxdna_hwctx *hwctx, void *arg) if (!array_args->num_element) return -EINVAL; - tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + tmp = kzalloc_obj(*tmp); if (!tmp) return -ENOMEM; diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h index a5f9c42155d1..b20a3661078c 100644 --- a/drivers/accel/amdxdna/aie2_pci.h +++ b/drivers/accel/amdxdna/aie2_pci.h @@ -70,6 +70,7 @@ enum psp_reg_idx { PSP_INTR_REG = PSP_NUM_IN_REGS, PSP_STATUS_REG, PSP_RESP_REG, + PSP_PWAITMODE_REG, PSP_MAX_REGS /* Keep this at the end */ }; @@ -231,11 +232,13 @@ struct aie2_hw_ops { enum aie2_fw_feature { AIE2_NPU_COMMAND, AIE2_PREEMPT, + AIE2_TEMPORAL_ONLY, AIE2_FEATURE_MAX }; struct aie2_fw_feature_tbl { - enum aie2_fw_feature feature; + u64 features; + u32 major; u32 max_minor; u32 min_minor; }; @@ -244,8 +247,6 @@ struct aie2_fw_feature_tbl { struct amdxdna_dev_priv { const char *fw_path; - u64 protocol_major; - u64 protocol_minor; const struct rt_config *rt_config; const struct dpm_clk_freq *dpm_clk_tbl; const struct aie2_fw_feature_tbl *fw_feature_tbl; @@ -285,11 +286,13 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); /* aie2_pm.c */ int aie2_pm_init(struct amdxdna_dev_hdl *ndev); int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target); +int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); /* aie2_psp.c */ struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf); int aie2_psp_start(struct psp_device *psp); void aie2_psp_stop(struct psp_device *psp); +int aie2_psp_waitmode_poll(struct psp_device *psp); /* aie2_error.c */ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev); @@ -332,6 +335,11 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, int (*notify_cb)(void *, void __iomem *, size_t)); int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, int (*notify_cb)(void *, void __iomem *, size_t)); +void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size, + dma_addr_t *dma_addr); +#define aie2_free_msg_buffer(ndev, size, buff_addr, dma_addr) \ + dma_free_noncoherent((ndev)->xdna->ddev.dev, size, buff_addr, \ + dma_addr, DMA_FROM_DEVICE) /* aie2_hwctx.c */ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx); diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c index 426c38fce848..579b8be13b18 100644 --- a/drivers/accel/amdxdna/aie2_pm.c +++ b/drivers/accel/amdxdna/aie2_pm.c @@ -10,6 +10,7 @@ #include "aie2_pci.h" #include "amdxdna_pci_drv.h" +#include "amdxdna_pm.h" #define AIE2_CLK_GATING_ENABLE 1 #define AIE2_CLK_GATING_DISABLE 0 @@ -26,6 +27,22 @@ static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 val) return 0; } +int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) +{ + int ret; + + ret = amdxdna_pm_resume_get(ndev->xdna); + if (ret) + return ret; + + ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level); + if (!ret) + ndev->dpm_level = dpm_level; + amdxdna_pm_suspend_put(ndev->xdna); + + return ret; +} + int aie2_pm_init(struct amdxdna_dev_hdl *ndev) { int ret; @@ -50,6 +67,7 @@ int aie2_pm_init(struct amdxdna_dev_hdl *ndev) ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level); if (ret) return ret; + ndev->dpm_level = ndev->max_dpm_level; ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE); if (ret) @@ -94,7 +112,7 @@ int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type return -EOPNOTSUPP; } - ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level); + ret = aie2_pm_set_dpm(ndev, dpm_level); if (ret) return ret; diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c index f28a060a8810..3a7130577e3e 100644 --- a/drivers/accel/amdxdna/aie2_psp.c +++ b/drivers/accel/amdxdna/aie2_psp.c @@ -76,6 +76,21 @@ static int psp_exec(struct psp_device *psp, u32 *reg_vals) return 0; } +int aie2_psp_waitmode_poll(struct psp_device *psp) +{ + struct amdxdna_dev *xdna = to_xdna_dev(psp->ddev); + u32 mode_reg; + int ret; + + ret = readx_poll_timeout(readl, PSP_REG(psp, PSP_PWAITMODE_REG), mode_reg, + (mode_reg & 0x1) == 1, + PSP_POLL_INTERVAL, PSP_POLL_TIMEOUT); + if (ret) + XDNA_ERR(xdna, "fw waitmode reg error, ret %d", ret); + + return ret; +} + void aie2_psp_stop(struct psp_device *psp) { u32 reg_vals[PSP_NUM_IN_REGS] = { PSP_RELEASE_TMR, }; diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c index bd94ee96c2bc..d8c31924e501 100644 --- a/drivers/accel/amdxdna/aie2_smu.c +++ b/drivers/accel/amdxdna/aie2_smu.c @@ -11,7 +11,6 @@ #include "aie2_pci.h" #include "amdxdna_pci_drv.h" -#include "amdxdna_pm.h" #define SMU_RESULT_OK 1 @@ -67,16 +66,12 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) u32 freq; int ret; - ret = amdxdna_pm_resume_get(ndev->xdna); - if (ret) - return ret; - ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq); if (ret) { XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n", ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret); - goto suspend_put; + return ret; } ndev->npuclk_freq = freq; @@ -85,12 +80,10 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) if (ret) { XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n", ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret); - goto suspend_put; + return ret; } - amdxdna_pm_suspend_put(ndev->xdna); ndev->hclk_freq = freq; - ndev->dpm_level = dpm_level; ndev->max_tops = 2 * ndev->total_col; ndev->curr_tops = ndev->max_tops * freq / 1028; @@ -98,38 +91,28 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) ndev->npuclk_freq, ndev->hclk_freq); return 0; - -suspend_put: - amdxdna_pm_suspend_put(ndev->xdna); - return ret; } int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) { int ret; - ret = amdxdna_pm_resume_get(ndev->xdna); - if (ret) - return ret; - ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL); if (ret) { XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ", dpm_level, ret); - goto suspend_put; + return ret; } ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL); if (ret) { XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d", dpm_level, ret); - goto suspend_put; + return ret; } - amdxdna_pm_suspend_put(ndev->xdna); ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk; ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk; - ndev->dpm_level = dpm_level; ndev->max_tops = NPU4_DPM_TOPS(ndev, ndev->max_dpm_level); ndev->curr_tops = NPU4_DPM_TOPS(ndev, dpm_level); @@ -137,10 +120,6 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) ndev->npuclk_freq, ndev->hclk_freq); return 0; - -suspend_put: - amdxdna_pm_suspend_put(ndev->xdna); - return ret; } int aie2_smu_init(struct amdxdna_dev_hdl *ndev) diff --git a/drivers/accel/amdxdna/aie2_solver.c b/drivers/accel/amdxdna/aie2_solver.c index 2013d1f13aae..3611e3268d79 100644 --- a/drivers/accel/amdxdna/aie2_solver.c +++ b/drivers/accel/amdxdna/aie2_solver.c @@ -197,7 +197,7 @@ static int get_free_partition(struct solver_state *xrs, if (i == snode->cols_len) return -ENODEV; - pt_node = kzalloc(sizeof(*pt_node), GFP_KERNEL); + pt_node = kzalloc_obj(*pt_node); if (!pt_node) return -ENOMEM; @@ -266,7 +266,7 @@ static struct solver_node *create_solver_node(struct solver_state *xrs, struct solver_node *node; int ret; - node = kzalloc(struct_size(node, start_cols, cdop->cols_len), GFP_KERNEL); + node = kzalloc_flex(*node, start_cols, cdop->cols_len); if (!node) return ERR_PTR(-ENOMEM); diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c index d17aef89a0ad..59fa3800b9d3 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.c +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -50,7 +50,7 @@ static struct dma_fence *amdxdna_fence_create(struct amdxdna_hwctx *hwctx) { struct amdxdna_fence *fence; - fence = kzalloc(sizeof(*fence), GFP_KERNEL); + fence = kzalloc_obj(*fence); if (!fence) return NULL; @@ -161,7 +161,7 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr if (args->ext || args->ext_flags) return -EINVAL; - hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL); + hwctx = kzalloc_obj(*hwctx); if (!hwctx) return -ENOMEM; @@ -436,7 +436,7 @@ int amdxdna_cmd_submit(struct amdxdna_client *client, int ret, idx; XDNA_DBG(xdna, "Command BO hdl %d, Arg BO count %d", cmd_bo_hdl, arg_bo_cnt); - job = kzalloc(struct_size(job, bos, arg_bo_cnt), GFP_KERNEL); + job = kzalloc_flex(*job, bos, arg_bo_cnt); if (!job) return -ENOMEM; diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h index b6151244d64f..16c85f08f03c 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.h +++ b/drivers/accel/amdxdna/amdxdna_ctx.h @@ -98,11 +98,7 @@ struct amdxdna_hwctx { u32 *col_list; u32 start_col; u32 num_col; -#define HWCTX_STAT_INIT 0 -#define HWCTX_STAT_READY 1 -#define HWCTX_STAT_STOP 2 - u32 status; - u32 old_status; + u32 num_unused_col; struct amdxdna_qos_info qos; struct amdxdna_hwctx_param_config_cu *cus; diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c index dfa916eeb2d9..8c290ddd3251 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.c +++ b/drivers/accel/amdxdna/amdxdna_gem.c @@ -205,13 +205,12 @@ static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo, if (!xdna->dev_info->ops->hmm_invalidate) return 0; - mapp = kzalloc(sizeof(*mapp), GFP_KERNEL); + mapp = kzalloc_obj(*mapp); if (!mapp) return -ENOMEM; nr_pages = (PAGE_ALIGN(addr + len) - (addr & PAGE_MASK)) >> PAGE_SHIFT; - mapp->range.hmm_pfns = kvcalloc(nr_pages, sizeof(*mapp->range.hmm_pfns), - GFP_KERNEL); + mapp->range.hmm_pfns = kvzalloc_objs(*mapp->range.hmm_pfns, nr_pages); if (!mapp->range.hmm_pfns) { ret = -ENOMEM; goto free_map; @@ -499,7 +498,7 @@ amdxdna_gem_create_obj(struct drm_device *dev, size_t size) { struct amdxdna_gem_obj *abo; - abo = kzalloc(sizeof(*abo), GFP_KERNEL); + abo = kzalloc_obj(*abo); if (!abo) return ERR_PTR(-ENOMEM); diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c index 858df97cd3fb..235a94047530 100644 --- a/drivers/accel/amdxdna/amdxdna_mailbox.c +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c @@ -112,22 +112,6 @@ static u32 mailbox_reg_read(struct mailbox_channel *mb_chann, u32 mbox_reg) return readl(ringbuf_addr); } -static int mailbox_reg_read_non_zero(struct mailbox_channel *mb_chann, u32 mbox_reg, u32 *val) -{ - struct xdna_mailbox_res *mb_res = &mb_chann->mb->res; - void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg; - int ret, value; - - /* Poll till value is not zero */ - ret = readx_poll_timeout(readl, ringbuf_addr, value, - value, 1 /* us */, 100); - if (ret < 0) - return ret; - - *val = value; - return 0; -} - static inline void mailbox_set_headptr(struct mailbox_channel *mb_chann, u32 headptr_val) { @@ -207,26 +191,34 @@ mailbox_send_msg(struct mailbox_channel *mb_chann, struct mailbox_msg *mb_msg) u32 head, tail; u32 start_addr; u32 tmp_tail; + int ret; head = mailbox_get_headptr(mb_chann, CHAN_RES_X2I); tail = mb_chann->x2i_tail; - ringbuf_size = mailbox_get_ringbuf_size(mb_chann, CHAN_RES_X2I); + ringbuf_size = mailbox_get_ringbuf_size(mb_chann, CHAN_RES_X2I) - sizeof(u32); start_addr = mb_chann->res[CHAN_RES_X2I].rb_start_addr; tmp_tail = tail + mb_msg->pkg_size; - if (tail < head && tmp_tail >= head) - goto no_space; - if (tail >= head && (tmp_tail > ringbuf_size - sizeof(u32) && - mb_msg->pkg_size >= head)) - goto no_space; - - if (tail >= head && tmp_tail > ringbuf_size - sizeof(u32)) { +check_again: + if (tail >= head && tmp_tail > ringbuf_size) { write_addr = mb_chann->mb->res.ringbuf_base + start_addr + tail; writel(TOMBSTONE, write_addr); /* tombstone is set. Write from the start of the ringbuf */ tail = 0; + tmp_tail = tail + mb_msg->pkg_size; + } + + if (tail < head && tmp_tail >= head) { + ret = read_poll_timeout(mailbox_get_headptr, head, + tmp_tail < head || tail >= head, + 1, 100, false, mb_chann, CHAN_RES_X2I); + if (ret) + return ret; + + if (tail >= head) + goto check_again; } write_addr = mb_chann->mb->res.ringbuf_base + start_addr + tail; @@ -238,9 +230,6 @@ mailbox_send_msg(struct mailbox_channel *mb_chann, struct mailbox_msg *mb_msg) mb_msg->pkg.header.id); return 0; - -no_space: - return -ENOSPC; } static int @@ -286,8 +275,7 @@ static int mailbox_get_msg(struct mailbox_channel *mb_chann) u32 start_addr; int ret; - if (mailbox_reg_read_non_zero(mb_chann, mb_chann->res[CHAN_RES_I2X].mb_tail_ptr_reg, &tail)) - return -EINVAL; + tail = mailbox_get_tailptr(mb_chann, CHAN_RES_I2X); head = mb_chann->i2x_head; ringbuf_size = mailbox_get_ringbuf_size(mb_chann, CHAN_RES_I2X); start_addr = mb_chann->res[CHAN_RES_I2X].rb_start_addr; @@ -487,7 +475,7 @@ xdna_mailbox_create_channel(struct mailbox *mb, return NULL; } - mb_chann = kzalloc(sizeof(*mb_chann), GFP_KERNEL); + mb_chann = kzalloc_obj(*mb_chann); if (!mb_chann) return NULL; diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index 1973ab67721b..4ada45d06fcf 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -51,7 +51,6 @@ MODULE_DEVICE_TABLE(pci, pci_ids); static const struct amdxdna_device_id amdxdna_ids[] = { { 0x1502, 0x0, &dev_npu1_info }, - { 0x17f0, 0x0, &dev_npu2_info }, { 0x17f0, 0x10, &dev_npu4_info }, { 0x17f0, 0x11, &dev_npu5_info }, { 0x17f0, 0x20, &dev_npu6_info }, @@ -64,7 +63,7 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp) struct amdxdna_client *client; int ret; - client = kzalloc(sizeof(*client), GFP_KERNEL); + client = kzalloc_obj(*client); if (!client) return -ENOMEM; @@ -83,6 +82,8 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp) ret = -ENODEV; goto unbind_sva; } + client->mm = current->mm; + mmgrab(client->mm); init_srcu_struct(&client->hwctx_srcu); xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC); mutex_init(&client->mm_lock); @@ -105,43 +106,39 @@ failed: return ret; } -static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp) +static void amdxdna_client_cleanup(struct amdxdna_client *client) { - struct amdxdna_client *client = filp->driver_priv; - struct amdxdna_dev *xdna = to_xdna_dev(ddev); - - XDNA_DBG(xdna, "closing pid %d", client->pid); - + list_del(&client->node); + amdxdna_hwctx_remove_all(client); xa_destroy(&client->hwctx_xa); cleanup_srcu_struct(&client->hwctx_srcu); mutex_destroy(&client->mm_lock); + if (client->dev_heap) drm_gem_object_put(to_gobj(client->dev_heap)); iommu_sva_unbind_device(client->sva); + mmdrop(client->mm); - XDNA_DBG(xdna, "pid %d closed", client->pid); kfree(client); } -static int amdxdna_flush(struct file *f, fl_owner_t id) +static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp) { - struct drm_file *filp = f->private_data; struct amdxdna_client *client = filp->driver_priv; - struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_dev *xdna = to_xdna_dev(ddev); int idx; - XDNA_DBG(xdna, "PID %d flushing...", client->pid); + XDNA_DBG(xdna, "closing pid %d", client->pid); + if (!drm_dev_enter(&xdna->ddev, &idx)) - return 0; + return; mutex_lock(&xdna->dev_lock); - list_del_init(&client->node); - amdxdna_hwctx_remove_all(client); + amdxdna_client_cleanup(client); mutex_unlock(&xdna->dev_lock); drm_dev_exit(idx); - return 0; } static int amdxdna_drm_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) @@ -217,7 +214,6 @@ static const struct file_operations amdxdna_fops = { .owner = THIS_MODULE, .open = accel_open, .release = drm_release, - .flush = amdxdna_flush, .unlocked_ioctl = drm_ioctl, .compat_ioctl = drm_compat_ioctl, .poll = drm_poll, @@ -282,7 +278,7 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id) fs_reclaim_release(GFP_KERNEL); } - xdna->notifier_wq = alloc_ordered_workqueue("notifier_wq", 0); + xdna->notifier_wq = alloc_ordered_workqueue("notifier_wq", WQ_MEM_RECLAIM); if (!xdna->notifier_wq) return -ENOMEM; @@ -333,8 +329,7 @@ static void amdxdna_remove(struct pci_dev *pdev) client = list_first_entry_or_null(&xdna->client_list, struct amdxdna_client, node); while (client) { - list_del_init(&client->node); - amdxdna_hwctx_remove_all(client); + amdxdna_client_cleanup(client); client = list_first_entry_or_null(&xdna->client_list, struct amdxdna_client, node); diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h index c99477f5e454..f08406b8fdf9 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h @@ -101,7 +101,6 @@ struct amdxdna_dev { struct amdxdna_fw_ver fw_ver; struct rw_semaphore notifier_lock; /* for mmu notifier*/ struct workqueue_struct *notifier_wq; - bool rpm_on; }; /* @@ -131,6 +130,7 @@ struct amdxdna_client { struct iommu_sva *sva; int pasid; + struct mm_struct *mm; }; #define amdxdna_for_each_hwctx(client, hwctx_id, entry) \ @@ -138,7 +138,6 @@ struct amdxdna_client { /* Add device info below */ extern const struct amdxdna_dev_info dev_npu1_info; -extern const struct amdxdna_dev_info dev_npu2_info; extern const struct amdxdna_dev_info dev_npu4_info; extern const struct amdxdna_dev_info dev_npu5_info; extern const struct amdxdna_dev_info dev_npu6_info; diff --git a/drivers/accel/amdxdna/amdxdna_pm.c b/drivers/accel/amdxdna/amdxdna_pm.c index fa38e65d617c..d024d480521c 100644 --- a/drivers/accel/amdxdna/amdxdna_pm.c +++ b/drivers/accel/amdxdna/amdxdna_pm.c @@ -15,14 +15,9 @@ int amdxdna_pm_suspend(struct device *dev) { struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev)); int ret = -EOPNOTSUPP; - bool rpm; - if (xdna->dev_info->ops->suspend) { - rpm = xdna->rpm_on; - xdna->rpm_on = false; + if (xdna->dev_info->ops->suspend) ret = xdna->dev_info->ops->suspend(xdna); - xdna->rpm_on = rpm; - } XDNA_DBG(xdna, "Suspend done ret %d", ret); return ret; @@ -32,14 +27,9 @@ int amdxdna_pm_resume(struct device *dev) { struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev)); int ret = -EOPNOTSUPP; - bool rpm; - if (xdna->dev_info->ops->resume) { - rpm = xdna->rpm_on; - xdna->rpm_on = false; + if (xdna->dev_info->ops->resume) ret = xdna->dev_info->ops->resume(xdna); - xdna->rpm_on = rpm; - } XDNA_DBG(xdna, "Resume done ret %d", ret); return ret; @@ -50,9 +40,6 @@ int amdxdna_pm_resume_get(struct amdxdna_dev *xdna) struct device *dev = xdna->ddev.dev; int ret; - if (!xdna->rpm_on) - return 0; - ret = pm_runtime_resume_and_get(dev); if (ret) { XDNA_ERR(xdna, "Resume failed: %d", ret); @@ -66,9 +53,6 @@ void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna) { struct device *dev = xdna->ddev.dev; - if (!xdna->rpm_on) - return; - pm_runtime_put_autosuspend(dev); } @@ -81,14 +65,12 @@ void amdxdna_pm_init(struct amdxdna_dev *xdna) pm_runtime_use_autosuspend(dev); pm_runtime_allow(dev); pm_runtime_put_autosuspend(dev); - xdna->rpm_on = true; } void amdxdna_pm_fini(struct amdxdna_dev *xdna) { struct device *dev = xdna->ddev.dev; - xdna->rpm_on = false; pm_runtime_get_noresume(dev); pm_runtime_forbid(dev); } diff --git a/drivers/accel/amdxdna/amdxdna_ubuf.c b/drivers/accel/amdxdna/amdxdna_ubuf.c index 077b2261cf2a..b509f10b155c 100644 --- a/drivers/accel/amdxdna/amdxdna_ubuf.c +++ b/drivers/accel/amdxdna/amdxdna_ubuf.c @@ -27,22 +27,28 @@ static struct sg_table *amdxdna_ubuf_map(struct dma_buf_attachment *attach, struct sg_table *sg; int ret; - sg = kzalloc(sizeof(*sg), GFP_KERNEL); + sg = kzalloc_obj(*sg); if (!sg) return ERR_PTR(-ENOMEM); ret = sg_alloc_table_from_pages(sg, ubuf->pages, ubuf->nr_pages, 0, ubuf->nr_pages << PAGE_SHIFT, GFP_KERNEL); if (ret) - return ERR_PTR(ret); + goto err_free_sg; if (ubuf->flags & AMDXDNA_UBUF_FLAG_MAP_DMA) { ret = dma_map_sgtable(attach->dev, sg, direction, 0); if (ret) - return ERR_PTR(ret); + goto err_free_table; } return sg; + +err_free_table: + sg_free_table(sg); +err_free_sg: + kfree(sg); + return ERR_PTR(ret); } static void amdxdna_ubuf_unmap(struct dma_buf_attachment *attach, @@ -141,7 +147,7 @@ struct dma_buf *amdxdna_get_ubuf(struct drm_device *dev, if (!can_do_mlock()) return ERR_PTR(-EPERM); - ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL); + ubuf = kzalloc_obj(*ubuf); if (!ubuf) return ERR_PTR(-ENOMEM); @@ -149,7 +155,7 @@ struct dma_buf *amdxdna_get_ubuf(struct drm_device *dev, ubuf->mm = current->mm; mmgrab(ubuf->mm); - va_ent = kvcalloc(num_entries, sizeof(*va_ent), GFP_KERNEL); + va_ent = kvzalloc_objs(*va_ent, num_entries); if (!va_ent) { ret = -ENOMEM; goto free_ubuf; @@ -183,7 +189,7 @@ struct dma_buf *amdxdna_get_ubuf(struct drm_device *dev, goto sub_pin_cnt; } - ubuf->pages = kvmalloc_array(ubuf->nr_pages, sizeof(*ubuf->pages), GFP_KERNEL); + ubuf->pages = kvmalloc_objs(*ubuf->pages, ubuf->nr_pages); if (!ubuf->pages) { ret = -ENOMEM; goto sub_pin_cnt; diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c index ec407f3b48fc..6f36a27b5a02 100644 --- a/drivers/accel/amdxdna/npu1_regs.c +++ b/drivers/accel/amdxdna/npu1_regs.c @@ -6,6 +6,7 @@ #include <drm/amdxdna_accel.h> #include <drm/drm_device.h> #include <drm/gpu_scheduler.h> +#include <linux/bits.h> #include <linux/sizes.h> #include "aie2_pci.h" @@ -13,6 +14,7 @@ #include "amdxdna_pci_drv.h" /* Address definition from NPU1 docs */ +#define MPNPU_PWAITMODE 0x3010034 #define MPNPU_PUB_SEC_INTR 0x3010090 #define MPNPU_PUB_PWRMGMT_INTR 0x3010094 #define MPNPU_PUB_SCRATCH2 0x30100A0 @@ -64,14 +66,13 @@ const struct dpm_clk_freq npu1_dpm_clk_table[] = { }; static const struct aie2_fw_feature_tbl npu1_fw_feature_table[] = { - { .feature = AIE2_NPU_COMMAND, .min_minor = 8 }, + { .major = 5, .min_minor = 7 }, + { .features = BIT_U64(AIE2_NPU_COMMAND), .min_minor = 8 }, { 0 } }; static const struct amdxdna_dev_priv npu1_dev_priv = { .fw_path = "amdnpu/1502_00/npu.sbin", - .protocol_major = 0x5, - .protocol_minor = 0x7, .rt_config = npu1_default_rt_cfg, .dpm_clk_tbl = npu1_dpm_clk_table, .fw_feature_tbl = npu1_fw_feature_table, @@ -92,6 +93,7 @@ static const struct amdxdna_dev_priv npu1_dev_priv = { DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU1_PSP, MPNPU_PUB_SEC_INTR), DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU1_PSP, MPNPU_PUB_SCRATCH2), DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU1_PSP, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_PWAITMODE_REG, NPU1_PSP, MPNPU_PWAITMODE), }, .smu_regs_off = { DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU1_SMU, MPNPU_PUB_SCRATCH5), diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c deleted file mode 100644 index 86f87d0d1354..000000000000 --- a/drivers/accel/amdxdna/npu2_regs.c +++ /dev/null @@ -1,115 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. - */ - -#include <drm/amdxdna_accel.h> -#include <drm/drm_device.h> -#include <drm/gpu_scheduler.h> -#include <linux/sizes.h> - -#include "aie2_pci.h" -#include "amdxdna_mailbox.h" -#include "amdxdna_pci_drv.h" - -/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */ -#define MPNPU_PUB_SEC_INTR 0x3010060 -#define MPNPU_PUB_PWRMGMT_INTR 0x3010064 -#define MPNPU_PUB_SCRATCH0 0x301006C -#define MPNPU_PUB_SCRATCH1 0x3010070 -#define MPNPU_PUB_SCRATCH2 0x3010074 -#define MPNPU_PUB_SCRATCH3 0x3010078 -#define MPNPU_PUB_SCRATCH4 0x301007C -#define MPNPU_PUB_SCRATCH5 0x3010080 -#define MPNPU_PUB_SCRATCH6 0x3010084 -#define MPNPU_PUB_SCRATCH7 0x3010088 -#define MPNPU_PUB_SCRATCH8 0x301008C -#define MPNPU_PUB_SCRATCH9 0x3010090 -#define MPNPU_PUB_SCRATCH10 0x3010094 -#define MPNPU_PUB_SCRATCH11 0x3010098 -#define MPNPU_PUB_SCRATCH12 0x301009C -#define MPNPU_PUB_SCRATCH13 0x30100A0 -#define MPNPU_PUB_SCRATCH14 0x30100A4 -#define MPNPU_PUB_SCRATCH15 0x30100A8 -#define MP0_C2PMSG_73 0x3810A24 -#define MP0_C2PMSG_123 0x3810AEC - -#define MP1_C2PMSG_0 0x3B10900 -#define MP1_C2PMSG_60 0x3B109F0 -#define MP1_C2PMSG_61 0x3B109F4 - -#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000 -#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000 -#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000 -#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000 - -#define MMNPU_APERTURE0_BASE 0x3000000 -#define MMNPU_APERTURE1_BASE 0x3600000 -#define MMNPU_APERTURE3_BASE 0x3810000 -#define MMNPU_APERTURE4_BASE 0x3B10000 - -/* PCIe BAR Index for NPU2 */ -#define NPU2_REG_BAR_INDEX 0 -#define NPU2_MBOX_BAR_INDEX 0 -#define NPU2_PSP_BAR_INDEX 4 -#define NPU2_SMU_BAR_INDEX 5 -#define NPU2_SRAM_BAR_INDEX 2 -/* Associated BARs and Apertures */ -#define NPU2_REG_BAR_BASE MMNPU_APERTURE0_BASE -#define NPU2_MBOX_BAR_BASE MMNPU_APERTURE0_BASE -#define NPU2_PSP_BAR_BASE MMNPU_APERTURE3_BASE -#define NPU2_SMU_BAR_BASE MMNPU_APERTURE4_BASE -#define NPU2_SRAM_BAR_BASE MMNPU_APERTURE1_BASE - -static const struct amdxdna_dev_priv npu2_dev_priv = { - .fw_path = "amdnpu/17f0_00/npu.sbin", - .protocol_major = 0x6, - .protocol_minor = 0x6, - .rt_config = npu4_default_rt_cfg, - .dpm_clk_tbl = npu4_dpm_clk_table, - .fw_feature_tbl = npu4_fw_feature_table, - .col_align = COL_ALIGN_NATURE, - .mbox_dev_addr = NPU2_MBOX_BAR_BASE, - .mbox_size = 0, /* Use BAR size */ - .sram_dev_addr = NPU2_SRAM_BAR_BASE, - .hwctx_limit = 16, - .sram_offs = { - DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_0), - DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_15), - }, - .psp_regs_off = { - DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU2_PSP, MP0_C2PMSG_123), - DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU2_REG, MPNPU_PUB_SCRATCH3), - DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU2_REG, MPNPU_PUB_SCRATCH4), - DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU2_REG, MPNPU_PUB_SCRATCH9), - DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU2_PSP, MP0_C2PMSG_73), - DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU2_PSP, MP0_C2PMSG_123), - DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU2_REG, MPNPU_PUB_SCRATCH3), - }, - .smu_regs_off = { - DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU2_SMU, MP1_C2PMSG_0), - DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU2_SMU, MP1_C2PMSG_60), - DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU2_SMU, MMNPU_APERTURE4_BASE), - DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU2_SMU, MP1_C2PMSG_61), - DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU2_SMU, MP1_C2PMSG_60), - }, - .hw_ops = { - .set_dpm = npu4_set_dpm, - }, -}; - -const struct amdxdna_dev_info dev_npu2_info = { - .reg_bar = NPU2_REG_BAR_INDEX, - .mbox_bar = NPU2_MBOX_BAR_INDEX, - .sram_bar = NPU2_SRAM_BAR_INDEX, - .psp_bar = NPU2_PSP_BAR_INDEX, - .smu_bar = NPU2_SMU_BAR_INDEX, - .first_col = 0, - .dev_mem_buf_shift = 15, /* 32 KiB aligned */ - .dev_mem_base = AIE2_DEVM_BASE, - .dev_mem_size = AIE2_DEVM_SIZE, - .vbnv = "RyzenAI-npu2", - .device_type = AMDXDNA_DEV_TYPE_KMQ, - .dev_priv = &npu2_dev_priv, - .ops = &aie2_ops, /* NPU2 can share NPU1's callback */ -}; diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c index 986a5f28ba24..a8d6f76dde5f 100644 --- a/drivers/accel/amdxdna/npu4_regs.c +++ b/drivers/accel/amdxdna/npu4_regs.c @@ -6,6 +6,7 @@ #include <drm/amdxdna_accel.h> #include <drm/drm_device.h> #include <drm/gpu_scheduler.h> +#include <linux/bits.h> #include <linux/sizes.h> #include "aie2_pci.h" @@ -13,6 +14,7 @@ #include "amdxdna_pci_drv.h" /* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */ +#define MPNPU_PWAITMODE 0x301003C #define MPNPU_PUB_SEC_INTR 0x3010060 #define MPNPU_PUB_PWRMGMT_INTR 0x3010064 #define MPNPU_PUB_SCRATCH0 0x301006C @@ -87,15 +89,16 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = { }; const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = { - { .feature = AIE2_NPU_COMMAND, .min_minor = 15 }, - { .feature = AIE2_PREEMPT, .min_minor = 12 }, + { .major = 6, .min_minor = 12 }, + { .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6, .min_minor = 15 }, + { .features = BIT_U64(AIE2_PREEMPT), .major = 6, .min_minor = 12 }, + { .features = BIT_U64(AIE2_TEMPORAL_ONLY), .major = 6, .min_minor = 12 }, + { .features = GENMASK_ULL(AIE2_TEMPORAL_ONLY, AIE2_NPU_COMMAND), .major = 7 }, { 0 } }; static const struct amdxdna_dev_priv npu4_dev_priv = { .fw_path = "amdnpu/17f0_10/npu.sbin", - .protocol_major = 0x6, - .protocol_minor = 12, .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, .fw_feature_tbl = npu4_fw_feature_table, @@ -116,6 +119,7 @@ static const struct amdxdna_dev_priv npu4_dev_priv = { DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU4_PSP, MP0_C2PMSG_73), DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU4_PSP, MP0_C2PMSG_123), DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU4_REG, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_PWAITMODE_REG, NPU4_REG, MPNPU_PWAITMODE), }, .smu_regs_off = { DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU4_SMU, MP1_C2PMSG_0), diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c index 75ad97f0b937..c0a35cfd886c 100644 --- a/drivers/accel/amdxdna/npu5_regs.c +++ b/drivers/accel/amdxdna/npu5_regs.c @@ -13,6 +13,7 @@ #include "amdxdna_pci_drv.h" /* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */ +#define MPNPU_PWAITMODE 0x301003C #define MPNPU_PUB_SEC_INTR 0x3010060 #define MPNPU_PUB_PWRMGMT_INTR 0x3010064 #define MPNPU_PUB_SCRATCH0 0x301006C @@ -63,8 +64,6 @@ static const struct amdxdna_dev_priv npu5_dev_priv = { .fw_path = "amdnpu/17f0_11/npu.sbin", - .protocol_major = 0x6, - .protocol_minor = 12, .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, .fw_feature_tbl = npu4_fw_feature_table, @@ -85,6 +84,7 @@ static const struct amdxdna_dev_priv npu5_dev_priv = { DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU5_PSP, MP0_C2PMSG_73), DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU5_PSP, MP0_C2PMSG_123), DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU5_REG, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_PWAITMODE_REG, NPU5_REG, MPNPU_PWAITMODE), }, .smu_regs_off = { DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU5_SMU, MP1_C2PMSG_0), diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c index 758dc013fe13..1fb07df99186 100644 --- a/drivers/accel/amdxdna/npu6_regs.c +++ b/drivers/accel/amdxdna/npu6_regs.c @@ -13,6 +13,7 @@ #include "amdxdna_pci_drv.h" /* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */ +#define MPNPU_PWAITMODE 0x301003C #define MPNPU_PUB_SEC_INTR 0x3010060 #define MPNPU_PUB_PWRMGMT_INTR 0x3010064 #define MPNPU_PUB_SCRATCH0 0x301006C @@ -63,8 +64,6 @@ static const struct amdxdna_dev_priv npu6_dev_priv = { .fw_path = "amdnpu/17f0_10/npu.sbin", - .protocol_major = 0x6, - .protocol_minor = 12, .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, .fw_feature_tbl = npu4_fw_feature_table, @@ -85,6 +84,7 @@ static const struct amdxdna_dev_priv npu6_dev_priv = { DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU6_PSP, MP0_C2PMSG_73), DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU6_PSP, MP0_C2PMSG_123), DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU6_REG, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_PWAITMODE_REG, NPU6_REG, MPNPU_PWAITMODE), }, .smu_regs_off = { DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU6_SMU, MP1_C2PMSG_0), diff --git a/drivers/accel/ethosu/ethosu_drv.c b/drivers/accel/ethosu/ethosu_drv.c index e05a69bf5574..9992193d7338 100644 --- a/drivers/accel/ethosu/ethosu_drv.c +++ b/drivers/accel/ethosu/ethosu_drv.c @@ -144,7 +144,7 @@ static int ethosu_open(struct drm_device *ddev, struct drm_file *file) if (!try_module_get(THIS_MODULE)) return -EINVAL; - struct ethosu_file_priv __free(kfree) *priv = kzalloc(sizeof(*priv), GFP_KERNEL); + struct ethosu_file_priv __free(kfree) *priv = kzalloc_obj(*priv); if (!priv) { ret = -ENOMEM; goto err_put_mod; diff --git a/drivers/accel/ethosu/ethosu_gem.c b/drivers/accel/ethosu/ethosu_gem.c index 7b073116314b..668c71d5ff45 100644 --- a/drivers/accel/ethosu/ethosu_gem.c +++ b/drivers/accel/ethosu/ethosu_gem.c @@ -50,7 +50,7 @@ struct drm_gem_object *ethosu_gem_create_object(struct drm_device *ddev, size_t { struct ethosu_gem_object *obj; - obj = kzalloc(sizeof(*obj), GFP_KERNEL); + obj = kzalloc_obj(*obj); if (!obj) return ERR_PTR(-ENOMEM); @@ -352,7 +352,7 @@ static int ethosu_gem_cmdstream_copy_and_validate(struct drm_device *ddev, struct ethosu_gem_object *bo, u32 size) { - struct ethosu_validated_cmdstream_info __free(kfree) *info = kzalloc(sizeof(*info), GFP_KERNEL); + struct ethosu_validated_cmdstream_info __free(kfree) *info = kzalloc_obj(*info); struct ethosu_device *edev = to_ethosu_device(ddev); u32 *bocmds = bo->base.vaddr; struct cmd_state st; diff --git a/drivers/accel/ethosu/ethosu_job.c b/drivers/accel/ethosu/ethosu_job.c index 26e7a2f64d71..8598a3634340 100644 --- a/drivers/accel/ethosu/ethosu_job.c +++ b/drivers/accel/ethosu/ethosu_job.c @@ -375,7 +375,7 @@ static int ethosu_ioctl_submit_job(struct drm_device *dev, struct drm_file *file if (edev->npu_info.sram_size < job->sram_size) return -EINVAL; - ejob = kzalloc(sizeof(*ejob), GFP_KERNEL); + ejob = kzalloc_obj(*ejob); if (!ejob) return -ENOMEM; @@ -384,7 +384,7 @@ static int ethosu_ioctl_submit_job(struct drm_device *dev, struct drm_file *file ejob->dev = edev; ejob->sram_size = job->sram_size; - ejob->done_fence = kzalloc(sizeof(*ejob->done_fence), GFP_KERNEL); + ejob->done_fence = kzalloc_obj(*ejob->done_fence); if (!ejob->done_fence) { ret = -ENOMEM; goto out_cleanup_job; @@ -476,7 +476,7 @@ int ethosu_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *fil } struct drm_ethosu_job __free(kvfree) *jobs = - kvmalloc_array(args->job_count, sizeof(*jobs), GFP_KERNEL); + kvmalloc_objs(*jobs, args->job_count); if (!jobs) return -ENOMEM; diff --git a/drivers/accel/habanalabs/common/command_buffer.c b/drivers/accel/habanalabs/common/command_buffer.c index 0f0d295116e7..e929db8bc023 100644 --- a/drivers/accel/habanalabs/common/command_buffer.c +++ b/drivers/accel/habanalabs/common/command_buffer.c @@ -116,10 +116,10 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, * and must use GFP_ATOMIC for all memory allocations. */ if (ctx_id == HL_KERNEL_ASID_ID && !hdev->disabled) - cb = kzalloc(sizeof(*cb), GFP_ATOMIC); + cb = kzalloc_obj(*cb, GFP_ATOMIC); if (!cb) - cb = kzalloc(sizeof(*cb), GFP_KERNEL); + cb = kzalloc_obj(*cb); if (!cb) return NULL; diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c index dee487724918..ba4257bda77b 100644 --- a/drivers/accel/habanalabs/common/command_submission.c +++ b/drivers/accel/habanalabs/common/command_submission.c @@ -907,9 +907,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cntr = &hdev->aggregated_cs_counters; - cs = kzalloc(sizeof(*cs), GFP_ATOMIC); + cs = kzalloc_obj(*cs, GFP_ATOMIC); if (!cs) - cs = kzalloc(sizeof(*cs), GFP_KERNEL); + cs = kzalloc_obj(*cs); if (!cs) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); @@ -936,9 +936,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, kref_init(&cs->refcount); spin_lock_init(&cs->job_lock); - cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_ATOMIC); + cs_cmpl = kzalloc_obj(*cs_cmpl, GFP_ATOMIC); if (!cs_cmpl) - cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_KERNEL); + cs_cmpl = kzalloc_obj(*cs_cmpl); if (!cs_cmpl) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); @@ -1302,9 +1302,9 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, { struct hl_cs_job *job; - job = kzalloc(sizeof(*job), GFP_ATOMIC); + job = kzalloc_obj(*job, GFP_ATOMIC); if (!job) - job = kzalloc(sizeof(*job), GFP_KERNEL); + job = kzalloc_obj(*job); if (!job) return NULL; @@ -1420,11 +1420,9 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev, return -EINVAL; } - *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array), - GFP_ATOMIC); + *cs_chunk_array = kmalloc_objs(**cs_chunk_array, num_chunks, GFP_ATOMIC); if (!*cs_chunk_array) - *cs_chunk_array = kmalloc_array(num_chunks, - sizeof(**cs_chunk_array), GFP_KERNEL); + *cs_chunk_array = kmalloc_objs(**cs_chunk_array, num_chunks); if (!*cs_chunk_array) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); @@ -2040,7 +2038,7 @@ static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv, prop = &hdev->kernel_queues[q_idx].sync_stream_prop; - handle = kzalloc(sizeof(*handle), GFP_KERNEL); + handle = kzalloc_obj(*handle); if (!handle) { rc = -ENOMEM; goto out; @@ -3053,7 +3051,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) } /* allocate array for the fences */ - fence_arr = kmalloc_array(seq_arr_len, sizeof(struct hl_fence *), GFP_KERNEL); + fence_arr = kmalloc_objs(struct hl_fence *, seq_arr_len); if (!fence_arr) { rc = -ENOMEM; goto free_seq_arr; @@ -3412,7 +3410,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, goto put_cq_cb; } - pend = kzalloc(sizeof(*pend), GFP_KERNEL); + pend = kzalloc_obj(*pend); if (!pend) { rc = -ENOMEM; goto put_cq_cb; @@ -3521,7 +3519,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ hl_ctx_get(ctx); - pend = kzalloc(sizeof(*pend), GFP_KERNEL); + pend = kzalloc_obj(*pend); if (!pend) { hl_ctx_put(ctx); return -ENOMEM; diff --git a/drivers/accel/habanalabs/common/context.c b/drivers/accel/habanalabs/common/context.c index 9f212b17611a..69c68b187bb7 100644 --- a/drivers/accel/habanalabs/common/context.c +++ b/drivers/accel/habanalabs/common/context.c @@ -155,7 +155,7 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv) struct hl_ctx *ctx; int rc; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc_obj(*ctx); if (!ctx) { rc = -ENOMEM; goto out_err; @@ -209,9 +209,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) spin_lock_init(&ctx->cs_lock); atomic_set(&ctx->thread_ctx_switch_token, 1); ctx->thread_ctx_switch_wait_token = 0; - ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs, - sizeof(struct hl_fence *), - GFP_KERNEL); + ctx->cs_pending = kzalloc_objs(struct hl_fence *, + hdev->asic_prop.max_pending_cs); if (!ctx->cs_pending) return -ENOMEM; diff --git a/drivers/accel/habanalabs/common/debugfs.c b/drivers/accel/habanalabs/common/debugfs.c index 5f0820b19ccb..1d5e29fc6463 100644 --- a/drivers/accel/habanalabs/common/debugfs.c +++ b/drivers/accel/habanalabs/common/debugfs.c @@ -2052,7 +2052,7 @@ int hl_debugfs_device_init(struct hl_device *hdev) int count = ARRAY_SIZE(hl_debugfs_list); dev_entry->hdev = hdev; - dev_entry->entry_arr = kmalloc_array(count, sizeof(struct hl_debugfs_entry), GFP_KERNEL); + dev_entry->entry_arr = kmalloc_objs(struct hl_debugfs_entry, count); if (!dev_entry->entry_arr) return -ENOMEM; diff --git a/drivers/accel/habanalabs/common/decoder.c b/drivers/accel/habanalabs/common/decoder.c index c03a6da45d00..e4802f30c08a 100644 --- a/drivers/accel/habanalabs/common/decoder.c +++ b/drivers/accel/habanalabs/common/decoder.c @@ -98,7 +98,7 @@ int hl_dec_init(struct hl_device *hdev) if (!prop->max_dec) return 0; - hdev->dec = kcalloc(prop->max_dec, sizeof(struct hl_dec), GFP_KERNEL); + hdev->dec = kzalloc_objs(struct hl_dec, prop->max_dec); if (!hdev->dec) return -ENOMEM; diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c index 999c92d7036e..09b27bac3a31 100644 --- a/drivers/accel/habanalabs/common/device.c +++ b/drivers/accel/habanalabs/common/device.c @@ -722,7 +722,7 @@ static int device_init_cdev(struct hl_device *hdev, const struct class *class, cdev_init(cdev, fops); cdev->owner = THIS_MODULE; - *dev = kzalloc(sizeof(**dev), GFP_KERNEL); + *dev = kzalloc_obj(**dev); if (!*dev) return -ENOMEM; @@ -892,9 +892,8 @@ static int device_early_init(struct hl_device *hdev) goto early_fini; if (hdev->asic_prop.completion_queues_count) { - hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count, - sizeof(struct workqueue_struct *), - GFP_KERNEL); + hdev->cq_wq = kzalloc_objs(struct workqueue_struct *, + hdev->asic_prop.completion_queues_count); if (!hdev->cq_wq) { rc = -ENOMEM; goto asid_fini; @@ -945,7 +944,7 @@ static int device_early_init(struct hl_device *hdev) goto free_ts_free_wq; } - hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), GFP_KERNEL); + hdev->hl_chip_info = kzalloc_obj(struct hwmon_chip_info); if (!hdev->hl_chip_info) { rc = -ENOMEM; goto free_prefetch_wq; @@ -1851,8 +1850,7 @@ kill_processes: } /* Allocate the kernel context */ - hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), - GFP_KERNEL); + hdev->kernel_ctx = kzalloc_obj(*hdev->kernel_ctx); if (!hdev->kernel_ctx) { rc = -ENOMEM; hl_mmu_fini(hdev); @@ -2159,8 +2157,8 @@ int hl_device_init(struct hl_device *hdev) hdev->asic_prop.user_interrupt_count; if (user_interrupt_cnt) { - hdev->user_interrupt = kcalloc(user_interrupt_cnt, sizeof(*hdev->user_interrupt), - GFP_KERNEL); + hdev->user_interrupt = kzalloc_objs(*hdev->user_interrupt, + user_interrupt_cnt); if (!hdev->user_interrupt) { rc = -ENOMEM; goto early_fini; @@ -2226,9 +2224,8 @@ int hl_device_init(struct hl_device *hdev) * passed as arguments to request_irq */ if (cq_cnt) { - hdev->completion_queue = kcalloc(cq_cnt, - sizeof(*hdev->completion_queue), - GFP_KERNEL); + hdev->completion_queue = kzalloc_objs(*hdev->completion_queue, + cq_cnt); if (!hdev->completion_queue) { dev_err(hdev->dev, @@ -2249,8 +2246,8 @@ int hl_device_init(struct hl_device *hdev) hdev->completion_queue[i].cq_idx = i; } - hdev->shadow_cs_queue = kcalloc(hdev->asic_prop.max_pending_cs, - sizeof(struct hl_cs *), GFP_KERNEL); + hdev->shadow_cs_queue = kzalloc_objs(struct hl_cs *, + hdev->asic_prop.max_pending_cs); if (!hdev->shadow_cs_queue) { rc = -ENOMEM; goto cq_fini; @@ -2275,7 +2272,7 @@ int hl_device_init(struct hl_device *hdev) } /* Allocate the kernel context */ - hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL); + hdev->kernel_ctx = kzalloc_obj(*hdev->kernel_ctx); if (!hdev->kernel_ctx) { rc = -ENOMEM; goto mmu_fini; diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c index eeb6b2a80fc7..c8f1e252241d 100644 --- a/drivers/accel/habanalabs/common/firmware_if.c +++ b/drivers/accel/habanalabs/common/firmware_if.c @@ -2681,7 +2681,7 @@ static int hl_fw_dynamic_send_msg(struct hl_device *hdev, struct lkd_msg_comms *msg; int rc; - msg = kzalloc(sizeof(*msg), GFP_KERNEL); + msg = kzalloc_obj(*msg); if (!msg) return -ENOMEM; diff --git a/drivers/accel/habanalabs/common/habanalabs_drv.c b/drivers/accel/habanalabs/common/habanalabs_drv.c index 0035748f3228..483e1ad9fc41 100644 --- a/drivers/accel/habanalabs/common/habanalabs_drv.c +++ b/drivers/accel/habanalabs/common/habanalabs_drv.c @@ -181,7 +181,7 @@ int hl_device_open(struct drm_device *ddev, struct drm_file *file_priv) struct hl_fpriv *hpriv; int rc; - hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); + hpriv = kzalloc_obj(*hpriv); if (!hpriv) return -ENOMEM; @@ -291,7 +291,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) return -ENXIO; } - hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); + hpriv = kzalloc_obj(*hpriv); if (!hpriv) return -ENOMEM; diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c index fdfdabc85e54..fef542afe035 100644 --- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c @@ -201,7 +201,7 @@ static int debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, struct hl void *input = NULL, *output = NULL; int rc; - params = kzalloc(sizeof(*params), GFP_KERNEL); + params = kzalloc_obj(*params); if (!params) return -ENOMEM; @@ -682,11 +682,11 @@ static int sec_attest_info(struct hl_fpriv *hpriv, struct hl_info_args *args) if ((!max_size) || (!out)) return -EINVAL; - sec_attest_info = kmalloc(sizeof(*sec_attest_info), GFP_KERNEL); + sec_attest_info = kmalloc_obj(*sec_attest_info); if (!sec_attest_info) return -ENOMEM; - info = kzalloc(sizeof(*info), GFP_KERNEL); + info = kzalloc_obj(*info); if (!info) { rc = -ENOMEM; goto free_sec_attest_info; @@ -731,11 +731,11 @@ static int dev_info_signed(struct hl_fpriv *hpriv, struct hl_info_args *args) if ((!max_size) || (!out)) return -EINVAL; - dev_info_signed = kzalloc(sizeof(*dev_info_signed), GFP_KERNEL); + dev_info_signed = kzalloc_obj(*dev_info_signed); if (!dev_info_signed) return -ENOMEM; - info = kzalloc(sizeof(*info), GFP_KERNEL); + info = kzalloc_obj(*info); if (!info) { rc = -ENOMEM; goto free_dev_info_signed; diff --git a/drivers/accel/habanalabs/common/hldio.c b/drivers/accel/habanalabs/common/hldio.c index 083ae5610875..c33c817a962a 100644 --- a/drivers/accel/habanalabs/common/hldio.c +++ b/drivers/accel/habanalabs/common/hldio.c @@ -308,7 +308,7 @@ int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd, dev_dbg(hdev->dev, "SSD2HL fd=%d va=%#llx len=%#lx\n", fd, device_va, len_bytes); - io = kzalloc(sizeof(*io), GFP_KERNEL); + io = kzalloc_obj(*io); if (!io) { rc = -ENOMEM; goto out; diff --git a/drivers/accel/habanalabs/common/hw_queue.c b/drivers/accel/habanalabs/common/hw_queue.c index 3d04a7507cce..4232f374dfc3 100644 --- a/drivers/accel/habanalabs/common/hw_queue.c +++ b/drivers/accel/habanalabs/common/hw_queue.c @@ -837,7 +837,7 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, q->kernel_address = p; - q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, sizeof(struct hl_cs_job *), GFP_KERNEL); + q->shadow_queue = kmalloc_objs(struct hl_cs_job *, HL_QUEUE_LENGTH); if (!q->shadow_queue) { dev_err(hdev->dev, "Failed to allocate shadow queue for H/W queue %d\n", @@ -1082,8 +1082,8 @@ int hl_hw_queues_create(struct hl_device *hdev) struct hl_hw_queue *q; int i, rc, q_ready_cnt; - hdev->kernel_queues = kcalloc(asic->max_queues, - sizeof(*hdev->kernel_queues), GFP_KERNEL); + hdev->kernel_queues = kzalloc_objs(*hdev->kernel_queues, + asic->max_queues); if (!hdev->kernel_queues) { dev_err(hdev->dev, "Not enough memory for H/W queues\n"); diff --git a/drivers/accel/habanalabs/common/hwmon.c b/drivers/accel/habanalabs/common/hwmon.c index 52d1e6bf10dc..768604d2392d 100644 --- a/drivers/accel/habanalabs/common/hwmon.c +++ b/drivers/accel/habanalabs/common/hwmon.c @@ -195,15 +195,15 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sen curr_arr[sensors_by_type_next_index[type]++] = flags; } - channels_info = kcalloc(num_active_sensor_types + 1, sizeof(struct hwmon_channel_info *), - GFP_KERNEL); + channels_info = kzalloc_objs(struct hwmon_channel_info *, + num_active_sensor_types + 1); if (!channels_info) { rc = -ENOMEM; goto channels_info_array_err; } for (i = 0 ; i < num_active_sensor_types ; i++) { - channels_info[i] = kzalloc(sizeof(*channels_info[i]), GFP_KERNEL); + channels_info[i] = kzalloc_obj(*channels_info[i]); if (!channels_info[i]) { rc = -ENOMEM; goto channel_info_err; diff --git a/drivers/accel/habanalabs/common/irq.c b/drivers/accel/habanalabs/common/irq.c index 7c9f2f6a2870..023dd3f1c82c 100644 --- a/drivers/accel/habanalabs/common/irq.c +++ b/drivers/accel/habanalabs/common/irq.c @@ -267,7 +267,7 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi if (!(*free_list)) { /* Alloc/Init the timestamp registration free objects list */ - *free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC); + *free_list = kmalloc_obj(struct list_head, GFP_ATOMIC); if (!(*free_list)) return -ENOMEM; @@ -283,14 +283,16 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi intr->interrupt_id); if (!(*dynamic_alloc_list)) { - *dynamic_alloc_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC); + *dynamic_alloc_list = kmalloc_obj(struct list_head, + GFP_ATOMIC); if (!(*dynamic_alloc_list)) return -ENOMEM; INIT_LIST_HEAD(*dynamic_alloc_list); } - free_node = kmalloc(sizeof(struct timestamp_reg_free_node), GFP_ATOMIC); + free_node = kmalloc_obj(struct timestamp_reg_free_node, + GFP_ATOMIC); if (!free_node) return -ENOMEM; @@ -344,7 +346,7 @@ static void handle_user_interrupt_ts_list(struct hl_device *hdev, struct hl_user * and move nodes hanged on the free list back to the interrupt ts list * we always alloc the job of the WQ at the beginning. */ - job = kmalloc(sizeof(*job), GFP_ATOMIC); + job = kmalloc_obj(*job, GFP_ATOMIC); if (!job) return; @@ -542,7 +544,7 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg) goto skip_irq; } - handle_eqe_work = kmalloc(sizeof(*handle_eqe_work), GFP_ATOMIC); + handle_eqe_work = kmalloc_obj(*handle_eqe_work, GFP_ATOMIC); if (handle_eqe_work) { INIT_WORK(&handle_eqe_work->eq_work, irq_handle_eqe); handle_eqe_work->hdev = hdev; diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c index 633db4bff46f..361cff577381 100644 --- a/drivers/accel/habanalabs/common/memory.c +++ b/drivers/accel/habanalabs/common/memory.c @@ -125,7 +125,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, } } - phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); + phys_pg_pack = kzalloc_obj(*phys_pg_pack); if (!phys_pg_pack) { rc = -ENOMEM; goto pages_pack_err; @@ -228,7 +228,7 @@ static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size, struct hl_userptr *userptr; int rc; - userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); + userptr = kzalloc_obj(*userptr); if (!userptr) { rc = -ENOMEM; goto userptr_err; @@ -501,7 +501,7 @@ static int add_va_block_locked(struct hl_device *hdev, res = va_block; } - va_block = kmalloc(sizeof(*va_block), GFP_KERNEL); + va_block = kmalloc_obj(*va_block); if (!va_block) return -ENOMEM; @@ -850,7 +850,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, dma_addr_t dma_addr; int rc, i, j; - phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); + phys_pg_pack = kzalloc_obj(*phys_pg_pack); if (!phys_pg_pack) return -ENOMEM; @@ -1152,7 +1152,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device goto shared_err; } - hnode = kzalloc(sizeof(*hnode), GFP_KERNEL); + hnode = kzalloc_obj(*hnode); if (!hnode) { rc = -ENOMEM; goto hnode_err; @@ -1482,7 +1482,7 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) return -EINVAL; } - lnode = kzalloc(sizeof(*lnode), GFP_KERNEL); + lnode = kzalloc_obj(*lnode); if (!lnode) return -ENOMEM; @@ -1553,7 +1553,7 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 return ERR_PTR(-EINVAL); } - sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); + sgt = kzalloc_obj(*sgt); if (!sgt) return ERR_PTR(-ENOMEM); @@ -2046,7 +2046,7 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o return -EINVAL; } - hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL); + hl_dmabuf = kzalloc_obj(*hl_dmabuf); if (!hl_dmabuf) return -ENOMEM; @@ -2116,7 +2116,7 @@ static int hl_ts_alloc_buf(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args) num_elements = *(u32 *)args; - ts_buff = kzalloc(sizeof(*ts_buff), gfp); + ts_buff = kzalloc_obj(*ts_buff, gfp); if (!ts_buff) return -ENOMEM; @@ -2323,7 +2323,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, return -EFAULT; } - userptr->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); + userptr->pages = kvmalloc_objs(struct page *, npages); if (!userptr->pages) return -ENOMEM; @@ -2395,7 +2395,7 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, } userptr->pid = current->pid; - userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL); + userptr->sgt = kzalloc_obj(*userptr->sgt); if (!userptr->sgt) return -ENOMEM; @@ -2611,7 +2611,7 @@ static int vm_ctx_init_with_ranges(struct hl_ctx *ctx, for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX ; i++) { ctx->va_range[i] = - kzalloc(sizeof(struct hl_va_range), GFP_KERNEL); + kzalloc_obj(struct hl_va_range); if (!ctx->va_range[i]) { rc = -ENOMEM; goto free_va_range; diff --git a/drivers/accel/habanalabs/common/memory_mgr.c b/drivers/accel/habanalabs/common/memory_mgr.c index 4401beb99e42..9fdd34acf389 100644 --- a/drivers/accel/habanalabs/common/memory_mgr.c +++ b/drivers/accel/habanalabs/common/memory_mgr.c @@ -152,7 +152,7 @@ hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg, struct hl_mmap_mem_buf *buf; int rc; - buf = kzalloc(sizeof(*buf), gfp); + buf = kzalloc_obj(*buf, gfp); if (!buf) return NULL; diff --git a/drivers/accel/habanalabs/common/mmu/mmu.c b/drivers/accel/habanalabs/common/mmu/mmu.c index 79823facce7f..6c7c4ff8a8a9 100644 --- a/drivers/accel/habanalabs/common/mmu/mmu.c +++ b/drivers/accel/habanalabs/common/mmu/mmu.c @@ -697,7 +697,7 @@ int hl_mmu_prefetch_cache_range(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, { struct hl_prefetch_work *handle_prefetch_work; - handle_prefetch_work = kmalloc(sizeof(*handle_prefetch_work), GFP_KERNEL); + handle_prefetch_work = kmalloc_obj(*handle_prefetch_work); if (!handle_prefetch_work) return -ENOMEM; @@ -843,7 +843,7 @@ int hl_mmu_hr_init(struct hl_device *hdev, struct hl_mmu_hr_priv *hr_priv, u32 h return -ENOMEM; } - hr_priv->mmu_asid_hop0 = kvcalloc(prop->max_asid, sizeof(struct pgt_info), GFP_KERNEL); + hr_priv->mmu_asid_hop0 = kvzalloc_objs(struct pgt_info, prop->max_asid); if (ZERO_OR_NULL_PTR(hr_priv->mmu_asid_hop0)) { dev_err(hdev->dev, "Failed to allocate hr-mmu hop0 table\n"); rc = -ENOMEM; @@ -1071,7 +1071,7 @@ struct pgt_info *hl_mmu_hr_alloc_hop(struct hl_ctx *ctx, struct hl_mmu_hr_priv * void *virt_addr; int i, retry = 1; - pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL); + pgt_info = kmalloc_obj(*pgt_info); if (!pgt_info) return NULL; @@ -1325,7 +1325,7 @@ u64 hl_mmu_dr_alloc_hop(struct hl_ctx *ctx) struct pgt_info *pgt_info; u64 phys_addr, shadow_addr; - pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL); + pgt_info = kmalloc_obj(*pgt_info); if (!pgt_info) return ULLONG_MAX; diff --git a/drivers/accel/habanalabs/common/security.c b/drivers/accel/habanalabs/common/security.c index 5402a3cd0491..abe5ed0d405a 100644 --- a/drivers/accel/habanalabs/common/security.c +++ b/drivers/accel/habanalabs/common/security.c @@ -312,9 +312,7 @@ int hl_init_pb_with_mask(struct hl_device *hdev, u32 num_dcores, int i, j; struct hl_block_glbl_sec *glbl_sec; - glbl_sec = kcalloc(blocks_array_size, - sizeof(struct hl_block_glbl_sec), - GFP_KERNEL); + glbl_sec = kzalloc_objs(struct hl_block_glbl_sec, blocks_array_size); if (!glbl_sec) return -ENOMEM; @@ -393,9 +391,7 @@ int hl_init_pb_ranges_with_mask(struct hl_device *hdev, u32 num_dcores, int i, j, rc = 0; struct hl_block_glbl_sec *glbl_sec; - glbl_sec = kcalloc(blocks_array_size, - sizeof(struct hl_block_glbl_sec), - GFP_KERNEL); + glbl_sec = kzalloc_objs(struct hl_block_glbl_sec, blocks_array_size); if (!glbl_sec) return -ENOMEM; @@ -476,9 +472,7 @@ int hl_init_pb_single_dcore(struct hl_device *hdev, u32 dcore_offset, int i, rc = 0; struct hl_block_glbl_sec *glbl_sec; - glbl_sec = kcalloc(blocks_array_size, - sizeof(struct hl_block_glbl_sec), - GFP_KERNEL); + glbl_sec = kzalloc_objs(struct hl_block_glbl_sec, blocks_array_size); if (!glbl_sec) return -ENOMEM; @@ -524,9 +518,7 @@ int hl_init_pb_ranges_single_dcore(struct hl_device *hdev, u32 dcore_offset, int i; struct hl_block_glbl_sec *glbl_sec; - glbl_sec = kcalloc(blocks_array_size, - sizeof(struct hl_block_glbl_sec), - GFP_KERNEL); + glbl_sec = kzalloc_objs(struct hl_block_glbl_sec, blocks_array_size); if (!glbl_sec) return -ENOMEM; diff --git a/drivers/accel/habanalabs/common/state_dump.c b/drivers/accel/habanalabs/common/state_dump.c index 3a9931f24259..1c70a363a328 100644 --- a/drivers/accel/habanalabs/common/state_dump.c +++ b/drivers/accel/habanalabs/common/state_dump.c @@ -400,7 +400,7 @@ static int hl_state_dump_print_syncs(struct hl_device *hdev, u32 index; int rc = 0; - map = kzalloc(sizeof(*map), GFP_KERNEL); + map = kzalloc_obj(*map); if (!map) return -ENOMEM; diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c index 34771d75da9d..5ddb38aaff7a 100644 --- a/drivers/accel/habanalabs/gaudi/gaudi.c +++ b/drivers/accel/habanalabs/gaudi/gaudi.c @@ -539,9 +539,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) int i; prop->max_queues = GAUDI_QUEUE_ID_SIZE; - prop->hw_queues_props = kcalloc(prop->max_queues, - sizeof(struct hw_queue_properties), - GFP_KERNEL); + prop->hw_queues_props = kzalloc_objs(struct hw_queue_properties, + prop->max_queues); if (!prop->hw_queues_props) return -ENOMEM; @@ -1853,7 +1852,7 @@ static int gaudi_sw_init(struct hl_device *hdev) int rc; /* Allocate device structure */ - gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL); + gaudi = kzalloc_obj(*gaudi); if (!gaudi) return -ENOMEM; @@ -4906,7 +4905,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev, parser->job_userptr_list, &userptr)) goto already_pinned; - userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); + userptr = kzalloc_obj(*userptr); if (!userptr) return -ENOMEM; @@ -8843,7 +8842,7 @@ static int gaudi_add_sync_to_engine_map_entry( reg_value -= lower_32_bits(CFG_BASE); /* create a new hash entry */ - entry = kzalloc(sizeof(*entry), GFP_KERNEL); + entry = kzalloc_obj(*entry); if (!entry) return -ENOMEM; entry->engine_type = engine_type; diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index b8c0689dba64..5d9c6f0698b7 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -2762,8 +2762,8 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) int i, rc; prop->max_queues = GAUDI2_QUEUE_ID_SIZE; - prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties), - GFP_KERNEL); + prop->hw_queues_props = kzalloc_objs(struct hw_queue_properties, + prop->max_queues); if (!prop->hw_queues_props) return -ENOMEM; @@ -3943,8 +3943,8 @@ static int gaudi2_special_blocks_config(struct hl_device *hdev) /* Configure Special blocks */ prop->glbl_err_max_cause_num = GAUDI2_GLBL_ERR_MAX_CAUSE_NUM; prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks); - prop->special_blocks = kmalloc_array(prop->num_of_special_blocks, - sizeof(*prop->special_blocks), GFP_KERNEL); + prop->special_blocks = kmalloc_objs(*prop->special_blocks, + prop->num_of_special_blocks); if (!prop->special_blocks) return -ENOMEM; @@ -3958,8 +3958,8 @@ static int gaudi2_special_blocks_config(struct hl_device *hdev) if (ARRAY_SIZE(gaudi2_iterator_skip_block_types)) { prop->skip_special_blocks_cfg.block_types = - kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_types), - sizeof(gaudi2_iterator_skip_block_types[0]), GFP_KERNEL); + kmalloc_objs(gaudi2_iterator_skip_block_types[0], + ARRAY_SIZE(gaudi2_iterator_skip_block_types)); if (!prop->skip_special_blocks_cfg.block_types) { rc = -ENOMEM; goto free_special_blocks; @@ -3974,8 +3974,8 @@ static int gaudi2_special_blocks_config(struct hl_device *hdev) if (ARRAY_SIZE(gaudi2_iterator_skip_block_ranges)) { prop->skip_special_blocks_cfg.block_ranges = - kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_ranges), - sizeof(gaudi2_iterator_skip_block_ranges[0]), GFP_KERNEL); + kmalloc_objs(gaudi2_iterator_skip_block_ranges[0], + ARRAY_SIZE(gaudi2_iterator_skip_block_ranges)); if (!prop->skip_special_blocks_cfg.block_ranges) { rc = -ENOMEM; goto free_skip_special_blocks_types; @@ -4054,7 +4054,7 @@ static int gaudi2_sw_init(struct hl_device *hdev) int i, rc; /* Allocate device structure */ - gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL); + gaudi2 = kzalloc_obj(*gaudi2); if (!gaudi2) return -ENOMEM; diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c index 307ccb912ccd..ea3a0e57c836 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c @@ -2620,7 +2620,7 @@ static int gaudi2_init_pb_tpc(struct hl_device *hdev) block_array_size = ARRAY_SIZE(gaudi2_pb_dcr0_tpc0); - glbl_sec = kcalloc(block_array_size, sizeof(struct hl_block_glbl_sec), GFP_KERNEL); + glbl_sec = kzalloc_objs(struct hl_block_glbl_sec, block_array_size); if (!glbl_sec) return -ENOMEM; diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalabs/goya/goya.c index 84768e306269..bf12702e0a69 100644 --- a/drivers/accel/habanalabs/goya/goya.c +++ b/drivers/accel/habanalabs/goya/goya.c @@ -363,9 +363,8 @@ int goya_set_fixed_properties(struct hl_device *hdev) int i; prop->max_queues = GOYA_QUEUE_ID_SIZE; - prop->hw_queues_props = kcalloc(prop->max_queues, - sizeof(struct hw_queue_properties), - GFP_KERNEL); + prop->hw_queues_props = kzalloc_objs(struct hw_queue_properties, + prop->max_queues); if (!prop->hw_queues_props) return -ENOMEM; @@ -970,7 +969,7 @@ static int goya_sw_init(struct hl_device *hdev) int rc; /* Allocate device structure */ - goya = kzalloc(sizeof(*goya), GFP_KERNEL); + goya = kzalloc_obj(*goya); if (!goya) return -ENOMEM; @@ -1031,7 +1030,7 @@ static int goya_sw_init(struct hl_device *hdev) hdev->asic_funcs->set_pci_memory_regions(hdev); - goya->goya_work = kmalloc(sizeof(struct goya_work_freq), GFP_KERNEL); + goya->goya_work = kmalloc_obj(struct goya_work_freq); if (!goya->goya_work) { rc = -ENOMEM; goto free_cpu_accessible_dma_pool; @@ -3336,7 +3335,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev, parser->job_userptr_list, &userptr)) goto already_pinned; - userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); + userptr = kzalloc_obj(*userptr); if (!userptr) return -ENOMEM; diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c index 3bd85ee6c26b..a09f54fc4302 100644 --- a/drivers/accel/ivpu/ivpu_debugfs.c +++ b/drivers/accel/ivpu/ivpu_debugfs.c @@ -20,6 +20,7 @@ #include "ivpu_hw.h" #include "ivpu_jsm_msg.h" #include "ivpu_pm.h" +#include "vpu_boot_api.h" static inline struct ivpu_device *seq_to_ivpu(struct seq_file *s) { @@ -96,7 +97,8 @@ static int last_bootmode_show(struct seq_file *s, void *v) { struct ivpu_device *vdev = seq_to_ivpu(s); - seq_printf(s, "%s\n", (vdev->pm->is_warmboot) ? "warmboot" : "coldboot"); + seq_printf(s, "%s\n", (vdev->fw->last_boot_mode == VPU_BOOT_TYPE_WARMBOOT) ? + "warm boot" : "cold boot"); return 0; } diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 3d6fccdefdd6..5900a40c7a78 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -237,7 +237,7 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) if (!drm_dev_enter(dev, &idx)) return -ENODEV; - file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); + file_priv = kzalloc_obj(*file_priv); if (!file_priv) { ret = -ENOMEM; goto err_dev_exit; @@ -384,6 +384,7 @@ int ivpu_boot(struct ivpu_device *vdev) drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); ivpu_fw_boot_params_setup(vdev, ivpu_bo_vaddr(vdev->fw->mem_bp)); + vdev->fw->last_boot_mode = vdev->fw->next_boot_mode; ret = ivpu_hw_boot_fw(vdev); if (ret) { @@ -396,13 +397,12 @@ int ivpu_boot(struct ivpu_device *vdev) ivpu_err(vdev, "Failed to boot the firmware: %d\n", ret); goto err_diagnose_failure; } - ivpu_hw_irq_clear(vdev); enable_irq(vdev->irq); ivpu_hw_irq_enable(vdev); ivpu_ipc_enable(vdev); - if (ivpu_fw_is_cold_boot(vdev)) { + if (!ivpu_fw_is_warm_boot(vdev)) { ret = ivpu_pm_dct_init(vdev); if (ret) goto err_disable_ipc; diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 48386d2cddbb..107f8ad31050 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -300,9 +300,7 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) fw->image_load_offset = image_load_addr - runtime_addr; fw->image_size = image_size; fw->shave_nn_size = PAGE_ALIGN(fw_hdr->shave_nn_fw_size); - fw->cold_boot_entry_point = fw_hdr->entry_point; - fw->entry_point = fw->cold_boot_entry_point; fw->trace_level = min_t(u32, ivpu_fw_log_level, IVPU_FW_LOG_FATAL); fw->trace_destination_mask = VPU_TRACE_DESTINATION_VERBOSE_TRACING; @@ -338,7 +336,7 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) fw->image_load_offset, fw->image_size); ivpu_dbg(vdev, FW_BOOT, "Read-only section: address 0x%llx, size %u\n", fw->read_only_addr, fw->read_only_size); - ivpu_dbg(vdev, FW_BOOT, "FW entry point: 0x%llx\n", fw->entry_point); + ivpu_dbg(vdev, FW_BOOT, "FW cold boot entry point: 0x%llx\n", fw->cold_boot_entry_point); ivpu_dbg(vdev, FW_BOOT, "SHAVE NN size: %u\n", fw->shave_nn_size); return 0; @@ -616,6 +614,7 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_ boot_params->power_profile); ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_uses_ecc_mca_signal = 0x%x\n", boot_params->vpu_uses_ecc_mca_signal); + ivpu_dbg(vdev, FW_BOOT, "boot_params.boot_type = 0x%x\n", boot_params->boot_type); } void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params) @@ -623,7 +622,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params struct ivpu_bo *ipc_mem_rx = vdev->ipc->mem_rx; /* In case of warm boot only update variable params */ - if (!ivpu_fw_is_cold_boot(vdev)) { + if (ivpu_fw_is_warm_boot(vdev)) { boot_params->d0i3_residency_time_us = ktime_us_delta(ktime_get_boottime(), vdev->hw->d0i3_entry_host_ts); boot_params->d0i3_entry_vpu_ts = vdev->hw->d0i3_entry_vpu_ts; @@ -635,16 +634,16 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->d0i3_entry_vpu_ts); ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n", boot_params->system_time_us); + ivpu_dbg(vdev, FW_BOOT, "boot_params.boot_type = 0x%x\n", boot_params->boot_type); boot_params->save_restore_ret_address = 0; - vdev->pm->is_warmboot = true; + boot_params->boot_type = VPU_BOOT_TYPE_WARMBOOT; wmb(); /* Flush WC buffers after writing save_restore_ret_address */ return; } memset(boot_params, 0, sizeof(*boot_params)); - vdev->pm->is_warmboot = false; - + boot_params->boot_type = VPU_BOOT_TYPE_COLDBOOT; boot_params->magic = VPU_BOOT_PARAMS_MAGIC; boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number; diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h index 00945892b55e..d3c410912c9c 100644 --- a/drivers/accel/ivpu/ivpu_fw.h +++ b/drivers/accel/ivpu/ivpu_fw.h @@ -6,6 +6,7 @@ #ifndef __IVPU_FW_H__ #define __IVPU_FW_H__ +#include "vpu_boot_api.h" #include "vpu_jsm_api.h" #define FW_VERSION_HEADER_SIZE SZ_4K @@ -34,8 +35,10 @@ struct ivpu_fw_info { u64 image_load_offset; u32 image_size; u32 shave_nn_size; - u64 entry_point; /* Cold or warm boot entry point for next boot */ + u64 warm_boot_entry_point; u64 cold_boot_entry_point; + u8 last_boot_mode; + u8 next_boot_mode; u32 trace_level; u32 trace_destination_mask; u64 trace_hw_component_mask; @@ -54,9 +57,9 @@ void ivpu_fw_fini(struct ivpu_device *vdev); void ivpu_fw_load(struct ivpu_device *vdev); void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params); -static inline bool ivpu_fw_is_cold_boot(struct ivpu_device *vdev) +static inline bool ivpu_fw_is_warm_boot(struct ivpu_device *vdev) { - return vdev->fw->entry_point == vdev->fw->cold_boot_entry_point; + return vdev->fw->next_boot_mode == VPU_BOOT_TYPE_WARMBOOT; } static inline u32 ivpu_fw_preempt_buf_size(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index ece68f570b7e..98b9ce26962b 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -95,7 +95,7 @@ int __must_check ivpu_bo_bind(struct ivpu_bo *bo) if (!bo->mmu_mapped) { drm_WARN_ON(&vdev->drm, !bo->ctx); - ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, sgt, + ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, sgt, ivpu_bo_size(bo), ivpu_bo_is_snooped(bo), ivpu_bo_is_read_only(bo)); if (ret) { ivpu_err(vdev, "Failed to map BO in MMU: %d\n", ret); diff --git a/drivers/accel/ivpu/ivpu_gem_userptr.c b/drivers/accel/ivpu/ivpu_gem_userptr.c index 25ba606164c0..7cbf3a4cdc73 100644 --- a/drivers/accel/ivpu/ivpu_gem_userptr.c +++ b/drivers/accel/ivpu/ivpu_gem_userptr.c @@ -77,7 +77,7 @@ ivpu_create_userptr_dmabuf(struct ivpu_device *vdev, void __user *user_ptr, if (!(flags & DRM_IVPU_BO_READ_ONLY)) gup_flags |= FOLL_WRITE; - pages = kvmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL); + pages = kvmalloc_objs(*pages, nr_pages); if (!pages) return ERR_PTR(-ENOMEM); @@ -94,7 +94,7 @@ ivpu_create_userptr_dmabuf(struct ivpu_device *vdev, void __user *user_ptr, goto unpin_pages; } - sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + sgt = kmalloc_obj(*sgt); if (!sgt) { ret = -ENOMEM; goto unpin_pages; diff --git a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h index fc0ee8d637f9..421242acb184 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h +++ b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h @@ -121,6 +121,12 @@ #define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY 0x0003006cu #define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY_STATUS_DLY_MASK GENMASK(7, 0) +#define VPU_40XX_HOST_SS_AON_RETENTION0 0x0003000cu +#define VPU_40XX_HOST_SS_AON_RETENTION1 0x00030010u +#define VPU_40XX_HOST_SS_AON_RETENTION2 0x00030014u +#define VPU_40XX_HOST_SS_AON_RETENTION3 0x00030018u +#define VPU_40XX_HOST_SS_AON_RETENTION4 0x0003001cu + #define VPU_40XX_HOST_SS_AON_IDLE_GEN 0x00030200u #define VPU_40XX_HOST_SS_AON_IDLE_GEN_EN_MASK BIT_MASK(0) #define VPU_40XX_HOST_SS_AON_IDLE_GEN_HW_PG_EN_MASK BIT_MASK(1) diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c index 06aa1e7dc50b..959984c54341 100644 --- a/drivers/accel/ivpu/ivpu_hw_ip.c +++ b/drivers/accel/ivpu/ivpu_hw_ip.c @@ -5,6 +5,7 @@ #include "ivpu_drv.h" #include "ivpu_fw.h" +#include "ivpu_gem.h" #include "ivpu_hw.h" #include "ivpu_hw_37xx_reg.h" #include "ivpu_hw_40xx_reg.h" @@ -816,6 +817,14 @@ void ivpu_hw_ip_tbu_mmu_enable(struct ivpu_device *vdev) return ivpu_hw_ip_tbu_mmu_enable_40xx(vdev); } +static inline u64 get_entry_point_addr(struct ivpu_device *vdev) +{ + if (ivpu_fw_is_warm_boot(vdev)) + return vdev->fw->warm_boot_entry_point; + else + return vdev->fw->cold_boot_entry_point; +} + static int soc_cpu_boot_37xx(struct ivpu_device *vdev) { u32 val; @@ -832,15 +841,12 @@ static int soc_cpu_boot_37xx(struct ivpu_device *vdev) val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); - val = vdev->fw->entry_point >> 9; + val = get_entry_point_addr(vdev) >> 9; REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val); val = REG_SET_FLD(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, DONE, val); REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val); - ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n", - vdev->fw->entry_point == vdev->fw->cold_boot_entry_point ? "cold boot" : "resume"); - return 0; } @@ -894,46 +900,68 @@ static int soc_cpu_drive_40xx(struct ivpu_device *vdev, bool enable) return ret; } -static int soc_cpu_enable(struct ivpu_device *vdev) +static void soc_cpu_set_entry_point_40xx(struct ivpu_device *vdev, u64 entry_point) { - if (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_60XX) - return 0; + u64 val64; + u32 val; + + val64 = entry_point; + val64 <<= ffs(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO_IMAGE_LOCATION_MASK) - 1; + REGV_WR64(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, val64); - return soc_cpu_drive_40xx(vdev, true); + val = REGV_RD32(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO); + val = REG_SET_FLD(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, DONE, val); + REGV_WR32(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, val); } static int soc_cpu_boot_40xx(struct ivpu_device *vdev) { int ret; - u32 val; - u64 val64; - ret = soc_cpu_enable(vdev); + ret = soc_cpu_drive_40xx(vdev, true); if (ret) { ivpu_err(vdev, "Failed to enable SOC CPU: %d\n", ret); return ret; } - val64 = vdev->fw->entry_point; - val64 <<= ffs(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO_IMAGE_LOCATION_MASK) - 1; - REGV_WR64(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, val64); + soc_cpu_set_entry_point_40xx(vdev, get_entry_point_addr(vdev)); - val = REGV_RD32(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO); - val = REG_SET_FLD(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, DONE, val); - REGV_WR32(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, val); + return 0; +} - ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n", - ivpu_fw_is_cold_boot(vdev) ? "cold boot" : "resume"); +static int soc_cpu_boot_60xx(struct ivpu_device *vdev) +{ + REGV_WR64(VPU_40XX_HOST_SS_AON_RETENTION1, vdev->fw->mem_bp->vpu_addr); + soc_cpu_set_entry_point_40xx(vdev, vdev->fw->cold_boot_entry_point); return 0; } int ivpu_hw_ip_soc_cpu_boot(struct ivpu_device *vdev) { - if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) - return soc_cpu_boot_37xx(vdev); - else - return soc_cpu_boot_40xx(vdev); + int ret; + + switch (ivpu_hw_ip_gen(vdev)) { + case IVPU_HW_IP_37XX: + ret = soc_cpu_boot_37xx(vdev); + break; + + case IVPU_HW_IP_40XX: + case IVPU_HW_IP_50XX: + ret = soc_cpu_boot_40xx(vdev); + break; + + default: + ret = soc_cpu_boot_60xx(vdev); + } + + if (ret) + return ret; + + ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n", + ivpu_fw_is_warm_boot(vdev) ? "warm boot" : "cold boot"); + + return 0; } static void wdt_disable_37xx(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_hw_ip.h b/drivers/accel/ivpu/ivpu_hw_ip.h index 5b1b391aa577..dbbcdd10a5f8 100644 --- a/drivers/accel/ivpu/ivpu_hw_ip.h +++ b/drivers/accel/ivpu/ivpu_hw_ip.h @@ -29,7 +29,6 @@ u32 ivpu_hw_ip_ipc_rx_addr_get(struct ivpu_device *vdev); void ivpu_hw_ip_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr); void ivpu_hw_ip_irq_enable(struct ivpu_device *vdev); void ivpu_hw_ip_irq_disable(struct ivpu_device *vdev); -void ivpu_hw_ip_diagnose_failure(struct ivpu_device *vdev); void ivpu_hw_ip_fabric_req_override_enable_50xx(struct ivpu_device *vdev); void ivpu_hw_ip_fabric_req_override_disable_50xx(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c index 1f13bf95b2b3..f47df092bb0d 100644 --- a/drivers/accel/ivpu/ivpu_ipc.c +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -142,7 +142,7 @@ ivpu_ipc_rx_msg_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, lockdep_assert_held(&ipc->cons_lock); - rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC); + rx_msg = kzalloc_obj(*rx_msg, GFP_ATOMIC); if (!rx_msg) { ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg); return; diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 4f8564e2878a..fe02b7bd465b 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -98,7 +98,7 @@ static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv) struct ivpu_device *vdev = file_priv->vdev; struct ivpu_cmdq *cmdq; - cmdq = kzalloc(sizeof(*cmdq), GFP_KERNEL); + cmdq = kzalloc_obj(*cmdq); if (!cmdq) return NULL; @@ -491,7 +491,7 @@ static struct dma_fence *ivpu_fence_create(struct ivpu_device *vdev) { struct ivpu_fence *fence; - fence = kzalloc(sizeof(*fence), GFP_KERNEL); + fence = kzalloc_obj(*fence); if (!fence) return NULL; @@ -525,7 +525,7 @@ ivpu_job_create(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) struct ivpu_device *vdev = file_priv->vdev; struct ivpu_job *job; - job = kzalloc(struct_size(job, bos, bo_count), GFP_KERNEL); + job = kzalloc_flex(*job, bos, bo_count); if (!job) return NULL; diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c index 87ad593ef47d..c4014c83e727 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.c +++ b/drivers/accel/ivpu/ivpu_mmu_context.c @@ -429,11 +429,12 @@ static void ivpu_mmu_context_unmap_pages(struct ivpu_mmu_context *ctx, u64 vpu_a } int -ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, - u64 vpu_addr, struct sg_table *sgt, bool llc_coherent, bool read_only) +ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr, + struct sg_table *sgt, size_t bo_size, bool llc_coherent, bool read_only) { size_t start_vpu_addr = vpu_addr; struct scatterlist *sg; + size_t sgt_size = 0; int ret; u64 prot; u64 i; @@ -462,12 +463,25 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, ivpu_dbg(vdev, MMU_MAP, "Map ctx: %u dma_addr: 0x%llx vpu_addr: 0x%llx size: %lu\n", ctx->id, dma_addr, vpu_addr, size); + if (sgt_size + size > bo_size) { + ivpu_err(vdev, "Scatter-gather table size exceeds buffer object size\n"); + ret = -EINVAL; + goto err_unmap_pages; + } + ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot); if (ret) { ivpu_err(vdev, "Failed to map context pages\n"); goto err_unmap_pages; } vpu_addr += size; + sgt_size += size; + } + + if (sgt_size < bo_size) { + ivpu_err(vdev, "Scatter-gather table size too small to cover buffer object size\n"); + ret = -EINVAL; + goto err_unmap_pages; } if (!ctx->is_cd_valid) { @@ -493,7 +507,7 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, return 0; err_unmap_pages: - ivpu_mmu_context_unmap_pages(ctx, start_vpu_addr, vpu_addr - start_vpu_addr); + ivpu_mmu_context_unmap_pages(ctx, start_vpu_addr, sgt_size); mutex_unlock(&ctx->lock); return ret; } diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h index 663a11a9db11..cc02e7bab04e 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.h +++ b/drivers/accel/ivpu/ivpu_mmu_context.h @@ -41,8 +41,9 @@ int ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu u64 size, struct drm_mm_node *node); void ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node); -int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, - u64 vpu_addr, struct sg_table *sgt, bool llc_coherent, bool read_only); +int +ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr, + struct sg_table *sgt, size_t bo_size, bool llc_coherent, bool read_only); void ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr, struct sg_table *sgt); int ivpu_mmu_context_set_pages_ro(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c index 1d9c1cb17924..be43851f5f32 100644 --- a/drivers/accel/ivpu/ivpu_ms.c +++ b/drivers/accel/ivpu/ivpu_ms.c @@ -59,7 +59,7 @@ int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *fil goto unlock; } - ms = kzalloc(sizeof(*ms), GFP_KERNEL); + ms = kzalloc_obj(*ms); if (!ms) { ret = -ENOMEM; goto unlock; diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 480c075d87f6..d20144a21e09 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -47,8 +47,10 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev) ivpu_ipc_reset(vdev); ivpu_fw_log_reset(vdev); ivpu_fw_load(vdev); - fw->entry_point = fw->cold_boot_entry_point; fw->last_heartbeat = 0; + + ivpu_dbg(vdev, FW_BOOT, "Cold boot entry point 0x%llx", vdev->fw->cold_boot_entry_point); + fw->next_boot_mode = VPU_BOOT_TYPE_COLDBOOT; } static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev) @@ -56,13 +58,14 @@ static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev) struct ivpu_fw_info *fw = vdev->fw; struct vpu_boot_params *bp = ivpu_bo_vaddr(fw->mem_bp); - if (!bp->save_restore_ret_address) { + fw->warm_boot_entry_point = bp->save_restore_ret_address; + if (!fw->warm_boot_entry_point) { ivpu_pm_prepare_cold_boot(vdev); return; } - ivpu_dbg(vdev, FW_BOOT, "Save/restore entry point %llx", bp->save_restore_ret_address); - fw->entry_point = bp->save_restore_ret_address; + ivpu_dbg(vdev, FW_BOOT, "Warm boot entry point 0x%llx", fw->warm_boot_entry_point); + fw->next_boot_mode = VPU_BOOT_TYPE_WARMBOOT; } static int ivpu_suspend(struct ivpu_device *vdev) @@ -110,7 +113,7 @@ err_power_down: ivpu_hw_power_down(vdev); pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); - if (!ivpu_fw_is_cold_boot(vdev)) { + if (ivpu_fw_is_warm_boot(vdev)) { ivpu_pm_prepare_cold_boot(vdev); goto retry; } else { diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h index a2aa7a27f32e..00f2a01e3df6 100644 --- a/drivers/accel/ivpu/ivpu_pm.h +++ b/drivers/accel/ivpu/ivpu_pm.h @@ -18,7 +18,6 @@ struct ivpu_pm_info { struct rw_semaphore reset_lock; atomic_t reset_counter; atomic_t reset_pending; - bool is_warmboot; u8 dct_active_percent; }; diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c index 13a14c6c6168..4d787f77ce41 100644 --- a/drivers/accel/qaic/mhi_controller.c +++ b/drivers/accel/qaic/mhi_controller.c @@ -39,7 +39,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -55,7 +54,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -71,7 +69,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -87,7 +84,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -103,7 +99,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -119,7 +114,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -135,7 +129,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -151,7 +144,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -167,7 +159,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -183,7 +174,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -199,7 +189,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -215,7 +204,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -231,7 +219,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -247,7 +234,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -263,7 +249,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -279,7 +264,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -295,7 +279,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -311,7 +294,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -327,7 +309,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -343,7 +324,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -359,7 +339,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -375,7 +354,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -391,7 +369,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -407,7 +384,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -423,7 +399,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -439,7 +414,6 @@ static const struct mhi_channel_config aic100_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = true, .wake_capable = false, }, }; @@ -458,7 +432,6 @@ static const struct mhi_channel_config aic200_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -474,7 +447,6 @@ static const struct mhi_channel_config aic200_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -490,7 +462,6 @@ static const struct mhi_channel_config aic200_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -506,7 +477,6 @@ static const struct mhi_channel_config aic200_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -522,7 +492,6 @@ static const struct mhi_channel_config aic200_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -538,7 +507,6 @@ static const struct mhi_channel_config aic200_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -554,7 +522,6 @@ static const struct mhi_channel_config aic200_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -570,7 +537,6 @@ static const struct mhi_channel_config aic200_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -586,7 +552,6 @@ static const struct mhi_channel_config aic200_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -602,7 +567,6 @@ static const struct mhi_channel_config aic200_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -618,7 +582,6 @@ static const struct mhi_channel_config aic200_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -634,7 +597,6 @@ static const struct mhi_channel_config aic200_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -650,7 +612,6 @@ static const struct mhi_channel_config aic200_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -666,7 +627,6 @@ static const struct mhi_channel_config aic200_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -682,7 +642,6 @@ static const struct mhi_channel_config aic200_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -698,7 +657,6 @@ static const struct mhi_channel_config aic200_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -714,7 +672,6 @@ static const struct mhi_channel_config aic200_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = false, .wake_capable = false, }, { @@ -730,7 +687,6 @@ static const struct mhi_channel_config aic200_channels[] = { .lpm_notify = false, .offload_channel = false, .doorbell_mode_switch = false, - .auto_queue = true, .wake_capable = false, }, }; diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c index 428d8f65bff3..f698d5dfd326 100644 --- a/drivers/accel/qaic/qaic_control.c +++ b/drivers/accel/qaic/qaic_control.c @@ -423,7 +423,8 @@ static int find_and_map_user_pages(struct qaic_device *qdev, nr_pages = need_pages; while (1) { - page_list = kmalloc_array(nr_pages, sizeof(*page_list), GFP_KERNEL | __GFP_NOWARN); + page_list = kmalloc_objs(*page_list, nr_pages, + GFP_KERNEL | __GFP_NOWARN); if (!page_list) { nr_pages = nr_pages / 2; if (!nr_pages) @@ -442,7 +443,7 @@ static int find_and_map_user_pages(struct qaic_device *qdev, goto put_pages; } - sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + sgt = kmalloc_obj(*sgt); if (!sgt) { ret = -ENOMEM; goto put_pages; @@ -581,7 +582,7 @@ static int encode_dma(struct qaic_device *qdev, void *trans, struct wrapper_list QAIC_MANAGE_WIRE_MSG_LENGTH) return -ENOMEM; - xfer = kmalloc(sizeof(*xfer), GFP_KERNEL); + xfer = kmalloc_obj(*xfer); if (!xfer) return -ENOMEM; @@ -1165,7 +1166,7 @@ static struct wrapper_list *alloc_wrapper_list(void) { struct wrapper_list *wrappers; - wrappers = kmalloc(sizeof(*wrappers), GFP_KERNEL); + wrappers = kmalloc_obj(*wrappers); if (!wrappers) return NULL; INIT_LIST_HEAD(&wrappers->list); @@ -1457,7 +1458,7 @@ void qaic_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_resu return; } - resp = kmalloc(sizeof(*resp), GFP_ATOMIC); + resp = kmalloc_obj(*resp, GFP_ATOMIC); if (!resp) { kfree(msg); return; diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c index 60cb4d65d48e..95300c2f7d8a 100644 --- a/drivers/accel/qaic/qaic_data.c +++ b/drivers/accel/qaic/qaic_data.c @@ -213,7 +213,7 @@ static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_tabl goto out; } - sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); + sgt = kzalloc_obj(*sgt); if (!sgt) { ret = -ENOMEM; goto out; @@ -399,13 +399,13 @@ static int qaic_map_one_slice(struct qaic_device *qdev, struct qaic_bo *bo, if (ret) goto out; - slice = kmalloc(sizeof(*slice), GFP_KERNEL); + slice = kmalloc_obj(*slice); if (!slice) { ret = -ENOMEM; goto free_sgt; } - slice->reqs = kvcalloc(sgt->nents, sizeof(*slice->reqs), GFP_KERNEL); + slice->reqs = kvzalloc_objs(*slice->reqs, sgt->nents); if (!slice->reqs) { ret = -ENOMEM; goto free_slice; @@ -507,7 +507,7 @@ static int create_sgt(struct qaic_device *qdev, struct sg_table **sgt_out, u64 s i++; } - sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + sgt = kmalloc_obj(*sgt); if (!sgt) { ret = -ENOMEM; goto free_partial_alloc; @@ -653,7 +653,7 @@ static struct sg_table *qaic_get_sg_table(struct drm_gem_object *obj) sgt_in = bo->sgt; - sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + sgt = kmalloc_obj(*sgt); if (!sgt) return ERR_PTR(-ENOMEM); @@ -697,7 +697,7 @@ static struct qaic_bo *qaic_alloc_init_bo(void) { struct qaic_bo *bo; - bo = kzalloc(sizeof(*bo), GFP_KERNEL); + bo = kzalloc_obj(*bo); if (!bo) return ERR_PTR(-ENOMEM); diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c index 4c70bd949d53..63fb8c7b4abc 100644 --- a/drivers/accel/qaic/qaic_drv.c +++ b/drivers/accel/qaic/qaic_drv.c @@ -152,7 +152,7 @@ static int qaic_open(struct drm_device *dev, struct drm_file *file) goto dev_unlock; } - usr = kmalloc(sizeof(*usr), GFP_KERNEL); + usr = kmalloc_obj(*usr); if (!usr) { ret = -ENOMEM; goto dev_unlock; diff --git a/drivers/accel/qaic/qaic_ras.c b/drivers/accel/qaic/qaic_ras.c index f1d52a710136..cc0b75461e1a 100644 --- a/drivers/accel/qaic/qaic_ras.c +++ b/drivers/accel/qaic/qaic_ras.c @@ -556,7 +556,7 @@ static int qaic_ras_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_devic if (ret) return ret; - resp = kzalloc(sizeof(*resp), GFP_KERNEL); + resp = kzalloc_obj(*resp); if (!resp) { mhi_unprepare_from_transfer(mhi_dev); return -ENOMEM; diff --git a/drivers/accel/qaic/qaic_ssr.c b/drivers/accel/qaic/qaic_ssr.c index 9b662d690371..a5bb6078824b 100644 --- a/drivers/accel/qaic/qaic_ssr.c +++ b/drivers/accel/qaic/qaic_ssr.c @@ -260,7 +260,7 @@ static int send_xfer_done(struct qaic_device *qdev, void *resp, u32 dbc_id) struct ssr_debug_transfer_done *xfer_done; int ret; - xfer_done = kmalloc(sizeof(*xfer_done), GFP_KERNEL); + xfer_done = kmalloc_obj(*xfer_done); if (!xfer_done) { ret = -ENOMEM; goto out; @@ -450,14 +450,14 @@ static struct ssr_dump_info *alloc_dump_info(struct qaic_device *qdev, } /* Allocate SSR crashdump book keeping structure */ - dump_info = kzalloc(sizeof(*dump_info), GFP_KERNEL); + dump_info = kzalloc_obj(*dump_info); if (!dump_info) { ret = -ENOMEM; goto out; } /* Buffer used to send MEMORY READ request to device via MHI */ - dump_info->read_buf_req = kzalloc(sizeof(*dump_info->read_buf_req), GFP_KERNEL); + dump_info->read_buf_req = kzalloc_obj(*dump_info->read_buf_req); if (!dump_info->read_buf_req) { ret = -ENOMEM; goto free_dump_info; @@ -490,7 +490,7 @@ static int dbg_xfer_info_rsp(struct qaic_device *qdev, struct dma_bridge_chan *d struct ssr_crashdump *ssr_crash = NULL; int ret = 0, ret2; - debug_rsp = kmalloc(sizeof(*debug_rsp), GFP_KERNEL); + debug_rsp = kmalloc_obj(*debug_rsp); if (!debug_rsp) return -ENOMEM; @@ -640,7 +640,7 @@ static void ssr_worker(struct work_struct *work) break; } - event_rsp = kmalloc(sizeof(*event_rsp), GFP_KERNEL); + event_rsp = kmalloc_obj(*event_rsp); if (!event_rsp) break; diff --git a/drivers/accel/qaic/qaic_timesync.c b/drivers/accel/qaic/qaic_timesync.c index 8af2475f4f36..939462b9958d 100644 --- a/drivers/accel/qaic/qaic_timesync.c +++ b/drivers/accel/qaic/qaic_timesync.c @@ -185,7 +185,7 @@ static int qaic_timesync_probe(struct mhi_device *mhi_dev, const struct mhi_devi struct timer_list *timer; int ret; - mqtsdev = kzalloc(sizeof(*mqtsdev), GFP_KERNEL); + mqtsdev = kzalloc_obj(*mqtsdev); if (!mqtsdev) { ret = -ENOMEM; goto out; @@ -196,7 +196,7 @@ static int qaic_timesync_probe(struct mhi_device *mhi_dev, const struct mhi_devi mqtsdev->qdev = qdev; mqtsdev->dev = &qdev->pdev->dev; - mqtsdev->sync_msg = kzalloc(sizeof(*mqtsdev->sync_msg), GFP_KERNEL); + mqtsdev->sync_msg = kzalloc_obj(*mqtsdev->sync_msg); if (!mqtsdev->sync_msg) { ret = -ENOMEM; goto free_mqts_dev; @@ -275,7 +275,7 @@ static void qaic_boot_timesync_worker(struct work_struct *work) switch (data.hdr.msg_type) { case QAIC_TS_CMD_TO_HOST: - req = kzalloc(sizeof(*req), GFP_KERNEL); + req = kzalloc_obj(*req); if (!req) break; @@ -304,7 +304,7 @@ static int qaic_boot_timesync_queue_resp(struct mhi_device *mhi_dev, struct qaic struct qts_resp *resp; int ret; - resp = kzalloc(sizeof(*resp), GFP_KERNEL); + resp = kzalloc_obj(*resp); if (!resp) return -ENOMEM; diff --git a/drivers/accel/rocket/rocket_core.c b/drivers/accel/rocket/rocket_core.c index abe7719c1db4..b3b2fa9ba645 100644 --- a/drivers/accel/rocket/rocket_core.c +++ b/drivers/accel/rocket/rocket_core.c @@ -59,8 +59,11 @@ int rocket_core_init(struct rocket_core *core) core->iommu_group = iommu_group_get(dev); err = rocket_job_init(core); - if (err) + if (err) { + iommu_group_put(core->iommu_group); + core->iommu_group = NULL; return err; + } pm_runtime_use_autosuspend(dev); @@ -76,7 +79,7 @@ int rocket_core_init(struct rocket_core *core) err = pm_runtime_resume_and_get(dev); if (err) { - rocket_job_fini(core); + rocket_core_fini(core); return err; } diff --git a/drivers/accel/rocket/rocket_drv.c b/drivers/accel/rocket/rocket_drv.c index 5c0b63f0a8f0..8bbbce594883 100644 --- a/drivers/accel/rocket/rocket_drv.c +++ b/drivers/accel/rocket/rocket_drv.c @@ -13,6 +13,7 @@ #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include "rocket_device.h" #include "rocket_drv.h" #include "rocket_gem.h" #include "rocket_job.h" @@ -37,7 +38,7 @@ rocket_iommu_domain_destroy(struct kref *kref) static struct rocket_iommu_domain* rocket_iommu_domain_create(struct device *dev) { - struct rocket_iommu_domain *domain = kmalloc(sizeof(*domain), GFP_KERNEL); + struct rocket_iommu_domain *domain = kmalloc_obj(*domain); void *err; if (!domain) @@ -78,7 +79,7 @@ rocket_open(struct drm_device *dev, struct drm_file *file) if (!try_module_get(THIS_MODULE)) return -EINVAL; - rocket_priv = kzalloc(sizeof(*rocket_priv), GFP_KERNEL); + rocket_priv = kzalloc_obj(*rocket_priv); if (!rocket_priv) { ret = -ENOMEM; goto err_put_mod; @@ -158,6 +159,8 @@ static const struct drm_driver rocket_drm_driver = { static int rocket_probe(struct platform_device *pdev) { + int ret; + if (rdev == NULL) { /* First core probing, initialize DRM device. */ rdev = rocket_device_init(drm_dev, &rocket_drm_driver); @@ -177,20 +180,31 @@ static int rocket_probe(struct platform_device *pdev) rdev->num_cores++; - return rocket_core_init(&rdev->cores[core]); + ret = rocket_core_init(&rdev->cores[core]); + if (ret) { + rdev->num_cores--; + + if (rdev->num_cores == 0) { + rocket_device_fini(rdev); + rdev = NULL; + } + } + + return ret; } +static int find_core_for_dev(struct device *dev); + static void rocket_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; + int core = find_core_for_dev(dev); - for (unsigned int core = 0; core < rdev->num_cores; core++) { - if (rdev->cores[core].dev == dev) { - rocket_core_fini(&rdev->cores[core]); - rdev->num_cores--; - break; - } - } + if (core < 0) + return; + + rocket_core_fini(&rdev->cores[core]); + rdev->num_cores--; if (rdev->num_cores == 0) { /* Last core removed, deinitialize DRM device. */ diff --git a/drivers/accel/rocket/rocket_gem.c b/drivers/accel/rocket/rocket_gem.c index 624c4ecf5a34..b6a385d2edfc 100644 --- a/drivers/accel/rocket/rocket_gem.c +++ b/drivers/accel/rocket/rocket_gem.c @@ -48,7 +48,7 @@ struct drm_gem_object *rocket_gem_create_object(struct drm_device *dev, size_t s { struct rocket_gem_object *obj; - obj = kzalloc(sizeof(*obj), GFP_KERNEL); + obj = kzalloc_obj(*obj); if (!obj) return ERR_PTR(-ENOMEM); diff --git a/drivers/accel/rocket/rocket_job.c b/drivers/accel/rocket/rocket_job.c index acd606160dc9..ac51bff39833 100644 --- a/drivers/accel/rocket/rocket_job.c +++ b/drivers/accel/rocket/rocket_job.c @@ -45,7 +45,7 @@ static struct dma_fence *rocket_fence_create(struct rocket_core *core) { struct dma_fence *fence; - fence = kzalloc(sizeof(*fence), GFP_KERNEL); + fence = kzalloc_obj(*fence); if (!fence) return ERR_PTR(-ENOMEM); @@ -71,7 +71,7 @@ rocket_copy_tasks(struct drm_device *dev, if (!rjob->task_count) return 0; - rjob->tasks = kvmalloc_array(job->task_count, sizeof(*rjob->tasks), GFP_KERNEL); + rjob->tasks = kvmalloc_objs(*rjob->tasks, job->task_count); if (!rjob->tasks) { drm_dbg(dev, "Failed to allocate task array\n"); return -ENOMEM; @@ -496,9 +496,8 @@ void rocket_job_fini(struct rocket_core *core) int rocket_job_open(struct rocket_file_priv *rocket_priv) { struct rocket_device *rdev = rocket_priv->rdev; - struct drm_gpu_scheduler **scheds = kmalloc_array(rdev->num_cores, - sizeof(*scheds), - GFP_KERNEL); + struct drm_gpu_scheduler **scheds = kmalloc_objs(*scheds, + rdev->num_cores); unsigned int core; int ret; @@ -543,7 +542,7 @@ static int rocket_ioctl_submit_job(struct drm_device *dev, struct drm_file *file if (job->task_count == 0) return -EINVAL; - rjob = kzalloc(sizeof(*rjob), GFP_KERNEL); + rjob = kzalloc_obj(*rjob); if (!rjob) return -ENOMEM; @@ -610,7 +609,7 @@ int rocket_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *fil return -EINVAL; } - jobs = kvmalloc_array(args->job_count, sizeof(*jobs), GFP_KERNEL); + jobs = kvmalloc_objs(*jobs, args->job_count); if (!jobs) { drm_dbg(dev, "Failed to allocate incoming job array\n"); return -ENOMEM; |
