summaryrefslogtreecommitdiff
path: root/drivers/accel
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel')
-rw-r--r--drivers/accel/Kconfig1
-rw-r--r--drivers/accel/Makefile1
-rw-r--r--drivers/accel/amdxdna/Kconfig18
-rw-r--r--drivers/accel/amdxdna/Makefile23
-rw-r--r--drivers/accel/amdxdna/TODO3
-rw-r--r--drivers/accel/amdxdna/aie2_ctx.c919
-rw-r--r--drivers/accel/amdxdna/aie2_error.c358
-rw-r--r--drivers/accel/amdxdna/aie2_message.c776
-rw-r--r--drivers/accel/amdxdna/aie2_msg_priv.h368
-rw-r--r--drivers/accel/amdxdna/aie2_pci.c928
-rw-r--r--drivers/accel/amdxdna/aie2_pci.h297
-rw-r--r--drivers/accel/amdxdna/aie2_pm.c108
-rw-r--r--drivers/accel/amdxdna/aie2_psp.c146
-rw-r--r--drivers/accel/amdxdna/aie2_smu.c136
-rw-r--r--drivers/accel/amdxdna/aie2_solver.c380
-rw-r--r--drivers/accel/amdxdna/aie2_solver.h155
-rw-r--r--drivers/accel/amdxdna/amdxdna_ctx.c552
-rw-r--r--drivers/accel/amdxdna/amdxdna_ctx.h165
-rw-r--r--drivers/accel/amdxdna/amdxdna_gem.c622
-rw-r--r--drivers/accel/amdxdna/amdxdna_gem.h65
-rw-r--r--drivers/accel/amdxdna/amdxdna_mailbox.c571
-rw-r--r--drivers/accel/amdxdna/amdxdna_mailbox.h124
-rw-r--r--drivers/accel/amdxdna/amdxdna_mailbox_helper.c61
-rw-r--r--drivers/accel/amdxdna/amdxdna_mailbox_helper.h42
-rw-r--r--drivers/accel/amdxdna/amdxdna_pci_drv.c434
-rw-r--r--drivers/accel/amdxdna/amdxdna_pci_drv.h147
-rw-r--r--drivers/accel/amdxdna/amdxdna_sysfs.c67
-rw-r--r--drivers/accel/amdxdna/npu1_regs.c114
-rw-r--r--drivers/accel/amdxdna/npu2_regs.c113
-rw-r--r--drivers/accel/amdxdna/npu4_regs.c134
-rw-r--r--drivers/accel/amdxdna/npu5_regs.c113
-rw-r--r--drivers/accel/amdxdna/npu6_regs.c114
-rw-r--r--drivers/accel/habanalabs/common/command_submission.c2
-rw-r--r--drivers/accel/habanalabs/common/context.c3
-rw-r--r--drivers/accel/habanalabs/common/debugfs.c2
-rw-r--r--drivers/accel/habanalabs/common/device.c4
-rw-r--r--drivers/accel/habanalabs/common/habanalabs_drv.c6
-rw-r--r--drivers/accel/habanalabs/common/habanalabs_ioctl.c11
-rw-r--r--drivers/accel/habanalabs/common/sysfs.c10
-rw-r--r--drivers/accel/ivpu/ivpu_debugfs.c93
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c102
-rw-r--r--drivers/accel/ivpu/ivpu_drv.h14
-rw-r--r--drivers/accel/ivpu/ivpu_fw.c38
-rw-r--r--drivers/accel/ivpu/ivpu_gem.c132
-rw-r--r--drivers/accel/ivpu/ivpu_gem.h3
-rw-r--r--drivers/accel/ivpu/ivpu_hw.c112
-rw-r--r--drivers/accel/ivpu/ivpu_hw.h28
-rw-r--r--drivers/accel/ivpu/ivpu_hw_btrs.c144
-rw-r--r--drivers/accel/ivpu/ivpu_hw_btrs.h10
-rw-r--r--drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h1
-rw-r--r--drivers/accel/ivpu/ivpu_hw_ip.c4
-rw-r--r--drivers/accel/ivpu/ivpu_hw_reg_io.h64
-rw-r--r--drivers/accel/ivpu/ivpu_ipc.c10
-rw-r--r--drivers/accel/ivpu/ivpu_ipc.h2
-rw-r--r--drivers/accel/ivpu/ivpu_job.c554
-rw-r--r--drivers/accel/ivpu/ivpu_job.h8
-rw-r--r--drivers/accel/ivpu/ivpu_jsm_msg.c38
-rw-r--r--drivers/accel/ivpu/ivpu_mmu.c121
-rw-r--r--drivers/accel/ivpu/ivpu_mmu.h2
-rw-r--r--drivers/accel/ivpu/ivpu_mmu_context.c13
-rw-r--r--drivers/accel/ivpu/ivpu_mmu_context.h2
-rw-r--r--drivers/accel/ivpu/ivpu_ms.c24
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c28
-rw-r--r--drivers/accel/ivpu/ivpu_pm.h2
-rw-r--r--drivers/accel/ivpu/ivpu_sysfs.c84
-rw-r--r--drivers/accel/ivpu/vpu_boot_api.h13
-rw-r--r--drivers/accel/ivpu/vpu_jsm_api.h53
-rw-r--r--drivers/accel/qaic/mhi_controller.c360
-rw-r--r--drivers/accel/qaic/mhi_controller.h2
-rw-r--r--drivers/accel/qaic/qaic.h14
-rw-r--r--drivers/accel/qaic/qaic_data.c9
-rw-r--r--drivers/accel/qaic/qaic_drv.c98
-rw-r--r--drivers/accel/qaic/qaic_timesync.c4
-rw-r--r--drivers/accel/qaic/sahara.c46
74 files changed, 9703 insertions, 622 deletions
diff --git a/drivers/accel/Kconfig b/drivers/accel/Kconfig
index 64065fb8922b..5b9490367a39 100644
--- a/drivers/accel/Kconfig
+++ b/drivers/accel/Kconfig
@@ -24,6 +24,7 @@ menuconfig DRM_ACCEL
different device files, called accel/accel* (in /dev, sysfs
and debugfs).
+source "drivers/accel/amdxdna/Kconfig"
source "drivers/accel/habanalabs/Kconfig"
source "drivers/accel/ivpu/Kconfig"
source "drivers/accel/qaic/Kconfig"
diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile
index ab3df932937f..a301fb6089d4 100644
--- a/drivers/accel/Makefile
+++ b/drivers/accel/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_DRM_ACCEL_AMDXDNA) += amdxdna/
obj-$(CONFIG_DRM_ACCEL_HABANALABS) += habanalabs/
obj-$(CONFIG_DRM_ACCEL_IVPU) += ivpu/
obj-$(CONFIG_DRM_ACCEL_QAIC) += qaic/
diff --git a/drivers/accel/amdxdna/Kconfig b/drivers/accel/amdxdna/Kconfig
new file mode 100644
index 000000000000..f39d7a87296c
--- /dev/null
+++ b/drivers/accel/amdxdna/Kconfig
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config DRM_ACCEL_AMDXDNA
+ tristate "AMD AI Engine"
+ depends on AMD_IOMMU
+ depends on DRM_ACCEL
+ depends on PCI && HAS_IOMEM
+ depends on X86_64
+ select DRM_SCHED
+ select DRM_GEM_SHMEM_HELPER
+ select FW_LOADER
+ select HMM_MIRROR
+ help
+ Choose this option to enable support for NPU integrated into AMD
+ client CPUs like AMD Ryzen AI 300 Series. AMD NPU can be used to
+ accelerate machine learning applications.
+
+ If "M" is selected, the driver module will be amdxdna.
diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
new file mode 100644
index 000000000000..0e9adf6890a0
--- /dev/null
+++ b/drivers/accel/amdxdna/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+amdxdna-y := \
+ aie2_ctx.o \
+ aie2_error.o \
+ aie2_message.o \
+ aie2_pci.o \
+ aie2_pm.o \
+ aie2_psp.o \
+ aie2_smu.o \
+ aie2_solver.o \
+ amdxdna_ctx.o \
+ amdxdna_gem.o \
+ amdxdna_mailbox.o \
+ amdxdna_mailbox_helper.o \
+ amdxdna_pci_drv.o \
+ amdxdna_sysfs.o \
+ npu1_regs.o \
+ npu2_regs.o \
+ npu4_regs.o \
+ npu5_regs.o \
+ npu6_regs.o
+obj-$(CONFIG_DRM_ACCEL_AMDXDNA) = amdxdna.o
diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO
new file mode 100644
index 000000000000..5119bccd1917
--- /dev/null
+++ b/drivers/accel/amdxdna/TODO
@@ -0,0 +1,3 @@
+- Add import and export BO support
+- Add debugfs support
+- Add debug BO support
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
new file mode 100644
index 000000000000..00d215ac866e
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -0,0 +1,919 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
+#include <drm/drm_syncobj.h>
+#include <linux/hmm.h>
+#include <linux/types.h>
+#include <linux/xarray.h>
+#include <trace/events/amdxdna.h>
+
+#include "aie2_msg_priv.h"
+#include "aie2_pci.h"
+#include "aie2_solver.h"
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+static bool force_cmdlist;
+module_param(force_cmdlist, bool, 0600);
+MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)");
+
+#define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */
+
+static void aie2_job_release(struct kref *ref)
+{
+ struct amdxdna_sched_job *job;
+
+ job = container_of(ref, struct amdxdna_sched_job, refcnt);
+ amdxdna_sched_job_cleanup(job);
+ atomic64_inc(&job->hwctx->job_free_cnt);
+ wake_up(&job->hwctx->priv->job_free_wq);
+ if (job->out_fence)
+ dma_fence_put(job->out_fence);
+ kfree(job);
+}
+
+static void aie2_job_put(struct amdxdna_sched_job *job)
+{
+ kref_put(&job->refcnt, aie2_job_release);
+}
+
+/* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
+static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
+ struct drm_sched_job *bad_job)
+{
+ drm_sched_stop(&hwctx->priv->sched, bad_job);
+ aie2_destroy_context(xdna->dev_handle, hwctx);
+}
+
+static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_gem_obj *heap = hwctx->priv->heap;
+ int ret;
+
+ ret = aie2_create_context(xdna->dev_handle, hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret);
+ goto out;
+ }
+
+ ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
+ heap->mem.userptr, heap->mem.size);
+ if (ret) {
+ XDNA_ERR(xdna, "Map host buf failed, ret %d", ret);
+ goto out;
+ }
+
+ if (hwctx->status != HWCTX_STAT_READY) {
+ XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status);
+ goto out;
+ }
+
+ ret = aie2_config_cu(hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
+ goto out;
+ }
+
+out:
+ drm_sched_start(&hwctx->priv->sched, 0);
+ XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
+ return ret;
+}
+
+void aie2_restart_ctx(struct amdxdna_client *client)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_hwctx *hwctx;
+ unsigned long hwctx_id;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ mutex_lock(&client->hwctx_lock);
+ amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
+ if (hwctx->status != HWCTX_STAT_STOP)
+ continue;
+
+ hwctx->status = hwctx->old_status;
+ XDNA_DBG(xdna, "Resetting %s", hwctx->name);
+ aie2_hwctx_restart(xdna, hwctx);
+ }
+ mutex_unlock(&client->hwctx_lock);
+}
+
+static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq)
+{
+ struct dma_fence *fence, *out_fence = NULL;
+ int ret;
+
+ fence = drm_syncobj_fence_get(hwctx->priv->syncobj);
+ if (!fence)
+ return NULL;
+
+ ret = dma_fence_chain_find_seqno(&fence, seq);
+ if (ret)
+ goto out;
+
+ out_fence = dma_fence_get(dma_fence_chain_contained(fence));
+
+out:
+ dma_fence_put(fence);
+ return out_fence;
+}
+
+static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx)
+{
+ struct dma_fence *fence;
+
+ fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1);
+ if (!fence)
+ return;
+
+ /* Wait up to 2 seconds for fw to finish all pending requests */
+ dma_fence_wait_timeout(fence, false, msecs_to_jiffies(2000));
+ dma_fence_put(fence);
+}
+
+void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+
+ /*
+ * Command timeout is unlikely. But if it happens, it doesn't
+ * break the system. aie2_hwctx_stop() will destroy mailbox
+ * and abort all commands.
+ */
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ aie2_hwctx_wait_for_idle(hwctx);
+ aie2_hwctx_stop(xdna, hwctx, NULL);
+ hwctx->old_status = hwctx->status;
+ hwctx->status = HWCTX_STAT_STOP;
+}
+
+void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+
+ /*
+ * The resume path cannot guarantee that mailbox channel can be
+ * regenerated. If this happen, when submit message to this
+ * mailbox channel, error will return.
+ */
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ hwctx->status = hwctx->old_status;
+ aie2_hwctx_restart(xdna, hwctx);
+}
+
+static void
+aie2_sched_notify(struct amdxdna_sched_job *job)
+{
+ struct dma_fence *fence = job->fence;
+
+ trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
+ job->hwctx->priv->completed++;
+ dma_fence_signal(fence);
+
+ up(&job->hwctx->priv->job_sem);
+ job->job_done = true;
+ dma_fence_put(fence);
+ mmput_async(job->mm);
+ aie2_job_put(job);
+}
+
+static int
+aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
+{
+ struct amdxdna_sched_job *job = handle;
+ struct amdxdna_gem_obj *cmd_abo;
+ u32 ret = 0;
+ u32 status;
+
+ cmd_abo = job->cmd_bo;
+
+ if (unlikely(!data))
+ goto out;
+
+ if (unlikely(size != sizeof(u32))) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ status = readl(data);
+ XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
+ if (status == AIE2_STATUS_SUCCESS)
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
+ else
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
+
+out:
+ aie2_sched_notify(job);
+ return ret;
+}
+
+static int
+aie2_sched_nocmd_resp_handler(void *handle, void __iomem *data, size_t size)
+{
+ struct amdxdna_sched_job *job = handle;
+ u32 ret = 0;
+ u32 status;
+
+ if (unlikely(!data))
+ goto out;
+
+ if (unlikely(size != sizeof(u32))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ status = readl(data);
+ XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
+
+out:
+ aie2_sched_notify(job);
+ return ret;
+}
+
+static int
+aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
+{
+ struct amdxdna_sched_job *job = handle;
+ struct amdxdna_gem_obj *cmd_abo;
+ struct amdxdna_dev *xdna;
+ u32 fail_cmd_status;
+ u32 fail_cmd_idx;
+ u32 cmd_status;
+ u32 ret = 0;
+
+ cmd_abo = job->cmd_bo;
+ if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ cmd_status = readl(data + offsetof(struct cmd_chain_resp, status));
+ xdna = job->hwctx->client->xdna;
+ XDNA_DBG(xdna, "Status 0x%x", cmd_status);
+ if (cmd_status == AIE2_STATUS_SUCCESS) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
+ goto out;
+ }
+
+ /* Slow path to handle error, read from ringbuf on BAR */
+ fail_cmd_idx = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_idx));
+ fail_cmd_status = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_status));
+ XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x",
+ fail_cmd_idx, fail_cmd_status);
+
+ if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+ ret = -EINVAL;
+ goto out;
+ }
+ amdxdna_cmd_set_state(cmd_abo, fail_cmd_status);
+
+ if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) {
+ struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL);
+
+ cc->error_index = fail_cmd_idx;
+ if (cc->error_index >= cc->command_count)
+ cc->error_index = 0;
+ }
+out:
+ aie2_sched_notify(job);
+ return ret;
+}
+
+static struct dma_fence *
+aie2_sched_job_run(struct drm_sched_job *sched_job)
+{
+ struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
+ struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ struct amdxdna_hwctx *hwctx = job->hwctx;
+ struct dma_fence *fence;
+ int ret;
+
+ if (!mmget_not_zero(job->mm))
+ return ERR_PTR(-ESRCH);
+
+ kref_get(&job->refcnt);
+ fence = dma_fence_get(job->fence);
+
+ if (unlikely(!cmd_abo)) {
+ ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler);
+ goto out;
+ }
+
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW);
+
+ if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN)
+ ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
+ else if (force_cmdlist)
+ ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
+ else
+ ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler);
+
+out:
+ if (ret) {
+ dma_fence_put(job->fence);
+ aie2_job_put(job);
+ mmput(job->mm);
+ fence = ERR_PTR(ret);
+ }
+ trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
+
+ return fence;
+}
+
+static void aie2_sched_job_free(struct drm_sched_job *sched_job)
+{
+ struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
+ struct amdxdna_hwctx *hwctx = job->hwctx;
+
+ trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
+ if (!job->job_done)
+ up(&hwctx->priv->job_sem);
+
+ drm_sched_job_cleanup(sched_job);
+ aie2_job_put(job);
+}
+
+static enum drm_gpu_sched_stat
+aie2_sched_job_timedout(struct drm_sched_job *sched_job)
+{
+ struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
+ struct amdxdna_hwctx *hwctx = job->hwctx;
+ struct amdxdna_dev *xdna;
+
+ xdna = hwctx->client->xdna;
+ trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
+ mutex_lock(&xdna->dev_lock);
+ aie2_hwctx_stop(xdna, hwctx, sched_job);
+
+ aie2_hwctx_restart(xdna, hwctx);
+ mutex_unlock(&xdna->dev_lock);
+
+ return DRM_GPU_SCHED_STAT_NOMINAL;
+}
+
+static const struct drm_sched_backend_ops sched_ops = {
+ .run_job = aie2_sched_job_run,
+ .free_job = aie2_sched_job_free,
+ .timedout_job = aie2_sched_job_timedout,
+};
+
+static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+ int start, end, first, last;
+ u32 width = 1, entries = 0;
+ int i;
+
+ if (!hwctx->num_tiles) {
+ XDNA_ERR(xdna, "Number of tiles is zero");
+ return -EINVAL;
+ }
+
+ ndev = xdna->dev_handle;
+ if (unlikely(!ndev->metadata.core.row_count)) {
+ XDNA_WARN(xdna, "Core tile row count is zero");
+ return -EINVAL;
+ }
+
+ hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count;
+ if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
+ XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
+ return -EINVAL;
+ }
+
+ if (ndev->priv->col_align == COL_ALIGN_NATURE)
+ width = hwctx->num_col;
+
+ /*
+ * In range [start, end], find out columns that is multiple of width.
+ * 'first' is the first column,
+ * 'last' is the last column,
+ * 'entries' is the total number of columns.
+ */
+ start = xdna->dev_info->first_col;
+ end = ndev->total_col - hwctx->num_col;
+ if (start > 0 && end == 0) {
+ XDNA_DBG(xdna, "Force start from col 0");
+ start = 0;
+ }
+ first = start + (width - start % width) % width;
+ last = end - end % width;
+ if (last >= first)
+ entries = (last - first) / width + 1;
+ XDNA_DBG(xdna, "start %d end %d first %d last %d",
+ start, end, first, last);
+
+ if (unlikely(!entries)) {
+ XDNA_ERR(xdna, "Start %d end %d width %d",
+ start, end, width);
+ return -EINVAL;
+ }
+
+ hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL);
+ if (!hwctx->col_list)
+ return -ENOMEM;
+
+ hwctx->col_list_len = entries;
+ hwctx->col_list[0] = first;
+ for (i = 1; i < entries; i++)
+ hwctx->col_list[i] = hwctx->col_list[i - 1] + width;
+
+ print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list,
+ entries * sizeof(*hwctx->col_list), false);
+ return 0;
+}
+
+static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct alloc_requests *xrs_req;
+ int ret;
+
+ xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL);
+ if (!xrs_req)
+ return -ENOMEM;
+
+ xrs_req->cdo.start_cols = hwctx->col_list;
+ xrs_req->cdo.cols_len = hwctx->col_list_len;
+ xrs_req->cdo.ncols = hwctx->num_col;
+ xrs_req->cdo.qos_cap.opc = hwctx->max_opc;
+
+ xrs_req->rqos.gops = hwctx->qos.gops;
+ xrs_req->rqos.fps = hwctx->qos.fps;
+ xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth;
+ xrs_req->rqos.latency = hwctx->qos.latency;
+ xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time;
+ xrs_req->rqos.priority = hwctx->qos.priority;
+
+ xrs_req->rid = (uintptr_t)hwctx;
+
+ ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx);
+ if (ret)
+ XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret);
+
+ kfree(xrs_req);
+ return ret;
+}
+
+static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ int ret;
+
+ ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
+ if (ret)
+ XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
+}
+
+static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct drm_file *filp = hwctx->client->filp;
+ struct drm_syncobj *syncobj;
+ u32 hdl;
+ int ret;
+
+ hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;
+
+ ret = drm_syncobj_create(&syncobj, 0, NULL);
+ if (ret) {
+ XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret);
+ return ret;
+ }
+ ret = drm_syncobj_get_handle(filp, syncobj, &hdl);
+ if (ret) {
+ drm_syncobj_put(syncobj);
+ XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret);
+ return ret;
+ }
+ hwctx->priv->syncobj = syncobj;
+ hwctx->syncobj_hdl = hdl;
+
+ return 0;
+}
+
+static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx)
+{
+ /*
+ * The syncobj_hdl is owned by user space and will be cleaned up
+ * separately.
+ */
+ drm_syncobj_put(hwctx->priv->syncobj);
+}
+
+int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_client *client = hwctx->client;
+ struct amdxdna_dev *xdna = client->xdna;
+ const struct drm_sched_init_args args = {
+ .ops = &sched_ops,
+ .num_rqs = DRM_SCHED_PRIORITY_COUNT,
+ .credit_limit = HWCTX_MAX_CMDS,
+ .timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
+ .name = hwctx->name,
+ .dev = xdna->ddev.dev,
+ };
+ struct drm_gpu_scheduler *sched;
+ struct amdxdna_hwctx_priv *priv;
+ struct amdxdna_gem_obj *heap;
+ struct amdxdna_dev_hdl *ndev;
+ int i, ret;
+
+ priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+ hwctx->priv = priv;
+
+ mutex_lock(&client->mm_lock);
+ heap = client->dev_heap;
+ if (!heap) {
+ XDNA_ERR(xdna, "The client dev heap object not exist");
+ mutex_unlock(&client->mm_lock);
+ ret = -ENOENT;
+ goto free_priv;
+ }
+ drm_gem_object_get(to_gobj(heap));
+ mutex_unlock(&client->mm_lock);
+ priv->heap = heap;
+ sema_init(&priv->job_sem, HWCTX_MAX_CMDS);
+
+ ret = amdxdna_gem_pin(heap);
+ if (ret) {
+ XDNA_ERR(xdna, "Dev heap pin failed, ret %d", ret);
+ goto put_heap;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
+ struct amdxdna_gem_obj *abo;
+ struct amdxdna_drm_create_bo args = {
+ .flags = 0,
+ .type = AMDXDNA_BO_DEV,
+ .vaddr = 0,
+ .size = MAX_CHAIN_CMDBUF_SIZE,
+ };
+
+ abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp, true);
+ if (IS_ERR(abo)) {
+ ret = PTR_ERR(abo);
+ goto free_cmd_bufs;
+ }
+
+ XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx",
+ i, abo->mem.dev_addr, abo->mem.size);
+ priv->cmd_buf[i] = abo;
+ }
+
+ sched = &priv->sched;
+ mutex_init(&priv->io_lock);
+
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&priv->io_lock);
+ fs_reclaim_release(GFP_KERNEL);
+
+ ret = drm_sched_init(sched, &args);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
+ goto free_cmd_bufs;
+ }
+
+ ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret);
+ goto free_sched;
+ }
+
+ ret = aie2_hwctx_col_list(hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Create col list failed, ret %d", ret);
+ goto free_entity;
+ }
+
+ ret = aie2_alloc_resource(hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret);
+ goto free_col_list;
+ }
+
+ ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
+ heap->mem.userptr, heap->mem.size);
+ if (ret) {
+ XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret);
+ goto release_resource;
+ }
+
+ ret = aie2_ctx_syncobj_create(hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
+ goto release_resource;
+ }
+
+ hwctx->status = HWCTX_STAT_INIT;
+ ndev = xdna->dev_handle;
+ ndev->hwctx_num++;
+ init_waitqueue_head(&priv->job_free_wq);
+
+ XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
+
+ return 0;
+
+release_resource:
+ aie2_release_resource(hwctx);
+free_col_list:
+ kfree(hwctx->col_list);
+free_entity:
+ drm_sched_entity_destroy(&priv->entity);
+free_sched:
+ drm_sched_fini(&priv->sched);
+free_cmd_bufs:
+ for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
+ if (!priv->cmd_buf[i])
+ continue;
+ drm_gem_object_put(to_gobj(priv->cmd_buf[i]));
+ }
+ amdxdna_gem_unpin(heap);
+put_heap:
+ drm_gem_object_put(to_gobj(heap));
+free_priv:
+ kfree(priv);
+ return ret;
+}
+
+void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev_hdl *ndev;
+ struct amdxdna_dev *xdna;
+ int idx;
+
+ xdna = hwctx->client->xdna;
+ ndev = xdna->dev_handle;
+ ndev->hwctx_num--;
+
+ XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq);
+ drm_sched_entity_destroy(&hwctx->priv->entity);
+
+ aie2_hwctx_wait_for_idle(hwctx);
+
+ /* Request fw to destroy hwctx and cancel the rest pending requests */
+ aie2_release_resource(hwctx);
+
+ /* Wait for all submitted jobs to be completed or canceled */
+ wait_event(hwctx->priv->job_free_wq,
+ atomic64_read(&hwctx->job_submit_cnt) ==
+ atomic64_read(&hwctx->job_free_cnt));
+
+ drm_sched_fini(&hwctx->priv->sched);
+ aie2_ctx_syncobj_destroy(hwctx);
+
+ for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++)
+ drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx]));
+ amdxdna_gem_unpin(hwctx->priv->heap);
+ drm_gem_object_put(to_gobj(hwctx->priv->heap));
+
+ mutex_destroy(&hwctx->priv->io_lock);
+ kfree(hwctx->col_list);
+ kfree(hwctx->priv);
+ kfree(hwctx->cus);
+}
+
+static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size)
+{
+ struct amdxdna_hwctx_param_config_cu *config = buf;
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ u32 total_size;
+ int ret;
+
+ XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name);
+ if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad)))
+ return -EINVAL;
+
+ if (hwctx->status != HWCTX_STAT_INIT) {
+ XDNA_ERR(xdna, "Not support re-config CU");
+ return -EINVAL;
+ }
+
+ if (!config->num_cus) {
+ XDNA_ERR(xdna, "Number of CU is zero");
+ return -EINVAL;
+ }
+
+ total_size = struct_size(config, cu_configs, config->num_cus);
+ if (total_size > size) {
+ XDNA_ERR(xdna, "CU config larger than size");
+ return -EINVAL;
+ }
+
+ hwctx->cus = kmemdup(config, total_size, GFP_KERNEL);
+ if (!hwctx->cus)
+ return -ENOMEM;
+
+ ret = aie2_config_cu(hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
+ goto free_cus;
+ }
+
+ wmb(); /* To avoid locking in command submit when check status */
+ hwctx->status = HWCTX_STAT_READY;
+
+ return 0;
+
+free_cus:
+ kfree(hwctx->cus);
+ hwctx->cus = NULL;
+ return ret;
+}
+
+int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ switch (type) {
+ case DRM_AMDXDNA_HWCTX_CONFIG_CU:
+ return aie2_hwctx_cu_config(hwctx, buf, size);
+ case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
+ case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
+ return -EOPNOTSUPP;
+ default:
+ XDNA_DBG(xdna, "Not supported type %d", type);
+ return -EOPNOTSUPP;
+ }
+}
+
+static int aie2_populate_range(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ struct mm_struct *mm = abo->mem.notifier.mm;
+ struct hmm_range range = { 0 };
+ unsigned long timeout;
+ int ret;
+
+ XDNA_INFO_ONCE(xdna, "populate memory range %llx size %lx",
+ abo->mem.userptr, abo->mem.size);
+ range.notifier = &abo->mem.notifier;
+ range.start = abo->mem.userptr;
+ range.end = abo->mem.userptr + abo->mem.size;
+ range.hmm_pfns = abo->mem.pfns;
+ range.default_flags = HMM_PFN_REQ_FAULT;
+
+ if (!mmget_not_zero(mm))
+ return -EFAULT;
+
+ timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+again:
+ range.notifier_seq = mmu_interval_read_begin(&abo->mem.notifier);
+ mmap_read_lock(mm);
+ ret = hmm_range_fault(&range);
+ mmap_read_unlock(mm);
+ if (ret) {
+ if (time_after(jiffies, timeout)) {
+ ret = -ETIME;
+ goto put_mm;
+ }
+
+ if (ret == -EBUSY)
+ goto again;
+
+ goto put_mm;
+ }
+
+ down_read(&xdna->notifier_lock);
+ if (mmu_interval_read_retry(&abo->mem.notifier, range.notifier_seq)) {
+ up_read(&xdna->notifier_lock);
+ goto again;
+ }
+ abo->mem.map_invalid = false;
+ up_read(&xdna->notifier_lock);
+
+put_mm:
+ mmput(mm);
+ return ret;
+}
+
+int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct ww_acquire_ctx acquire_ctx;
+ struct dma_fence_chain *chain;
+ struct amdxdna_gem_obj *abo;
+ unsigned long timeout = 0;
+ int ret, i;
+
+ ret = down_interruptible(&hwctx->priv->job_sem);
+ if (ret) {
+ XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret);
+ return ret;
+ }
+
+ chain = dma_fence_chain_alloc();
+ if (!chain) {
+ XDNA_ERR(xdna, "Alloc fence chain failed");
+ ret = -ENOMEM;
+ goto up_sem;
+ }
+
+ ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
+ goto free_chain;
+ }
+
+retry:
+ ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
+ if (ret) {
+ XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
+ goto cleanup_job;
+ }
+
+ for (i = 0; i < job->bo_cnt; i++) {
+ ret = dma_resv_reserve_fences(job->bos[i]->resv, 1);
+ if (ret) {
+ XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
+ drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
+ goto cleanup_job;
+ }
+ }
+
+ down_read(&xdna->notifier_lock);
+ for (i = 0; i < job->bo_cnt; i++) {
+ abo = to_xdna_obj(job->bos[i]);
+ if (abo->mem.map_invalid) {
+ up_read(&xdna->notifier_lock);
+ drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
+ if (!timeout) {
+ timeout = jiffies +
+ msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+ } else if (time_after(jiffies, timeout)) {
+ ret = -ETIME;
+ goto cleanup_job;
+ }
+
+ ret = aie2_populate_range(abo);
+ if (ret)
+ goto cleanup_job;
+ goto retry;
+ }
+ }
+
+ mutex_lock(&hwctx->priv->io_lock);
+ drm_sched_job_arm(&job->base);
+ job->out_fence = dma_fence_get(&job->base.s_fence->finished);
+ for (i = 0; i < job->bo_cnt; i++)
+ dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE);
+ job->seq = hwctx->priv->seq++;
+ kref_get(&job->refcnt);
+ drm_sched_entity_push_job(&job->base);
+
+ *seq = job->seq;
+ drm_syncobj_add_point(hwctx->priv->syncobj, chain, job->out_fence, *seq);
+ mutex_unlock(&hwctx->priv->io_lock);
+
+ up_read(&xdna->notifier_lock);
+ drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
+
+ aie2_job_put(job);
+ atomic64_inc(&hwctx->job_submit_cnt);
+
+ return 0;
+
+cleanup_job:
+ drm_sched_job_cleanup(&job->base);
+free_chain:
+ dma_fence_chain_free(chain);
+up_sem:
+ up(&hwctx->priv->job_sem);
+ job->job_done = true;
+ return ret;
+}
+
+void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
+ unsigned long cur_seq)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ struct drm_gem_object *gobj = to_gobj(abo);
+ long ret;
+
+ down_write(&xdna->notifier_lock);
+ abo->mem.map_invalid = true;
+ mmu_interval_set_seq(&abo->mem.notifier, cur_seq);
+ up_write(&xdna->notifier_lock);
+ ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP,
+ true, MAX_SCHEDULE_TIMEOUT);
+ if (!ret || ret == -ERESTARTSYS)
+ XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret);
+}
diff --git a/drivers/accel/amdxdna/aie2_error.c b/drivers/accel/amdxdna/aie2_error.c
new file mode 100644
index 000000000000..5ee905632a39
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_error.c
@@ -0,0 +1,358 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/drm_cache.h>
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/dma-mapping.h>
+#include <linux/kthread.h>
+#include <linux/kernel.h>
+
+#include "aie2_msg_priv.h"
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+struct async_event {
+ struct amdxdna_dev_hdl *ndev;
+ struct async_event_msg_resp resp;
+ struct workqueue_struct *wq;
+ struct work_struct work;
+ u8 *buf;
+ dma_addr_t addr;
+ u32 size;
+};
+
+struct async_events {
+ struct workqueue_struct *wq;
+ u8 *buf;
+ dma_addr_t addr;
+ u32 size;
+ u32 event_cnt;
+ struct async_event event[] __counted_by(event_cnt);
+};
+
+/*
+ * Below enum, struct and lookup tables are porting from XAIE util header file.
+ *
+ * Below data is defined by AIE device and it is used for decode error message
+ * from the device.
+ */
+
+enum aie_module_type {
+ AIE_MEM_MOD = 0,
+ AIE_CORE_MOD,
+ AIE_PL_MOD,
+};
+
+enum aie_error_category {
+ AIE_ERROR_SATURATION = 0,
+ AIE_ERROR_FP,
+ AIE_ERROR_STREAM,
+ AIE_ERROR_ACCESS,
+ AIE_ERROR_BUS,
+ AIE_ERROR_INSTRUCTION,
+ AIE_ERROR_ECC,
+ AIE_ERROR_LOCK,
+ AIE_ERROR_DMA,
+ AIE_ERROR_MEM_PARITY,
+ /* Unknown is not from XAIE, added for better category */
+ AIE_ERROR_UNKNOWN,
+};
+
+/* Don't pack, unless XAIE side changed */
+struct aie_error {
+ __u8 row;
+ __u8 col;
+ __u32 mod_type;
+ __u8 event_id;
+};
+
+struct aie_err_info {
+ u32 err_cnt;
+ u32 ret_code;
+ u32 rsvd;
+ struct aie_error payload[] __counted_by(err_cnt);
+};
+
+struct aie_event_category {
+ u8 event_id;
+ enum aie_error_category category;
+};
+
+#define EVENT_CATEGORY(id, cat) { id, cat }
+static const struct aie_event_category aie_ml_mem_event_cat[] = {
+ EVENT_CATEGORY(88U, AIE_ERROR_ECC),
+ EVENT_CATEGORY(90U, AIE_ERROR_ECC),
+ EVENT_CATEGORY(91U, AIE_ERROR_MEM_PARITY),
+ EVENT_CATEGORY(92U, AIE_ERROR_MEM_PARITY),
+ EVENT_CATEGORY(93U, AIE_ERROR_MEM_PARITY),
+ EVENT_CATEGORY(94U, AIE_ERROR_MEM_PARITY),
+ EVENT_CATEGORY(95U, AIE_ERROR_MEM_PARITY),
+ EVENT_CATEGORY(96U, AIE_ERROR_MEM_PARITY),
+ EVENT_CATEGORY(97U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(98U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(99U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(100U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(101U, AIE_ERROR_LOCK),
+};
+
+static const struct aie_event_category aie_ml_core_event_cat[] = {
+ EVENT_CATEGORY(55U, AIE_ERROR_ACCESS),
+ EVENT_CATEGORY(56U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(57U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(58U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(59U, AIE_ERROR_INSTRUCTION),
+ EVENT_CATEGORY(60U, AIE_ERROR_ACCESS),
+ EVENT_CATEGORY(62U, AIE_ERROR_ECC),
+ EVENT_CATEGORY(64U, AIE_ERROR_ECC),
+ EVENT_CATEGORY(65U, AIE_ERROR_ACCESS),
+ EVENT_CATEGORY(66U, AIE_ERROR_ACCESS),
+ EVENT_CATEGORY(67U, AIE_ERROR_LOCK),
+ EVENT_CATEGORY(70U, AIE_ERROR_INSTRUCTION),
+ EVENT_CATEGORY(71U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(72U, AIE_ERROR_BUS),
+};
+
+static const struct aie_event_category aie_ml_mem_tile_event_cat[] = {
+ EVENT_CATEGORY(130U, AIE_ERROR_ECC),
+ EVENT_CATEGORY(132U, AIE_ERROR_ECC),
+ EVENT_CATEGORY(133U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(134U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(135U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(136U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(137U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(138U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(139U, AIE_ERROR_LOCK),
+};
+
+static const struct aie_event_category aie_ml_shim_tile_event_cat[] = {
+ EVENT_CATEGORY(64U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(65U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(66U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(67U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(68U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(69U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(70U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(71U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(72U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(73U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(74U, AIE_ERROR_LOCK),
+};
+
+static enum aie_error_category
+aie_get_error_category(u8 row, u8 event_id, enum aie_module_type mod_type)
+{
+ const struct aie_event_category *lut;
+ int num_entry;
+ int i;
+
+ switch (mod_type) {
+ case AIE_PL_MOD:
+ lut = aie_ml_shim_tile_event_cat;
+ num_entry = ARRAY_SIZE(aie_ml_shim_tile_event_cat);
+ break;
+ case AIE_CORE_MOD:
+ lut = aie_ml_core_event_cat;
+ num_entry = ARRAY_SIZE(aie_ml_core_event_cat);
+ break;
+ case AIE_MEM_MOD:
+ if (row == 1) {
+ lut = aie_ml_mem_tile_event_cat;
+ num_entry = ARRAY_SIZE(aie_ml_mem_tile_event_cat);
+ } else {
+ lut = aie_ml_mem_event_cat;
+ num_entry = ARRAY_SIZE(aie_ml_mem_event_cat);
+ }
+ break;
+ default:
+ return AIE_ERROR_UNKNOWN;
+ }
+
+ for (i = 0; i < num_entry; i++) {
+ if (event_id != lut[i].event_id)
+ continue;
+
+ return lut[i].category;
+ }
+
+ return AIE_ERROR_UNKNOWN;
+}
+
+static u32 aie2_error_backtrack(struct amdxdna_dev_hdl *ndev, void *err_info, u32 num_err)
+{
+ struct aie_error *errs = err_info;
+ u32 err_col = 0; /* assume that AIE has less than 32 columns */
+ int i;
+
+ /* Get err column bitmap */
+ for (i = 0; i < num_err; i++) {
+ struct aie_error *err = &errs[i];
+ enum aie_error_category cat;
+
+ cat = aie_get_error_category(err->row, err->event_id, err->mod_type);
+ XDNA_ERR(ndev->xdna, "Row: %d, Col: %d, module %d, event ID %d, category %d",
+ err->row, err->col, err->mod_type,
+ err->event_id, cat);
+
+ if (err->col >= 32) {
+ XDNA_WARN(ndev->xdna, "Invalid column number");
+ break;
+ }
+
+ err_col |= (1 << err->col);
+ }
+
+ return err_col;
+}
+
+static int aie2_error_async_cb(void *handle, void __iomem *data, size_t size)
+{
+ struct async_event *e = handle;
+
+ if (data) {
+ e->resp.type = readl(data + offsetof(struct async_event_msg_resp, type));
+ wmb(); /* Update status in the end, so that no lock for here */
+ e->resp.status = readl(data + offsetof(struct async_event_msg_resp, status));
+ }
+ queue_work(e->wq, &e->work);
+ return 0;
+}
+
+static int aie2_error_event_send(struct async_event *e)
+{
+ drm_clflush_virt_range(e->buf, e->size); /* device can access */
+ return aie2_register_asyn_event_msg(e->ndev, e->addr, e->size, e,
+ aie2_error_async_cb);
+}
+
+static void aie2_error_worker(struct work_struct *err_work)
+{
+ struct aie_err_info *info;
+ struct amdxdna_dev *xdna;
+ struct async_event *e;
+ u32 max_err;
+ u32 err_col;
+
+ e = container_of(err_work, struct async_event, work);
+
+ xdna = e->ndev->xdna;
+
+ if (e->resp.status == MAX_AIE2_STATUS_CODE)
+ return;
+
+ e->resp.status = MAX_AIE2_STATUS_CODE;
+
+ print_hex_dump_debug("AIE error: ", DUMP_PREFIX_OFFSET, 16, 4,
+ e->buf, 0x100, false);
+
+ info = (struct aie_err_info *)e->buf;
+ XDNA_DBG(xdna, "Error count %d return code %d", info->err_cnt, info->ret_code);
+
+ max_err = (e->size - sizeof(*info)) / sizeof(struct aie_error);
+ if (unlikely(info->err_cnt > max_err)) {
+ WARN_ONCE(1, "Error count too large %d\n", info->err_cnt);
+ return;
+ }
+ err_col = aie2_error_backtrack(e->ndev, info->payload, info->err_cnt);
+ if (!err_col) {
+ XDNA_WARN(xdna, "Did not get error column");
+ return;
+ }
+
+ mutex_lock(&xdna->dev_lock);
+ /* Re-sent this event to firmware */
+ if (aie2_error_event_send(e))
+ XDNA_WARN(xdna, "Unable to register async event");
+ mutex_unlock(&xdna->dev_lock);
+}
+
+int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+ struct async_event *e;
+ int i, ret;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ for (i = 0; i < ndev->async_events->event_cnt; i++) {
+ e = &ndev->async_events->event[i];
+ ret = aie2_error_event_send(e);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+ struct async_events *events;
+
+ events = ndev->async_events;
+
+ mutex_unlock(&xdna->dev_lock);
+ destroy_workqueue(events->wq);
+ mutex_lock(&xdna->dev_lock);
+
+ dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf,
+ events->addr, DMA_FROM_DEVICE);
+ kfree(events);
+}
+
+int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+ u32 total_col = ndev->total_col;
+ u32 total_size = ASYNC_BUF_SIZE * total_col;
+ struct async_events *events;
+ int i, ret;
+
+ events = kzalloc(struct_size(events, event, total_col), GFP_KERNEL);
+ if (!events)
+ return -ENOMEM;
+
+ events->buf = dma_alloc_noncoherent(xdna->ddev.dev, total_size, &events->addr,
+ DMA_FROM_DEVICE, GFP_KERNEL);
+ if (!events->buf) {
+ ret = -ENOMEM;
+ goto free_events;
+ }
+ events->size = total_size;
+ events->event_cnt = total_col;
+
+ events->wq = alloc_ordered_workqueue("async_wq", 0);
+ if (!events->wq) {
+ ret = -ENOMEM;
+ goto free_buf;
+ }
+
+ for (i = 0; i < events->event_cnt; i++) {
+ struct async_event *e = &events->event[i];
+ u32 offset = i * ASYNC_BUF_SIZE;
+
+ e->ndev = ndev;
+ e->wq = events->wq;
+ e->buf = &events->buf[offset];
+ e->addr = events->addr + offset;
+ e->size = ASYNC_BUF_SIZE;
+ e->resp.status = MAX_AIE2_STATUS_CODE;
+ INIT_WORK(&e->work, aie2_error_worker);
+ }
+
+ ndev->async_events = events;
+
+ XDNA_DBG(xdna, "Async event count %d, buf total size 0x%x",
+ events->event_cnt, events->size);
+ return 0;
+
+free_buf:
+ dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf,
+ events->addr, DMA_FROM_DEVICE);
+free_events:
+ kfree(events);
+ return ret;
+}
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
new file mode 100644
index 000000000000..82412eec9a4b
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -0,0 +1,776 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_cache.h>
+#include <drm/drm_device.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/bitfield.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/types.h>
+#include <linux/xarray.h>
+
+#include "aie2_msg_priv.h"
+#include "aie2_pci.h"
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_mailbox_helper.h"
+#include "amdxdna_pci_drv.h"
+
+#define DECLARE_AIE2_MSG(name, op) \
+ DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE)
+
+static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
+ struct xdna_mailbox_msg *msg)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+ struct xdna_notify *hdl = msg->handle;
+ int ret;
+
+ if (!ndev->mgmt_chann)
+ return -ENODEV;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
+ if (ret == -ETIME) {
+ xdna_mailbox_stop_channel(ndev->mgmt_chann);
+ xdna_mailbox_destroy_channel(ndev->mgmt_chann);
+ ndev->mgmt_chann = NULL;
+ }
+
+ if (!ret && *hdl->data != AIE2_STATUS_SUCCESS) {
+ XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
+ msg->opcode, *hdl->data);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev)
+{
+ DECLARE_AIE2_MSG(suspend, MSG_OP_SUSPEND);
+
+ return aie2_send_mgmt_msg_wait(ndev, &msg);
+}
+
+int aie2_resume_fw(struct amdxdna_dev_hdl *ndev)
+{
+ DECLARE_AIE2_MSG(suspend, MSG_OP_RESUME);
+
+ return aie2_send_mgmt_msg_wait(ndev, &msg);
+}
+
+int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value)
+{
+ DECLARE_AIE2_MSG(set_runtime_cfg, MSG_OP_SET_RUNTIME_CONFIG);
+ int ret;
+
+ req.type = type;
+ req.value = value;
+
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Failed to set runtime config, ret %d", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value)
+{
+ DECLARE_AIE2_MSG(get_runtime_cfg, MSG_OP_GET_RUNTIME_CONFIG);
+ int ret;
+
+ req.type = type;
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Failed to get runtime config, ret %d", ret);
+ return ret;
+ }
+
+ *value = resp.value;
+ return 0;
+}
+
+int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid)
+{
+ DECLARE_AIE2_MSG(assign_mgmt_pasid, MSG_OP_ASSIGN_MGMT_PASID);
+
+ req.pasid = pasid;
+
+ return aie2_send_mgmt_msg_wait(ndev, &msg);
+}
+
+int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version)
+{
+ DECLARE_AIE2_MSG(aie_version_info, MSG_OP_QUERY_AIE_VERSION);
+ struct amdxdna_dev *xdna = ndev->xdna;
+ int ret;
+
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret)
+ return ret;
+
+ XDNA_DBG(xdna, "Query AIE version - major: %u minor: %u completed",
+ resp.major, resp.minor);
+
+ version->major = resp.major;
+ version->minor = resp.minor;
+
+ return 0;
+}
+
+int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata)
+{
+ DECLARE_AIE2_MSG(aie_tile_info, MSG_OP_QUERY_AIE_TILE_INFO);
+ int ret;
+
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret)
+ return ret;
+
+ metadata->size = resp.info.size;
+ metadata->cols = resp.info.cols;
+ metadata->rows = resp.info.rows;
+
+ metadata->version.major = resp.info.major;
+ metadata->version.minor = resp.info.minor;
+
+ metadata->core.row_count = resp.info.core_rows;
+ metadata->core.row_start = resp.info.core_row_start;
+ metadata->core.dma_channel_count = resp.info.core_dma_channels;
+ metadata->core.lock_count = resp.info.core_locks;
+ metadata->core.event_reg_count = resp.info.core_events;
+
+ metadata->mem.row_count = resp.info.mem_rows;
+ metadata->mem.row_start = resp.info.mem_row_start;
+ metadata->mem.dma_channel_count = resp.info.mem_dma_channels;
+ metadata->mem.lock_count = resp.info.mem_locks;
+ metadata->mem.event_reg_count = resp.info.mem_events;
+
+ metadata->shim.row_count = resp.info.shim_rows;
+ metadata->shim.row_start = resp.info.shim_row_start;
+ metadata->shim.dma_channel_count = resp.info.shim_dma_channels;
+ metadata->shim.lock_count = resp.info.shim_locks;
+ metadata->shim.event_reg_count = resp.info.shim_events;
+
+ return 0;
+}
+
+int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
+ struct amdxdna_fw_ver *fw_ver)
+{
+ DECLARE_AIE2_MSG(firmware_version, MSG_OP_GET_FIRMWARE_VERSION);
+ int ret;
+
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret)
+ return ret;
+
+ fw_ver->major = resp.major;
+ fw_ver->minor = resp.minor;
+ fw_ver->sub = resp.sub;
+ fw_ver->build = resp.build;
+
+ return 0;
+}
+
+int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
+{
+ DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT);
+ struct amdxdna_dev *xdna = ndev->xdna;
+ struct xdna_mailbox_chann_res x2i;
+ struct xdna_mailbox_chann_res i2x;
+ struct cq_pair *cq_pair;
+ u32 intr_reg;
+ int ret;
+
+ req.aie_type = 1;
+ req.start_col = hwctx->start_col;
+ req.num_col = hwctx->num_col;
+ req.num_cq_pairs_requested = 1;
+ req.pasid = hwctx->client->pasid;
+ req.context_priority = 2;
+
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret)
+ return ret;
+
+ hwctx->fw_ctx_id = resp.context_id;
+ WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id");
+
+ cq_pair = &resp.cq_pair[0];
+ x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr);
+ x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr);
+ x2i.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->x2i_q.buf_addr);
+ x2i.rb_size = cq_pair->x2i_q.buf_size;
+
+ i2x.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.head_addr);
+ i2x.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.tail_addr);
+ i2x.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->i2x_q.buf_addr);
+ i2x.rb_size = cq_pair->i2x_q.buf_size;
+
+ ret = pci_irq_vector(to_pci_dev(xdna->ddev.dev), resp.msix_id);
+ if (ret == -EINVAL) {
+ XDNA_ERR(xdna, "not able to create channel");
+ goto out_destroy_context;
+ }
+
+ intr_reg = i2x.mb_head_ptr_reg + 4;
+ hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x,
+ intr_reg, ret);
+ if (!hwctx->priv->mbox_chann) {
+ XDNA_ERR(xdna, "not able to create channel");
+ ret = -EINVAL;
+ goto out_destroy_context;
+ }
+
+ XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d",
+ hwctx->name, ret, resp.msix_id);
+ XDNA_DBG(xdna, "%s created fw ctx %d pasid %d", hwctx->name,
+ hwctx->fw_ctx_id, hwctx->client->pasid);
+
+ return 0;
+
+out_destroy_context:
+ aie2_destroy_context(ndev, hwctx);
+ return ret;
+}
+
+int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
+{
+ DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT);
+ struct amdxdna_dev *xdna = ndev->xdna;
+ int ret;
+
+ if (hwctx->fw_ctx_id == -1)
+ return 0;
+
+ xdna_mailbox_stop_channel(hwctx->priv->mbox_chann);
+
+ req.context_id = hwctx->fw_ctx_id;
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret)
+ XDNA_WARN(xdna, "%s destroy context failed, ret %d", hwctx->name, ret);
+
+ xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann);
+ XDNA_DBG(xdna, "%s destroyed fw ctx %d", hwctx->name,
+ hwctx->fw_ctx_id);
+ hwctx->priv->mbox_chann = NULL;
+ hwctx->fw_ctx_id = -1;
+
+ return ret;
+}
+
+int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size)
+{
+ DECLARE_AIE2_MSG(map_host_buffer, MSG_OP_MAP_HOST_BUFFER);
+ struct amdxdna_dev *xdna = ndev->xdna;
+ int ret;
+
+ req.context_id = context_id;
+ req.buf_addr = addr;
+ req.buf_size = size;
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret)
+ return ret;
+
+ XDNA_DBG(xdna, "fw ctx %d map host buf addr 0x%llx size 0x%llx",
+ context_id, addr, size);
+
+ return 0;
+}
+
+int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
+ u32 size, u32 *cols_filled)
+{
+ DECLARE_AIE2_MSG(aie_column_info, MSG_OP_QUERY_COL_STATUS);
+ struct amdxdna_dev *xdna = ndev->xdna;
+ struct amdxdna_client *client;
+ struct amdxdna_hwctx *hwctx;
+ unsigned long hwctx_id;
+ dma_addr_t dma_addr;
+ u32 aie_bitmap = 0;
+ u8 *buff_addr;
+ int ret, idx;
+
+ buff_addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr,
+ DMA_FROM_DEVICE, GFP_KERNEL);
+ if (!buff_addr)
+ return -ENOMEM;
+
+ /* Go through each hardware context and mark the AIE columns that are active */
+ list_for_each_entry(client, &xdna->client_list, node) {
+ idx = srcu_read_lock(&client->hwctx_srcu);
+ amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
+ aie_bitmap |= amdxdna_hwctx_col_map(hwctx);
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ }
+
+ *cols_filled = 0;
+ req.dump_buff_addr = dma_addr;
+ req.dump_buff_size = size;
+ req.num_cols = hweight32(aie_bitmap);
+ req.aie_bitmap = aie_bitmap;
+
+ drm_clflush_virt_range(buff_addr, size); /* device can access */
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret) {
+ XDNA_ERR(xdna, "Error during NPU query, status %d", ret);
+ goto fail;
+ }
+
+ if (resp.status != AIE2_STATUS_SUCCESS) {
+ XDNA_ERR(xdna, "Query NPU status failed, status 0x%x", resp.status);
+ ret = -EINVAL;
+ goto fail;
+ }
+ XDNA_DBG(xdna, "Query NPU status completed");
+
+ if (size < resp.size) {
+ ret = -EINVAL;
+ XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size);
+ goto fail;
+ }
+
+ if (copy_to_user(buf, buff_addr, resp.size)) {
+ ret = -EFAULT;
+ XDNA_ERR(xdna, "Failed to copy NPU status to user space");
+ goto fail;
+ }
+
+ *cols_filled = aie_bitmap;
+
+fail:
+ dma_free_noncoherent(xdna->ddev.dev, size, buff_addr, dma_addr, DMA_FROM_DEVICE);
+ return ret;
+}
+
+int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
+ void *handle, int (*cb)(void*, void __iomem *, size_t))
+{
+ struct async_event_msg_req req = { 0 };
+ struct xdna_mailbox_msg msg = {
+ .send_data = (u8 *)&req,
+ .send_size = sizeof(req),
+ .handle = handle,
+ .opcode = MSG_OP_REGISTER_ASYNC_EVENT_MSG,
+ .notify_cb = cb,
+ };
+
+ req.buf_addr = addr;
+ req.buf_size = size;
+
+ XDNA_DBG(ndev->xdna, "Register addr 0x%llx size 0x%x", addr, size);
+ return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg, TX_TIMEOUT);
+}
+
+int aie2_config_cu(struct amdxdna_hwctx *hwctx)
+{
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ u32 shift = xdna->dev_info->dev_mem_buf_shift;
+ DECLARE_AIE2_MSG(config_cu, MSG_OP_CONFIG_CU);
+ struct drm_gem_object *gobj;
+ struct amdxdna_gem_obj *abo;
+ int ret, i;
+
+ if (!chann)
+ return -ENODEV;
+
+ if (hwctx->cus->num_cus > MAX_NUM_CUS) {
+ XDNA_DBG(xdna, "Exceed maximum CU %d", MAX_NUM_CUS);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < hwctx->cus->num_cus; i++) {
+ struct amdxdna_cu_config *cu = &hwctx->cus->cu_configs[i];
+
+ if (XDNA_MBZ_DBG(xdna, cu->pad, sizeof(cu->pad)))
+ return -EINVAL;
+
+ gobj = drm_gem_object_lookup(hwctx->client->filp, cu->cu_bo);
+ if (!gobj) {
+ XDNA_ERR(xdna, "Lookup GEM object failed");
+ return -EINVAL;
+ }
+ abo = to_xdna_obj(gobj);
+
+ if (abo->type != AMDXDNA_BO_DEV) {
+ drm_gem_object_put(gobj);
+ XDNA_ERR(xdna, "Invalid BO type");
+ return -EINVAL;
+ }
+
+ req.cfgs[i] = FIELD_PREP(AIE2_MSG_CFG_CU_PDI_ADDR,
+ abo->mem.dev_addr >> shift);
+ req.cfgs[i] |= FIELD_PREP(AIE2_MSG_CFG_CU_FUNC, cu->cu_func);
+ XDNA_DBG(xdna, "CU %d full addr 0x%llx, cfg 0x%x", i,
+ abo->mem.dev_addr, req.cfgs[i]);
+ drm_gem_object_put(gobj);
+ }
+ req.num_cus = hwctx->cus->num_cus;
+
+ ret = xdna_send_msg_wait(xdna, chann, &msg);
+ if (ret == -ETIME)
+ aie2_destroy_context(xdna->dev_handle, hwctx);
+
+ if (resp.status == AIE2_STATUS_SUCCESS) {
+ XDNA_DBG(xdna, "Configure %d CUs, ret %d", req.num_cus, ret);
+ return 0;
+ }
+
+ XDNA_ERR(xdna, "Command opcode 0x%x failed, status 0x%x ret %d",
+ msg.opcode, resp.status, ret);
+ return ret;
+}
+
+int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t))
+{
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ union {
+ struct execute_buffer_req ebuf;
+ struct exec_dpu_req dpu;
+ } req;
+ struct xdna_mailbox_msg msg;
+ u32 payload_len;
+ void *payload;
+ int cu_idx;
+ int ret;
+ u32 op;
+
+ if (!chann)
+ return -ENODEV;
+
+ payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
+ if (!payload) {
+ XDNA_ERR(xdna, "Invalid command, cannot get payload");
+ return -EINVAL;
+ }
+
+ cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo);
+ if (cu_idx < 0) {
+ XDNA_DBG(xdna, "Invalid cu idx");
+ return -EINVAL;
+ }
+
+ op = amdxdna_cmd_get_op(cmd_abo);
+ switch (op) {
+ case ERT_START_CU:
+ if (unlikely(payload_len > sizeof(req.ebuf.payload)))
+ XDNA_DBG(xdna, "Invalid ebuf payload len: %d", payload_len);
+ req.ebuf.cu_idx = cu_idx;
+ memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload));
+ msg.send_size = sizeof(req.ebuf);
+ msg.opcode = MSG_OP_EXECUTE_BUFFER_CF;
+ break;
+ case ERT_START_NPU: {
+ struct amdxdna_cmd_start_npu *sn = payload;
+
+ if (unlikely(payload_len - sizeof(*sn) > sizeof(req.dpu.payload)))
+ XDNA_DBG(xdna, "Invalid dpu payload len: %d", payload_len);
+ req.dpu.inst_buf_addr = sn->buffer;
+ req.dpu.inst_size = sn->buffer_size;
+ req.dpu.inst_prop_cnt = sn->prop_count;
+ req.dpu.cu_idx = cu_idx;
+ memcpy(req.dpu.payload, sn->prop_args, sizeof(req.dpu.payload));
+ msg.send_size = sizeof(req.dpu);
+ msg.opcode = MSG_OP_EXEC_DPU;
+ break;
+ }
+ default:
+ XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op);
+ return -EINVAL;
+ }
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
+ 0x40, false);
+
+ ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(xdna, "Send message failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset,
+ struct amdxdna_gem_obj *abo, u32 *size)
+{
+ struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset;
+ int cu_idx = amdxdna_cmd_get_cu_idx(abo);
+ u32 payload_len;
+ void *payload;
+
+ if (cu_idx < 0)
+ return -EINVAL;
+
+ payload = amdxdna_cmd_get_payload(abo, &payload_len);
+ if (!payload)
+ return -EINVAL;
+
+ if (!slot_has_space(*buf, offset, payload_len))
+ return -ENOSPC;
+
+ buf->cu_idx = cu_idx;
+ buf->arg_cnt = payload_len / sizeof(u32);
+ memcpy(buf->args, payload, payload_len);
+ /* Accurate buf size to hint firmware to do necessary copy */
+ *size = sizeof(*buf) + payload_len;
+ return 0;
+}
+
+static int
+aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
+ struct amdxdna_gem_obj *abo, u32 *size)
+{
+ struct cmd_chain_slot_dpu *buf = cmd_buf + offset;
+ int cu_idx = amdxdna_cmd_get_cu_idx(abo);
+ struct amdxdna_cmd_start_npu *sn;
+ u32 payload_len;
+ void *payload;
+ u32 arg_sz;
+
+ if (cu_idx < 0)
+ return -EINVAL;
+
+ payload = amdxdna_cmd_get_payload(abo, &payload_len);
+ if (!payload)
+ return -EINVAL;
+ sn = payload;
+ arg_sz = payload_len - sizeof(*sn);
+ if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
+ return -EINVAL;
+
+ if (!slot_has_space(*buf, offset, arg_sz))
+ return -ENOSPC;
+
+ buf->inst_buf_addr = sn->buffer;
+ buf->inst_size = sn->buffer_size;
+ buf->inst_prop_cnt = sn->prop_count;
+ buf->cu_idx = cu_idx;
+ buf->arg_cnt = arg_sz / sizeof(u32);
+ memcpy(buf->args, sn->prop_args, arg_sz);
+
+ /* Accurate buf size to hint firmware to do necessary copy */
+ *size = sizeof(*buf) + arg_sz;
+ return 0;
+}
+
+static int
+aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj *cmdbuf_abo, u32 offset,
+ struct amdxdna_gem_obj *abo, u32 *size)
+{
+ u32 this_op = amdxdna_cmd_get_op(abo);
+ void *cmd_buf = cmdbuf_abo->mem.kva;
+ int ret;
+
+ if (this_op != op) {
+ ret = -EINVAL;
+ goto done;
+ }
+
+ switch (op) {
+ case ERT_START_CU:
+ ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo, size);
+ break;
+ case ERT_START_NPU:
+ ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo, size);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ }
+
+done:
+ if (ret) {
+ XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d ret %d",
+ op, ret);
+ }
+ return ret;
+}
+
+static inline struct amdxdna_gem_obj *
+aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
+{
+ int idx = get_job_idx(job->seq);
+
+ return job->hwctx->priv->cmd_buf[idx];
+}
+
+static void
+aie2_cmdlist_prepare_request(struct cmd_chain_req *req,
+ struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 cnt)
+{
+ req->buf_addr = cmdbuf_abo->mem.dev_addr;
+ req->buf_size = size;
+ req->count = cnt;
+ drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
+ XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size 0x%x count %d",
+ req->buf_addr, size, cnt);
+}
+
+static inline u32
+aie2_cmd_op_to_msg_op(u32 op)
+{
+ switch (op) {
+ case ERT_START_CU:
+ return MSG_OP_CHAIN_EXEC_BUFFER_CF;
+ case ERT_START_NPU:
+ return MSG_OP_CHAIN_EXEC_DPU;
+ default:
+ return MSG_OP_MAX_OPCODE;
+ }
+}
+
+int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
+ struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t))
+{
+ struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_client *client = hwctx->client;
+ struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ struct amdxdna_cmd_chain *payload;
+ struct xdna_mailbox_msg msg;
+ struct cmd_chain_req req;
+ u32 payload_len;
+ u32 offset = 0;
+ u32 size;
+ int ret;
+ u32 op;
+ u32 i;
+
+ op = amdxdna_cmd_get_op(cmd_abo);
+ payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
+ if (op != ERT_CMD_CHAIN || !payload ||
+ payload_len < struct_size(payload, data, payload->command_count))
+ return -EINVAL;
+
+ for (i = 0; i < payload->command_count; i++) {
+ u32 boh = (u32)(payload->data[i]);
+ struct amdxdna_gem_obj *abo;
+
+ abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD);
+ if (!abo) {
+ XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh);
+ return -ENOENT;
+ }
+
+ /* All sub-cmd should have same op, use the first one. */
+ if (i == 0)
+ op = amdxdna_cmd_get_op(abo);
+
+ ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset, abo, &size);
+ amdxdna_gem_put_obj(abo);
+ if (ret)
+ return -EINVAL;
+
+ offset += size;
+ }
+
+ /* The offset is the accumulated total size of the cmd buffer */
+ aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset, payload->command_count);
+
+ msg.opcode = aie2_cmd_op_to_msg_op(op);
+ if (msg.opcode == MSG_OP_MAX_OPCODE)
+ return -EOPNOTSUPP;
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(hwctx->client->xdna, "Send message failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
+ struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t))
+{
+ struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ struct xdna_mailbox_msg msg;
+ struct cmd_chain_req req;
+ u32 size;
+ int ret;
+ u32 op;
+
+ op = amdxdna_cmd_get_op(cmd_abo);
+ ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo, &size);
+ if (ret)
+ return ret;
+
+ aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1);
+
+ msg.opcode = aie2_cmd_op_to_msg_op(op);
+ if (msg.opcode == MSG_OP_MAX_OPCODE)
+ return -EOPNOTSUPP;
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(hwctx->client->xdna, "Send message failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t))
+{
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]);
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct xdna_mailbox_msg msg;
+ struct sync_bo_req req;
+ int ret = 0;
+
+ req.src_addr = 0;
+ req.dst_addr = abo->mem.dev_addr - hwctx->client->dev_heap->mem.dev_addr;
+ req.size = abo->mem.size;
+
+ /* Device to Host */
+ req.type = FIELD_PREP(AIE2_MSG_SYNC_BO_SRC_TYPE, SYNC_BO_DEV_MEM) |
+ FIELD_PREP(AIE2_MSG_SYNC_BO_DST_TYPE, SYNC_BO_HOST_MEM);
+
+ XDNA_DBG(xdna, "sync %d bytes src(0x%llx) to dst(0x%llx) completed",
+ req.size, req.src_addr, req.dst_addr);
+
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ msg.opcode = MSG_OP_SYNC_BO;
+
+ ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(xdna, "Send message failed");
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h
new file mode 100644
index 000000000000..6df9065b13f6
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_msg_priv.h
@@ -0,0 +1,368 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AIE2_MSG_PRIV_H_
+#define _AIE2_MSG_PRIV_H_
+
+enum aie2_msg_opcode {
+ MSG_OP_CREATE_CONTEXT = 0x2,
+ MSG_OP_DESTROY_CONTEXT = 0x3,
+ MSG_OP_SYNC_BO = 0x7,
+ MSG_OP_EXECUTE_BUFFER_CF = 0xC,
+ MSG_OP_QUERY_COL_STATUS = 0xD,
+ MSG_OP_QUERY_AIE_TILE_INFO = 0xE,
+ MSG_OP_QUERY_AIE_VERSION = 0xF,
+ MSG_OP_EXEC_DPU = 0x10,
+ MSG_OP_CONFIG_CU = 0x11,
+ MSG_OP_CHAIN_EXEC_BUFFER_CF = 0x12,
+ MSG_OP_CHAIN_EXEC_DPU = 0x13,
+ MSG_OP_MAX_XRT_OPCODE,
+ MSG_OP_SUSPEND = 0x101,
+ MSG_OP_RESUME = 0x102,
+ MSG_OP_ASSIGN_MGMT_PASID = 0x103,
+ MSG_OP_INVOKE_SELF_TEST = 0x104,
+ MSG_OP_MAP_HOST_BUFFER = 0x106,
+ MSG_OP_GET_FIRMWARE_VERSION = 0x108,
+ MSG_OP_SET_RUNTIME_CONFIG = 0x10A,
+ MSG_OP_GET_RUNTIME_CONFIG = 0x10B,
+ MSG_OP_REGISTER_ASYNC_EVENT_MSG = 0x10C,
+ MSG_OP_MAX_DRV_OPCODE,
+ MSG_OP_GET_PROTOCOL_VERSION = 0x301,
+ MSG_OP_MAX_OPCODE
+};
+
+enum aie2_msg_status {
+ AIE2_STATUS_SUCCESS = 0x0,
+ /* AIE Error codes */
+ AIE2_STATUS_AIE_SATURATION_ERROR = 0x1000001,
+ AIE2_STATUS_AIE_FP_ERROR = 0x1000002,
+ AIE2_STATUS_AIE_STREAM_ERROR = 0x1000003,
+ AIE2_STATUS_AIE_ACCESS_ERROR = 0x1000004,
+ AIE2_STATUS_AIE_BUS_ERROR = 0x1000005,
+ AIE2_STATUS_AIE_INSTRUCTION_ERROR = 0x1000006,
+ AIE2_STATUS_AIE_ECC_ERROR = 0x1000007,
+ AIE2_STATUS_AIE_LOCK_ERROR = 0x1000008,
+ AIE2_STATUS_AIE_DMA_ERROR = 0x1000009,
+ AIE2_STATUS_AIE_MEM_PARITY_ERROR = 0x100000a,
+ AIE2_STATUS_AIE_PWR_CFG_ERROR = 0x100000b,
+ AIE2_STATUS_AIE_BACKTRACK_ERROR = 0x100000c,
+ AIE2_STATUS_MAX_AIE_STATUS_CODE,
+ /* MGMT ERT Error codes */
+ AIE2_STATUS_MGMT_ERT_SELF_TEST_FAILURE = 0x2000001,
+ AIE2_STATUS_MGMT_ERT_HASH_MISMATCH,
+ AIE2_STATUS_MGMT_ERT_NOAVAIL,
+ AIE2_STATUS_MGMT_ERT_INVALID_PARAM,
+ AIE2_STATUS_MGMT_ERT_ENTER_SUSPEND_FAILURE,
+ AIE2_STATUS_MGMT_ERT_BUSY,
+ AIE2_STATUS_MGMT_ERT_APPLICATION_ACTIVE,
+ MAX_MGMT_ERT_STATUS_CODE,
+ /* APP ERT Error codes */
+ AIE2_STATUS_APP_ERT_FIRST_ERROR = 0x3000001,
+ AIE2_STATUS_APP_INVALID_INSTR,
+ AIE2_STATUS_APP_LOAD_PDI_FAIL,
+ MAX_APP_ERT_STATUS_CODE,
+ /* NPU RTOS Error Codes */
+ AIE2_STATUS_INVALID_INPUT_BUFFER = 0x4000001,
+ AIE2_STATUS_INVALID_COMMAND,
+ AIE2_STATUS_INVALID_PARAM,
+ AIE2_STATUS_INVALID_OPERATION = 0x4000006,
+ AIE2_STATUS_ASYNC_EVENT_MSGS_FULL,
+ AIE2_STATUS_MAX_RTOS_STATUS_CODE,
+ MAX_AIE2_STATUS_CODE
+};
+
+struct assign_mgmt_pasid_req {
+ __u16 pasid;
+ __u16 reserved;
+} __packed;
+
+struct assign_mgmt_pasid_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct map_host_buffer_req {
+ __u32 context_id;
+ __u64 buf_addr;
+ __u64 buf_size;
+} __packed;
+
+struct map_host_buffer_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+#define MAX_CQ_PAIRS 2
+struct cq_info {
+ __u32 head_addr;
+ __u32 tail_addr;
+ __u32 buf_addr;
+ __u32 buf_size;
+};
+
+struct cq_pair {
+ struct cq_info x2i_q;
+ struct cq_info i2x_q;
+};
+
+struct create_ctx_req {
+ __u32 aie_type;
+ __u8 start_col;
+ __u8 num_col;
+ __u16 reserved;
+ __u8 num_cq_pairs_requested;
+ __u8 reserved1;
+ __u16 pasid;
+ __u32 pad[2];
+ __u32 sec_comm_target_type;
+ __u32 context_priority;
+} __packed;
+
+struct create_ctx_resp {
+ enum aie2_msg_status status;
+ __u32 context_id;
+ __u16 msix_id;
+ __u8 num_cq_pairs_allocated;
+ __u8 reserved;
+ struct cq_pair cq_pair[MAX_CQ_PAIRS];
+} __packed;
+
+struct destroy_ctx_req {
+ __u32 context_id;
+} __packed;
+
+struct destroy_ctx_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct execute_buffer_req {
+ __u32 cu_idx;
+ __u32 payload[19];
+} __packed;
+
+struct exec_dpu_req {
+ __u64 inst_buf_addr;
+ __u32 inst_size;
+ __u32 inst_prop_cnt;
+ __u32 cu_idx;
+ __u32 payload[35];
+} __packed;
+
+struct execute_buffer_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct aie_tile_info {
+ __u32 size;
+ __u16 major;
+ __u16 minor;
+ __u16 cols;
+ __u16 rows;
+ __u16 core_rows;
+ __u16 mem_rows;
+ __u16 shim_rows;
+ __u16 core_row_start;
+ __u16 mem_row_start;
+ __u16 shim_row_start;
+ __u16 core_dma_channels;
+ __u16 mem_dma_channels;
+ __u16 shim_dma_channels;
+ __u16 core_locks;
+ __u16 mem_locks;
+ __u16 shim_locks;
+ __u16 core_events;
+ __u16 mem_events;
+ __u16 shim_events;
+ __u16 reserved;
+};
+
+struct aie_tile_info_req {
+ __u32 reserved;
+} __packed;
+
+struct aie_tile_info_resp {
+ enum aie2_msg_status status;
+ struct aie_tile_info info;
+} __packed;
+
+struct aie_version_info_req {
+ __u32 reserved;
+} __packed;
+
+struct aie_version_info_resp {
+ enum aie2_msg_status status;
+ __u16 major;
+ __u16 minor;
+} __packed;
+
+struct aie_column_info_req {
+ __u64 dump_buff_addr;
+ __u32 dump_buff_size;
+ __u32 num_cols;
+ __u32 aie_bitmap;
+} __packed;
+
+struct aie_column_info_resp {
+ enum aie2_msg_status status;
+ __u32 size;
+} __packed;
+
+struct suspend_req {
+ __u32 place_holder;
+} __packed;
+
+struct suspend_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct resume_req {
+ __u32 place_holder;
+} __packed;
+
+struct resume_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct check_header_hash_req {
+ __u64 hash_high;
+ __u64 hash_low;
+} __packed;
+
+struct check_header_hash_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct query_error_req {
+ __u64 buf_addr;
+ __u32 buf_size;
+ __u32 next_row;
+ __u32 next_column;
+ __u32 next_module;
+} __packed;
+
+struct query_error_resp {
+ enum aie2_msg_status status;
+ __u32 num_err;
+ __u32 has_next_err;
+ __u32 next_row;
+ __u32 next_column;
+ __u32 next_module;
+} __packed;
+
+struct protocol_version_req {
+ __u32 reserved;
+} __packed;
+
+struct protocol_version_resp {
+ enum aie2_msg_status status;
+ __u32 major;
+ __u32 minor;
+} __packed;
+
+struct firmware_version_req {
+ __u32 reserved;
+} __packed;
+
+struct firmware_version_resp {
+ enum aie2_msg_status status;
+ __u32 major;
+ __u32 minor;
+ __u32 sub;
+ __u32 build;
+} __packed;
+
+#define MAX_NUM_CUS 32
+#define AIE2_MSG_CFG_CU_PDI_ADDR GENMASK(16, 0)
+#define AIE2_MSG_CFG_CU_FUNC GENMASK(24, 17)
+struct config_cu_req {
+ __u32 num_cus;
+ __u32 cfgs[MAX_NUM_CUS];
+} __packed;
+
+struct config_cu_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct set_runtime_cfg_req {
+ __u32 type;
+ __u64 value;
+} __packed;
+
+struct set_runtime_cfg_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct get_runtime_cfg_req {
+ __u32 type;
+} __packed;
+
+struct get_runtime_cfg_resp {
+ enum aie2_msg_status status;
+ __u64 value;
+} __packed;
+
+enum async_event_type {
+ ASYNC_EVENT_TYPE_AIE_ERROR,
+ ASYNC_EVENT_TYPE_EXCEPTION,
+ MAX_ASYNC_EVENT_TYPE
+};
+
+#define ASYNC_BUF_SIZE SZ_8K
+struct async_event_msg_req {
+ __u64 buf_addr;
+ __u32 buf_size;
+} __packed;
+
+struct async_event_msg_resp {
+ enum aie2_msg_status status;
+ enum async_event_type type;
+} __packed;
+
+#define MAX_CHAIN_CMDBUF_SIZE SZ_4K
+#define slot_has_space(slot, offset, payload_size) \
+ (MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) + \
+ sizeof(typeof(slot)))
+
+struct cmd_chain_slot_execbuf_cf {
+ __u32 cu_idx;
+ __u32 arg_cnt;
+ __u32 args[] __counted_by(arg_cnt);
+};
+
+struct cmd_chain_slot_dpu {
+ __u64 inst_buf_addr;
+ __u32 inst_size;
+ __u32 inst_prop_cnt;
+ __u32 cu_idx;
+ __u32 arg_cnt;
+#define MAX_DPU_ARGS_SIZE (34 * sizeof(__u32))
+ __u32 args[] __counted_by(arg_cnt);
+};
+
+struct cmd_chain_req {
+ __u64 buf_addr;
+ __u32 buf_size;
+ __u32 count;
+} __packed;
+
+struct cmd_chain_resp {
+ enum aie2_msg_status status;
+ __u32 fail_cmd_idx;
+ enum aie2_msg_status fail_cmd_status;
+} __packed;
+
+#define AIE2_MSG_SYNC_BO_SRC_TYPE GENMASK(3, 0)
+#define AIE2_MSG_SYNC_BO_DST_TYPE GENMASK(7, 4)
+struct sync_bo_req {
+ __u64 src_addr;
+ __u64 dst_addr;
+ __u32 size;
+#define SYNC_BO_DEV_MEM 0
+#define SYNC_BO_HOST_MEM 2
+ __u32 type;
+} __packed;
+
+struct sync_bo_resp {
+ enum aie2_msg_status status;
+} __packed;
+#endif /* _AIE2_MSG_PRIV_H_ */
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
new file mode 100644
index 000000000000..5a058e565b01
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -0,0 +1,928 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/errno.h>
+#include <linux/firmware.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/pci.h>
+#include <linux/xarray.h>
+
+#include "aie2_msg_priv.h"
+#include "aie2_pci.h"
+#include "aie2_solver.h"
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+static int aie2_max_col = XRS_MAX_COL;
+module_param(aie2_max_col, uint, 0600);
+MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used");
+
+/*
+ * The management mailbox channel is allocated by firmware.
+ * The related register and ring buffer information is on SRAM BAR.
+ * This struct is the register layout.
+ */
+#define MGMT_MBOX_MAGIC 0x55504e5f /* _NPU */
+struct mgmt_mbox_chann_info {
+ __u32 x2i_tail;
+ __u32 x2i_head;
+ __u32 x2i_buf;
+ __u32 x2i_buf_sz;
+ __u32 i2x_tail;
+ __u32 i2x_head;
+ __u32 i2x_buf;
+ __u32 i2x_buf_sz;
+ __u32 magic;
+ __u32 msi_id;
+ __u32 prot_major;
+ __u32 prot_minor;
+ __u32 rsvd[4];
+};
+
+static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+
+ /*
+ * The driver supported mailbox behavior is defined by
+ * ndev->priv->protocol_major and protocol_minor.
+ *
+ * When protocol_major and fw_major are different, it means driver
+ * and firmware are incompatible.
+ */
+ if (ndev->priv->protocol_major != fw_major) {
+ XDNA_ERR(xdna, "Incompatible firmware protocol major %d minor %d",
+ fw_major, fw_minor);
+ return -EINVAL;
+ }
+
+ /*
+ * When protocol_minor is greater then fw_minor, that means driver
+ * relies on operation the installed firmware does not support.
+ */
+ if (ndev->priv->protocol_minor > fw_minor) {
+ XDNA_ERR(xdna, "Firmware minor version smaller than supported");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void aie2_dump_chann_info_debug(struct amdxdna_dev_hdl *ndev)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+
+ XDNA_DBG(xdna, "i2x tail 0x%x", ndev->mgmt_i2x.mb_tail_ptr_reg);
+ XDNA_DBG(xdna, "i2x head 0x%x", ndev->mgmt_i2x.mb_head_ptr_reg);
+ XDNA_DBG(xdna, "i2x ringbuf 0x%x", ndev->mgmt_i2x.rb_start_addr);
+ XDNA_DBG(xdna, "i2x rsize 0x%x", ndev->mgmt_i2x.rb_size);
+ XDNA_DBG(xdna, "x2i tail 0x%x", ndev->mgmt_x2i.mb_tail_ptr_reg);
+ XDNA_DBG(xdna, "x2i head 0x%x", ndev->mgmt_x2i.mb_head_ptr_reg);
+ XDNA_DBG(xdna, "x2i ringbuf 0x%x", ndev->mgmt_x2i.rb_start_addr);
+ XDNA_DBG(xdna, "x2i rsize 0x%x", ndev->mgmt_x2i.rb_size);
+ XDNA_DBG(xdna, "x2i chann index 0x%x", ndev->mgmt_chan_idx);
+ XDNA_DBG(xdna, "mailbox protocol major 0x%x", ndev->mgmt_prot_major);
+ XDNA_DBG(xdna, "mailbox protocol minor 0x%x", ndev->mgmt_prot_minor);
+}
+
+static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev)
+{
+ struct mgmt_mbox_chann_info info_regs;
+ struct xdna_mailbox_chann_res *i2x;
+ struct xdna_mailbox_chann_res *x2i;
+ u32 addr, off;
+ u32 *reg;
+ int ret;
+ int i;
+
+ /*
+ * Once firmware is alive, it will write management channel
+ * information in SRAM BAR and write the address of that information
+ * at FW_ALIVE_OFF offset in SRMA BAR.
+ *
+ * Read a non-zero value from FW_ALIVE_OFF implies that firmware
+ * is alive.
+ */
+ ret = readx_poll_timeout(readl, SRAM_GET_ADDR(ndev, FW_ALIVE_OFF),
+ addr, addr, AIE2_INTERVAL, AIE2_TIMEOUT);
+ if (ret || !addr)
+ return -ETIME;
+
+ off = AIE2_SRAM_OFF(ndev, addr);
+ reg = (u32 *)&info_regs;
+ for (i = 0; i < sizeof(info_regs) / sizeof(u32); i++)
+ reg[i] = readl(ndev->sram_base + off + i * sizeof(u32));
+
+ if (info_regs.magic != MGMT_MBOX_MAGIC) {
+ XDNA_ERR(ndev->xdna, "Invalid mbox magic 0x%x", info_regs.magic);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ i2x = &ndev->mgmt_i2x;
+ x2i = &ndev->mgmt_x2i;
+
+ i2x->mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.i2x_head);
+ i2x->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.i2x_tail);
+ i2x->rb_start_addr = AIE2_SRAM_OFF(ndev, info_regs.i2x_buf);
+ i2x->rb_size = info_regs.i2x_buf_sz;
+
+ x2i->mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.x2i_head);
+ x2i->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.x2i_tail);
+ x2i->rb_start_addr = AIE2_SRAM_OFF(ndev, info_regs.x2i_buf);
+ x2i->rb_size = info_regs.x2i_buf_sz;
+
+ ndev->mgmt_chan_idx = info_regs.msi_id;
+ ndev->mgmt_prot_major = info_regs.prot_major;
+ ndev->mgmt_prot_minor = info_regs.prot_minor;
+
+ ret = aie2_check_protocol(ndev, ndev->mgmt_prot_major, ndev->mgmt_prot_minor);
+
+done:
+ aie2_dump_chann_info_debug(ndev);
+
+ /* Must clear address at FW_ALIVE_OFF */
+ writel(0, SRAM_GET_ADDR(ndev, FW_ALIVE_OFF));
+
+ return ret;
+}
+
+int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
+ enum rt_config_category category, u32 *val)
+{
+ const struct rt_config *cfg;
+ u32 value;
+ int ret;
+
+ for (cfg = ndev->priv->rt_config; cfg->type; cfg++) {
+ if (cfg->category != category)
+ continue;
+
+ value = val ? *val : cfg->value;
+ ret = aie2_set_runtime_cfg(ndev, cfg->type, value);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set type %d value %d failed",
+ cfg->type, value);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int aie2_xdna_reset(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ ret = aie2_suspend_fw(ndev);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Suspend firmware failed");
+ return ret;
+ }
+
+ ret = aie2_resume_fw(ndev);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Resume firmware failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_INIT, NULL);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Runtime config failed");
+ return ret;
+ }
+
+ ret = aie2_assign_mgmt_pasid(ndev, 0);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Can not assign PASID");
+ return ret;
+ }
+
+ ret = aie2_xdna_reset(ndev);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Reset firmware failed");
+ return ret;
+ }
+
+ if (!ndev->async_events)
+ return 0;
+
+ ret = aie2_error_async_events_send(ndev);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Send async events failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int aie2_mgmt_fw_query(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "query firmware version failed");
+ return ret;
+ }
+
+ ret = aie2_query_aie_version(ndev, &ndev->version);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Query AIE version failed");
+ return ret;
+ }
+
+ ret = aie2_query_aie_metadata(ndev, &ndev->metadata);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Query AIE metadata failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+static void aie2_mgmt_fw_fini(struct amdxdna_dev_hdl *ndev)
+{
+ if (aie2_suspend_fw(ndev))
+ XDNA_ERR(ndev->xdna, "Suspend_fw failed");
+ XDNA_DBG(ndev->xdna, "Firmware suspended");
+}
+
+static int aie2_xrs_load(void *cb_arg, struct xrs_action_load *action)
+{
+ struct amdxdna_hwctx *hwctx = cb_arg;
+ struct amdxdna_dev *xdna;
+ int ret;
+
+ xdna = hwctx->client->xdna;
+
+ hwctx->start_col = action->part.start_col;
+ hwctx->num_col = action->part.ncols;
+ ret = aie2_create_context(xdna->dev_handle, hwctx);
+ if (ret)
+ XDNA_ERR(xdna, "create context failed, ret %d", ret);
+
+ return ret;
+}
+
+static int aie2_xrs_unload(void *cb_arg)
+{
+ struct amdxdna_hwctx *hwctx = cb_arg;
+ struct amdxdna_dev *xdna;
+ int ret;
+
+ xdna = hwctx->client->xdna;
+
+ ret = aie2_destroy_context(xdna->dev_handle, hwctx);
+ if (ret)
+ XDNA_ERR(xdna, "destroy context failed, ret %d", ret);
+
+ return ret;
+}
+
+static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 dpm_level)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(ddev);
+ struct amdxdna_dev_hdl *ndev;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ ndev = xdna->dev_handle;
+ ndev->dft_dpm_level = dpm_level;
+ if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == dpm_level)
+ return 0;
+
+ return ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
+}
+
+static struct xrs_action_ops aie2_xrs_actions = {
+ .load = aie2_xrs_load,
+ .unload = aie2_xrs_unload,
+ .set_dft_dpm_level = aie2_xrs_set_dft_dpm_level,
+};
+
+static void aie2_hw_stop(struct amdxdna_dev *xdna)
+{
+ struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+ struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+
+ if (ndev->dev_status <= AIE2_DEV_INIT) {
+ XDNA_ERR(xdna, "device is already stopped");
+ return;
+ }
+
+ aie2_mgmt_fw_fini(ndev);
+ xdna_mailbox_stop_channel(ndev->mgmt_chann);
+ xdna_mailbox_destroy_channel(ndev->mgmt_chann);
+ ndev->mgmt_chann = NULL;
+ drmm_kfree(&xdna->ddev, ndev->mbox);
+ ndev->mbox = NULL;
+ aie2_psp_stop(ndev->psp_hdl);
+ aie2_smu_fini(ndev);
+ pci_disable_device(pdev);
+
+ ndev->dev_status = AIE2_DEV_INIT;
+}
+
+static int aie2_hw_start(struct amdxdna_dev *xdna)
+{
+ struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+ struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+ struct xdna_mailbox_res mbox_res;
+ u32 xdna_mailbox_intr_reg;
+ int mgmt_mb_irq, ret;
+
+ if (ndev->dev_status >= AIE2_DEV_START) {
+ XDNA_INFO(xdna, "device is already started");
+ return 0;
+ }
+
+ ret = pci_enable_device(pdev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to enable device, ret %d", ret);
+ return ret;
+ }
+ pci_set_master(pdev);
+
+ ret = aie2_smu_init(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to init smu, ret %d", ret);
+ goto disable_dev;
+ }
+
+ ret = aie2_psp_start(ndev->psp_hdl);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to start psp, ret %d", ret);
+ goto fini_smu;
+ }
+
+ ret = aie2_get_mgmt_chann_info(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "firmware is not alive");
+ goto stop_psp;
+ }
+
+ mbox_res.ringbuf_base = ndev->sram_base;
+ mbox_res.ringbuf_size = pci_resource_len(pdev, xdna->dev_info->sram_bar);
+ mbox_res.mbox_base = ndev->mbox_base;
+ mbox_res.mbox_size = MBOX_SIZE(ndev);
+ mbox_res.name = "xdna_mailbox";
+ ndev->mbox = xdnam_mailbox_create(&xdna->ddev, &mbox_res);
+ if (!ndev->mbox) {
+ XDNA_ERR(xdna, "failed to create mailbox device");
+ ret = -ENODEV;
+ goto stop_psp;
+ }
+
+ mgmt_mb_irq = pci_irq_vector(pdev, ndev->mgmt_chan_idx);
+ if (mgmt_mb_irq < 0) {
+ ret = mgmt_mb_irq;
+ XDNA_ERR(xdna, "failed to alloc irq vector, ret %d", ret);
+ goto stop_psp;
+ }
+
+ xdna_mailbox_intr_reg = ndev->mgmt_i2x.mb_head_ptr_reg + 4;
+ ndev->mgmt_chann = xdna_mailbox_create_channel(ndev->mbox,
+ &ndev->mgmt_x2i,
+ &ndev->mgmt_i2x,
+ xdna_mailbox_intr_reg,
+ mgmt_mb_irq);
+ if (!ndev->mgmt_chann) {
+ XDNA_ERR(xdna, "failed to create management mailbox channel");
+ ret = -EINVAL;
+ goto stop_psp;
+ }
+
+ ret = aie2_pm_init(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
+ goto destroy_mgmt_chann;
+ }
+
+ ret = aie2_mgmt_fw_init(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
+ goto destroy_mgmt_chann;
+ }
+
+ ndev->dev_status = AIE2_DEV_START;
+
+ return 0;
+
+destroy_mgmt_chann:
+ xdna_mailbox_stop_channel(ndev->mgmt_chann);
+ xdna_mailbox_destroy_channel(ndev->mgmt_chann);
+stop_psp:
+ aie2_psp_stop(ndev->psp_hdl);
+fini_smu:
+ aie2_smu_fini(ndev);
+disable_dev:
+ pci_disable_device(pdev);
+
+ return ret;
+}
+
+static int aie2_init(struct amdxdna_dev *xdna)
+{
+ struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+ void __iomem *tbl[PCI_NUM_RESOURCES] = {0};
+ struct init_config xrs_cfg = { 0 };
+ struct amdxdna_dev_hdl *ndev;
+ struct psp_config psp_conf;
+ const struct firmware *fw;
+ unsigned long bars = 0;
+ int i, nvec, ret;
+
+ ndev = drmm_kzalloc(&xdna->ddev, sizeof(*ndev), GFP_KERNEL);
+ if (!ndev)
+ return -ENOMEM;
+
+ ndev->priv = xdna->dev_info->dev_priv;
+ ndev->xdna = xdna;
+
+ ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to request_firmware %s, ret %d",
+ ndev->priv->fw_path, ret);
+ return ret;
+ }
+
+ ret = pcim_enable_device(pdev);
+ if (ret) {
+ XDNA_ERR(xdna, "pcim enable device failed, ret %d", ret);
+ goto release_fw;
+ }
+
+ for (i = 0; i < PSP_MAX_REGS; i++)
+ set_bit(PSP_REG_BAR(ndev, i), &bars);
+
+ set_bit(xdna->dev_info->sram_bar, &bars);
+ set_bit(xdna->dev_info->smu_bar, &bars);
+ set_bit(xdna->dev_info->mbox_bar, &bars);
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ if (!test_bit(i, &bars))
+ continue;
+ tbl[i] = pcim_iomap(pdev, i, 0);
+ if (!tbl[i]) {
+ XDNA_ERR(xdna, "map bar %d failed", i);
+ ret = -ENOMEM;
+ goto release_fw;
+ }
+ }
+
+ ndev->sram_base = tbl[xdna->dev_info->sram_bar];
+ ndev->smu_base = tbl[xdna->dev_info->smu_bar];
+ ndev->mbox_base = tbl[xdna->dev_info->mbox_bar];
+
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to set DMA mask: %d", ret);
+ goto release_fw;
+ }
+
+ nvec = pci_msix_vec_count(pdev);
+ if (nvec <= 0) {
+ XDNA_ERR(xdna, "does not get number of interrupt vector");
+ ret = -EINVAL;
+ goto release_fw;
+ }
+
+ ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_MSIX);
+ if (ret < 0) {
+ XDNA_ERR(xdna, "failed to alloc irq vectors, ret %d", ret);
+ goto release_fw;
+ }
+
+ ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
+ if (ret) {
+ XDNA_ERR(xdna, "Enable PASID failed, ret %d", ret);
+ goto free_irq;
+ }
+
+ psp_conf.fw_size = fw->size;
+ psp_conf.fw_buf = fw->data;
+ for (i = 0; i < PSP_MAX_REGS; i++)
+ psp_conf.psp_regs[i] = tbl[PSP_REG_BAR(ndev, i)] + PSP_REG_OFF(ndev, i);
+ ndev->psp_hdl = aie2m_psp_create(&xdna->ddev, &psp_conf);
+ if (!ndev->psp_hdl) {
+ XDNA_ERR(xdna, "failed to create psp");
+ ret = -ENOMEM;
+ goto disable_sva;
+ }
+ xdna->dev_handle = ndev;
+
+ ret = aie2_hw_start(xdna);
+ if (ret) {
+ XDNA_ERR(xdna, "start npu failed, ret %d", ret);
+ goto disable_sva;
+ }
+
+ ret = aie2_mgmt_fw_query(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "Query firmware failed, ret %d", ret);
+ goto stop_hw;
+ }
+ ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
+
+ xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
+ for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
+ xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv->dpm_clk_tbl[i].hclk;
+ xrs_cfg.sys_eff_factor = 1;
+ xrs_cfg.ddev = &xdna->ddev;
+ xrs_cfg.actions = &aie2_xrs_actions;
+ xrs_cfg.total_col = ndev->total_col;
+
+ xdna->xrs_hdl = xrsm_init(&xrs_cfg);
+ if (!xdna->xrs_hdl) {
+ XDNA_ERR(xdna, "Initialize resolver failed");
+ ret = -EINVAL;
+ goto stop_hw;
+ }
+
+ ret = aie2_error_async_events_alloc(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
+ goto stop_hw;
+ }
+
+ ret = aie2_error_async_events_send(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "Send async events failed, ret %d", ret);
+ goto async_event_free;
+ }
+
+ /* Issue a command to make sure firmware handled async events */
+ ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver);
+ if (ret) {
+ XDNA_ERR(xdna, "Re-query firmware version failed");
+ goto async_event_free;
+ }
+
+ release_firmware(fw);
+ return 0;
+
+async_event_free:
+ aie2_error_async_events_free(ndev);
+stop_hw:
+ aie2_hw_stop(xdna);
+disable_sva:
+ iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
+free_irq:
+ pci_free_irq_vectors(pdev);
+release_fw:
+ release_firmware(fw);
+
+ return ret;
+}
+
+static void aie2_fini(struct amdxdna_dev *xdna)
+{
+ struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+ struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+
+ aie2_hw_stop(xdna);
+ aie2_error_async_events_free(ndev);
+ iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
+ pci_free_irq_vectors(pdev);
+}
+
+static int aie2_get_aie_status(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_aie_status status;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+ int ret;
+
+ ndev = xdna->dev_handle;
+ if (copy_from_user(&status, u64_to_user_ptr(args->buffer), sizeof(status))) {
+ XDNA_ERR(xdna, "Failed to copy AIE request into kernel");
+ return -EFAULT;
+ }
+
+ if (ndev->metadata.cols * ndev->metadata.size < status.buffer_size) {
+ XDNA_ERR(xdna, "Invalid buffer size. Given Size: %u. Need Size: %u.",
+ status.buffer_size, ndev->metadata.cols * ndev->metadata.size);
+ return -EINVAL;
+ }
+
+ ret = aie2_query_status(ndev, u64_to_user_ptr(status.buffer),
+ status.buffer_size, &status.cols_filled);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to get AIE status info. Ret: %d", ret);
+ return ret;
+ }
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &status, sizeof(status))) {
+ XDNA_ERR(xdna, "Failed to copy AIE request info to user space");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int aie2_get_aie_metadata(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_aie_metadata *meta;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+ int ret = 0;
+
+ ndev = xdna->dev_handle;
+ meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+ if (!meta)
+ return -ENOMEM;
+
+ meta->col_size = ndev->metadata.size;
+ meta->cols = ndev->metadata.cols;
+ meta->rows = ndev->metadata.rows;
+
+ meta->version.major = ndev->metadata.version.major;
+ meta->version.minor = ndev->metadata.version.minor;
+
+ meta->core.row_count = ndev->metadata.core.row_count;
+ meta->core.row_start = ndev->metadata.core.row_start;
+ meta->core.dma_channel_count = ndev->metadata.core.dma_channel_count;
+ meta->core.lock_count = ndev->metadata.core.lock_count;
+ meta->core.event_reg_count = ndev->metadata.core.event_reg_count;
+
+ meta->mem.row_count = ndev->metadata.mem.row_count;
+ meta->mem.row_start = ndev->metadata.mem.row_start;
+ meta->mem.dma_channel_count = ndev->metadata.mem.dma_channel_count;
+ meta->mem.lock_count = ndev->metadata.mem.lock_count;
+ meta->mem.event_reg_count = ndev->metadata.mem.event_reg_count;
+
+ meta->shim.row_count = ndev->metadata.shim.row_count;
+ meta->shim.row_start = ndev->metadata.shim.row_start;
+ meta->shim.dma_channel_count = ndev->metadata.shim.dma_channel_count;
+ meta->shim.lock_count = ndev->metadata.shim.lock_count;
+ meta->shim.event_reg_count = ndev->metadata.shim.event_reg_count;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), meta, sizeof(*meta)))
+ ret = -EFAULT;
+
+ kfree(meta);
+ return ret;
+}
+
+static int aie2_get_aie_version(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_aie_version version;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+
+ ndev = xdna->dev_handle;
+ version.major = ndev->version.major;
+ version.minor = ndev->version.minor;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &version, sizeof(version)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int aie2_get_firmware_version(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_firmware_version version;
+ struct amdxdna_dev *xdna = client->xdna;
+
+ version.major = xdna->fw_ver.major;
+ version.minor = xdna->fw_ver.minor;
+ version.patch = xdna->fw_ver.sub;
+ version.build = xdna->fw_ver.build;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &version, sizeof(version)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int aie2_get_power_mode(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_get_power_mode mode = {};
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+
+ ndev = xdna->dev_handle;
+ mode.power_mode = ndev->pw_mode;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &mode, sizeof(mode)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int aie2_get_clock_metadata(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_clock_metadata *clock;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+ int ret = 0;
+
+ ndev = xdna->dev_handle;
+ clock = kzalloc(sizeof(*clock), GFP_KERNEL);
+ if (!clock)
+ return -ENOMEM;
+
+ snprintf(clock->mp_npu_clock.name, sizeof(clock->mp_npu_clock.name),
+ "MP-NPU Clock");
+ clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq;
+ snprintf(clock->h_clock.name, sizeof(clock->h_clock.name), "H Clock");
+ clock->h_clock.freq_mhz = ndev->hclk_freq;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), clock, sizeof(*clock)))
+ ret = -EFAULT;
+
+ kfree(clock);
+ return ret;
+}
+
+static int aie2_get_hwctx_status(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_hwctx __user *buf;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_drm_query_hwctx *tmp;
+ struct amdxdna_client *tmp_client;
+ struct amdxdna_hwctx *hwctx;
+ unsigned long hwctx_id;
+ bool overflow = false;
+ u32 req_bytes = 0;
+ u32 hw_i = 0;
+ int ret = 0;
+ int idx;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ buf = u64_to_user_ptr(args->buffer);
+ list_for_each_entry(tmp_client, &xdna->client_list, node) {
+ idx = srcu_read_lock(&tmp_client->hwctx_srcu);
+ amdxdna_for_each_hwctx(tmp_client, hwctx_id, hwctx) {
+ req_bytes += sizeof(*tmp);
+ if (args->buffer_size < req_bytes) {
+ /* Continue iterating to get the required size */
+ overflow = true;
+ continue;
+ }
+
+ memset(tmp, 0, sizeof(*tmp));
+ tmp->pid = tmp_client->pid;
+ tmp->context_id = hwctx->id;
+ tmp->start_col = hwctx->start_col;
+ tmp->num_col = hwctx->num_col;
+ tmp->command_submissions = hwctx->priv->seq;
+ tmp->command_completions = hwctx->priv->completed;
+
+ if (copy_to_user(&buf[hw_i], tmp, sizeof(*tmp))) {
+ ret = -EFAULT;
+ srcu_read_unlock(&tmp_client->hwctx_srcu, idx);
+ goto out;
+ }
+ hw_i++;
+ }
+ srcu_read_unlock(&tmp_client->hwctx_srcu, idx);
+ }
+
+ if (overflow) {
+ XDNA_ERR(xdna, "Invalid buffer size. Given: %u Need: %u.",
+ args->buffer_size, req_bytes);
+ ret = -EINVAL;
+ }
+
+out:
+ kfree(tmp);
+ args->buffer_size = req_bytes;
+ return ret;
+}
+
+static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ int ret, idx;
+
+ if (!drm_dev_enter(&xdna->ddev, &idx))
+ return -ENODEV;
+
+ switch (args->param) {
+ case DRM_AMDXDNA_QUERY_AIE_STATUS:
+ ret = aie2_get_aie_status(client, args);
+ break;
+ case DRM_AMDXDNA_QUERY_AIE_METADATA:
+ ret = aie2_get_aie_metadata(client, args);
+ break;
+ case DRM_AMDXDNA_QUERY_AIE_VERSION:
+ ret = aie2_get_aie_version(client, args);
+ break;
+ case DRM_AMDXDNA_QUERY_CLOCK_METADATA:
+ ret = aie2_get_clock_metadata(client, args);
+ break;
+ case DRM_AMDXDNA_QUERY_HW_CONTEXTS:
+ ret = aie2_get_hwctx_status(client, args);
+ break;
+ case DRM_AMDXDNA_QUERY_FIRMWARE_VERSION:
+ ret = aie2_get_firmware_version(client, args);
+ break;
+ case DRM_AMDXDNA_GET_POWER_MODE:
+ ret = aie2_get_power_mode(client, args);
+ break;
+ default:
+ XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
+ ret = -EOPNOTSUPP;
+ }
+ XDNA_DBG(xdna, "Got param %d", args->param);
+
+ drm_dev_exit(idx);
+ return ret;
+}
+
+static int aie2_set_power_mode(struct amdxdna_client *client,
+ struct amdxdna_drm_set_state *args)
+{
+ struct amdxdna_drm_set_power_mode power_state;
+ enum amdxdna_power_mode_type power_mode;
+ struct amdxdna_dev *xdna = client->xdna;
+
+ if (copy_from_user(&power_state, u64_to_user_ptr(args->buffer),
+ sizeof(power_state))) {
+ XDNA_ERR(xdna, "Failed to copy power mode request into kernel");
+ return -EFAULT;
+ }
+
+ if (XDNA_MBZ_DBG(xdna, power_state.pad, sizeof(power_state.pad)))
+ return -EINVAL;
+
+ power_mode = power_state.power_mode;
+ if (power_mode > POWER_MODE_TURBO) {
+ XDNA_ERR(xdna, "Invalid power mode %d", power_mode);
+ return -EINVAL;
+ }
+
+ return aie2_pm_set_mode(xdna->dev_handle, power_mode);
+}
+
+static int aie2_set_state(struct amdxdna_client *client,
+ struct amdxdna_drm_set_state *args)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ int ret, idx;
+
+ if (!drm_dev_enter(&xdna->ddev, &idx))
+ return -ENODEV;
+
+ switch (args->param) {
+ case DRM_AMDXDNA_SET_POWER_MODE:
+ ret = aie2_set_power_mode(client, args);
+ break;
+ default:
+ XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ drm_dev_exit(idx);
+ return ret;
+}
+
+const struct amdxdna_dev_ops aie2_ops = {
+ .init = aie2_init,
+ .fini = aie2_fini,
+ .resume = aie2_hw_start,
+ .suspend = aie2_hw_stop,
+ .get_aie_info = aie2_get_info,
+ .set_aie_state = aie2_set_state,
+ .hwctx_init = aie2_hwctx_init,
+ .hwctx_fini = aie2_hwctx_fini,
+ .hwctx_config = aie2_hwctx_config,
+ .cmd_submit = aie2_cmd_submit,
+ .hmm_invalidate = aie2_hmm_invalidate,
+ .hwctx_suspend = aie2_hwctx_suspend,
+ .hwctx_resume = aie2_hwctx_resume,
+};
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
new file mode 100644
index 000000000000..385914840eaa
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AIE2_PCI_H_
+#define _AIE2_PCI_H_
+
+#include <drm/amdxdna_accel.h>
+#include <linux/semaphore.h>
+
+#include "amdxdna_mailbox.h"
+
+#define AIE2_INTERVAL 20000 /* us */
+#define AIE2_TIMEOUT 1000000 /* us */
+
+/* Firmware determines device memory base address and size */
+#define AIE2_DEVM_BASE 0x4000000
+#define AIE2_DEVM_SIZE SZ_64M
+
+#define NDEV2PDEV(ndev) (to_pci_dev((ndev)->xdna->ddev.dev))
+
+#define AIE2_SRAM_OFF(ndev, addr) ((addr) - (ndev)->priv->sram_dev_addr)
+#define AIE2_MBOX_OFF(ndev, addr) ((addr) - (ndev)->priv->mbox_dev_addr)
+
+#define PSP_REG_BAR(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].bar_idx)
+#define PSP_REG_OFF(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].offset)
+#define SRAM_REG_OFF(ndev, idx) ((ndev)->priv->sram_offs[(idx)].offset)
+
+#define SMU_REG(ndev, idx) \
+({ \
+ typeof(ndev) _ndev = ndev; \
+ ((_ndev)->smu_base + (_ndev)->priv->smu_regs_off[(idx)].offset); \
+})
+#define SRAM_GET_ADDR(ndev, idx) \
+({ \
+ typeof(ndev) _ndev = ndev; \
+ ((_ndev)->sram_base + SRAM_REG_OFF((_ndev), (idx))); \
+})
+
+#define CHAN_SLOT_SZ SZ_8K
+#define MBOX_SIZE(ndev) \
+({ \
+ typeof(ndev) _ndev = (ndev); \
+ ((_ndev)->priv->mbox_size) ? (_ndev)->priv->mbox_size : \
+ pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \
+})
+
+enum aie2_smu_reg_idx {
+ SMU_CMD_REG = 0,
+ SMU_ARG_REG,
+ SMU_INTR_REG,
+ SMU_RESP_REG,
+ SMU_OUT_REG,
+ SMU_MAX_REGS /* Keep this at the end */
+};
+
+enum aie2_sram_reg_idx {
+ MBOX_CHANN_OFF = 0,
+ FW_ALIVE_OFF,
+ SRAM_MAX_INDEX /* Keep this at the end */
+};
+
+enum psp_reg_idx {
+ PSP_CMD_REG = 0,
+ PSP_ARG0_REG,
+ PSP_ARG1_REG,
+ PSP_ARG2_REG,
+ PSP_NUM_IN_REGS, /* number of input registers */
+ PSP_INTR_REG = PSP_NUM_IN_REGS,
+ PSP_STATUS_REG,
+ PSP_RESP_REG,
+ PSP_MAX_REGS /* Keep this at the end */
+};
+
+struct amdxdna_client;
+struct amdxdna_fw_ver;
+struct amdxdna_hwctx;
+struct amdxdna_sched_job;
+
+struct psp_config {
+ const void *fw_buf;
+ u32 fw_size;
+ void __iomem *psp_regs[PSP_MAX_REGS];
+};
+
+struct aie_version {
+ u16 major;
+ u16 minor;
+};
+
+struct aie_tile_metadata {
+ u16 row_count;
+ u16 row_start;
+ u16 dma_channel_count;
+ u16 lock_count;
+ u16 event_reg_count;
+};
+
+struct aie_metadata {
+ u32 size;
+ u16 cols;
+ u16 rows;
+ struct aie_version version;
+ struct aie_tile_metadata core;
+ struct aie_tile_metadata mem;
+ struct aie_tile_metadata shim;
+};
+
+enum rt_config_category {
+ AIE2_RT_CFG_INIT,
+ AIE2_RT_CFG_CLK_GATING,
+};
+
+struct rt_config {
+ u32 type;
+ u32 value;
+ u32 category;
+};
+
+struct dpm_clk_freq {
+ u32 npuclk;
+ u32 hclk;
+};
+
+/*
+ * Define the maximum number of pending commands in a hardware context.
+ * Must be power of 2!
+ */
+#define HWCTX_MAX_CMDS 4
+#define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1))
+struct amdxdna_hwctx_priv {
+ struct amdxdna_gem_obj *heap;
+ void *mbox_chann;
+
+ struct drm_gpu_scheduler sched;
+ struct drm_sched_entity entity;
+
+ struct mutex io_lock; /* protect seq and cmd order */
+ struct wait_queue_head job_free_wq;
+ u32 num_pending;
+ u64 seq;
+ struct semaphore job_sem;
+ bool job_done;
+
+ /* Completed job counter */
+ u64 completed;
+
+ struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS];
+ struct drm_syncobj *syncobj;
+};
+
+enum aie2_dev_status {
+ AIE2_DEV_UNINIT,
+ AIE2_DEV_INIT,
+ AIE2_DEV_START,
+};
+
+struct amdxdna_dev_hdl {
+ struct amdxdna_dev *xdna;
+ const struct amdxdna_dev_priv *priv;
+ void __iomem *sram_base;
+ void __iomem *smu_base;
+ void __iomem *mbox_base;
+ struct psp_device *psp_hdl;
+
+ struct xdna_mailbox_chann_res mgmt_x2i;
+ struct xdna_mailbox_chann_res mgmt_i2x;
+ u32 mgmt_chan_idx;
+ u32 mgmt_prot_major;
+ u32 mgmt_prot_minor;
+
+ u32 total_col;
+ struct aie_version version;
+ struct aie_metadata metadata;
+
+ /* power management and clock*/
+ enum amdxdna_power_mode_type pw_mode;
+ u32 dpm_level;
+ u32 dft_dpm_level;
+ u32 max_dpm_level;
+ u32 clk_gating;
+ u32 npuclk_freq;
+ u32 hclk_freq;
+
+ /* Mailbox and the management channel */
+ struct mailbox *mbox;
+ struct mailbox_channel *mgmt_chann;
+ struct async_events *async_events;
+
+ enum aie2_dev_status dev_status;
+ u32 hwctx_num;
+};
+
+#define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
+ [reg_name] = {bar##_BAR_INDEX, (reg_addr) - bar##_BAR_BASE}
+
+struct aie2_bar_off_pair {
+ int bar_idx;
+ u32 offset;
+};
+
+struct aie2_hw_ops {
+ int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+};
+
+struct amdxdna_dev_priv {
+ const char *fw_path;
+ u64 protocol_major;
+ u64 protocol_minor;
+ const struct rt_config *rt_config;
+ const struct dpm_clk_freq *dpm_clk_tbl;
+
+#define COL_ALIGN_NONE 0
+#define COL_ALIGN_NATURE 1
+ u32 col_align;
+ u32 mbox_dev_addr;
+ /* If mbox_size is 0, use BAR size. See MBOX_SIZE macro */
+ u32 mbox_size;
+ u32 sram_dev_addr;
+ struct aie2_bar_off_pair sram_offs[SRAM_MAX_INDEX];
+ struct aie2_bar_off_pair psp_regs_off[PSP_MAX_REGS];
+ struct aie2_bar_off_pair smu_regs_off[SMU_MAX_REGS];
+ struct aie2_hw_ops hw_ops;
+};
+
+extern const struct amdxdna_dev_ops aie2_ops;
+
+int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
+ enum rt_config_category category, u32 *val);
+
+/* aie2 npu hw config */
+extern const struct dpm_clk_freq npu1_dpm_clk_table[];
+extern const struct dpm_clk_freq npu4_dpm_clk_table[];
+extern const struct rt_config npu1_default_rt_cfg[];
+extern const struct rt_config npu4_default_rt_cfg[];
+
+/* aie2_smu.c */
+int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
+void aie2_smu_fini(struct amdxdna_dev_hdl *ndev);
+int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+
+/* aie2_pm.c */
+int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
+int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target);
+
+/* aie2_psp.c */
+struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf);
+int aie2_psp_start(struct psp_device *psp);
+void aie2_psp_stop(struct psp_device *psp);
+
+/* aie2_error.c */
+int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev);
+void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev);
+int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev);
+int aie2_error_async_msg_thread(void *data);
+
+/* aie2_message.c */
+int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
+int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
+int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
+int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value);
+int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid);
+int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version);
+int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
+int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
+ struct amdxdna_fw_ver *fw_ver);
+int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
+int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
+int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size);
+int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled);
+int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
+ void *handle, int (*cb)(void*, void __iomem *, size_t));
+int aie2_config_cu(struct amdxdna_hwctx *hwctx);
+int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t));
+int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
+ struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t));
+int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
+ struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t));
+int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t));
+
+/* aie2_hwctx.c */
+int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
+void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx);
+int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
+void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx);
+void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx);
+int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
+void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
+void aie2_restart_ctx(struct amdxdna_client *client);
+
+#endif /* _AIE2_PCI_H_ */
diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c
new file mode 100644
index 000000000000..426c38fce848
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_pm.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_pci_drv.h"
+
+#define AIE2_CLK_GATING_ENABLE 1
+#define AIE2_CLK_GATING_DISABLE 0
+
+static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 val)
+{
+ int ret;
+
+ ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, &val);
+ if (ret)
+ return ret;
+
+ ndev->clk_gating = val;
+ return 0;
+}
+
+int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ if (ndev->dev_status != AIE2_DEV_UNINIT) {
+ /* Resume device */
+ ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level);
+ if (ret)
+ return ret;
+
+ ret = aie2_pm_set_clk_gating(ndev, ndev->clk_gating);
+ if (ret)
+ return ret;
+
+ return 0;
+ }
+
+ while (ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk)
+ ndev->max_dpm_level++;
+ ndev->max_dpm_level--;
+
+ ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level);
+ if (ret)
+ return ret;
+
+ ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE);
+ if (ret)
+ return ret;
+
+ ndev->pw_mode = POWER_MODE_DEFAULT;
+ ndev->dft_dpm_level = ndev->max_dpm_level;
+
+ return 0;
+}
+
+int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+ u32 clk_gating, dpm_level;
+ int ret;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ if (ndev->pw_mode == target)
+ return 0;
+
+ switch (target) {
+ case POWER_MODE_TURBO:
+ if (ndev->hwctx_num) {
+ XDNA_ERR(xdna, "Can not set turbo when there is active hwctx");
+ return -EINVAL;
+ }
+
+ clk_gating = AIE2_CLK_GATING_DISABLE;
+ dpm_level = ndev->max_dpm_level;
+ break;
+ case POWER_MODE_HIGH:
+ clk_gating = AIE2_CLK_GATING_ENABLE;
+ dpm_level = ndev->max_dpm_level;
+ break;
+ case POWER_MODE_DEFAULT:
+ clk_gating = AIE2_CLK_GATING_ENABLE;
+ dpm_level = ndev->dft_dpm_level;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
+ if (ret)
+ return ret;
+
+ ret = aie2_pm_set_clk_gating(ndev, clk_gating);
+ if (ret)
+ return ret;
+
+ ndev->pw_mode = target;
+
+ return 0;
+}
diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c
new file mode 100644
index 000000000000..f28a060a8810
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_psp.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/drm_device.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/bitfield.h>
+#include <linux/iopoll.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+#define PSP_STATUS_READY BIT(31)
+
+/* PSP commands */
+#define PSP_VALIDATE 1
+#define PSP_START 2
+#define PSP_RELEASE_TMR 3
+
+/* PSP special arguments */
+#define PSP_START_COPY_FW 1
+
+/* PSP response error code */
+#define PSP_ERROR_CANCEL 0xFFFF0002
+#define PSP_ERROR_BAD_STATE 0xFFFF0007
+
+#define PSP_FW_ALIGN 0x10000
+#define PSP_POLL_INTERVAL 20000 /* us */
+#define PSP_POLL_TIMEOUT 1000000 /* us */
+
+#define PSP_REG(p, reg) ((p)->psp_regs[reg])
+
+struct psp_device {
+ struct drm_device *ddev;
+ struct psp_config conf;
+ u32 fw_buf_sz;
+ u64 fw_paddr;
+ void *fw_buffer;
+ void __iomem *psp_regs[PSP_MAX_REGS];
+};
+
+static int psp_exec(struct psp_device *psp, u32 *reg_vals)
+{
+ u32 resp_code;
+ int ret, i;
+ u32 ready;
+
+ /* Write command and argument registers */
+ for (i = 0; i < PSP_NUM_IN_REGS; i++)
+ writel(reg_vals[i], PSP_REG(psp, i));
+
+ /* clear and set PSP INTR register to kick off */
+ writel(0, PSP_REG(psp, PSP_INTR_REG));
+ writel(1, PSP_REG(psp, PSP_INTR_REG));
+
+ /* PSP should be busy. Wait for ready, so we know task is done. */
+ ret = readx_poll_timeout(readl, PSP_REG(psp, PSP_STATUS_REG), ready,
+ FIELD_GET(PSP_STATUS_READY, ready),
+ PSP_POLL_INTERVAL, PSP_POLL_TIMEOUT);
+ if (ret) {
+ drm_err(psp->ddev, "PSP is not ready, ret 0x%x", ret);
+ return ret;
+ }
+
+ resp_code = readl(PSP_REG(psp, PSP_RESP_REG));
+ if (resp_code) {
+ drm_err(psp->ddev, "fw return error 0x%x", resp_code);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+void aie2_psp_stop(struct psp_device *psp)
+{
+ u32 reg_vals[PSP_NUM_IN_REGS] = { PSP_RELEASE_TMR, };
+ int ret;
+
+ ret = psp_exec(psp, reg_vals);
+ if (ret)
+ drm_err(psp->ddev, "release tmr failed, ret %d", ret);
+}
+
+int aie2_psp_start(struct psp_device *psp)
+{
+ u32 reg_vals[PSP_NUM_IN_REGS];
+ int ret;
+
+ reg_vals[0] = PSP_VALIDATE;
+ reg_vals[1] = lower_32_bits(psp->fw_paddr);
+ reg_vals[2] = upper_32_bits(psp->fw_paddr);
+ reg_vals[3] = psp->fw_buf_sz;
+
+ ret = psp_exec(psp, reg_vals);
+ if (ret) {
+ drm_err(psp->ddev, "failed to validate fw, ret %d", ret);
+ return ret;
+ }
+
+ memset(reg_vals, 0, sizeof(reg_vals));
+ reg_vals[0] = PSP_START;
+ reg_vals[1] = PSP_START_COPY_FW;
+ ret = psp_exec(psp, reg_vals);
+ if (ret) {
+ drm_err(psp->ddev, "failed to start fw, ret %d", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf)
+{
+ struct psp_device *psp;
+ u64 offset;
+
+ psp = drmm_kzalloc(ddev, sizeof(*psp), GFP_KERNEL);
+ if (!psp)
+ return NULL;
+
+ psp->ddev = ddev;
+ memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs));
+
+ psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN);
+ psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz + PSP_FW_ALIGN, GFP_KERNEL);
+ if (!psp->fw_buffer) {
+ drm_err(ddev, "no memory for fw buffer");
+ return NULL;
+ }
+
+ /*
+ * AMD Platform Security Processor(PSP) requires host physical
+ * address to load NPU firmware.
+ */
+ psp->fw_paddr = virt_to_phys(psp->fw_buffer);
+ offset = ALIGN(psp->fw_paddr, PSP_FW_ALIGN) - psp->fw_paddr;
+ psp->fw_paddr += offset;
+ memcpy(psp->fw_buffer + offset, conf->fw_buf, conf->fw_size);
+
+ return psp;
+}
diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c
new file mode 100644
index 000000000000..d303701b0ded
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_smu.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/drm_device.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/iopoll.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_pci_drv.h"
+
+#define SMU_RESULT_OK 1
+
+/* SMU commands */
+#define AIE2_SMU_POWER_ON 0x3
+#define AIE2_SMU_POWER_OFF 0x4
+#define AIE2_SMU_SET_MPNPUCLK_FREQ 0x5
+#define AIE2_SMU_SET_HCLK_FREQ 0x6
+#define AIE2_SMU_SET_SOFT_DPMLEVEL 0x7
+#define AIE2_SMU_SET_HARD_DPMLEVEL 0x8
+
+static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd,
+ u32 reg_arg, u32 *out)
+{
+ u32 resp;
+ int ret;
+
+ writel(0, SMU_REG(ndev, SMU_RESP_REG));
+ writel(reg_arg, SMU_REG(ndev, SMU_ARG_REG));
+ writel(reg_cmd, SMU_REG(ndev, SMU_CMD_REG));
+
+ /* Clear and set SMU_INTR_REG to kick off */
+ writel(0, SMU_REG(ndev, SMU_INTR_REG));
+ writel(1, SMU_REG(ndev, SMU_INTR_REG));
+
+ ret = readx_poll_timeout(readl, SMU_REG(ndev, SMU_RESP_REG), resp,
+ resp, AIE2_INTERVAL, AIE2_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "smu cmd %d timed out", reg_cmd);
+ return ret;
+ }
+
+ if (out)
+ *out = readl(SMU_REG(ndev, SMU_OUT_REG));
+
+ if (resp != SMU_RESULT_OK) {
+ XDNA_ERR(ndev->xdna, "smu cmd %d failed, 0x%x", reg_cmd, resp);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
+{
+ u32 freq;
+ int ret;
+
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
+ ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
+ ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
+ return ret;
+ }
+ ndev->npuclk_freq = freq;
+
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ,
+ ndev->priv->dpm_clk_tbl[dpm_level].hclk, &freq);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
+ ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
+ return ret;
+ }
+ ndev->hclk_freq = freq;
+ ndev->dpm_level = dpm_level;
+
+ XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
+ ndev->npuclk_freq, ndev->hclk_freq);
+
+ return 0;
+}
+
+int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
+{
+ int ret;
+
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
+ dpm_level, ret);
+ return ret;
+ }
+
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
+ dpm_level, ret);
+ return ret;
+ }
+
+ ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
+ ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
+ ndev->dpm_level = dpm_level;
+
+ XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
+ ndev->npuclk_freq, ndev->hclk_freq);
+
+ return 0;
+}
+
+int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+void aie2_smu_fini(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ ndev->priv->hw_ops.set_dpm(ndev, 0);
+ ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL);
+ if (ret)
+ XDNA_ERR(ndev->xdna, "Power off failed, ret %d", ret);
+}
diff --git a/drivers/accel/amdxdna/aie2_solver.c b/drivers/accel/amdxdna/aie2_solver.c
new file mode 100644
index 000000000000..2013d1f13aae
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_solver.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/drm_device.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <linux/bitops.h>
+#include <linux/bitmap.h>
+#include <linux/slab.h>
+
+#include "aie2_solver.h"
+
+struct partition_node {
+ struct list_head list;
+ u32 nshared; /* # shared requests */
+ u32 start_col; /* start column */
+ u32 ncols; /* # columns */
+ bool exclusive; /* can not be shared if set */
+};
+
+struct solver_node {
+ struct list_head list;
+ u64 rid; /* Request ID from consumer */
+
+ struct partition_node *pt_node;
+ void *cb_arg;
+ u32 dpm_level;
+ u32 cols_len;
+ u32 start_cols[] __counted_by(cols_len);
+};
+
+struct solver_rgroup {
+ u32 rgid;
+ u32 nnode;
+ u32 npartition_node;
+
+ DECLARE_BITMAP(resbit, XRS_MAX_COL);
+ struct list_head node_list;
+ struct list_head pt_node_list;
+};
+
+struct solver_state {
+ struct solver_rgroup rgp;
+ struct init_config cfg;
+ struct xrs_action_ops *actions;
+};
+
+static u32 calculate_gops(struct aie_qos *rqos)
+{
+ u32 service_rate = 0;
+
+ if (rqos->latency)
+ service_rate = (1000 / rqos->latency);
+
+ if (rqos->fps > service_rate)
+ return rqos->fps * rqos->gops;
+
+ return service_rate * rqos->gops;
+}
+
+/*
+ * qos_meet() - Check the QOS request can be met.
+ */
+static int qos_meet(struct solver_state *xrs, struct aie_qos *rqos, u32 cgops)
+{
+ u32 request_gops = calculate_gops(rqos) * xrs->cfg.sys_eff_factor;
+
+ if (request_gops <= cgops)
+ return 0;
+
+ return -EINVAL;
+}
+
+/*
+ * sanity_check() - Do a basic sanity check on allocation request.
+ */
+static int sanity_check(struct solver_state *xrs, struct alloc_requests *req)
+{
+ struct cdo_parts *cdop = &req->cdo;
+ struct aie_qos *rqos = &req->rqos;
+ u32 cu_clk_freq;
+
+ if (cdop->ncols > xrs->cfg.total_col)
+ return -EINVAL;
+
+ /*
+ * We can find at least one CDOs groups that meet the
+ * GOPs requirement.
+ */
+ cu_clk_freq = xrs->cfg.clk_list.cu_clk_list[xrs->cfg.clk_list.num_levels - 1];
+
+ if (qos_meet(xrs, rqos, cdop->qos_cap.opc * cu_clk_freq / 1000))
+ return -EINVAL;
+
+ return 0;
+}
+
+static bool is_valid_qos_dpm_params(struct aie_qos *rqos)
+{
+ /*
+ * gops is retrieved from the xmodel, so it's always set
+ * fps and latency are the configurable params from the application
+ */
+ if (rqos->gops > 0 && (rqos->fps > 0 || rqos->latency > 0))
+ return true;
+
+ return false;
+}
+
+static int set_dpm_level(struct solver_state *xrs, struct alloc_requests *req, u32 *dpm_level)
+{
+ struct solver_rgroup *rgp = &xrs->rgp;
+ struct cdo_parts *cdop = &req->cdo;
+ struct aie_qos *rqos = &req->rqos;
+ u32 freq, max_dpm_level, level;
+ struct solver_node *node;
+
+ max_dpm_level = xrs->cfg.clk_list.num_levels - 1;
+ /* If no QoS parameters are passed, set it to the max DPM level */
+ if (!is_valid_qos_dpm_params(rqos)) {
+ level = max_dpm_level;
+ goto set_dpm;
+ }
+
+ /* Find one CDO group that meet the GOPs requirement. */
+ for (level = 0; level < max_dpm_level; level++) {
+ freq = xrs->cfg.clk_list.cu_clk_list[level];
+ if (!qos_meet(xrs, rqos, cdop->qos_cap.opc * freq / 1000))
+ break;
+ }
+
+ /* set the dpm level which fits all the sessions */
+ list_for_each_entry(node, &rgp->node_list, list) {
+ if (node->dpm_level > level)
+ level = node->dpm_level;
+ }
+
+set_dpm:
+ *dpm_level = level;
+ return xrs->cfg.actions->set_dft_dpm_level(xrs->cfg.ddev, level);
+}
+
+static struct solver_node *rg_search_node(struct solver_rgroup *rgp, u64 rid)
+{
+ struct solver_node *node;
+
+ list_for_each_entry(node, &rgp->node_list, list) {
+ if (node->rid == rid)
+ return node;
+ }
+
+ return NULL;
+}
+
+static void remove_partition_node(struct solver_rgroup *rgp,
+ struct partition_node *pt_node)
+{
+ pt_node->nshared--;
+ if (pt_node->nshared > 0)
+ return;
+
+ list_del(&pt_node->list);
+ rgp->npartition_node--;
+
+ bitmap_clear(rgp->resbit, pt_node->start_col, pt_node->ncols);
+ kfree(pt_node);
+}
+
+static void remove_solver_node(struct solver_rgroup *rgp,
+ struct solver_node *node)
+{
+ list_del(&node->list);
+ rgp->nnode--;
+
+ if (node->pt_node)
+ remove_partition_node(rgp, node->pt_node);
+
+ kfree(node);
+}
+
+static int get_free_partition(struct solver_state *xrs,
+ struct solver_node *snode,
+ struct alloc_requests *req)
+{
+ struct partition_node *pt_node;
+ u32 ncols = req->cdo.ncols;
+ u32 col, i;
+
+ for (i = 0; i < snode->cols_len; i++) {
+ col = snode->start_cols[i];
+ if (find_next_bit(xrs->rgp.resbit, XRS_MAX_COL, col) >= col + ncols)
+ break;
+ }
+
+ if (i == snode->cols_len)
+ return -ENODEV;
+
+ pt_node = kzalloc(sizeof(*pt_node), GFP_KERNEL);
+ if (!pt_node)
+ return -ENOMEM;
+
+ pt_node->nshared = 1;
+ pt_node->start_col = col;
+ pt_node->ncols = ncols;
+
+ /*
+ * Always set exclusive to false for now.
+ */
+ pt_node->exclusive = false;
+
+ list_add_tail(&pt_node->list, &xrs->rgp.pt_node_list);
+ xrs->rgp.npartition_node++;
+ bitmap_set(xrs->rgp.resbit, pt_node->start_col, pt_node->ncols);
+
+ snode->pt_node = pt_node;
+
+ return 0;
+}
+
+static int allocate_partition(struct solver_state *xrs,
+ struct solver_node *snode,
+ struct alloc_requests *req)
+{
+ struct partition_node *pt_node, *rpt_node = NULL;
+ int idx, ret;
+
+ ret = get_free_partition(xrs, snode, req);
+ if (!ret)
+ return ret;
+
+ /* try to get a share-able partition */
+ list_for_each_entry(pt_node, &xrs->rgp.pt_node_list, list) {
+ if (pt_node->exclusive)
+ continue;
+
+ if (rpt_node && pt_node->nshared >= rpt_node->nshared)
+ continue;
+
+ for (idx = 0; idx < snode->cols_len; idx++) {
+ if (snode->start_cols[idx] != pt_node->start_col)
+ continue;
+
+ if (req->cdo.ncols != pt_node->ncols)
+ continue;
+
+ rpt_node = pt_node;
+ break;
+ }
+ }
+
+ if (!rpt_node)
+ return -ENODEV;
+
+ rpt_node->nshared++;
+ snode->pt_node = rpt_node;
+
+ return 0;
+}
+
+static struct solver_node *create_solver_node(struct solver_state *xrs,
+ struct alloc_requests *req)
+{
+ struct cdo_parts *cdop = &req->cdo;
+ struct solver_node *node;
+ int ret;
+
+ node = kzalloc(struct_size(node, start_cols, cdop->cols_len), GFP_KERNEL);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ node->rid = req->rid;
+ node->cols_len = cdop->cols_len;
+ memcpy(node->start_cols, cdop->start_cols, cdop->cols_len * sizeof(u32));
+
+ ret = allocate_partition(xrs, node, req);
+ if (ret)
+ goto free_node;
+
+ list_add_tail(&node->list, &xrs->rgp.node_list);
+ xrs->rgp.nnode++;
+ return node;
+
+free_node:
+ kfree(node);
+ return ERR_PTR(ret);
+}
+
+static void fill_load_action(struct solver_state *xrs,
+ struct solver_node *snode,
+ struct xrs_action_load *action)
+{
+ action->rid = snode->rid;
+ action->part.start_col = snode->pt_node->start_col;
+ action->part.ncols = snode->pt_node->ncols;
+}
+
+int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg)
+{
+ struct xrs_action_load load_act;
+ struct solver_node *snode;
+ struct solver_state *xrs;
+ u32 dpm_level;
+ int ret;
+
+ xrs = (struct solver_state *)hdl;
+
+ ret = sanity_check(xrs, req);
+ if (ret) {
+ drm_err(xrs->cfg.ddev, "invalid request");
+ return ret;
+ }
+
+ if (rg_search_node(&xrs->rgp, req->rid)) {
+ drm_err(xrs->cfg.ddev, "rid %lld is in-use", req->rid);
+ return -EEXIST;
+ }
+
+ snode = create_solver_node(xrs, req);
+ if (IS_ERR(snode))
+ return PTR_ERR(snode);
+
+ fill_load_action(xrs, snode, &load_act);
+ ret = xrs->cfg.actions->load(cb_arg, &load_act);
+ if (ret)
+ goto free_node;
+
+ ret = set_dpm_level(xrs, req, &dpm_level);
+ if (ret)
+ goto free_node;
+
+ snode->dpm_level = dpm_level;
+ snode->cb_arg = cb_arg;
+
+ drm_dbg(xrs->cfg.ddev, "start col %d ncols %d\n",
+ snode->pt_node->start_col, snode->pt_node->ncols);
+
+ return 0;
+
+free_node:
+ remove_solver_node(&xrs->rgp, snode);
+
+ return ret;
+}
+
+int xrs_release_resource(void *hdl, u64 rid)
+{
+ struct solver_state *xrs = hdl;
+ struct solver_node *node;
+
+ node = rg_search_node(&xrs->rgp, rid);
+ if (!node) {
+ drm_err(xrs->cfg.ddev, "node not exist");
+ return -ENODEV;
+ }
+
+ xrs->cfg.actions->unload(node->cb_arg);
+ remove_solver_node(&xrs->rgp, node);
+
+ return 0;
+}
+
+void *xrsm_init(struct init_config *cfg)
+{
+ struct solver_rgroup *rgp;
+ struct solver_state *xrs;
+
+ xrs = drmm_kzalloc(cfg->ddev, sizeof(*xrs), GFP_KERNEL);
+ if (!xrs)
+ return NULL;
+
+ memcpy(&xrs->cfg, cfg, sizeof(*cfg));
+
+ rgp = &xrs->rgp;
+ INIT_LIST_HEAD(&rgp->node_list);
+ INIT_LIST_HEAD(&rgp->pt_node_list);
+
+ return xrs;
+}
diff --git a/drivers/accel/amdxdna/aie2_solver.h b/drivers/accel/amdxdna/aie2_solver.h
new file mode 100644
index 000000000000..a2e3c52229e9
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_solver.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AIE2_SOLVER_H
+#define _AIE2_SOLVER_H
+
+#define XRS_MAX_COL 128
+
+/*
+ * Structure used to describe a partition. A partition is column based
+ * allocation unit described by its start column and number of columns.
+ */
+struct aie_part {
+ u32 start_col;
+ u32 ncols;
+};
+
+/*
+ * The QoS capabilities of a given AIE partition.
+ */
+struct aie_qos_cap {
+ u32 opc; /* operations per cycle */
+ u32 dma_bw; /* DMA bandwidth */
+};
+
+/*
+ * QoS requirement of a resource allocation.
+ */
+struct aie_qos {
+ u32 gops; /* Giga operations */
+ u32 fps; /* Frames per second */
+ u32 dma_bw; /* DMA bandwidth */
+ u32 latency; /* Frame response latency */
+ u32 exec_time; /* Frame execution time */
+ u32 priority; /* Request priority */
+};
+
+/*
+ * Structure used to describe a relocatable CDO (Configuration Data Object).
+ */
+struct cdo_parts {
+ u32 *start_cols; /* Start column array */
+ u32 cols_len; /* Length of start column array */
+ u32 ncols; /* # of column */
+ struct aie_qos_cap qos_cap; /* CDO QoS capabilities */
+};
+
+/*
+ * Structure used to describe a request to allocate.
+ */
+struct alloc_requests {
+ u64 rid;
+ struct cdo_parts cdo;
+ struct aie_qos rqos; /* Requested QoS */
+};
+
+/*
+ * Load callback argument
+ */
+struct xrs_action_load {
+ u32 rid;
+ struct aie_part part;
+};
+
+/*
+ * Define the power level available
+ *
+ * POWER_LEVEL_MIN:
+ * Lowest power level. Usually set when all actions are unloaded.
+ *
+ * POWER_LEVEL_n
+ * Power levels 0 - n, is a step increase in system frequencies
+ */
+enum power_level {
+ POWER_LEVEL_MIN = 0x0,
+ POWER_LEVEL_0 = 0x1,
+ POWER_LEVEL_1 = 0x2,
+ POWER_LEVEL_2 = 0x3,
+ POWER_LEVEL_3 = 0x4,
+ POWER_LEVEL_4 = 0x5,
+ POWER_LEVEL_5 = 0x6,
+ POWER_LEVEL_6 = 0x7,
+ POWER_LEVEL_7 = 0x8,
+ POWER_LEVEL_NUM,
+};
+
+/*
+ * Structure used to describe the frequency table.
+ * Resource solver chooses the frequency from the table
+ * to meet the QOS requirements.
+ */
+struct clk_list_info {
+ u32 num_levels; /* available power levels */
+ u32 cu_clk_list[POWER_LEVEL_NUM]; /* available aie clock frequencies in Mhz*/
+};
+
+struct xrs_action_ops {
+ int (*load)(void *cb_arg, struct xrs_action_load *action);
+ int (*unload)(void *cb_arg);
+ int (*set_dft_dpm_level)(struct drm_device *ddev, u32 level);
+};
+
+/*
+ * Structure used to describe information for solver during initialization.
+ */
+struct init_config {
+ u32 total_col;
+ u32 sys_eff_factor; /* system efficiency factor */
+ u32 latency_adj; /* latency adjustment in ms */
+ struct clk_list_info clk_list; /* List of frequencies available in system */
+ struct drm_device *ddev;
+ struct xrs_action_ops *actions;
+};
+
+/*
+ * xrsm_init() - Register resource solver. Resource solver client needs
+ * to call this function to register itself.
+ *
+ * @cfg: The system metrics for resource solver to use
+ *
+ * Return: A resource solver handle
+ *
+ * Note: We should only create one handle per AIE array to be managed.
+ */
+void *xrsm_init(struct init_config *cfg);
+
+/*
+ * xrs_allocate_resource() - Request to allocate resources for a given context
+ * and a partition metadata. (See struct part_meta)
+ *
+ * @hdl: Resource solver handle obtained from xrs_init()
+ * @req: Input to the Resource solver including request id
+ * and partition metadata.
+ * @cb_arg: callback argument pointer
+ *
+ * Return: 0 when successful.
+ * Or standard error number when failing
+ *
+ * Note:
+ * There is no lock mechanism inside resource solver. So it is
+ * the caller's responsibility to lock down XCLBINs and grab
+ * necessary lock.
+ */
+int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg);
+
+/*
+ * xrs_release_resource() - Request to free resources for a given context.
+ *
+ * @hdl: Resource solver handle obtained from xrs_init()
+ * @rid: The Request ID to identify the requesting context
+ */
+int xrs_release_resource(void *hdl, u64 rid);
+#endif /* _AIE2_SOLVER_H */
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
new file mode 100644
index 000000000000..43442b9e273b
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -0,0 +1,552 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/xarray.h>
+#include <trace/events/amdxdna.h>
+
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_pci_drv.h"
+
+#define MAX_HWCTX_ID 255
+#define MAX_ARG_COUNT 4095
+
+struct amdxdna_fence {
+ struct dma_fence base;
+ spinlock_t lock; /* for base */
+ struct amdxdna_hwctx *hwctx;
+};
+
+static const char *amdxdna_fence_get_driver_name(struct dma_fence *fence)
+{
+ return KBUILD_MODNAME;
+}
+
+static const char *amdxdna_fence_get_timeline_name(struct dma_fence *fence)
+{
+ struct amdxdna_fence *xdna_fence;
+
+ xdna_fence = container_of(fence, struct amdxdna_fence, base);
+
+ return xdna_fence->hwctx->name;
+}
+
+static const struct dma_fence_ops fence_ops = {
+ .get_driver_name = amdxdna_fence_get_driver_name,
+ .get_timeline_name = amdxdna_fence_get_timeline_name,
+};
+
+static struct dma_fence *amdxdna_fence_create(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_fence *fence;
+
+ fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+ if (!fence)
+ return NULL;
+
+ fence->hwctx = hwctx;
+ spin_lock_init(&fence->lock);
+ dma_fence_init(&fence->base, &fence_ops, &fence->lock, hwctx->id, 0);
+ return &fence->base;
+}
+
+void amdxdna_hwctx_suspend(struct amdxdna_client *client)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_hwctx *hwctx;
+ unsigned long hwctx_id;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ mutex_lock(&client->hwctx_lock);
+ amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
+ xdna->dev_info->ops->hwctx_suspend(hwctx);
+ mutex_unlock(&client->hwctx_lock);
+}
+
+void amdxdna_hwctx_resume(struct amdxdna_client *client)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_hwctx *hwctx;
+ unsigned long hwctx_id;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ mutex_lock(&client->hwctx_lock);
+ amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
+ xdna->dev_info->ops->hwctx_resume(hwctx);
+ mutex_unlock(&client->hwctx_lock);
+}
+
+static void amdxdna_hwctx_destroy_rcu(struct amdxdna_hwctx *hwctx,
+ struct srcu_struct *ss)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+
+ synchronize_srcu(ss);
+
+ /* At this point, user is not able to submit new commands */
+ mutex_lock(&xdna->dev_lock);
+ xdna->dev_info->ops->hwctx_fini(hwctx);
+ mutex_unlock(&xdna->dev_lock);
+
+ kfree(hwctx->name);
+ kfree(hwctx);
+}
+
+void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+ u32 num_masks, count;
+
+ if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
+ num_masks = 0;
+ else
+ num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
+
+ if (size) {
+ count = FIELD_GET(AMDXDNA_CMD_COUNT, cmd->header);
+ if (unlikely(count <= num_masks)) {
+ *size = 0;
+ return NULL;
+ }
+ *size = (count - num_masks) * sizeof(u32);
+ }
+ return &cmd->data[num_masks];
+}
+
+int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+ u32 num_masks, i;
+ u32 *cu_mask;
+
+ if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
+ return -1;
+
+ num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
+ cu_mask = cmd->data;
+ for (i = 0; i < num_masks; i++) {
+ if (cu_mask[i])
+ return ffs(cu_mask[i]) - 1;
+ }
+
+ return -1;
+}
+
+/*
+ * This should be called in close() and remove(). DO NOT call in other syscalls.
+ * This guarantee that when hwctx and resources will be released, if user
+ * doesn't call amdxdna_drm_destroy_hwctx_ioctl.
+ */
+void amdxdna_hwctx_remove_all(struct amdxdna_client *client)
+{
+ struct amdxdna_hwctx *hwctx;
+ unsigned long hwctx_id;
+
+ mutex_lock(&client->hwctx_lock);
+ amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
+ XDNA_DBG(client->xdna, "PID %d close HW context %d",
+ client->pid, hwctx->id);
+ xa_erase(&client->hwctx_xa, hwctx->id);
+ mutex_unlock(&client->hwctx_lock);
+ amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu);
+ mutex_lock(&client->hwctx_lock);
+ }
+ mutex_unlock(&client->hwctx_lock);
+}
+
+int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_drm_create_hwctx *args = data;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_hwctx *hwctx;
+ int ret, idx;
+
+ if (args->ext || args->ext_flags)
+ return -EINVAL;
+
+ if (!drm_dev_enter(dev, &idx))
+ return -ENODEV;
+
+ hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL);
+ if (!hwctx) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ if (copy_from_user(&hwctx->qos, u64_to_user_ptr(args->qos_p), sizeof(hwctx->qos))) {
+ XDNA_ERR(xdna, "Access QoS info failed");
+ ret = -EFAULT;
+ goto free_hwctx;
+ }
+
+ hwctx->client = client;
+ hwctx->fw_ctx_id = -1;
+ hwctx->num_tiles = args->num_tiles;
+ hwctx->mem_size = args->mem_size;
+ hwctx->max_opc = args->max_opc;
+ ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
+ XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID),
+ &client->next_hwctxid, GFP_KERNEL);
+ if (ret < 0) {
+ XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
+ goto free_hwctx;
+ }
+
+ hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->id);
+ if (!hwctx->name) {
+ ret = -ENOMEM;
+ goto rm_id;
+ }
+
+ mutex_lock(&xdna->dev_lock);
+ ret = xdna->dev_info->ops->hwctx_init(hwctx);
+ if (ret) {
+ mutex_unlock(&xdna->dev_lock);
+ XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
+ goto free_name;
+ }
+ args->handle = hwctx->id;
+ args->syncobj_handle = hwctx->syncobj_hdl;
+ mutex_unlock(&xdna->dev_lock);
+
+ atomic64_set(&hwctx->job_submit_cnt, 0);
+ atomic64_set(&hwctx->job_free_cnt, 0);
+ XDNA_DBG(xdna, "PID %d create HW context %d, ret %d", client->pid, args->handle, ret);
+ drm_dev_exit(idx);
+ return 0;
+
+free_name:
+ kfree(hwctx->name);
+rm_id:
+ xa_erase(&client->hwctx_xa, hwctx->id);
+free_hwctx:
+ kfree(hwctx);
+exit:
+ drm_dev_exit(idx);
+ return ret;
+}
+
+int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_drm_destroy_hwctx *args = data;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_hwctx *hwctx;
+ int ret = 0, idx;
+
+ if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad)))
+ return -EINVAL;
+
+ if (!drm_dev_enter(dev, &idx))
+ return -ENODEV;
+
+ hwctx = xa_erase(&client->hwctx_xa, args->handle);
+ if (!hwctx) {
+ ret = -EINVAL;
+ XDNA_DBG(xdna, "PID %d HW context %d not exist",
+ client->pid, args->handle);
+ goto out;
+ }
+
+ /*
+ * The pushed jobs are handled by DRM scheduler during destroy.
+ * SRCU to synchronize with exec command ioctls.
+ */
+ amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu);
+
+ XDNA_DBG(xdna, "PID %d destroyed HW context %d", client->pid, args->handle);
+out:
+ drm_dev_exit(idx);
+ return ret;
+}
+
+int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_drm_config_hwctx *args = data;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_hwctx *hwctx;
+ int ret, idx;
+ u32 buf_size;
+ void *buf;
+ u64 val;
+
+ if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad)))
+ return -EINVAL;
+
+ if (!xdna->dev_info->ops->hwctx_config)
+ return -EOPNOTSUPP;
+
+ val = args->param_val;
+ buf_size = args->param_val_size;
+
+ switch (args->param_type) {
+ case DRM_AMDXDNA_HWCTX_CONFIG_CU:
+ /* For those types that param_val is pointer */
+ if (buf_size > PAGE_SIZE) {
+ XDNA_ERR(xdna, "Config CU param buffer too large");
+ return -E2BIG;
+ }
+
+ /* Hwctx needs to keep buf */
+ buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ if (copy_from_user(buf, u64_to_user_ptr(val), buf_size)) {
+ kfree(buf);
+ return -EFAULT;
+ }
+
+ break;
+ case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
+ case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
+ /* For those types that param_val is a value */
+ buf = NULL;
+ buf_size = 0;
+ break;
+ default:
+ XDNA_DBG(xdna, "Unknown HW context config type %d", args->param_type);
+ return -EINVAL;
+ }
+
+ mutex_lock(&xdna->dev_lock);
+ idx = srcu_read_lock(&client->hwctx_srcu);
+ hwctx = xa_load(&client->hwctx_xa, args->handle);
+ if (!hwctx) {
+ XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle);
+ ret = -EINVAL;
+ goto unlock_srcu;
+ }
+
+ ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size);
+
+unlock_srcu:
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ mutex_unlock(&xdna->dev_lock);
+ kfree(buf);
+ return ret;
+}
+
+static void
+amdxdna_arg_bos_put(struct amdxdna_sched_job *job)
+{
+ int i;
+
+ for (i = 0; i < job->bo_cnt; i++) {
+ if (!job->bos[i])
+ break;
+ drm_gem_object_put(job->bos[i]);
+ }
+}
+
+static int
+amdxdna_arg_bos_lookup(struct amdxdna_client *client,
+ struct amdxdna_sched_job *job,
+ u32 *bo_hdls, u32 bo_cnt)
+{
+ struct drm_gem_object *gobj;
+ int i, ret;
+
+ job->bo_cnt = bo_cnt;
+ for (i = 0; i < job->bo_cnt; i++) {
+ struct amdxdna_gem_obj *abo;
+
+ gobj = drm_gem_object_lookup(client->filp, bo_hdls[i]);
+ if (!gobj) {
+ ret = -ENOENT;
+ goto put_shmem_bo;
+ }
+ abo = to_xdna_obj(gobj);
+
+ mutex_lock(&abo->lock);
+ if (abo->pinned) {
+ mutex_unlock(&abo->lock);
+ job->bos[i] = gobj;
+ continue;
+ }
+
+ ret = amdxdna_gem_pin_nolock(abo);
+ if (ret) {
+ mutex_unlock(&abo->lock);
+ drm_gem_object_put(gobj);
+ goto put_shmem_bo;
+ }
+ abo->pinned = true;
+ mutex_unlock(&abo->lock);
+
+ job->bos[i] = gobj;
+ }
+
+ return 0;
+
+put_shmem_bo:
+ amdxdna_arg_bos_put(job);
+ return ret;
+}
+
+void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job)
+{
+ trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release");
+ amdxdna_arg_bos_put(job);
+ amdxdna_gem_put_obj(job->cmd_bo);
+}
+
+int amdxdna_cmd_submit(struct amdxdna_client *client,
+ u32 cmd_bo_hdl, u32 *arg_bo_hdls, u32 arg_bo_cnt,
+ u32 hwctx_hdl, u64 *seq)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_sched_job *job;
+ struct amdxdna_hwctx *hwctx;
+ int ret, idx;
+
+ XDNA_DBG(xdna, "Command BO hdl %d, Arg BO count %d", cmd_bo_hdl, arg_bo_cnt);
+ job = kzalloc(struct_size(job, bos, arg_bo_cnt), GFP_KERNEL);
+ if (!job)
+ return -ENOMEM;
+
+ if (cmd_bo_hdl != AMDXDNA_INVALID_BO_HANDLE) {
+ job->cmd_bo = amdxdna_gem_get_obj(client, cmd_bo_hdl, AMDXDNA_BO_CMD);
+ if (!job->cmd_bo) {
+ XDNA_ERR(xdna, "Failed to get cmd bo from %d", cmd_bo_hdl);
+ ret = -EINVAL;
+ goto free_job;
+ }
+ } else {
+ job->cmd_bo = NULL;
+ }
+
+ ret = amdxdna_arg_bos_lookup(client, job, arg_bo_hdls, arg_bo_cnt);
+ if (ret) {
+ XDNA_ERR(xdna, "Argument BOs lookup failed, ret %d", ret);
+ goto cmd_put;
+ }
+
+ idx = srcu_read_lock(&client->hwctx_srcu);
+ hwctx = xa_load(&client->hwctx_xa, hwctx_hdl);
+ if (!hwctx) {
+ XDNA_DBG(xdna, "PID %d failed to get hwctx %d",
+ client->pid, hwctx_hdl);
+ ret = -EINVAL;
+ goto unlock_srcu;
+ }
+
+ if (hwctx->status != HWCTX_STAT_READY) {
+ XDNA_ERR(xdna, "HW Context is not ready");
+ ret = -EINVAL;
+ goto unlock_srcu;
+ }
+
+ job->hwctx = hwctx;
+ job->mm = current->mm;
+
+ job->fence = amdxdna_fence_create(hwctx);
+ if (!job->fence) {
+ XDNA_ERR(xdna, "Failed to create fence");
+ ret = -ENOMEM;
+ goto unlock_srcu;
+ }
+ kref_init(&job->refcnt);
+
+ ret = xdna->dev_info->ops->cmd_submit(hwctx, job, seq);
+ if (ret)
+ goto put_fence;
+
+ /*
+ * The amdxdna_hwctx_destroy_rcu() will release hwctx and associated
+ * resource after synchronize_srcu(). The submitted jobs should be
+ * handled by the queue, for example DRM scheduler, in device layer.
+ * For here we can unlock SRCU.
+ */
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ trace_amdxdna_debug_point(hwctx->name, *seq, "job pushed");
+
+ return 0;
+
+put_fence:
+ dma_fence_put(job->fence);
+unlock_srcu:
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ amdxdna_arg_bos_put(job);
+cmd_put:
+ amdxdna_gem_put_obj(job->cmd_bo);
+free_job:
+ kfree(job);
+ return ret;
+}
+
+/*
+ * The submit command ioctl submits a command to firmware. One firmware command
+ * may contain multiple command BOs for processing as a whole.
+ * The command sequence number is returned which can be used for wait command ioctl.
+ */
+static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client,
+ struct amdxdna_drm_exec_cmd *args)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ u32 *arg_bo_hdls;
+ u32 cmd_bo_hdl;
+ int ret;
+
+ if (!args->arg_count || args->arg_count > MAX_ARG_COUNT) {
+ XDNA_ERR(xdna, "Invalid arg bo count %d", args->arg_count);
+ return -EINVAL;
+ }
+
+ /* Only support single command for now. */
+ if (args->cmd_count != 1) {
+ XDNA_ERR(xdna, "Invalid cmd bo count %d", args->cmd_count);
+ return -EINVAL;
+ }
+
+ cmd_bo_hdl = (u32)args->cmd_handles;
+ arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL);
+ if (!arg_bo_hdls)
+ return -ENOMEM;
+ ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args),
+ args->arg_count * sizeof(u32));
+ if (ret) {
+ ret = -EFAULT;
+ goto free_cmd_bo_hdls;
+ }
+
+ ret = amdxdna_cmd_submit(client, cmd_bo_hdl, arg_bo_hdls,
+ args->arg_count, args->hwctx, &args->seq);
+ if (ret)
+ XDNA_DBG(xdna, "Submit cmds failed, ret %d", ret);
+
+free_cmd_bo_hdls:
+ kfree(arg_bo_hdls);
+ if (!ret)
+ XDNA_DBG(xdna, "Pushed cmd %lld to scheduler", args->seq);
+ return ret;
+}
+
+int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_drm_exec_cmd *args = data;
+
+ if (args->ext || args->ext_flags)
+ return -EINVAL;
+
+ switch (args->type) {
+ case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
+ return amdxdna_drm_submit_execbuf(client, args);
+ }
+
+ XDNA_ERR(client->xdna, "Invalid command type %d", args->type);
+ return -EINVAL;
+}
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
new file mode 100644
index 000000000000..f0a4a8586d85
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_CTX_H_
+#define _AMDXDNA_CTX_H_
+
+#include <linux/bitfield.h>
+
+#include "amdxdna_gem.h"
+
+struct amdxdna_hwctx_priv;
+
+enum ert_cmd_opcode {
+ ERT_START_CU = 0,
+ ERT_CMD_CHAIN = 19,
+ ERT_START_NPU = 20,
+};
+
+enum ert_cmd_state {
+ ERT_CMD_STATE_INVALID,
+ ERT_CMD_STATE_NEW,
+ ERT_CMD_STATE_QUEUED,
+ ERT_CMD_STATE_RUNNING,
+ ERT_CMD_STATE_COMPLETED,
+ ERT_CMD_STATE_ERROR,
+ ERT_CMD_STATE_ABORT,
+ ERT_CMD_STATE_SUBMITTED,
+ ERT_CMD_STATE_TIMEOUT,
+ ERT_CMD_STATE_NORESPONSE,
+};
+
+/*
+ * Interpretation of the beginning of data payload for ERT_START_NPU in
+ * amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args.
+ */
+struct amdxdna_cmd_start_npu {
+ u64 buffer; /* instruction buffer address */
+ u32 buffer_size; /* size of buffer in bytes */
+ u32 prop_count; /* properties count */
+ u32 prop_args[]; /* properties and regular kernel arguments */
+};
+
+/*
+ * Interpretation of the beginning of data payload for ERT_CMD_CHAIN in
+ * amdxdna_cmd. The rest of the payload in amdxdna_cmd is cmd BO handles.
+ */
+struct amdxdna_cmd_chain {
+ u32 command_count;
+ u32 submit_index;
+ u32 error_index;
+ u32 reserved[3];
+ u64 data[] __counted_by(command_count);
+};
+
+/* Exec buffer command header format */
+#define AMDXDNA_CMD_STATE GENMASK(3, 0)
+#define AMDXDNA_CMD_EXTRA_CU_MASK GENMASK(11, 10)
+#define AMDXDNA_CMD_COUNT GENMASK(22, 12)
+#define AMDXDNA_CMD_OPCODE GENMASK(27, 23)
+struct amdxdna_cmd {
+ u32 header;
+ u32 data[];
+};
+
+struct amdxdna_hwctx {
+ struct amdxdna_client *client;
+ struct amdxdna_hwctx_priv *priv;
+ char *name;
+
+ u32 id;
+ u32 max_opc;
+ u32 num_tiles;
+ u32 mem_size;
+ u32 fw_ctx_id;
+ u32 col_list_len;
+ u32 *col_list;
+ u32 start_col;
+ u32 num_col;
+#define HWCTX_STAT_INIT 0
+#define HWCTX_STAT_READY 1
+#define HWCTX_STAT_STOP 2
+ u32 status;
+ u32 old_status;
+
+ struct amdxdna_qos_info qos;
+ struct amdxdna_hwctx_param_config_cu *cus;
+ u32 syncobj_hdl;
+
+ atomic64_t job_submit_cnt;
+ atomic64_t job_free_cnt ____cacheline_aligned_in_smp;
+};
+
+#define drm_job_to_xdna_job(j) \
+ container_of(j, struct amdxdna_sched_job, base)
+
+struct amdxdna_sched_job {
+ struct drm_sched_job base;
+ struct kref refcnt;
+ struct amdxdna_hwctx *hwctx;
+ struct mm_struct *mm;
+ /* The fence to notice DRM scheduler that job is done by hardware */
+ struct dma_fence *fence;
+ /* user can wait on this fence */
+ struct dma_fence *out_fence;
+ bool job_done;
+ u64 seq;
+ struct amdxdna_gem_obj *cmd_bo;
+ size_t bo_cnt;
+ struct drm_gem_object *bos[] __counted_by(bo_cnt);
+};
+
+static inline u32
+amdxdna_cmd_get_op(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+
+ return FIELD_GET(AMDXDNA_CMD_OPCODE, cmd->header);
+}
+
+static inline void
+amdxdna_cmd_set_state(struct amdxdna_gem_obj *abo, enum ert_cmd_state s)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+
+ cmd->header &= ~AMDXDNA_CMD_STATE;
+ cmd->header |= FIELD_PREP(AMDXDNA_CMD_STATE, s);
+}
+
+static inline enum ert_cmd_state
+amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+
+ return FIELD_GET(AMDXDNA_CMD_STATE, cmd->header);
+}
+
+void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size);
+int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
+
+static inline u32 amdxdna_hwctx_col_map(struct amdxdna_hwctx *hwctx)
+{
+ return GENMASK(hwctx->start_col + hwctx->num_col - 1,
+ hwctx->start_col);
+}
+
+void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job);
+void amdxdna_hwctx_remove_all(struct amdxdna_client *client);
+void amdxdna_hwctx_suspend(struct amdxdna_client *client);
+void amdxdna_hwctx_resume(struct amdxdna_client *client);
+
+int amdxdna_cmd_submit(struct amdxdna_client *client,
+ u32 cmd_bo_hdls, u32 *arg_bo_hdls, u32 arg_bo_cnt,
+ u32 hwctx_hdl, u64 *seq);
+
+int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl,
+ u64 seq, u32 timeout);
+
+int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+
+#endif /* _AMDXDNA_CTX_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
new file mode 100644
index 000000000000..606433d73236
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -0,0 +1,622 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_cache.h>
+#include <drm/drm_device.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/iosys-map.h>
+#include <linux/vmalloc.h>
+
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_pci_drv.h"
+
+#define XDNA_MAX_CMD_BO_SIZE SZ_32K
+
+static int
+amdxdna_gem_insert_node_locked(struct amdxdna_gem_obj *abo, bool use_vmap)
+{
+ struct amdxdna_client *client = abo->client;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_mem *mem = &abo->mem;
+ u64 offset;
+ u32 align;
+ int ret;
+
+ align = 1 << max(PAGE_SHIFT, xdna->dev_info->dev_mem_buf_shift);
+ ret = drm_mm_insert_node_generic(&abo->dev_heap->mm, &abo->mm_node,
+ mem->size, align,
+ 0, DRM_MM_INSERT_BEST);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret);
+ return ret;
+ }
+
+ mem->dev_addr = abo->mm_node.start;
+ offset = mem->dev_addr - abo->dev_heap->mem.dev_addr;
+ mem->userptr = abo->dev_heap->mem.userptr + offset;
+ mem->pages = &abo->dev_heap->base.pages[offset >> PAGE_SHIFT];
+ mem->nr_pages = mem->size >> PAGE_SHIFT;
+
+ if (use_vmap) {
+ mem->kva = vmap(mem->pages, mem->nr_pages, VM_MAP, PAGE_KERNEL);
+ if (!mem->kva) {
+ XDNA_ERR(xdna, "Failed to vmap");
+ drm_mm_remove_node(&abo->mm_node);
+ return -EFAULT;
+ }
+ }
+
+ return 0;
+}
+
+static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
+ struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
+ struct iosys_map map = IOSYS_MAP_INIT_VADDR(abo->mem.kva);
+
+ XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr);
+ if (abo->pinned)
+ amdxdna_gem_unpin(abo);
+
+ if (abo->type == AMDXDNA_BO_DEV) {
+ mutex_lock(&abo->client->mm_lock);
+ drm_mm_remove_node(&abo->mm_node);
+ mutex_unlock(&abo->client->mm_lock);
+
+ vunmap(abo->mem.kva);
+ drm_gem_object_put(to_gobj(abo->dev_heap));
+ drm_gem_object_release(gobj);
+ mutex_destroy(&abo->lock);
+ kfree(abo);
+ return;
+ }
+
+ if (abo->type == AMDXDNA_BO_DEV_HEAP)
+ drm_mm_takedown(&abo->mm);
+
+ drm_gem_vunmap_unlocked(gobj, &map);
+ mutex_destroy(&abo->lock);
+ drm_gem_shmem_free(&abo->base);
+}
+
+static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = {
+ .free = amdxdna_gem_obj_free,
+};
+
+static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+ struct amdxdna_gem_obj *abo = container_of(mni, struct amdxdna_gem_obj,
+ mem.notifier);
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+
+ XDNA_DBG(xdna, "Invalid range 0x%llx, 0x%lx, type %d",
+ abo->mem.userptr, abo->mem.size, abo->type);
+
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+
+ xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
+
+ return true;
+}
+
+static const struct mmu_interval_notifier_ops amdxdna_hmm_ops = {
+ .invalidate = amdxdna_hmm_invalidate,
+};
+
+static void amdxdna_hmm_unregister(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+
+ if (!xdna->dev_info->ops->hmm_invalidate)
+ return;
+
+ mmu_interval_notifier_remove(&abo->mem.notifier);
+ kvfree(abo->mem.pfns);
+ abo->mem.pfns = NULL;
+}
+
+static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo, unsigned long addr,
+ size_t len)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ u32 nr_pages;
+ int ret;
+
+ if (!xdna->dev_info->ops->hmm_invalidate)
+ return 0;
+
+ if (abo->mem.pfns)
+ return -EEXIST;
+
+ nr_pages = (PAGE_ALIGN(addr + len) - (addr & PAGE_MASK)) >> PAGE_SHIFT;
+ abo->mem.pfns = kvcalloc(nr_pages, sizeof(*abo->mem.pfns),
+ GFP_KERNEL);
+ if (!abo->mem.pfns)
+ return -ENOMEM;
+
+ ret = mmu_interval_notifier_insert_locked(&abo->mem.notifier,
+ current->mm,
+ addr,
+ len,
+ &amdxdna_hmm_ops);
+ if (ret) {
+ XDNA_ERR(xdna, "Insert mmu notifier failed, ret %d", ret);
+ kvfree(abo->mem.pfns);
+ }
+ abo->mem.userptr = addr;
+
+ return ret;
+}
+
+static int amdxdna_gem_obj_mmap(struct drm_gem_object *gobj,
+ struct vm_area_struct *vma)
+{
+ struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
+ unsigned long num_pages;
+ int ret;
+
+ ret = amdxdna_hmm_register(abo, vma->vm_start, gobj->size);
+ if (ret)
+ return ret;
+
+ ret = drm_gem_shmem_mmap(&abo->base, vma);
+ if (ret)
+ goto hmm_unreg;
+
+ num_pages = gobj->size >> PAGE_SHIFT;
+ /* Try to insert the pages */
+ vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP);
+ ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages, &num_pages);
+ if (ret)
+ XDNA_ERR(abo->client->xdna, "Failed insert pages, ret %d", ret);
+
+ return 0;
+
+hmm_unreg:
+ amdxdna_hmm_unregister(abo);
+ return ret;
+}
+
+static vm_fault_t amdxdna_gem_vm_fault(struct vm_fault *vmf)
+{
+ return drm_gem_shmem_vm_ops.fault(vmf);
+}
+
+static void amdxdna_gem_vm_open(struct vm_area_struct *vma)
+{
+ drm_gem_shmem_vm_ops.open(vma);
+}
+
+static void amdxdna_gem_vm_close(struct vm_area_struct *vma)
+{
+ struct drm_gem_object *gobj = vma->vm_private_data;
+
+ amdxdna_hmm_unregister(to_xdna_obj(gobj));
+ drm_gem_shmem_vm_ops.close(vma);
+}
+
+static const struct vm_operations_struct amdxdna_gem_vm_ops = {
+ .fault = amdxdna_gem_vm_fault,
+ .open = amdxdna_gem_vm_open,
+ .close = amdxdna_gem_vm_close,
+};
+
+static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
+ .free = amdxdna_gem_obj_free,
+ .print_info = drm_gem_shmem_object_print_info,
+ .pin = drm_gem_shmem_object_pin,
+ .unpin = drm_gem_shmem_object_unpin,
+ .get_sg_table = drm_gem_shmem_object_get_sg_table,
+ .vmap = drm_gem_shmem_object_vmap,
+ .vunmap = drm_gem_shmem_object_vunmap,
+ .mmap = amdxdna_gem_obj_mmap,
+ .vm_ops = &amdxdna_gem_vm_ops,
+};
+
+static struct amdxdna_gem_obj *
+amdxdna_gem_create_obj(struct drm_device *dev, size_t size)
+{
+ struct amdxdna_gem_obj *abo;
+
+ abo = kzalloc(sizeof(*abo), GFP_KERNEL);
+ if (!abo)
+ return ERR_PTR(-ENOMEM);
+
+ abo->pinned = false;
+ abo->assigned_hwctx = AMDXDNA_INVALID_CTX_HANDLE;
+ mutex_init(&abo->lock);
+
+ abo->mem.userptr = AMDXDNA_INVALID_ADDR;
+ abo->mem.dev_addr = AMDXDNA_INVALID_ADDR;
+ abo->mem.size = size;
+
+ return abo;
+}
+
+/* For drm_driver->gem_create_object callback */
+struct drm_gem_object *
+amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size)
+{
+ struct amdxdna_gem_obj *abo;
+
+ abo = amdxdna_gem_create_obj(dev, size);
+ if (IS_ERR(abo))
+ return ERR_CAST(abo);
+
+ to_gobj(abo)->funcs = &amdxdna_gem_shmem_funcs;
+
+ return to_gobj(abo);
+}
+
+static struct amdxdna_gem_obj *
+amdxdna_drm_alloc_shmem(struct drm_device *dev,
+ struct amdxdna_drm_create_bo *args,
+ struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct drm_gem_shmem_object *shmem;
+ struct amdxdna_gem_obj *abo;
+
+ shmem = drm_gem_shmem_create(dev, args->size);
+ if (IS_ERR(shmem))
+ return ERR_CAST(shmem);
+
+ shmem->map_wc = false;
+
+ abo = to_xdna_obj(&shmem->base);
+ abo->client = client;
+ abo->type = AMDXDNA_BO_SHMEM;
+
+ return abo;
+}
+
+static struct amdxdna_gem_obj *
+amdxdna_drm_create_dev_heap(struct drm_device *dev,
+ struct amdxdna_drm_create_bo *args,
+ struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct drm_gem_shmem_object *shmem;
+ struct amdxdna_gem_obj *abo;
+ int ret;
+
+ if (args->size > xdna->dev_info->dev_mem_size) {
+ XDNA_DBG(xdna, "Invalid dev heap size 0x%llx, limit 0x%lx",
+ args->size, xdna->dev_info->dev_mem_size);
+ return ERR_PTR(-EINVAL);
+ }
+
+ mutex_lock(&client->mm_lock);
+ if (client->dev_heap) {
+ XDNA_DBG(client->xdna, "dev heap is already created");
+ ret = -EBUSY;
+ goto mm_unlock;
+ }
+
+ shmem = drm_gem_shmem_create(dev, args->size);
+ if (IS_ERR(shmem)) {
+ ret = PTR_ERR(shmem);
+ goto mm_unlock;
+ }
+
+ shmem->map_wc = false;
+ abo = to_xdna_obj(&shmem->base);
+
+ abo->type = AMDXDNA_BO_DEV_HEAP;
+ abo->client = client;
+ abo->mem.dev_addr = client->xdna->dev_info->dev_mem_base;
+ drm_mm_init(&abo->mm, abo->mem.dev_addr, abo->mem.size);
+
+ client->dev_heap = abo;
+ drm_gem_object_get(to_gobj(abo));
+ mutex_unlock(&client->mm_lock);
+
+ return abo;
+
+mm_unlock:
+ mutex_unlock(&client->mm_lock);
+ return ERR_PTR(ret);
+}
+
+struct amdxdna_gem_obj *
+amdxdna_drm_alloc_dev_bo(struct drm_device *dev,
+ struct amdxdna_drm_create_bo *args,
+ struct drm_file *filp, bool use_vmap)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ size_t aligned_sz = PAGE_ALIGN(args->size);
+ struct amdxdna_gem_obj *abo, *heap;
+ int ret;
+
+ mutex_lock(&client->mm_lock);
+ heap = client->dev_heap;
+ if (!heap) {
+ ret = -EINVAL;
+ goto mm_unlock;
+ }
+
+ if (heap->mem.userptr == AMDXDNA_INVALID_ADDR) {
+ XDNA_ERR(xdna, "Invalid dev heap userptr");
+ ret = -EINVAL;
+ goto mm_unlock;
+ }
+
+ if (args->size > heap->mem.size) {
+ XDNA_ERR(xdna, "Invalid dev bo size 0x%llx, limit 0x%lx",
+ args->size, heap->mem.size);
+ ret = -EINVAL;
+ goto mm_unlock;
+ }
+
+ abo = amdxdna_gem_create_obj(&xdna->ddev, aligned_sz);
+ if (IS_ERR(abo)) {
+ ret = PTR_ERR(abo);
+ goto mm_unlock;
+ }
+ to_gobj(abo)->funcs = &amdxdna_gem_dev_obj_funcs;
+ abo->type = AMDXDNA_BO_DEV;
+ abo->client = client;
+ abo->dev_heap = heap;
+ ret = amdxdna_gem_insert_node_locked(abo, use_vmap);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret);
+ goto mm_unlock;
+ }
+
+ drm_gem_object_get(to_gobj(heap));
+ drm_gem_private_object_init(&xdna->ddev, to_gobj(abo), aligned_sz);
+
+ mutex_unlock(&client->mm_lock);
+ return abo;
+
+mm_unlock:
+ mutex_unlock(&client->mm_lock);
+ return ERR_PTR(ret);
+}
+
+static struct amdxdna_gem_obj *
+amdxdna_drm_create_cmd_bo(struct drm_device *dev,
+ struct amdxdna_drm_create_bo *args,
+ struct drm_file *filp)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct drm_gem_shmem_object *shmem;
+ struct amdxdna_gem_obj *abo;
+ struct iosys_map map;
+ int ret;
+
+ if (args->size > XDNA_MAX_CMD_BO_SIZE) {
+ XDNA_ERR(xdna, "Command bo size 0x%llx too large", args->size);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (args->size < sizeof(struct amdxdna_cmd)) {
+ XDNA_DBG(xdna, "Command BO size 0x%llx too small", args->size);
+ return ERR_PTR(-EINVAL);
+ }
+
+ shmem = drm_gem_shmem_create(dev, args->size);
+ if (IS_ERR(shmem))
+ return ERR_CAST(shmem);
+
+ shmem->map_wc = false;
+ abo = to_xdna_obj(&shmem->base);
+
+ abo->type = AMDXDNA_BO_CMD;
+ abo->client = filp->driver_priv;
+
+ ret = drm_gem_vmap_unlocked(to_gobj(abo), &map);
+ if (ret) {
+ XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret);
+ goto release_obj;
+ }
+ abo->mem.kva = map.vaddr;
+
+ return abo;
+
+release_obj:
+ drm_gem_shmem_free(shmem);
+ return ERR_PTR(ret);
+}
+
+int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_drm_create_bo *args = data;
+ struct amdxdna_gem_obj *abo;
+ int ret;
+
+ if (args->flags || args->vaddr || !args->size)
+ return -EINVAL;
+
+ XDNA_DBG(xdna, "BO arg type %d vaddr 0x%llx size 0x%llx flags 0x%llx",
+ args->type, args->vaddr, args->size, args->flags);
+ switch (args->type) {
+ case AMDXDNA_BO_SHMEM:
+ abo = amdxdna_drm_alloc_shmem(dev, args, filp);
+ break;
+ case AMDXDNA_BO_DEV_HEAP:
+ abo = amdxdna_drm_create_dev_heap(dev, args, filp);
+ break;
+ case AMDXDNA_BO_DEV:
+ abo = amdxdna_drm_alloc_dev_bo(dev, args, filp, false);
+ break;
+ case AMDXDNA_BO_CMD:
+ abo = amdxdna_drm_create_cmd_bo(dev, args, filp);
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (IS_ERR(abo))
+ return PTR_ERR(abo);
+
+ /* ready to publish object to userspace */
+ ret = drm_gem_handle_create(filp, to_gobj(abo), &args->handle);
+ if (ret) {
+ XDNA_ERR(xdna, "Create handle failed");
+ goto put_obj;
+ }
+
+ XDNA_DBG(xdna, "BO hdl %d type %d userptr 0x%llx xdna_addr 0x%llx size 0x%lx",
+ args->handle, args->type, abo->mem.userptr,
+ abo->mem.dev_addr, abo->mem.size);
+put_obj:
+ /* Dereference object reference. Handle holds it now. */
+ drm_gem_object_put(to_gobj(abo));
+ return ret;
+}
+
+int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ int ret;
+
+ switch (abo->type) {
+ case AMDXDNA_BO_SHMEM:
+ case AMDXDNA_BO_DEV_HEAP:
+ ret = drm_gem_shmem_pin(&abo->base);
+ break;
+ case AMDXDNA_BO_DEV:
+ ret = drm_gem_shmem_pin(&abo->dev_heap->base);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ }
+
+ XDNA_DBG(xdna, "BO type %d ret %d", abo->type, ret);
+ return ret;
+}
+
+int amdxdna_gem_pin(struct amdxdna_gem_obj *abo)
+{
+ int ret;
+
+ if (abo->type == AMDXDNA_BO_DEV)
+ abo = abo->dev_heap;
+
+ mutex_lock(&abo->lock);
+ ret = amdxdna_gem_pin_nolock(abo);
+ mutex_unlock(&abo->lock);
+
+ return ret;
+}
+
+void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo)
+{
+ if (abo->type == AMDXDNA_BO_DEV)
+ abo = abo->dev_heap;
+
+ mutex_lock(&abo->lock);
+ drm_gem_shmem_unpin(&abo->base);
+ mutex_unlock(&abo->lock);
+}
+
+struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client,
+ u32 bo_hdl, u8 bo_type)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_gem_obj *abo;
+ struct drm_gem_object *gobj;
+
+ gobj = drm_gem_object_lookup(client->filp, bo_hdl);
+ if (!gobj) {
+ XDNA_DBG(xdna, "Can not find bo %d", bo_hdl);
+ return NULL;
+ }
+
+ abo = to_xdna_obj(gobj);
+ if (bo_type == AMDXDNA_BO_INVALID || abo->type == bo_type)
+ return abo;
+
+ drm_gem_object_put(gobj);
+ return NULL;
+}
+
+int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_drm_get_bo_info *args = data;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_gem_obj *abo;
+ struct drm_gem_object *gobj;
+ int ret = 0;
+
+ if (args->ext || args->ext_flags || args->pad)
+ return -EINVAL;
+
+ gobj = drm_gem_object_lookup(filp, args->handle);
+ if (!gobj) {
+ XDNA_DBG(xdna, "Lookup GEM object %d failed", args->handle);
+ return -ENOENT;
+ }
+
+ abo = to_xdna_obj(gobj);
+ args->vaddr = abo->mem.userptr;
+ args->xdna_addr = abo->mem.dev_addr;
+
+ if (abo->type != AMDXDNA_BO_DEV)
+ args->map_offset = drm_vma_node_offset_addr(&gobj->vma_node);
+ else
+ args->map_offset = AMDXDNA_INVALID_ADDR;
+
+ XDNA_DBG(xdna, "BO hdl %d map_offset 0x%llx vaddr 0x%llx xdna_addr 0x%llx",
+ args->handle, args->map_offset, args->vaddr, args->xdna_addr);
+
+ drm_gem_object_put(gobj);
+ return ret;
+}
+
+/*
+ * The sync bo ioctl is to make sure the CPU cache is in sync with memory.
+ * This is required because NPU is not cache coherent device. CPU cache
+ * flushing/invalidation is expensive so it is best to handle this outside
+ * of the command submission path. This ioctl allows explicit cache
+ * flushing/invalidation outside of the critical path.
+ */
+int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev,
+ void *data, struct drm_file *filp)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_drm_sync_bo *args = data;
+ struct amdxdna_gem_obj *abo;
+ struct drm_gem_object *gobj;
+ int ret;
+
+ gobj = drm_gem_object_lookup(filp, args->handle);
+ if (!gobj) {
+ XDNA_ERR(xdna, "Lookup GEM object failed");
+ return -ENOENT;
+ }
+ abo = to_xdna_obj(gobj);
+
+ ret = amdxdna_gem_pin(abo);
+ if (ret) {
+ XDNA_ERR(xdna, "Pin BO %d failed, ret %d", args->handle, ret);
+ goto put_obj;
+ }
+
+ if (abo->type == AMDXDNA_BO_DEV)
+ drm_clflush_pages(abo->mem.pages, abo->mem.nr_pages);
+ else
+ drm_clflush_pages(abo->base.pages, gobj->size >> PAGE_SHIFT);
+
+ amdxdna_gem_unpin(abo);
+
+ XDNA_DBG(xdna, "Sync bo %d offset 0x%llx, size 0x%llx\n",
+ args->handle, args->offset, args->size);
+
+put_obj:
+ drm_gem_object_put(gobj);
+ return ret;
+}
diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h
new file mode 100644
index 000000000000..8ccc0375dd9d
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_gem.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_GEM_H_
+#define _AMDXDNA_GEM_H_
+
+struct amdxdna_mem {
+ u64 userptr;
+ void *kva;
+ u64 dev_addr;
+ size_t size;
+ struct page **pages;
+ u32 nr_pages;
+ struct mmu_interval_notifier notifier;
+ unsigned long *pfns;
+ bool map_invalid;
+};
+
+struct amdxdna_gem_obj {
+ struct drm_gem_shmem_object base;
+ struct amdxdna_client *client;
+ u8 type;
+ bool pinned;
+ struct mutex lock; /* Protects: pinned */
+ struct amdxdna_mem mem;
+
+ /* Below members is uninitialized when needed */
+ struct drm_mm mm; /* For AMDXDNA_BO_DEV_HEAP */
+ struct amdxdna_gem_obj *dev_heap; /* For AMDXDNA_BO_DEV */
+ struct drm_mm_node mm_node; /* For AMDXDNA_BO_DEV */
+ u32 assigned_hwctx;
+};
+
+#define to_gobj(obj) (&(obj)->base.base)
+
+static inline struct amdxdna_gem_obj *to_xdna_obj(struct drm_gem_object *gobj)
+{
+ return container_of(gobj, struct amdxdna_gem_obj, base.base);
+}
+
+struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client,
+ u32 bo_hdl, u8 bo_type);
+static inline void amdxdna_gem_put_obj(struct amdxdna_gem_obj *abo)
+{
+ drm_gem_object_put(to_gobj(abo));
+}
+
+struct drm_gem_object *
+amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size);
+struct amdxdna_gem_obj *
+amdxdna_drm_alloc_dev_bo(struct drm_device *dev,
+ struct amdxdna_drm_create_bo *args,
+ struct drm_file *filp, bool use_vmap);
+
+int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo);
+int amdxdna_gem_pin(struct amdxdna_gem_obj *abo);
+void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo);
+
+int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+
+#endif /* _AMDXDNA_GEM_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
new file mode 100644
index 000000000000..da1ac89bb78f
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
@@ -0,0 +1,571 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/drm_device.h>
+#include <drm/drm_managed.h>
+#include <linux/bitfield.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/amdxdna.h>
+
+#include "amdxdna_mailbox.h"
+
+#define MB_ERR(chann, fmt, args...) \
+({ \
+ typeof(chann) _chann = chann; \
+ dev_err((_chann)->mb->dev, "xdna_mailbox.%d: "fmt, \
+ (_chann)->msix_irq, ##args); \
+})
+#define MB_DBG(chann, fmt, args...) \
+({ \
+ typeof(chann) _chann = chann; \
+ dev_dbg((_chann)->mb->dev, "xdna_mailbox.%d: "fmt, \
+ (_chann)->msix_irq, ##args); \
+})
+#define MB_WARN_ONCE(chann, fmt, args...) \
+({ \
+ typeof(chann) _chann = chann; \
+ dev_warn_once((_chann)->mb->dev, "xdna_mailbox.%d: "fmt, \
+ (_chann)->msix_irq, ##args); \
+})
+
+#define MAGIC_VAL 0x1D000000U
+#define MAGIC_VAL_MASK 0xFF000000
+#define MAX_MSG_ID_ENTRIES 256
+#define MSG_RX_TIMER 200 /* milliseconds */
+#define MAILBOX_NAME "xdna_mailbox"
+
+enum channel_res_type {
+ CHAN_RES_X2I,
+ CHAN_RES_I2X,
+ CHAN_RES_NUM
+};
+
+struct mailbox {
+ struct device *dev;
+ struct xdna_mailbox_res res;
+};
+
+struct mailbox_channel {
+ struct mailbox *mb;
+ struct xdna_mailbox_chann_res res[CHAN_RES_NUM];
+ int msix_irq;
+ u32 iohub_int_addr;
+ struct xarray chan_xa;
+ u32 next_msgid;
+ u32 x2i_tail;
+
+ /* Received msg related fields */
+ struct workqueue_struct *work_q;
+ struct work_struct rx_work;
+ u32 i2x_head;
+ bool bad_state;
+};
+
+#define MSG_BODY_SZ GENMASK(10, 0)
+#define MSG_PROTO_VER GENMASK(23, 16)
+struct xdna_msg_header {
+ __u32 total_size;
+ __u32 sz_ver;
+ __u32 id;
+ __u32 opcode;
+} __packed;
+
+static_assert(sizeof(struct xdna_msg_header) == 16);
+
+struct mailbox_pkg {
+ struct xdna_msg_header header;
+ __u32 payload[];
+};
+
+/* The protocol version. */
+#define MSG_PROTOCOL_VERSION 0x1
+/* The tombstone value. */
+#define TOMBSTONE 0xDEADFACE
+
+struct mailbox_msg {
+ void *handle;
+ int (*notify_cb)(void *handle, void __iomem *data, size_t size);
+ size_t pkg_size; /* package size in bytes */
+ struct mailbox_pkg pkg;
+};
+
+static void mailbox_reg_write(struct mailbox_channel *mb_chann, u32 mbox_reg, u32 data)
+{
+ struct xdna_mailbox_res *mb_res = &mb_chann->mb->res;
+ void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg;
+
+ writel(data, ringbuf_addr);
+}
+
+static u32 mailbox_reg_read(struct mailbox_channel *mb_chann, u32 mbox_reg)
+{
+ struct xdna_mailbox_res *mb_res = &mb_chann->mb->res;
+ void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg;
+
+ return readl(ringbuf_addr);
+}
+
+static int mailbox_reg_read_non_zero(struct mailbox_channel *mb_chann, u32 mbox_reg, u32 *val)
+{
+ struct xdna_mailbox_res *mb_res = &mb_chann->mb->res;
+ void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg;
+ int ret, value;
+
+ /* Poll till value is not zero */
+ ret = readx_poll_timeout(readl, ringbuf_addr, value,
+ value, 1 /* us */, 100);
+ if (ret < 0)
+ return ret;
+
+ *val = value;
+ return 0;
+}
+
+static inline void
+mailbox_set_headptr(struct mailbox_channel *mb_chann, u32 headptr_val)
+{
+ mailbox_reg_write(mb_chann, mb_chann->res[CHAN_RES_I2X].mb_head_ptr_reg, headptr_val);
+ mb_chann->i2x_head = headptr_val;
+}
+
+static inline void
+mailbox_set_tailptr(struct mailbox_channel *mb_chann, u32 tailptr_val)
+{
+ mailbox_reg_write(mb_chann, mb_chann->res[CHAN_RES_X2I].mb_tail_ptr_reg, tailptr_val);
+ mb_chann->x2i_tail = tailptr_val;
+}
+
+static inline u32
+mailbox_get_headptr(struct mailbox_channel *mb_chann, enum channel_res_type type)
+{
+ return mailbox_reg_read(mb_chann, mb_chann->res[type].mb_head_ptr_reg);
+}
+
+static inline u32
+mailbox_get_tailptr(struct mailbox_channel *mb_chann, enum channel_res_type type)
+{
+ return mailbox_reg_read(mb_chann, mb_chann->res[type].mb_tail_ptr_reg);
+}
+
+static inline u32
+mailbox_get_ringbuf_size(struct mailbox_channel *mb_chann, enum channel_res_type type)
+{
+ return mb_chann->res[type].rb_size;
+}
+
+static inline int mailbox_validate_msgid(int msg_id)
+{
+ return (msg_id & MAGIC_VAL_MASK) == MAGIC_VAL;
+}
+
+static int mailbox_acquire_msgid(struct mailbox_channel *mb_chann, struct mailbox_msg *mb_msg)
+{
+ u32 msg_id;
+ int ret;
+
+ ret = xa_alloc_cyclic_irq(&mb_chann->chan_xa, &msg_id, mb_msg,
+ XA_LIMIT(0, MAX_MSG_ID_ENTRIES - 1),
+ &mb_chann->next_msgid, GFP_NOWAIT);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Add MAGIC_VAL to the higher bits.
+ */
+ msg_id |= MAGIC_VAL;
+ return msg_id;
+}
+
+static void mailbox_release_msgid(struct mailbox_channel *mb_chann, int msg_id)
+{
+ msg_id &= ~MAGIC_VAL_MASK;
+ xa_erase_irq(&mb_chann->chan_xa, msg_id);
+}
+
+static void mailbox_release_msg(struct mailbox_channel *mb_chann,
+ struct mailbox_msg *mb_msg)
+{
+ MB_DBG(mb_chann, "msg_id 0x%x msg opcode 0x%x",
+ mb_msg->pkg.header.id, mb_msg->pkg.header.opcode);
+ mb_msg->notify_cb(mb_msg->handle, NULL, 0);
+ kfree(mb_msg);
+}
+
+static int
+mailbox_send_msg(struct mailbox_channel *mb_chann, struct mailbox_msg *mb_msg)
+{
+ void __iomem *write_addr;
+ u32 ringbuf_size;
+ u32 head, tail;
+ u32 start_addr;
+ u32 tmp_tail;
+
+ head = mailbox_get_headptr(mb_chann, CHAN_RES_X2I);
+ tail = mb_chann->x2i_tail;
+ ringbuf_size = mailbox_get_ringbuf_size(mb_chann, CHAN_RES_X2I);
+ start_addr = mb_chann->res[CHAN_RES_X2I].rb_start_addr;
+ tmp_tail = tail + mb_msg->pkg_size;
+
+ if (tail < head && tmp_tail >= head)
+ goto no_space;
+
+ if (tail >= head && (tmp_tail > ringbuf_size - sizeof(u32) &&
+ mb_msg->pkg_size >= head))
+ goto no_space;
+
+ if (tail >= head && tmp_tail > ringbuf_size - sizeof(u32)) {
+ write_addr = mb_chann->mb->res.ringbuf_base + start_addr + tail;
+ writel(TOMBSTONE, write_addr);
+
+ /* tombstone is set. Write from the start of the ringbuf */
+ tail = 0;
+ }
+
+ write_addr = mb_chann->mb->res.ringbuf_base + start_addr + tail;
+ memcpy_toio(write_addr, &mb_msg->pkg, mb_msg->pkg_size);
+ mailbox_set_tailptr(mb_chann, tail + mb_msg->pkg_size);
+
+ trace_mbox_set_tail(MAILBOX_NAME, mb_chann->msix_irq,
+ mb_msg->pkg.header.opcode,
+ mb_msg->pkg.header.id);
+
+ return 0;
+
+no_space:
+ return -ENOSPC;
+}
+
+static int
+mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *header,
+ void __iomem *data)
+{
+ struct mailbox_msg *mb_msg;
+ int msg_id;
+ int ret;
+
+ msg_id = header->id;
+ if (!mailbox_validate_msgid(msg_id)) {
+ MB_ERR(mb_chann, "Bad message ID 0x%x", msg_id);
+ return -EINVAL;
+ }
+
+ msg_id &= ~MAGIC_VAL_MASK;
+ mb_msg = xa_erase_irq(&mb_chann->chan_xa, msg_id);
+ if (!mb_msg) {
+ MB_ERR(mb_chann, "Cannot find msg 0x%x", msg_id);
+ return -EINVAL;
+ }
+
+ MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x",
+ header->opcode, header->total_size, header->id);
+ ret = mb_msg->notify_cb(mb_msg->handle, data, header->total_size);
+ if (unlikely(ret))
+ MB_ERR(mb_chann, "Message callback ret %d", ret);
+
+ kfree(mb_msg);
+ return ret;
+}
+
+static int mailbox_get_msg(struct mailbox_channel *mb_chann)
+{
+ struct xdna_msg_header header;
+ void __iomem *read_addr;
+ u32 msg_size, rest;
+ u32 ringbuf_size;
+ u32 head, tail;
+ u32 start_addr;
+ int ret;
+
+ if (mailbox_reg_read_non_zero(mb_chann, mb_chann->res[CHAN_RES_I2X].mb_tail_ptr_reg, &tail))
+ return -EINVAL;
+ head = mb_chann->i2x_head;
+ ringbuf_size = mailbox_get_ringbuf_size(mb_chann, CHAN_RES_I2X);
+ start_addr = mb_chann->res[CHAN_RES_I2X].rb_start_addr;
+
+ if (unlikely(tail > ringbuf_size || !IS_ALIGNED(tail, 4))) {
+ MB_WARN_ONCE(mb_chann, "Invalid tail 0x%x", tail);
+ return -EINVAL;
+ }
+
+ /* ringbuf empty */
+ if (head == tail)
+ return -ENOENT;
+
+ if (head == ringbuf_size)
+ head = 0;
+
+ /* Peek size of the message or TOMBSTONE */
+ read_addr = mb_chann->mb->res.ringbuf_base + start_addr + head;
+ header.total_size = readl(read_addr);
+ /* size is TOMBSTONE, set next read from 0 */
+ if (header.total_size == TOMBSTONE) {
+ if (head < tail) {
+ MB_WARN_ONCE(mb_chann, "Tombstone, head 0x%x tail 0x%x",
+ head, tail);
+ return -EINVAL;
+ }
+ mailbox_set_headptr(mb_chann, 0);
+ return 0;
+ }
+
+ if (unlikely(!header.total_size || !IS_ALIGNED(header.total_size, 4))) {
+ MB_WARN_ONCE(mb_chann, "Invalid total size 0x%x", header.total_size);
+ return -EINVAL;
+ }
+ msg_size = sizeof(header) + header.total_size;
+
+ if (msg_size > ringbuf_size - head || msg_size > tail - head) {
+ MB_WARN_ONCE(mb_chann, "Invalid message size %d, tail %d, head %d",
+ msg_size, tail, head);
+ return -EINVAL;
+ }
+
+ rest = sizeof(header) - sizeof(u32);
+ read_addr += sizeof(u32);
+ memcpy_fromio((u32 *)&header + 1, read_addr, rest);
+ read_addr += rest;
+
+ ret = mailbox_get_resp(mb_chann, &header, read_addr);
+
+ mailbox_set_headptr(mb_chann, head + msg_size);
+ /* After update head, it can equal to ringbuf_size. This is expected. */
+ trace_mbox_set_head(MAILBOX_NAME, mb_chann->msix_irq,
+ header.opcode, header.id);
+
+ return ret;
+}
+
+static irqreturn_t mailbox_irq_handler(int irq, void *p)
+{
+ struct mailbox_channel *mb_chann = p;
+
+ trace_mbox_irq_handle(MAILBOX_NAME, irq);
+ /* Schedule a rx_work to call the callback functions */
+ queue_work(mb_chann->work_q, &mb_chann->rx_work);
+
+ return IRQ_HANDLED;
+}
+
+static void mailbox_rx_worker(struct work_struct *rx_work)
+{
+ struct mailbox_channel *mb_chann;
+ int ret;
+
+ mb_chann = container_of(rx_work, struct mailbox_channel, rx_work);
+
+ if (READ_ONCE(mb_chann->bad_state)) {
+ MB_ERR(mb_chann, "Channel in bad state, work aborted");
+ return;
+ }
+
+again:
+ mailbox_reg_write(mb_chann, mb_chann->iohub_int_addr, 0);
+
+ while (1) {
+ /*
+ * If return is 0, keep consuming next message, until there is
+ * no messages or an error happened.
+ */
+ ret = mailbox_get_msg(mb_chann);
+ if (ret == -ENOENT)
+ break;
+
+ /* Other error means device doesn't look good, disable irq. */
+ if (unlikely(ret)) {
+ MB_ERR(mb_chann, "Unexpected ret %d, disable irq", ret);
+ WRITE_ONCE(mb_chann->bad_state, true);
+ return;
+ }
+ }
+
+ /*
+ * The hardware will not generate interrupt if firmware creates a new
+ * response right after driver clears interrupt register. Check
+ * the interrupt register to make sure there is not any new response
+ * before exiting.
+ */
+ if (mailbox_reg_read(mb_chann, mb_chann->iohub_int_addr))
+ goto again;
+}
+
+int xdna_mailbox_send_msg(struct mailbox_channel *mb_chann,
+ const struct xdna_mailbox_msg *msg, u64 tx_timeout)
+{
+ struct xdna_msg_header *header;
+ struct mailbox_msg *mb_msg;
+ size_t pkg_size;
+ int ret;
+
+ pkg_size = sizeof(*header) + msg->send_size;
+ if (pkg_size > mailbox_get_ringbuf_size(mb_chann, CHAN_RES_X2I)) {
+ MB_ERR(mb_chann, "Message size larger than ringbuf size");
+ return -EINVAL;
+ }
+
+ if (unlikely(!IS_ALIGNED(msg->send_size, 4))) {
+ MB_ERR(mb_chann, "Message must be 4 bytes align");
+ return -EINVAL;
+ }
+
+ /* The fist word in payload can NOT be TOMBSTONE */
+ if (unlikely(((u32 *)msg->send_data)[0] == TOMBSTONE)) {
+ MB_ERR(mb_chann, "Tomb stone in data");
+ return -EINVAL;
+ }
+
+ if (READ_ONCE(mb_chann->bad_state)) {
+ MB_ERR(mb_chann, "Channel in bad state");
+ return -EPIPE;
+ }
+
+ mb_msg = kzalloc(sizeof(*mb_msg) + pkg_size, GFP_KERNEL);
+ if (!mb_msg)
+ return -ENOMEM;
+
+ mb_msg->handle = msg->handle;
+ mb_msg->notify_cb = msg->notify_cb;
+ mb_msg->pkg_size = pkg_size;
+
+ header = &mb_msg->pkg.header;
+ /*
+ * Hardware use total_size and size to split huge message.
+ * We do not support it here. Thus the values are the same.
+ */
+ header->total_size = msg->send_size;
+ header->sz_ver = FIELD_PREP(MSG_BODY_SZ, msg->send_size) |
+ FIELD_PREP(MSG_PROTO_VER, MSG_PROTOCOL_VERSION);
+ header->opcode = msg->opcode;
+ memcpy(mb_msg->pkg.payload, msg->send_data, msg->send_size);
+
+ ret = mailbox_acquire_msgid(mb_chann, mb_msg);
+ if (unlikely(ret < 0)) {
+ MB_ERR(mb_chann, "mailbox_acquire_msgid failed");
+ goto msg_id_failed;
+ }
+ header->id = ret;
+
+ MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x",
+ header->opcode, header->total_size, header->id);
+
+ ret = mailbox_send_msg(mb_chann, mb_msg);
+ if (ret) {
+ MB_DBG(mb_chann, "Error in mailbox send msg, ret %d", ret);
+ goto release_id;
+ }
+
+ return 0;
+
+release_id:
+ mailbox_release_msgid(mb_chann, header->id);
+msg_id_failed:
+ kfree(mb_msg);
+ return ret;
+}
+
+struct mailbox_channel *
+xdna_mailbox_create_channel(struct mailbox *mb,
+ const struct xdna_mailbox_chann_res *x2i,
+ const struct xdna_mailbox_chann_res *i2x,
+ u32 iohub_int_addr,
+ int mb_irq)
+{
+ struct mailbox_channel *mb_chann;
+ int ret;
+
+ if (!is_power_of_2(x2i->rb_size) || !is_power_of_2(i2x->rb_size)) {
+ pr_err("Ring buf size must be power of 2");
+ return NULL;
+ }
+
+ mb_chann = kzalloc(sizeof(*mb_chann), GFP_KERNEL);
+ if (!mb_chann)
+ return NULL;
+
+ mb_chann->mb = mb;
+ mb_chann->msix_irq = mb_irq;
+ mb_chann->iohub_int_addr = iohub_int_addr;
+ memcpy(&mb_chann->res[CHAN_RES_X2I], x2i, sizeof(*x2i));
+ memcpy(&mb_chann->res[CHAN_RES_I2X], i2x, sizeof(*i2x));
+
+ xa_init_flags(&mb_chann->chan_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
+ mb_chann->x2i_tail = mailbox_get_tailptr(mb_chann, CHAN_RES_X2I);
+ mb_chann->i2x_head = mailbox_get_headptr(mb_chann, CHAN_RES_I2X);
+
+ INIT_WORK(&mb_chann->rx_work, mailbox_rx_worker);
+ mb_chann->work_q = create_singlethread_workqueue(MAILBOX_NAME);
+ if (!mb_chann->work_q) {
+ MB_ERR(mb_chann, "Create workqueue failed");
+ goto free_and_out;
+ }
+
+ /* Everything look good. Time to enable irq handler */
+ ret = request_irq(mb_irq, mailbox_irq_handler, 0, MAILBOX_NAME, mb_chann);
+ if (ret) {
+ MB_ERR(mb_chann, "Failed to request irq %d ret %d", mb_irq, ret);
+ goto destroy_wq;
+ }
+
+ mb_chann->bad_state = false;
+
+ MB_DBG(mb_chann, "Mailbox channel created (irq: %d)", mb_chann->msix_irq);
+ return mb_chann;
+
+destroy_wq:
+ destroy_workqueue(mb_chann->work_q);
+free_and_out:
+ kfree(mb_chann);
+ return NULL;
+}
+
+int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann)
+{
+ struct mailbox_msg *mb_msg;
+ unsigned long msg_id;
+
+ MB_DBG(mb_chann, "IRQ disabled and RX work cancelled");
+ free_irq(mb_chann->msix_irq, mb_chann);
+ destroy_workqueue(mb_chann->work_q);
+ /* We can clean up and release resources */
+
+ xa_for_each(&mb_chann->chan_xa, msg_id, mb_msg)
+ mailbox_release_msg(mb_chann, mb_msg);
+
+ xa_destroy(&mb_chann->chan_xa);
+
+ MB_DBG(mb_chann, "Mailbox channel destroyed, irq: %d", mb_chann->msix_irq);
+ kfree(mb_chann);
+ return 0;
+}
+
+void xdna_mailbox_stop_channel(struct mailbox_channel *mb_chann)
+{
+ /* Disable an irq and wait. This might sleep. */
+ disable_irq(mb_chann->msix_irq);
+
+ /* Cancel RX work and wait for it to finish */
+ cancel_work_sync(&mb_chann->rx_work);
+ MB_DBG(mb_chann, "IRQ disabled and RX work cancelled");
+}
+
+struct mailbox *xdnam_mailbox_create(struct drm_device *ddev,
+ const struct xdna_mailbox_res *res)
+{
+ struct mailbox *mb;
+
+ mb = drmm_kzalloc(ddev, sizeof(*mb), GFP_KERNEL);
+ if (!mb)
+ return NULL;
+ mb->dev = ddev->dev;
+
+ /* mailbox and ring buf base and size information */
+ memcpy(&mb->res, res, sizeof(*res));
+
+ return mb;
+}
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.h b/drivers/accel/amdxdna/amdxdna_mailbox.h
new file mode 100644
index 000000000000..ea367f2fb738
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AIE2_MAILBOX_H_
+#define _AIE2_MAILBOX_H_
+
+struct mailbox;
+struct mailbox_channel;
+
+/*
+ * xdna_mailbox_msg - message struct
+ *
+ * @opcode: opcode for firmware
+ * @handle: handle used for the notify callback
+ * @notify_cb: callback function to notify the sender when there is response
+ * @send_data: pointing to sending data
+ * @send_size: size of the sending data
+ *
+ * The mailbox will split the sending data in to multiple firmware message if
+ * the size of the data is too big. This is transparent to the sender. The
+ * sender will receive one notification.
+ */
+struct xdna_mailbox_msg {
+ u32 opcode;
+ void *handle;
+ int (*notify_cb)(void *handle, void __iomem *data, size_t size);
+ u8 *send_data;
+ size_t send_size;
+};
+
+/*
+ * xdna_mailbox_res - mailbox hardware resource
+ *
+ * @ringbuf_base: ring buffer base address
+ * @ringbuf_size: ring buffer size
+ * @mbox_base: mailbox base address
+ * @mbox_size: mailbox size
+ */
+struct xdna_mailbox_res {
+ void __iomem *ringbuf_base;
+ size_t ringbuf_size;
+ void __iomem *mbox_base;
+ size_t mbox_size;
+ const char *name;
+};
+
+/*
+ * xdna_mailbox_chann_res - resources
+ *
+ * @rb_start_addr: ring buffer start address
+ * @rb_size: ring buffer size
+ * @mb_head_ptr_reg: mailbox head pointer register
+ * @mb_tail_ptr_reg: mailbox tail pointer register
+ */
+struct xdna_mailbox_chann_res {
+ u32 rb_start_addr;
+ u32 rb_size;
+ u32 mb_head_ptr_reg;
+ u32 mb_tail_ptr_reg;
+};
+
+/*
+ * xdna_mailbox_create() -- create mailbox subsystem and initialize
+ *
+ * @ddev: device pointer
+ * @res: SRAM and mailbox resources
+ *
+ * Return: If success, return a handle of mailbox subsystem.
+ * Otherwise, return NULL pointer.
+ */
+struct mailbox *xdnam_mailbox_create(struct drm_device *ddev,
+ const struct xdna_mailbox_res *res);
+
+/*
+ * xdna_mailbox_create_channel() -- Create a mailbox channel instance
+ *
+ * @mailbox: the handle return from xdna_mailbox_create()
+ * @x2i: host to firmware mailbox resources
+ * @i2x: firmware to host mailbox resources
+ * @xdna_mailbox_intr_reg: register addr of MSI-X interrupt
+ * @mb_irq: Linux IRQ number associated with mailbox MSI-X interrupt vector index
+ *
+ * Return: If success, return a handle of mailbox channel. Otherwise, return NULL.
+ */
+struct mailbox_channel *
+xdna_mailbox_create_channel(struct mailbox *mailbox,
+ const struct xdna_mailbox_chann_res *x2i,
+ const struct xdna_mailbox_chann_res *i2x,
+ u32 xdna_mailbox_intr_reg,
+ int mb_irq);
+
+/*
+ * xdna_mailbox_destroy_channel() -- destroy mailbox channel
+ *
+ * @mailbox_chann: the handle return from xdna_mailbox_create_channel()
+ *
+ * Return: if success, return 0. otherwise return error code
+ */
+int xdna_mailbox_destroy_channel(struct mailbox_channel *mailbox_chann);
+
+/*
+ * xdna_mailbox_stop_channel() -- stop mailbox channel
+ *
+ * @mailbox_chann: the handle return from xdna_mailbox_create_channel()
+ *
+ * Return: if success, return 0. otherwise return error code
+ */
+void xdna_mailbox_stop_channel(struct mailbox_channel *mailbox_chann);
+
+/*
+ * xdna_mailbox_send_msg() -- Send a message
+ *
+ * @mailbox_chann: Mailbox channel handle
+ * @msg: message struct for message information
+ * @tx_timeout: the timeout value for sending the message in ms.
+ *
+ * Return: If success return 0, otherwise, return error code
+ */
+int xdna_mailbox_send_msg(struct mailbox_channel *mailbox_chann,
+ const struct xdna_mailbox_msg *msg, u64 tx_timeout);
+
+#endif /* _AIE2_MAILBOX_ */
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox_helper.c b/drivers/accel/amdxdna/amdxdna_mailbox_helper.c
new file mode 100644
index 000000000000..6d0c24513476
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_mailbox_helper.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/completion.h>
+
+#include "amdxdna_gem.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_mailbox_helper.h"
+#include "amdxdna_pci_drv.h"
+
+int xdna_msg_cb(void *handle, void __iomem *data, size_t size)
+{
+ struct xdna_notify *cb_arg = handle;
+ int ret;
+
+ if (unlikely(!data))
+ goto out;
+
+ if (unlikely(cb_arg->size != size)) {
+ cb_arg->error = -EINVAL;
+ goto out;
+ }
+
+ memcpy_fromio(cb_arg->data, data, cb_arg->size);
+ print_hex_dump_debug("resp data: ", DUMP_PREFIX_OFFSET,
+ 16, 4, cb_arg->data, cb_arg->size, true);
+out:
+ ret = cb_arg->error;
+ complete(&cb_arg->comp);
+ return ret;
+}
+
+int xdna_send_msg_wait(struct amdxdna_dev *xdna, struct mailbox_channel *chann,
+ struct xdna_mailbox_msg *msg)
+{
+ struct xdna_notify *hdl = msg->handle;
+ int ret;
+
+ ret = xdna_mailbox_send_msg(chann, msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(xdna, "Send message failed, ret %d", ret);
+ return ret;
+ }
+
+ ret = wait_for_completion_timeout(&hdl->comp,
+ msecs_to_jiffies(RX_TIMEOUT));
+ if (!ret) {
+ XDNA_ERR(xdna, "Wait for completion timeout");
+ return -ETIME;
+ }
+
+ return hdl->error;
+}
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox_helper.h b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h
new file mode 100644
index 000000000000..710ff8873d61
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_MAILBOX_HELPER_H
+#define _AMDXDNA_MAILBOX_HELPER_H
+
+#define TX_TIMEOUT 2000 /* milliseconds */
+#define RX_TIMEOUT 5000 /* milliseconds */
+
+struct amdxdna_dev;
+
+struct xdna_notify {
+ struct completion comp;
+ u32 *data;
+ size_t size;
+ int error;
+};
+
+#define DECLARE_XDNA_MSG_COMMON(name, op, status) \
+ struct name##_req req = { 0 }; \
+ struct name##_resp resp = { status }; \
+ struct xdna_notify hdl = { \
+ .error = 0, \
+ .data = (u32 *)&resp, \
+ .size = sizeof(resp), \
+ .comp = COMPLETION_INITIALIZER_ONSTACK(hdl.comp), \
+ }; \
+ struct xdna_mailbox_msg msg = { \
+ .send_data = (u8 *)&req, \
+ .send_size = sizeof(req), \
+ .handle = &hdl, \
+ .opcode = op, \
+ .notify_cb = xdna_msg_cb, \
+ }
+
+int xdna_msg_cb(void *handle, void __iomem *data, size_t size);
+int xdna_send_msg_wait(struct amdxdna_dev *xdna, struct mailbox_channel *chann,
+ struct xdna_mailbox_msg *msg);
+
+#endif /* _AMDXDNA_MAILBOX_HELPER_H */
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
new file mode 100644
index 000000000000..f5b8497cf5ad
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_accel.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/iommu.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_pci_drv.h"
+
+#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
+
+MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin");
+MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin");
+MODULE_FIRMWARE("amdnpu/17f0_11/npu.sbin");
+MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin");
+
+/*
+ * Bind the driver base on (vendor_id, device_id) pair and later use the
+ * (device_id, rev_id) pair as a key to select the devices. The devices with
+ * same device_id have very similar interface to host driver.
+ */
+static const struct pci_device_id pci_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1502) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x17f0) },
+ {0}
+};
+
+MODULE_DEVICE_TABLE(pci, pci_ids);
+
+static const struct amdxdna_device_id amdxdna_ids[] = {
+ { 0x1502, 0x0, &dev_npu1_info },
+ { 0x17f0, 0x0, &dev_npu2_info },
+ { 0x17f0, 0x10, &dev_npu4_info },
+ { 0x17f0, 0x11, &dev_npu5_info },
+ { 0x17f0, 0x20, &dev_npu6_info },
+ {0}
+};
+
+static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(ddev);
+ struct amdxdna_client *client;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(ddev->dev);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to get rpm, ret %d", ret);
+ return ret;
+ }
+
+ client = kzalloc(sizeof(*client), GFP_KERNEL);
+ if (!client) {
+ ret = -ENOMEM;
+ goto put_rpm;
+ }
+
+ client->pid = pid_nr(rcu_access_pointer(filp->pid));
+ client->xdna = xdna;
+
+ client->sva = iommu_sva_bind_device(xdna->ddev.dev, current->mm);
+ if (IS_ERR(client->sva)) {
+ ret = PTR_ERR(client->sva);
+ XDNA_ERR(xdna, "SVA bind device failed, ret %d", ret);
+ goto failed;
+ }
+ client->pasid = iommu_sva_get_pasid(client->sva);
+ if (client->pasid == IOMMU_PASID_INVALID) {
+ XDNA_ERR(xdna, "SVA get pasid failed");
+ ret = -ENODEV;
+ goto unbind_sva;
+ }
+ mutex_init(&client->hwctx_lock);
+ init_srcu_struct(&client->hwctx_srcu);
+ xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC);
+ mutex_init(&client->mm_lock);
+
+ mutex_lock(&xdna->dev_lock);
+ list_add_tail(&client->node, &xdna->client_list);
+ mutex_unlock(&xdna->dev_lock);
+
+ filp->driver_priv = client;
+ client->filp = filp;
+
+ XDNA_DBG(xdna, "pid %d opened", client->pid);
+ return 0;
+
+unbind_sva:
+ iommu_sva_unbind_device(client->sva);
+failed:
+ kfree(client);
+put_rpm:
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ return ret;
+}
+
+static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(ddev);
+
+ XDNA_DBG(xdna, "closing pid %d", client->pid);
+
+ xa_destroy(&client->hwctx_xa);
+ cleanup_srcu_struct(&client->hwctx_srcu);
+ mutex_destroy(&client->hwctx_lock);
+ mutex_destroy(&client->mm_lock);
+ if (client->dev_heap)
+ drm_gem_object_put(to_gobj(client->dev_heap));
+
+ iommu_sva_unbind_device(client->sva);
+
+ XDNA_DBG(xdna, "pid %d closed", client->pid);
+ kfree(client);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+}
+
+static int amdxdna_flush(struct file *f, fl_owner_t id)
+{
+ struct drm_file *filp = f->private_data;
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = client->xdna;
+ int idx;
+
+ XDNA_DBG(xdna, "PID %d flushing...", client->pid);
+ if (!drm_dev_enter(&xdna->ddev, &idx))
+ return 0;
+
+ mutex_lock(&xdna->dev_lock);
+ list_del_init(&client->node);
+ mutex_unlock(&xdna->dev_lock);
+ amdxdna_hwctx_remove_all(client);
+
+ drm_dev_exit(idx);
+ return 0;
+}
+
+static int amdxdna_drm_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_drm_get_info *args = data;
+ int ret;
+
+ if (!xdna->dev_info->ops->get_aie_info)
+ return -EOPNOTSUPP;
+
+ XDNA_DBG(xdna, "Request parameter %u", args->param);
+ mutex_lock(&xdna->dev_lock);
+ ret = xdna->dev_info->ops->get_aie_info(client, args);
+ mutex_unlock(&xdna->dev_lock);
+ return ret;
+}
+
+static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_drm_set_state *args = data;
+ int ret;
+
+ if (!xdna->dev_info->ops->set_aie_state)
+ return -EOPNOTSUPP;
+
+ XDNA_DBG(xdna, "Request parameter %u", args->param);
+ mutex_lock(&xdna->dev_lock);
+ ret = xdna->dev_info->ops->set_aie_state(client, args);
+ mutex_unlock(&xdna->dev_lock);
+
+ return ret;
+}
+
+static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
+ /* Context */
+ DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_DESTROY_HWCTX, amdxdna_drm_destroy_hwctx_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_CONFIG_HWCTX, amdxdna_drm_config_hwctx_ioctl, 0),
+ /* BO */
+ DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_BO, amdxdna_drm_create_bo_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_GET_BO_INFO, amdxdna_drm_get_bo_info_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0),
+ /* Execution */
+ DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0),
+ /* AIE hardware */
+ DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_SET_STATE, amdxdna_drm_set_state_ioctl, DRM_ROOT_ONLY),
+};
+
+static const struct file_operations amdxdna_fops = {
+ .owner = THIS_MODULE,
+ .open = accel_open,
+ .release = drm_release,
+ .flush = amdxdna_flush,
+ .unlocked_ioctl = drm_ioctl,
+ .compat_ioctl = drm_compat_ioctl,
+ .poll = drm_poll,
+ .read = drm_read,
+ .llseek = noop_llseek,
+ .mmap = drm_gem_mmap,
+ .fop_flags = FOP_UNSIGNED_OFFSET,
+};
+
+const struct drm_driver amdxdna_drm_drv = {
+ .driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL |
+ DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE,
+ .fops = &amdxdna_fops,
+ .name = "amdxdna_accel_driver",
+ .desc = "AMD XDNA DRM implementation",
+ .open = amdxdna_drm_open,
+ .postclose = amdxdna_drm_close,
+ .ioctls = amdxdna_drm_ioctls,
+ .num_ioctls = ARRAY_SIZE(amdxdna_drm_ioctls),
+
+ .gem_create_object = amdxdna_gem_create_object_cb,
+};
+
+static const struct amdxdna_dev_info *
+amdxdna_get_dev_info(struct pci_dev *pdev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(amdxdna_ids); i++) {
+ if (pdev->device == amdxdna_ids[i].device &&
+ pdev->revision == amdxdna_ids[i].revision)
+ return amdxdna_ids[i].dev_info;
+ }
+ return NULL;
+}
+
+static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct device *dev = &pdev->dev;
+ struct amdxdna_dev *xdna;
+ int ret;
+
+ xdna = devm_drm_dev_alloc(dev, &amdxdna_drm_drv, typeof(*xdna), ddev);
+ if (IS_ERR(xdna))
+ return PTR_ERR(xdna);
+
+ xdna->dev_info = amdxdna_get_dev_info(pdev);
+ if (!xdna->dev_info)
+ return -ENODEV;
+
+ drmm_mutex_init(&xdna->ddev, &xdna->dev_lock);
+ init_rwsem(&xdna->notifier_lock);
+ INIT_LIST_HEAD(&xdna->client_list);
+ pci_set_drvdata(pdev, xdna);
+
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&xdna->notifier_lock);
+ fs_reclaim_release(GFP_KERNEL);
+ }
+
+ mutex_lock(&xdna->dev_lock);
+ ret = xdna->dev_info->ops->init(xdna);
+ mutex_unlock(&xdna->dev_lock);
+ if (ret) {
+ XDNA_ERR(xdna, "Hardware init failed, ret %d", ret);
+ return ret;
+ }
+
+ ret = amdxdna_sysfs_init(xdna);
+ if (ret) {
+ XDNA_ERR(xdna, "Create amdxdna attrs failed: %d", ret);
+ goto failed_dev_fini;
+ }
+
+ pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_allow(dev);
+
+ ret = drm_dev_register(&xdna->ddev, 0);
+ if (ret) {
+ XDNA_ERR(xdna, "DRM register failed, ret %d", ret);
+ pm_runtime_forbid(dev);
+ goto failed_sysfs_fini;
+ }
+
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_put_autosuspend(dev);
+ return 0;
+
+failed_sysfs_fini:
+ amdxdna_sysfs_fini(xdna);
+failed_dev_fini:
+ mutex_lock(&xdna->dev_lock);
+ xdna->dev_info->ops->fini(xdna);
+ mutex_unlock(&xdna->dev_lock);
+ return ret;
+}
+
+static void amdxdna_remove(struct pci_dev *pdev)
+{
+ struct amdxdna_dev *xdna = pci_get_drvdata(pdev);
+ struct device *dev = &pdev->dev;
+ struct amdxdna_client *client;
+
+ pm_runtime_get_noresume(dev);
+ pm_runtime_forbid(dev);
+
+ drm_dev_unplug(&xdna->ddev);
+ amdxdna_sysfs_fini(xdna);
+
+ mutex_lock(&xdna->dev_lock);
+ client = list_first_entry_or_null(&xdna->client_list,
+ struct amdxdna_client, node);
+ while (client) {
+ list_del_init(&client->node);
+ mutex_unlock(&xdna->dev_lock);
+
+ amdxdna_hwctx_remove_all(client);
+
+ mutex_lock(&xdna->dev_lock);
+ client = list_first_entry_or_null(&xdna->client_list,
+ struct amdxdna_client, node);
+ }
+
+ xdna->dev_info->ops->fini(xdna);
+ mutex_unlock(&xdna->dev_lock);
+}
+
+static int amdxdna_dev_suspend_nolock(struct amdxdna_dev *xdna)
+{
+ if (xdna->dev_info->ops->suspend)
+ xdna->dev_info->ops->suspend(xdna);
+
+ return 0;
+}
+
+static int amdxdna_dev_resume_nolock(struct amdxdna_dev *xdna)
+{
+ if (xdna->dev_info->ops->resume)
+ return xdna->dev_info->ops->resume(xdna);
+
+ return 0;
+}
+
+static int amdxdna_pmops_suspend(struct device *dev)
+{
+ struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
+ struct amdxdna_client *client;
+
+ mutex_lock(&xdna->dev_lock);
+ list_for_each_entry(client, &xdna->client_list, node)
+ amdxdna_hwctx_suspend(client);
+
+ amdxdna_dev_suspend_nolock(xdna);
+ mutex_unlock(&xdna->dev_lock);
+
+ return 0;
+}
+
+static int amdxdna_pmops_resume(struct device *dev)
+{
+ struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
+ struct amdxdna_client *client;
+ int ret;
+
+ XDNA_INFO(xdna, "firmware resuming...");
+ mutex_lock(&xdna->dev_lock);
+ ret = amdxdna_dev_resume_nolock(xdna);
+ if (ret) {
+ XDNA_ERR(xdna, "resume NPU firmware failed");
+ mutex_unlock(&xdna->dev_lock);
+ return ret;
+ }
+
+ XDNA_INFO(xdna, "hardware context resuming...");
+ list_for_each_entry(client, &xdna->client_list, node)
+ amdxdna_hwctx_resume(client);
+ mutex_unlock(&xdna->dev_lock);
+
+ return 0;
+}
+
+static int amdxdna_rpmops_suspend(struct device *dev)
+{
+ struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
+ int ret;
+
+ mutex_lock(&xdna->dev_lock);
+ ret = amdxdna_dev_suspend_nolock(xdna);
+ mutex_unlock(&xdna->dev_lock);
+
+ XDNA_DBG(xdna, "Runtime suspend done ret: %d", ret);
+ return ret;
+}
+
+static int amdxdna_rpmops_resume(struct device *dev)
+{
+ struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
+ int ret;
+
+ mutex_lock(&xdna->dev_lock);
+ ret = amdxdna_dev_resume_nolock(xdna);
+ mutex_unlock(&xdna->dev_lock);
+
+ XDNA_DBG(xdna, "Runtime resume done ret: %d", ret);
+ return ret;
+}
+
+static const struct dev_pm_ops amdxdna_pm_ops = {
+ SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume)
+ RUNTIME_PM_OPS(amdxdna_rpmops_suspend, amdxdna_rpmops_resume, NULL)
+};
+
+static struct pci_driver amdxdna_pci_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = pci_ids,
+ .probe = amdxdna_probe,
+ .remove = amdxdna_remove,
+ .driver.pm = &amdxdna_pm_ops,
+};
+
+module_pci_driver(amdxdna_pci_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("XRT Team <runtimeca39d@amd.com>");
+MODULE_DESCRIPTION("amdxdna driver");
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
new file mode 100644
index 000000000000..37848a8d8031
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_PCI_DRV_H_
+#define _AMDXDNA_PCI_DRV_H_
+
+#include <linux/xarray.h>
+
+#define XDNA_INFO(xdna, fmt, args...) drm_info(&(xdna)->ddev, fmt, ##args)
+#define XDNA_WARN(xdna, fmt, args...) drm_warn(&(xdna)->ddev, "%s: "fmt, __func__, ##args)
+#define XDNA_ERR(xdna, fmt, args...) drm_err(&(xdna)->ddev, "%s: "fmt, __func__, ##args)
+#define XDNA_DBG(xdna, fmt, args...) drm_dbg(&(xdna)->ddev, fmt, ##args)
+#define XDNA_INFO_ONCE(xdna, fmt, args...) drm_info_once(&(xdna)->ddev, fmt, ##args)
+
+#define XDNA_MBZ_DBG(xdna, ptr, sz) \
+ ({ \
+ int __i; \
+ int __ret = 0; \
+ u8 *__ptr = (u8 *)(ptr); \
+ for (__i = 0; __i < (sz); __i++) { \
+ if (__ptr[__i]) { \
+ XDNA_DBG(xdna, "MBZ check failed"); \
+ __ret = -EINVAL; \
+ break; \
+ } \
+ } \
+ __ret; \
+ })
+
+#define to_xdna_dev(drm_dev) \
+ ((struct amdxdna_dev *)container_of(drm_dev, struct amdxdna_dev, ddev))
+
+extern const struct drm_driver amdxdna_drm_drv;
+
+struct amdxdna_client;
+struct amdxdna_dev;
+struct amdxdna_drm_get_info;
+struct amdxdna_drm_set_state;
+struct amdxdna_gem_obj;
+struct amdxdna_hwctx;
+struct amdxdna_sched_job;
+
+/*
+ * struct amdxdna_dev_ops - Device hardware operation callbacks
+ */
+struct amdxdna_dev_ops {
+ int (*init)(struct amdxdna_dev *xdna);
+ void (*fini)(struct amdxdna_dev *xdna);
+ int (*resume)(struct amdxdna_dev *xdna);
+ void (*suspend)(struct amdxdna_dev *xdna);
+ int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
+ void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
+ int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
+ void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
+ void (*hwctx_suspend)(struct amdxdna_hwctx *hwctx);
+ void (*hwctx_resume)(struct amdxdna_hwctx *hwctx);
+ int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
+ int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args);
+ int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args);
+};
+
+/*
+ * struct amdxdna_dev_info - Device hardware information
+ * Record device static information, like reg, mbox, PSP, SMU bar index
+ */
+struct amdxdna_dev_info {
+ int reg_bar;
+ int mbox_bar;
+ int sram_bar;
+ int psp_bar;
+ int smu_bar;
+ int device_type;
+ int first_col;
+ u32 dev_mem_buf_shift;
+ u64 dev_mem_base;
+ size_t dev_mem_size;
+ char *vbnv;
+ const struct amdxdna_dev_priv *dev_priv;
+ const struct amdxdna_dev_ops *ops;
+};
+
+struct amdxdna_fw_ver {
+ u32 major;
+ u32 minor;
+ u32 sub;
+ u32 build;
+};
+
+struct amdxdna_dev {
+ struct drm_device ddev;
+ struct amdxdna_dev_hdl *dev_handle;
+ const struct amdxdna_dev_info *dev_info;
+ void *xrs_hdl;
+
+ struct mutex dev_lock; /* per device lock */
+ struct list_head client_list;
+ struct amdxdna_fw_ver fw_ver;
+ struct rw_semaphore notifier_lock; /* for mmu notifier*/
+};
+
+/*
+ * struct amdxdna_device_id - PCI device info
+ */
+struct amdxdna_device_id {
+ unsigned short device;
+ u8 revision;
+ const struct amdxdna_dev_info *dev_info;
+};
+
+/*
+ * struct amdxdna_client - amdxdna client
+ * A per fd data structure for managing context and other user process stuffs.
+ */
+struct amdxdna_client {
+ struct list_head node;
+ pid_t pid;
+ struct mutex hwctx_lock; /* protect hwctx */
+ /* do NOT wait this srcu when hwctx_lock is held */
+ struct srcu_struct hwctx_srcu;
+ struct xarray hwctx_xa;
+ u32 next_hwctxid;
+ struct amdxdna_dev *xdna;
+ struct drm_file *filp;
+
+ struct mutex mm_lock; /* protect memory related */
+ struct amdxdna_gem_obj *dev_heap;
+
+ struct iommu_sva *sva;
+ int pasid;
+};
+
+#define amdxdna_for_each_hwctx(client, hwctx_id, entry) \
+ xa_for_each(&(client)->hwctx_xa, hwctx_id, entry)
+
+/* Add device info below */
+extern const struct amdxdna_dev_info dev_npu1_info;
+extern const struct amdxdna_dev_info dev_npu2_info;
+extern const struct amdxdna_dev_info dev_npu4_info;
+extern const struct amdxdna_dev_info dev_npu5_info;
+extern const struct amdxdna_dev_info dev_npu6_info;
+
+int amdxdna_sysfs_init(struct amdxdna_dev *xdna);
+void amdxdna_sysfs_fini(struct amdxdna_dev *xdna);
+
+#endif /* _AMDXDNA_PCI_DRV_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_sysfs.c b/drivers/accel/amdxdna/amdxdna_sysfs.c
new file mode 100644
index 000000000000..f27e4ee960a0
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_sysfs.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/types.h>
+
+#include "amdxdna_gem.h"
+#include "amdxdna_pci_drv.h"
+
+static ssize_t vbnv_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct amdxdna_dev *xdna = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%s\n", xdna->dev_info->vbnv);
+}
+static DEVICE_ATTR_RO(vbnv);
+
+static ssize_t device_type_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct amdxdna_dev *xdna = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d\n", xdna->dev_info->device_type);
+}
+static DEVICE_ATTR_RO(device_type);
+
+static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct amdxdna_dev *xdna = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d.%d.%d.%d\n", xdna->fw_ver.major,
+ xdna->fw_ver.minor, xdna->fw_ver.sub,
+ xdna->fw_ver.build);
+}
+static DEVICE_ATTR_RO(fw_version);
+
+static struct attribute *amdxdna_attrs[] = {
+ &dev_attr_device_type.attr,
+ &dev_attr_vbnv.attr,
+ &dev_attr_fw_version.attr,
+ NULL,
+};
+
+static struct attribute_group amdxdna_attr_group = {
+ .attrs = amdxdna_attrs,
+};
+
+int amdxdna_sysfs_init(struct amdxdna_dev *xdna)
+{
+ int ret;
+
+ ret = sysfs_create_group(&xdna->ddev.dev->kobj, &amdxdna_attr_group);
+ if (ret)
+ XDNA_ERR(xdna, "Create attr group failed");
+
+ return ret;
+}
+
+void amdxdna_sysfs_fini(struct amdxdna_dev *xdna)
+{
+ sysfs_remove_group(&xdna->ddev.dev->kobj, &amdxdna_attr_group);
+}
diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
new file mode 100644
index 000000000000..e4f6dac7d00f
--- /dev/null
+++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/sizes.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+/* Address definition from NPU1 docs */
+#define MPNPU_PUB_SEC_INTR 0x3010090
+#define MPNPU_PUB_PWRMGMT_INTR 0x3010094
+#define MPNPU_PUB_SCRATCH2 0x30100A0
+#define MPNPU_PUB_SCRATCH3 0x30100A4
+#define MPNPU_PUB_SCRATCH4 0x30100A8
+#define MPNPU_PUB_SCRATCH5 0x30100AC
+#define MPNPU_PUB_SCRATCH6 0x30100B0
+#define MPNPU_PUB_SCRATCH7 0x30100B4
+#define MPNPU_PUB_SCRATCH9 0x30100BC
+
+#define MPNPU_SRAM_X2I_MAILBOX_0 0x30A0000
+#define MPNPU_SRAM_X2I_MAILBOX_1 0x30A2000
+#define MPNPU_SRAM_I2X_MAILBOX_15 0x30BF000
+
+#define MPNPU_APERTURE0_BASE 0x3000000
+#define MPNPU_APERTURE1_BASE 0x3080000
+#define MPNPU_APERTURE2_BASE 0x30C0000
+
+/* PCIe BAR Index for NPU1 */
+#define NPU1_REG_BAR_INDEX 0
+#define NPU1_MBOX_BAR_INDEX 4
+#define NPU1_PSP_BAR_INDEX 0
+#define NPU1_SMU_BAR_INDEX 0
+#define NPU1_SRAM_BAR_INDEX 2
+/* Associated BARs and Apertures */
+#define NPU1_REG_BAR_BASE MPNPU_APERTURE0_BASE
+#define NPU1_MBOX_BAR_BASE MPNPU_APERTURE2_BASE
+#define NPU1_PSP_BAR_BASE MPNPU_APERTURE0_BASE
+#define NPU1_SMU_BAR_BASE MPNPU_APERTURE0_BASE
+#define NPU1_SRAM_BAR_BASE MPNPU_APERTURE1_BASE
+
+const struct rt_config npu1_default_rt_cfg[] = {
+ { 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+ { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 0 },
+};
+
+const struct dpm_clk_freq npu1_dpm_clk_table[] = {
+ {400, 800},
+ {600, 1024},
+ {600, 1024},
+ {600, 1024},
+ {600, 1024},
+ {720, 1309},
+ {720, 1309},
+ {847, 1600},
+ { 0 }
+};
+
+static const struct amdxdna_dev_priv npu1_dev_priv = {
+ .fw_path = "amdnpu/1502_00/npu.sbin",
+ .protocol_major = 0x5,
+ .protocol_minor = 0x7,
+ .rt_config = npu1_default_rt_cfg,
+ .dpm_clk_tbl = npu1_dpm_clk_table,
+ .col_align = COL_ALIGN_NONE,
+ .mbox_dev_addr = NPU1_MBOX_BAR_BASE,
+ .mbox_size = 0, /* Use BAR size */
+ .sram_dev_addr = NPU1_SRAM_BAR_BASE,
+ .sram_offs = {
+ DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU1_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+ DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU1_SRAM, MPNPU_SRAM_I2X_MAILBOX_15),
+ },
+ .psp_regs_off = {
+ DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU1_PSP, MPNPU_PUB_SCRATCH2),
+ DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU1_PSP, MPNPU_PUB_SCRATCH3),
+ DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU1_PSP, MPNPU_PUB_SCRATCH4),
+ DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU1_PSP, MPNPU_PUB_SCRATCH9),
+ DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU1_PSP, MPNPU_PUB_SEC_INTR),
+ DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU1_PSP, MPNPU_PUB_SCRATCH2),
+ DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU1_PSP, MPNPU_PUB_SCRATCH3),
+ },
+ .smu_regs_off = {
+ DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU1_SMU, MPNPU_PUB_SCRATCH5),
+ DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU1_SMU, MPNPU_PUB_SCRATCH7),
+ DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU1_SMU, MPNPU_PUB_PWRMGMT_INTR),
+ DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6),
+ DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU1_SMU, MPNPU_PUB_SCRATCH7),
+ },
+ .hw_ops = {
+ .set_dpm = npu1_set_dpm,
+ },
+};
+
+const struct amdxdna_dev_info dev_npu1_info = {
+ .reg_bar = NPU1_REG_BAR_INDEX,
+ .mbox_bar = NPU1_MBOX_BAR_INDEX,
+ .sram_bar = NPU1_SRAM_BAR_INDEX,
+ .psp_bar = NPU1_PSP_BAR_INDEX,
+ .smu_bar = NPU1_SMU_BAR_INDEX,
+ .first_col = 1,
+ .dev_mem_buf_shift = 15, /* 32 KiB aligned */
+ .dev_mem_base = AIE2_DEVM_BASE,
+ .dev_mem_size = AIE2_DEVM_SIZE,
+ .vbnv = "RyzenAI-npu1",
+ .device_type = AMDXDNA_DEV_TYPE_KMQ,
+ .dev_priv = &npu1_dev_priv,
+ .ops = &aie2_ops,
+};
diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c
new file mode 100644
index 000000000000..a081cac75ee0
--- /dev/null
+++ b/drivers/accel/amdxdna/npu2_regs.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/sizes.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
+#define MPNPU_PUB_SEC_INTR 0x3010060
+#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
+#define MPNPU_PUB_SCRATCH0 0x301006C
+#define MPNPU_PUB_SCRATCH1 0x3010070
+#define MPNPU_PUB_SCRATCH2 0x3010074
+#define MPNPU_PUB_SCRATCH3 0x3010078
+#define MPNPU_PUB_SCRATCH4 0x301007C
+#define MPNPU_PUB_SCRATCH5 0x3010080
+#define MPNPU_PUB_SCRATCH6 0x3010084
+#define MPNPU_PUB_SCRATCH7 0x3010088
+#define MPNPU_PUB_SCRATCH8 0x301008C
+#define MPNPU_PUB_SCRATCH9 0x3010090
+#define MPNPU_PUB_SCRATCH10 0x3010094
+#define MPNPU_PUB_SCRATCH11 0x3010098
+#define MPNPU_PUB_SCRATCH12 0x301009C
+#define MPNPU_PUB_SCRATCH13 0x30100A0
+#define MPNPU_PUB_SCRATCH14 0x30100A4
+#define MPNPU_PUB_SCRATCH15 0x30100A8
+#define MP0_C2PMSG_73 0x3810A24
+#define MP0_C2PMSG_123 0x3810AEC
+
+#define MP1_C2PMSG_0 0x3B10900
+#define MP1_C2PMSG_60 0x3B109F0
+#define MP1_C2PMSG_61 0x3B109F4
+
+#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
+#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
+#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
+#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
+
+#define MMNPU_APERTURE0_BASE 0x3000000
+#define MMNPU_APERTURE1_BASE 0x3600000
+#define MMNPU_APERTURE3_BASE 0x3810000
+#define MMNPU_APERTURE4_BASE 0x3B10000
+
+/* PCIe BAR Index for NPU2 */
+#define NPU2_REG_BAR_INDEX 0
+#define NPU2_MBOX_BAR_INDEX 0
+#define NPU2_PSP_BAR_INDEX 4
+#define NPU2_SMU_BAR_INDEX 5
+#define NPU2_SRAM_BAR_INDEX 2
+/* Associated BARs and Apertures */
+#define NPU2_REG_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU2_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU2_PSP_BAR_BASE MMNPU_APERTURE3_BASE
+#define NPU2_SMU_BAR_BASE MMNPU_APERTURE4_BASE
+#define NPU2_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
+
+static const struct amdxdna_dev_priv npu2_dev_priv = {
+ .fw_path = "amdnpu/17f0_00/npu.sbin",
+ .protocol_major = 0x6,
+ .protocol_minor = 0x6,
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
+ .col_align = COL_ALIGN_NATURE,
+ .mbox_dev_addr = NPU2_MBOX_BAR_BASE,
+ .mbox_size = 0, /* Use BAR size */
+ .sram_dev_addr = NPU2_SRAM_BAR_BASE,
+ .sram_offs = {
+ DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+ DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
+ },
+ .psp_regs_off = {
+ DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU2_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU2_REG, MPNPU_PUB_SCRATCH3),
+ DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU2_REG, MPNPU_PUB_SCRATCH4),
+ DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU2_REG, MPNPU_PUB_SCRATCH9),
+ DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU2_PSP, MP0_C2PMSG_73),
+ DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU2_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU2_REG, MPNPU_PUB_SCRATCH3),
+ },
+ .smu_regs_off = {
+ DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU2_SMU, MP1_C2PMSG_0),
+ DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU2_SMU, MP1_C2PMSG_60),
+ DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU2_SMU, MMNPU_APERTURE4_BASE),
+ DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU2_SMU, MP1_C2PMSG_61),
+ DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU2_SMU, MP1_C2PMSG_60),
+ },
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
+};
+
+const struct amdxdna_dev_info dev_npu2_info = {
+ .reg_bar = NPU2_REG_BAR_INDEX,
+ .mbox_bar = NPU2_MBOX_BAR_INDEX,
+ .sram_bar = NPU2_SRAM_BAR_INDEX,
+ .psp_bar = NPU2_PSP_BAR_INDEX,
+ .smu_bar = NPU2_SMU_BAR_INDEX,
+ .first_col = 0,
+ .dev_mem_buf_shift = 15, /* 32 KiB aligned */
+ .dev_mem_base = AIE2_DEVM_BASE,
+ .dev_mem_size = AIE2_DEVM_SIZE,
+ .vbnv = "RyzenAI-npu2",
+ .device_type = AMDXDNA_DEV_TYPE_KMQ,
+ .dev_priv = &npu2_dev_priv,
+ .ops = &aie2_ops, /* NPU2 can share NPU1's callback */
+};
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
new file mode 100644
index 000000000000..9f2e33182ec6
--- /dev/null
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/sizes.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
+#define MPNPU_PUB_SEC_INTR 0x3010060
+#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
+#define MPNPU_PUB_SCRATCH0 0x301006C
+#define MPNPU_PUB_SCRATCH1 0x3010070
+#define MPNPU_PUB_SCRATCH2 0x3010074
+#define MPNPU_PUB_SCRATCH3 0x3010078
+#define MPNPU_PUB_SCRATCH4 0x301007C
+#define MPNPU_PUB_SCRATCH5 0x3010080
+#define MPNPU_PUB_SCRATCH6 0x3010084
+#define MPNPU_PUB_SCRATCH7 0x3010088
+#define MPNPU_PUB_SCRATCH8 0x301008C
+#define MPNPU_PUB_SCRATCH9 0x3010090
+#define MPNPU_PUB_SCRATCH10 0x3010094
+#define MPNPU_PUB_SCRATCH11 0x3010098
+#define MPNPU_PUB_SCRATCH12 0x301009C
+#define MPNPU_PUB_SCRATCH13 0x30100A0
+#define MPNPU_PUB_SCRATCH14 0x30100A4
+#define MPNPU_PUB_SCRATCH15 0x30100A8
+#define MP0_C2PMSG_73 0x3810A24
+#define MP0_C2PMSG_123 0x3810AEC
+
+#define MP1_C2PMSG_0 0x3B10900
+#define MP1_C2PMSG_60 0x3B109F0
+#define MP1_C2PMSG_61 0x3B109F4
+
+#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
+#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
+#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
+#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
+
+#define MMNPU_APERTURE0_BASE 0x3000000
+#define MMNPU_APERTURE1_BASE 0x3600000
+#define MMNPU_APERTURE3_BASE 0x3810000
+#define MMNPU_APERTURE4_BASE 0x3B10000
+
+/* PCIe BAR Index for NPU4 */
+#define NPU4_REG_BAR_INDEX 0
+#define NPU4_MBOX_BAR_INDEX 0
+#define NPU4_PSP_BAR_INDEX 4
+#define NPU4_SMU_BAR_INDEX 5
+#define NPU4_SRAM_BAR_INDEX 2
+/* Associated BARs and Apertures */
+#define NPU4_REG_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU4_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU4_PSP_BAR_BASE MMNPU_APERTURE3_BASE
+#define NPU4_SMU_BAR_BASE MMNPU_APERTURE4_BASE
+#define NPU4_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
+
+const struct rt_config npu4_default_rt_cfg[] = {
+ { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+ { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 0 },
+};
+
+const struct dpm_clk_freq npu4_dpm_clk_table[] = {
+ {396, 792},
+ {600, 1056},
+ {792, 1152},
+ {975, 1267},
+ {975, 1267},
+ {1056, 1408},
+ {1152, 1584},
+ {1267, 1800},
+ { 0 }
+};
+
+static const struct amdxdna_dev_priv npu4_dev_priv = {
+ .fw_path = "amdnpu/17f0_10/npu.sbin",
+ .protocol_major = 0x6,
+ .protocol_minor = 12,
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
+ .col_align = COL_ALIGN_NATURE,
+ .mbox_dev_addr = NPU4_MBOX_BAR_BASE,
+ .mbox_size = 0, /* Use BAR size */
+ .sram_dev_addr = NPU4_SRAM_BAR_BASE,
+ .sram_offs = {
+ DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+ DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
+ },
+ .psp_regs_off = {
+ DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU4_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU4_REG, MPNPU_PUB_SCRATCH3),
+ DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU4_REG, MPNPU_PUB_SCRATCH4),
+ DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU4_REG, MPNPU_PUB_SCRATCH9),
+ DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU4_PSP, MP0_C2PMSG_73),
+ DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU4_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU4_REG, MPNPU_PUB_SCRATCH3),
+ },
+ .smu_regs_off = {
+ DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU4_SMU, MP1_C2PMSG_0),
+ DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU4_SMU, MP1_C2PMSG_60),
+ DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU4_SMU, MMNPU_APERTURE4_BASE),
+ DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
+ DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU4_SMU, MP1_C2PMSG_60),
+ },
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
+};
+
+const struct amdxdna_dev_info dev_npu4_info = {
+ .reg_bar = NPU4_REG_BAR_INDEX,
+ .mbox_bar = NPU4_MBOX_BAR_INDEX,
+ .sram_bar = NPU4_SRAM_BAR_INDEX,
+ .psp_bar = NPU4_PSP_BAR_INDEX,
+ .smu_bar = NPU4_SMU_BAR_INDEX,
+ .first_col = 0,
+ .dev_mem_buf_shift = 15, /* 32 KiB aligned */
+ .dev_mem_base = AIE2_DEVM_BASE,
+ .dev_mem_size = AIE2_DEVM_SIZE,
+ .vbnv = "RyzenAI-npu4",
+ .device_type = AMDXDNA_DEV_TYPE_KMQ,
+ .dev_priv = &npu4_dev_priv,
+ .ops = &aie2_ops, /* NPU4 can share NPU1's callback */
+};
diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
new file mode 100644
index 000000000000..5f1cf83461c4
--- /dev/null
+++ b/drivers/accel/amdxdna/npu5_regs.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/sizes.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
+#define MPNPU_PUB_SEC_INTR 0x3010060
+#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
+#define MPNPU_PUB_SCRATCH0 0x301006C
+#define MPNPU_PUB_SCRATCH1 0x3010070
+#define MPNPU_PUB_SCRATCH2 0x3010074
+#define MPNPU_PUB_SCRATCH3 0x3010078
+#define MPNPU_PUB_SCRATCH4 0x301007C
+#define MPNPU_PUB_SCRATCH5 0x3010080
+#define MPNPU_PUB_SCRATCH6 0x3010084
+#define MPNPU_PUB_SCRATCH7 0x3010088
+#define MPNPU_PUB_SCRATCH8 0x301008C
+#define MPNPU_PUB_SCRATCH9 0x3010090
+#define MPNPU_PUB_SCRATCH10 0x3010094
+#define MPNPU_PUB_SCRATCH11 0x3010098
+#define MPNPU_PUB_SCRATCH12 0x301009C
+#define MPNPU_PUB_SCRATCH13 0x30100A0
+#define MPNPU_PUB_SCRATCH14 0x30100A4
+#define MPNPU_PUB_SCRATCH15 0x30100A8
+#define MP0_C2PMSG_73 0x3810A24
+#define MP0_C2PMSG_123 0x3810AEC
+
+#define MP1_C2PMSG_0 0x3B10900
+#define MP1_C2PMSG_60 0x3B109F0
+#define MP1_C2PMSG_61 0x3B109F4
+
+#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
+#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
+#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
+#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
+
+#define MMNPU_APERTURE0_BASE 0x3000000
+#define MMNPU_APERTURE1_BASE 0x3600000
+#define MMNPU_APERTURE3_BASE 0x3810000
+#define MMNPU_APERTURE4_BASE 0x3B10000
+
+/* PCIe BAR Index for NPU5 */
+#define NPU5_REG_BAR_INDEX 0
+#define NPU5_MBOX_BAR_INDEX 0
+#define NPU5_PSP_BAR_INDEX 4
+#define NPU5_SMU_BAR_INDEX 5
+#define NPU5_SRAM_BAR_INDEX 2
+/* Associated BARs and Apertures */
+#define NPU5_REG_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU5_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU5_PSP_BAR_BASE MMNPU_APERTURE3_BASE
+#define NPU5_SMU_BAR_BASE MMNPU_APERTURE4_BASE
+#define NPU5_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
+
+static const struct amdxdna_dev_priv npu5_dev_priv = {
+ .fw_path = "amdnpu/17f0_11/npu.sbin",
+ .protocol_major = 0x6,
+ .protocol_minor = 12,
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
+ .col_align = COL_ALIGN_NATURE,
+ .mbox_dev_addr = NPU5_MBOX_BAR_BASE,
+ .mbox_size = 0, /* Use BAR size */
+ .sram_dev_addr = NPU5_SRAM_BAR_BASE,
+ .sram_offs = {
+ DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+ DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
+ },
+ .psp_regs_off = {
+ DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU5_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU5_REG, MPNPU_PUB_SCRATCH3),
+ DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU5_REG, MPNPU_PUB_SCRATCH4),
+ DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU5_REG, MPNPU_PUB_SCRATCH9),
+ DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU5_PSP, MP0_C2PMSG_73),
+ DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU5_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU5_REG, MPNPU_PUB_SCRATCH3),
+ },
+ .smu_regs_off = {
+ DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU5_SMU, MP1_C2PMSG_0),
+ DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU5_SMU, MP1_C2PMSG_60),
+ DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU5_SMU, MMNPU_APERTURE4_BASE),
+ DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61),
+ DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU5_SMU, MP1_C2PMSG_60),
+ },
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
+};
+
+const struct amdxdna_dev_info dev_npu5_info = {
+ .reg_bar = NPU5_REG_BAR_INDEX,
+ .mbox_bar = NPU5_MBOX_BAR_INDEX,
+ .sram_bar = NPU5_SRAM_BAR_INDEX,
+ .psp_bar = NPU5_PSP_BAR_INDEX,
+ .smu_bar = NPU5_SMU_BAR_INDEX,
+ .first_col = 0,
+ .dev_mem_buf_shift = 15, /* 32 KiB aligned */
+ .dev_mem_base = AIE2_DEVM_BASE,
+ .dev_mem_size = AIE2_DEVM_SIZE,
+ .vbnv = "RyzenAI-npu5",
+ .device_type = AMDXDNA_DEV_TYPE_KMQ,
+ .dev_priv = &npu5_dev_priv,
+ .ops = &aie2_ops,
+};
diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c
new file mode 100644
index 000000000000..94a7005685a7
--- /dev/null
+++ b/drivers/accel/amdxdna/npu6_regs.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/sizes.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
+#define MPNPU_PUB_SEC_INTR 0x3010060
+#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
+#define MPNPU_PUB_SCRATCH0 0x301006C
+#define MPNPU_PUB_SCRATCH1 0x3010070
+#define MPNPU_PUB_SCRATCH2 0x3010074
+#define MPNPU_PUB_SCRATCH3 0x3010078
+#define MPNPU_PUB_SCRATCH4 0x301007C
+#define MPNPU_PUB_SCRATCH5 0x3010080
+#define MPNPU_PUB_SCRATCH6 0x3010084
+#define MPNPU_PUB_SCRATCH7 0x3010088
+#define MPNPU_PUB_SCRATCH8 0x301008C
+#define MPNPU_PUB_SCRATCH9 0x3010090
+#define MPNPU_PUB_SCRATCH10 0x3010094
+#define MPNPU_PUB_SCRATCH11 0x3010098
+#define MPNPU_PUB_SCRATCH12 0x301009C
+#define MPNPU_PUB_SCRATCH13 0x30100A0
+#define MPNPU_PUB_SCRATCH14 0x30100A4
+#define MPNPU_PUB_SCRATCH15 0x30100A8
+#define MP0_C2PMSG_73 0x3810A24
+#define MP0_C2PMSG_123 0x3810AEC
+
+#define MP1_C2PMSG_0 0x3B10900
+#define MP1_C2PMSG_60 0x3B109F0
+#define MP1_C2PMSG_61 0x3B109F4
+
+#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
+#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
+#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
+#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
+
+#define MMNPU_APERTURE0_BASE 0x3000000
+#define MMNPU_APERTURE1_BASE 0x3600000
+#define MMNPU_APERTURE3_BASE 0x3810000
+#define MMNPU_APERTURE4_BASE 0x3B10000
+
+/* PCIe BAR Index for NPU6 */
+#define NPU6_REG_BAR_INDEX 0
+#define NPU6_MBOX_BAR_INDEX 0
+#define NPU6_PSP_BAR_INDEX 4
+#define NPU6_SMU_BAR_INDEX 5
+#define NPU6_SRAM_BAR_INDEX 2
+/* Associated BARs and Apertures */
+#define NPU6_REG_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU6_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU6_PSP_BAR_BASE MMNPU_APERTURE3_BASE
+#define NPU6_SMU_BAR_BASE MMNPU_APERTURE4_BASE
+#define NPU6_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
+
+static const struct amdxdna_dev_priv npu6_dev_priv = {
+ .fw_path = "amdnpu/17f0_10/npu.sbin",
+ .protocol_major = 0x6,
+ .protocol_minor = 12,
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
+ .col_align = COL_ALIGN_NATURE,
+ .mbox_dev_addr = NPU6_MBOX_BAR_BASE,
+ .mbox_size = 0, /* Use BAR size */
+ .sram_dev_addr = NPU6_SRAM_BAR_BASE,
+ .sram_offs = {
+ DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+ DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
+ },
+ .psp_regs_off = {
+ DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU6_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU6_REG, MPNPU_PUB_SCRATCH3),
+ DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU6_REG, MPNPU_PUB_SCRATCH4),
+ DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU6_REG, MPNPU_PUB_SCRATCH9),
+ DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU6_PSP, MP0_C2PMSG_73),
+ DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU6_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU6_REG, MPNPU_PUB_SCRATCH3),
+ },
+ .smu_regs_off = {
+ DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU6_SMU, MP1_C2PMSG_0),
+ DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU6_SMU, MP1_C2PMSG_60),
+ DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU6_SMU, MMNPU_APERTURE4_BASE),
+ DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61),
+ DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU6_SMU, MP1_C2PMSG_60),
+ },
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
+
+};
+
+const struct amdxdna_dev_info dev_npu6_info = {
+ .reg_bar = NPU6_REG_BAR_INDEX,
+ .mbox_bar = NPU6_MBOX_BAR_INDEX,
+ .sram_bar = NPU6_SRAM_BAR_INDEX,
+ .psp_bar = NPU6_PSP_BAR_INDEX,
+ .smu_bar = NPU6_SMU_BAR_INDEX,
+ .first_col = 0,
+ .dev_mem_buf_shift = 15, /* 32 KiB aligned */
+ .dev_mem_base = AIE2_DEVM_BASE,
+ .dev_mem_size = AIE2_DEVM_SIZE,
+ .vbnv = "RyzenAI-npu6",
+ .device_type = AMDXDNA_DEV_TYPE_KMQ,
+ .dev_priv = &npu6_dev_priv,
+ .ops = &aie2_ops,
+};
diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index 59823e3c3bf7..dee487724918 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -2586,7 +2586,7 @@ int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
cs_seq = args->in.seq;
timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT
- ? msecs_to_jiffies(args->in.timeout * 1000)
+ ? secs_to_jiffies(args->in.timeout)
: hpriv->hdev->timeout_jiffies;
switch (cs_type) {
diff --git a/drivers/accel/habanalabs/common/context.c b/drivers/accel/habanalabs/common/context.c
index b83141f58319..9f212b17611a 100644
--- a/drivers/accel/habanalabs/common/context.c
+++ b/drivers/accel/habanalabs/common/context.c
@@ -199,7 +199,6 @@ out_err:
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
{
- char task_comm[TASK_COMM_LEN];
int rc = 0, i;
ctx->hdev = hdev;
@@ -272,7 +271,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
mutex_init(&ctx->ts_reg_lock);
dev_dbg(hdev->dev, "create user context, comm=\"%s\", asid=%u\n",
- get_task_comm(task_comm, current), ctx->asid);
+ current->comm, ctx->asid);
}
return 0;
diff --git a/drivers/accel/habanalabs/common/debugfs.c b/drivers/accel/habanalabs/common/debugfs.c
index ca7677293a55..4b391807e5f2 100644
--- a/drivers/accel/habanalabs/common/debugfs.c
+++ b/drivers/accel/habanalabs/common/debugfs.c
@@ -1403,7 +1403,7 @@ static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf,
return rc;
if (value)
- hdev->timeout_jiffies = msecs_to_jiffies(value * 1000);
+ hdev->timeout_jiffies = secs_to_jiffies(value);
else
hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index e0cf3b4343bb..68eebed3b050 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -817,7 +817,7 @@ static void device_hard_reset_pending(struct work_struct *work)
}
queue_delayed_work(hdev->reset_wq, &device_reset_work->reset_work,
- msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000));
+ secs_to_jiffies(HL_PENDING_RESET_PER_SEC));
}
}
@@ -2091,7 +2091,7 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask)
dev_dbg(hdev->dev, "Device is going to be hard-reset in %u sec unless being released\n",
hdev->device_release_watchdog_timeout_sec);
schedule_delayed_work(&hdev->device_release_watchdog_work.reset_work,
- msecs_to_jiffies(hdev->device_release_watchdog_timeout_sec * 1000));
+ secs_to_jiffies(hdev->device_release_watchdog_timeout_sec));
hdev->reset_info.watchdog_active = 1;
out:
spin_unlock(&hdev->reset_info.lock);
diff --git a/drivers/accel/habanalabs/common/habanalabs_drv.c b/drivers/accel/habanalabs/common/habanalabs_drv.c
index 708dfd10f39c..0035748f3228 100644
--- a/drivers/accel/habanalabs/common/habanalabs_drv.c
+++ b/drivers/accel/habanalabs/common/habanalabs_drv.c
@@ -101,7 +101,6 @@ static const struct drm_driver hl_driver = {
.major = LINUX_VERSION_MAJOR,
.minor = LINUX_VERSION_PATCHLEVEL,
.patchlevel = LINUX_VERSION_SUBLEVEL,
- .date = "20190505",
.fops = &hl_fops,
.open = hl_device_open,
@@ -362,8 +361,7 @@ static void fixup_device_params_per_asic(struct hl_device *hdev, int timeout)
* a different default timeout for Gaudi
*/
if (timeout == HL_DEFAULT_TIMEOUT_LOCKED)
- hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED *
- MSEC_PER_SEC);
+ hdev->timeout_jiffies = secs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED);
hdev->reset_upon_device_release = 0;
break;
@@ -388,7 +386,7 @@ static int fixup_device_params(struct hl_device *hdev)
hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
if (tmp_timeout)
- hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC);
+ hdev->timeout_jiffies = secs_to_jiffies(tmp_timeout);
else
hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
index 1dd6e23172ca..8729a0c57d78 100644
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@@ -1279,13 +1279,10 @@ static long _hl_ioctl(struct hl_fpriv *hpriv, unsigned int cmd, unsigned long ar
retcode = -EFAULT;
out_err:
- if (retcode) {
- char task_comm[TASK_COMM_LEN];
-
+ if (retcode)
dev_dbg_ratelimited(dev,
"error in ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
- task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
- }
+ task_pid_nr(current), current->comm, cmd, nr);
if (kdata != stack_kdata)
kfree(kdata);
@@ -1308,11 +1305,9 @@ long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
if (nr == _IOC_NR(DRM_IOCTL_HL_INFO)) {
ioctl = &hl_ioctls_control[nr - HL_COMMAND_START];
} else {
- char task_comm[TASK_COMM_LEN];
-
dev_dbg_ratelimited(hdev->dev_ctrl,
"invalid ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
- task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
+ task_pid_nr(current), current->comm, cmd, nr);
return -ENOTTY;
}
diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c
index e9f8ccc0bbf9..9d58efa2ff38 100644
--- a/drivers/accel/habanalabs/common/sysfs.c
+++ b/drivers/accel/habanalabs/common/sysfs.c
@@ -368,7 +368,7 @@ out:
}
static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf, loff_t offset,
+ const struct bin_attribute *attr, char *buf, loff_t offset,
size_t max_size)
{
struct device *dev = kobj_to_dev(kobj);
@@ -443,10 +443,10 @@ static DEVICE_ATTR_RO(security_enabled);
static DEVICE_ATTR_RO(module_id);
static DEVICE_ATTR_RO(parent_device);
-static struct bin_attribute bin_attr_eeprom = {
+static const struct bin_attribute bin_attr_eeprom = {
.attr = {.name = "eeprom", .mode = (0444)},
.size = PAGE_SIZE,
- .read = eeprom_read_handler
+ .read_new = eeprom_read_handler
};
static struct attribute *hl_dev_attrs[] = {
@@ -472,14 +472,14 @@ static struct attribute *hl_dev_attrs[] = {
NULL,
};
-static struct bin_attribute *hl_dev_bin_attrs[] = {
+static const struct bin_attribute *const hl_dev_bin_attrs[] = {
&bin_attr_eeprom,
NULL
};
static struct attribute_group hl_dev_attr_group = {
.attrs = hl_dev_attrs,
- .bin_attrs = hl_dev_bin_attrs,
+ .bin_attrs_new = hl_dev_bin_attrs,
};
static struct attribute_group hl_dev_clks_attr_group;
diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index 8180b95ed69d..cd24ccd20ba6 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -4,6 +4,7 @@
*/
#include <linux/debugfs.h>
+#include <linux/fault-inject.h>
#include <drm/drm_debugfs.h>
#include <drm/drm_file.h>
@@ -331,7 +332,7 @@ ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t si
return -EINVAL;
ret = ivpu_rpm_get(vdev);
- if (ret)
+ if (ret < 0)
return ret;
ivpu_pm_trigger_recovery(vdev, "debugfs");
@@ -382,7 +383,7 @@ static int dct_active_set(void *data, u64 active_percent)
return -EINVAL;
ret = ivpu_rpm_get(vdev);
- if (ret)
+ if (ret < 0)
return ret;
if (active_percent)
@@ -397,6 +398,88 @@ static int dct_active_set(void *data, u64 active_percent)
DEFINE_DEBUGFS_ATTRIBUTE(ivpu_dct_fops, dct_active_get, dct_active_set, "%llu\n");
+static int priority_bands_show(struct seq_file *s, void *v)
+{
+ struct ivpu_device *vdev = s->private;
+ struct ivpu_hw_info *hw = vdev->hw;
+
+ for (int band = VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE;
+ band < VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT; band++) {
+ switch (band) {
+ case VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE:
+ seq_puts(s, "Idle: ");
+ break;
+
+ case VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL:
+ seq_puts(s, "Normal: ");
+ break;
+
+ case VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS:
+ seq_puts(s, "Focus: ");
+ break;
+
+ case VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME:
+ seq_puts(s, "Realtime: ");
+ break;
+ }
+
+ seq_printf(s, "grace_period %9u process_grace_period %9u process_quantum %9u\n",
+ hw->hws.grace_period[band], hw->hws.process_grace_period[band],
+ hw->hws.process_quantum[band]);
+ }
+
+ return 0;
+}
+
+static int priority_bands_fops_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, priority_bands_show, inode->i_private);
+}
+
+static ssize_t
+priority_bands_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
+{
+ struct seq_file *s = file->private_data;
+ struct ivpu_device *vdev = s->private;
+ char buf[64];
+ u32 grace_period;
+ u32 process_grace_period;
+ u32 process_quantum;
+ u32 band;
+ int ret;
+
+ if (size >= sizeof(buf))
+ return -EINVAL;
+
+ ret = simple_write_to_buffer(buf, sizeof(buf) - 1, pos, user_buf, size);
+ if (ret < 0)
+ return ret;
+
+ buf[ret] = '\0';
+ ret = sscanf(buf, "%u %u %u %u", &band, &grace_period, &process_grace_period,
+ &process_quantum);
+ if (ret != 4)
+ return -EINVAL;
+
+ if (band >= VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT)
+ return -EINVAL;
+
+ vdev->hw->hws.grace_period[band] = grace_period;
+ vdev->hw->hws.process_grace_period[band] = process_grace_period;
+ vdev->hw->hws.process_quantum[band] = process_quantum;
+
+ return size;
+}
+
+static const struct file_operations ivpu_hws_priority_bands_fops = {
+ .owner = THIS_MODULE,
+ .open = priority_bands_fops_open,
+ .write = priority_bands_fops_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
void ivpu_debugfs_init(struct ivpu_device *vdev)
{
struct dentry *debugfs_root = vdev->drm.debugfs_root;
@@ -419,6 +502,8 @@ void ivpu_debugfs_init(struct ivpu_device *vdev)
&fw_trace_hw_comp_mask_fops);
debugfs_create_file("fw_trace_level", 0200, debugfs_root, vdev,
&fw_trace_level_fops);
+ debugfs_create_file("hws_priority_bands", 0200, debugfs_root, vdev,
+ &ivpu_hws_priority_bands_fops);
debugfs_create_file("reset_engine", 0200, debugfs_root, vdev,
&ivpu_reset_engine_fops);
@@ -430,4 +515,8 @@ void ivpu_debugfs_init(struct ivpu_device *vdev)
debugfs_root, vdev, &fw_profiling_freq_fops);
debugfs_create_file("dct", 0644, debugfs_root, vdev, &ivpu_dct_fops);
}
+
+#ifdef CONFIG_FAULT_INJECTION
+ fault_create_debugfs_attr("fail_hw", debugfs_root, &ivpu_hw_failure);
+#endif
}
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 0c4a82271c26..eff1d3ca075f 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -1,12 +1,13 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#include <linux/firmware.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/pm_runtime.h>
+#include <linux/workqueue.h>
#include <generated/utsrelease.h>
#include <drm/drm_accel.h>
@@ -36,8 +37,6 @@
#define DRIVER_VERSION_STR "1.0.0 " UTS_RELEASE
#endif
-static struct lock_class_key submitted_jobs_xa_lock_class_key;
-
int ivpu_dbg_mask;
module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644);
MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros.");
@@ -128,20 +127,18 @@ void ivpu_file_priv_put(struct ivpu_file_priv **link)
kref_put(&file_priv->ref, file_priv_release);
}
-static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param *args)
+bool ivpu_is_capable(struct ivpu_device *vdev, u32 capability)
{
- switch (args->index) {
+ switch (capability) {
case DRM_IVPU_CAP_METRIC_STREAMER:
- args->value = 1;
- break;
+ return true;
case DRM_IVPU_CAP_DMA_MEMORY_RANGE:
- args->value = 1;
- break;
+ return true;
+ case DRM_IVPU_CAP_MANAGE_CMDQ:
+ return vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW;
default:
- return -EINVAL;
+ return false;
}
-
- return 0;
}
static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
@@ -167,7 +164,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
args->value = vdev->platform;
break;
case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
- args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio);
+ args->value = ivpu_hw_dpu_max_freq_get(vdev);
break;
case DRM_IVPU_PARAM_NUM_CONTEXTS:
args->value = ivpu_get_context_count(vdev);
@@ -201,7 +198,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
args->value = vdev->hw->sku;
break;
case DRM_IVPU_PARAM_CAPABILITIES:
- ret = ivpu_get_capabilities(vdev, args);
+ args->value = ivpu_is_capable(vdev, args->index);
break;
default:
ret = -EINVAL;
@@ -310,6 +307,9 @@ static const struct drm_ioctl_desc ivpu_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_GET_DATA, ivpu_ms_get_data_ioctl, 0),
DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_STOP, ivpu_ms_stop_ioctl, 0),
DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_GET_INFO, ivpu_ms_get_info_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(IVPU_CMDQ_CREATE, ivpu_cmdq_create_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(IVPU_CMDQ_DESTROY, ivpu_cmdq_destroy_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(IVPU_CMDQ_SUBMIT, ivpu_cmdq_submit_ioctl, 0),
};
static int ivpu_wait_for_ready(struct ivpu_device *vdev)
@@ -421,6 +421,9 @@ void ivpu_prepare_for_reset(struct ivpu_device *vdev)
{
ivpu_hw_irq_disable(vdev);
disable_irq(vdev->irq);
+ flush_work(&vdev->irq_ipc_work);
+ flush_work(&vdev->irq_dct_work);
+ flush_work(&vdev->context_abort_work);
ivpu_ipc_disable(vdev);
ivpu_mmu_disable(vdev);
}
@@ -453,7 +456,7 @@ static const struct drm_driver driver = {
.postclose = ivpu_postclose,
.gem_create_object = ivpu_gem_create_object,
- .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
+ .gem_prime_import = ivpu_gem_prime_import,
.ioctls = ivpu_drm_ioctls,
.num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls),
@@ -462,65 +465,9 @@ static const struct drm_driver driver = {
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
-#ifdef DRIVER_DATE
- .date = DRIVER_DATE,
- .major = DRIVER_MAJOR,
- .minor = DRIVER_MINOR,
- .patchlevel = DRIVER_PATCHLEVEL,
-#else
- .date = UTS_RELEASE,
.major = 1,
-#endif
};
-static void ivpu_context_abort_invalid(struct ivpu_device *vdev)
-{
- struct ivpu_file_priv *file_priv;
- unsigned long ctx_id;
-
- mutex_lock(&vdev->context_list_lock);
-
- xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
- if (!file_priv->has_mmu_faults || file_priv->aborted)
- continue;
-
- mutex_lock(&file_priv->lock);
- ivpu_context_abort_locked(file_priv);
- file_priv->aborted = true;
- mutex_unlock(&file_priv->lock);
- }
-
- mutex_unlock(&vdev->context_list_lock);
-}
-
-static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg)
-{
- struct ivpu_device *vdev = arg;
- u8 irq_src;
-
- if (kfifo_is_empty(&vdev->hw->irq.fifo))
- return IRQ_NONE;
-
- while (kfifo_get(&vdev->hw->irq.fifo, &irq_src)) {
- switch (irq_src) {
- case IVPU_HW_IRQ_SRC_IPC:
- ivpu_ipc_irq_thread_handler(vdev);
- break;
- case IVPU_HW_IRQ_SRC_MMU_EVTQ:
- ivpu_context_abort_invalid(vdev);
- break;
- case IVPU_HW_IRQ_SRC_DCT:
- ivpu_pm_dct_irq_thread_handler(vdev);
- break;
- default:
- ivpu_err_ratelimited(vdev, "Unknown IRQ source: %u\n", irq_src);
- break;
- }
- }
-
- return IRQ_HANDLED;
-}
-
static int ivpu_irq_init(struct ivpu_device *vdev)
{
struct pci_dev *pdev = to_pci_dev(vdev->drm.dev);
@@ -532,12 +479,16 @@ static int ivpu_irq_init(struct ivpu_device *vdev)
return ret;
}
+ INIT_WORK(&vdev->irq_ipc_work, ivpu_ipc_irq_work_fn);
+ INIT_WORK(&vdev->irq_dct_work, ivpu_pm_irq_dct_work_fn);
+ INIT_WORK(&vdev->context_abort_work, ivpu_context_abort_work_fn);
+
ivpu_irq_handlers_init(vdev);
vdev->irq = pci_irq_vector(pdev, 0);
- ret = devm_request_threaded_irq(vdev->drm.dev, vdev->irq, ivpu_hw_irq_handler,
- ivpu_irq_thread_handler, IRQF_NO_AUTOEN, DRIVER_NAME, vdev);
+ ret = devm_request_irq(vdev->drm.dev, vdev->irq, ivpu_hw_irq_handler,
+ IRQF_NO_AUTOEN, DRIVER_NAME, vdev);
if (ret)
ivpu_err(vdev, "Failed to request an IRQ %d\n", ret);
@@ -625,7 +576,6 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1);
- lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
INIT_LIST_HEAD(&vdev->bo_list);
vdev->db_limit.min = IVPU_MIN_DB;
@@ -635,6 +585,10 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
if (ret)
goto err_xa_destroy;
+ ret = drmm_mutex_init(&vdev->drm, &vdev->submitted_jobs_lock);
+ if (ret)
+ goto err_xa_destroy;
+
ret = drmm_mutex_init(&vdev->drm, &vdev->bo_list_lock);
if (ret)
goto err_xa_destroy;
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 3fdff3f6cffd..92753effb1c9 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -58,6 +58,7 @@
#define IVPU_PLATFORM_SILICON 0
#define IVPU_PLATFORM_SIMICS 2
#define IVPU_PLATFORM_FPGA 3
+#define IVPU_PLATFORM_HSLE 4
#define IVPU_PLATFORM_INVALID 8
#define IVPU_SCHED_MODE_AUTO -1
@@ -110,6 +111,7 @@ struct ivpu_wa_table {
bool disable_clock_relinquish;
bool disable_d0i3_msg;
bool wp0_during_power_up;
+ bool disable_d0i2;
};
struct ivpu_hw_info;
@@ -142,9 +144,14 @@ struct ivpu_device {
struct xa_limit db_limit;
u32 db_next;
+ struct work_struct irq_ipc_work;
+ struct work_struct irq_dct_work;
+ struct work_struct context_abort_work;
+
struct mutex bo_list_lock; /* Protects bo_list */
struct list_head bo_list;
+ struct mutex submitted_jobs_lock; /* Protects submitted_jobs */
struct xarray submitted_jobs_xa;
struct ivpu_ipc_consumer job_done_consumer;
@@ -200,6 +207,9 @@ extern bool ivpu_force_snoop;
#define IVPU_TEST_MODE_MIP_DISABLE BIT(6)
#define IVPU_TEST_MODE_DISABLE_TIMEOUTS BIT(8)
#define IVPU_TEST_MODE_TURBO BIT(9)
+#define IVPU_TEST_MODE_CLK_RELINQ_DISABLE BIT(10)
+#define IVPU_TEST_MODE_CLK_RELINQ_ENABLE BIT(11)
+#define IVPU_TEST_MODE_D0I2_DISABLE BIT(12)
extern int ivpu_test_mode;
struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv);
@@ -208,6 +218,7 @@ void ivpu_file_priv_put(struct ivpu_file_priv **link);
int ivpu_boot(struct ivpu_device *vdev);
int ivpu_shutdown(struct ivpu_device *vdev);
void ivpu_prepare_for_reset(struct ivpu_device *vdev);
+bool ivpu_is_capable(struct ivpu_device *vdev, u32 capability);
static inline u8 ivpu_revision(struct ivpu_device *vdev)
{
@@ -282,7 +293,8 @@ static inline bool ivpu_is_simics(struct ivpu_device *vdev)
static inline bool ivpu_is_fpga(struct ivpu_device *vdev)
{
- return ivpu_get_platform(vdev) == IVPU_PLATFORM_FPGA;
+ return ivpu_get_platform(vdev) == IVPU_PLATFORM_FPGA ||
+ ivpu_get_platform(vdev) == IVPU_PLATFORM_HSLE;
}
static inline bool ivpu_is_force_snoop_enabled(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index 6037ec0b3096..9db741695401 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#include <linux/firmware.h>
@@ -55,18 +55,18 @@ static struct {
int gen;
const char *name;
} fw_names[] = {
- { IVPU_HW_IP_37XX, "vpu_37xx.bin" },
+ { IVPU_HW_IP_37XX, "intel/vpu/vpu_37xx_v1.bin" },
{ IVPU_HW_IP_37XX, "intel/vpu/vpu_37xx_v0.0.bin" },
- { IVPU_HW_IP_40XX, "vpu_40xx.bin" },
+ { IVPU_HW_IP_40XX, "intel/vpu/vpu_40xx_v1.bin" },
{ IVPU_HW_IP_40XX, "intel/vpu/vpu_40xx_v0.0.bin" },
- { IVPU_HW_IP_50XX, "vpu_50xx.bin" },
+ { IVPU_HW_IP_50XX, "intel/vpu/vpu_50xx_v1.bin" },
{ IVPU_HW_IP_50XX, "intel/vpu/vpu_50xx_v0.0.bin" },
};
/* Production fw_names from the table above */
-MODULE_FIRMWARE("intel/vpu/vpu_37xx_v0.0.bin");
-MODULE_FIRMWARE("intel/vpu/vpu_40xx_v0.0.bin");
-MODULE_FIRMWARE("intel/vpu/vpu_50xx_v0.0.bin");
+MODULE_FIRMWARE("intel/vpu/vpu_37xx_v1.bin");
+MODULE_FIRMWARE("intel/vpu/vpu_40xx_v1.bin");
+MODULE_FIRMWARE("intel/vpu/vpu_50xx_v1.bin");
static int ivpu_fw_request(struct ivpu_device *vdev)
{
@@ -145,7 +145,10 @@ ivpu_fw_sched_mode_select(struct ivpu_device *vdev, const struct vpu_firmware_he
if (ivpu_sched_mode != IVPU_SCHED_MODE_AUTO)
return ivpu_sched_mode;
- return VPU_SCHEDULING_MODE_OS;
+ if (IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, JSM, 3, 24))
+ return VPU_SCHEDULING_MODE_OS;
+
+ return VPU_SCHEDULING_MODE_HW;
}
static int ivpu_fw_parse(struct ivpu_device *vdev)
@@ -230,10 +233,20 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
fw->dvfs_mode = 0;
fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr);
- fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size;
- fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size;
ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS");
+ if (fw_hdr->preemption_buffer_1_max_size)
+ fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_max_size;
+ else
+ fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size;
+
+ if (fw_hdr->preemption_buffer_2_max_size)
+ fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_max_size;
+ else
+ fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size;
+ ivpu_dbg(vdev, FW_BOOT, "Preemption buffer sizes: primary %u, secondary %u\n",
+ fw->primary_preempt_buf_size, fw->secondary_preempt_buf_size);
+
if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address,
fw_hdr->ro_section_size,
fw_hdr->image_load_address,
@@ -531,6 +544,8 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_
boot_params->d0i3_entry_vpu_ts);
ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n",
boot_params->system_time_us);
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = 0x%x\n",
+ boot_params->power_profile);
}
void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
@@ -561,7 +576,6 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->magic = VPU_BOOT_PARAMS_MAGIC;
boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number;
- boot_params->frequency = ivpu_hw_pll_freq_get(vdev);
/*
* This param is a debug firmware feature. It switches default clock
@@ -631,6 +645,8 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->d0i3_delayed_entry = 1;
boot_params->d0i3_residency_time_us = 0;
boot_params->d0i3_entry_vpu_ts = 0;
+ if (IVPU_WA(disable_d0i2))
+ boot_params->power_profile |= BIT(1);
boot_params->system_time_us = ktime_to_us(ktime_get_real());
wmb(); /* Flush WC buffers after writing bootparams */
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index 16178054e629..248bfebeaa22 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -20,17 +20,29 @@
#include "ivpu_mmu.h"
#include "ivpu_mmu_context.h"
+MODULE_IMPORT_NS("DMA_BUF");
+
static const struct drm_gem_object_funcs ivpu_gem_funcs;
static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, const char *action)
{
ivpu_dbg(vdev, BO,
"%6s: bo %8p vpu_addr %9llx size %8zu ctx %d has_pages %d dma_mapped %d mmu_mapped %d wc %d imported %d\n",
- action, bo, bo->vpu_addr, ivpu_bo_size(bo), bo->ctx ? bo->ctx->id : 0,
+ action, bo, bo->vpu_addr, ivpu_bo_size(bo), bo->ctx_id,
(bool)bo->base.pages, (bool)bo->base.sgt, bo->mmu_mapped, bo->base.map_wc,
(bool)bo->base.base.import_attach);
}
+static inline int ivpu_bo_lock(struct ivpu_bo *bo)
+{
+ return dma_resv_lock(bo->base.base.resv, NULL);
+}
+
+static inline void ivpu_bo_unlock(struct ivpu_bo *bo)
+{
+ dma_resv_unlock(bo->base.base.resv);
+}
+
/*
* ivpu_bo_pin() - pin the backing physical pages and map them to VPU.
*
@@ -41,22 +53,22 @@ static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, con
int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
{
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+ struct sg_table *sgt;
int ret = 0;
- mutex_lock(&bo->lock);
-
ivpu_dbg_bo(vdev, bo, "pin");
- drm_WARN_ON(&vdev->drm, !bo->ctx);
- if (!bo->mmu_mapped) {
- struct sg_table *sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
+ sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
+ if (IS_ERR(sgt)) {
+ ret = PTR_ERR(sgt);
+ ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
+ return ret;
+ }
- if (IS_ERR(sgt)) {
- ret = PTR_ERR(sgt);
- ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
- goto unlock;
- }
+ ivpu_bo_lock(bo);
+ if (!bo->mmu_mapped) {
+ drm_WARN_ON(&vdev->drm, !bo->ctx);
ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, sgt,
ivpu_bo_is_snooped(bo));
if (ret) {
@@ -67,7 +79,7 @@ int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
}
unlock:
- mutex_unlock(&bo->lock);
+ ivpu_bo_unlock(bo);
return ret;
}
@@ -82,7 +94,7 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
if (!drm_dev_enter(&vdev->drm, &idx))
return -ENODEV;
- mutex_lock(&bo->lock);
+ ivpu_bo_lock(bo);
ret = ivpu_mmu_context_insert_node(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
if (!ret) {
@@ -92,9 +104,7 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
ivpu_err(vdev, "Failed to add BO to context %u: %d\n", ctx->id, ret);
}
- ivpu_dbg_bo(vdev, bo, "alloc");
-
- mutex_unlock(&bo->lock);
+ ivpu_bo_unlock(bo);
drm_dev_exit(idx);
@@ -105,7 +115,7 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
{
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
- lockdep_assert(lockdep_is_held(&bo->lock) || !kref_read(&bo->base.base.refcount));
+ lockdep_assert(dma_resv_held(bo->base.base.resv) || !kref_read(&bo->base.base.refcount));
if (bo->mmu_mapped) {
drm_WARN_ON(&vdev->drm, !bo->ctx);
@@ -123,14 +133,12 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
if (bo->base.base.import_attach)
return;
- dma_resv_lock(bo->base.base.resv, NULL);
if (bo->base.sgt) {
dma_unmap_sgtable(vdev->drm.dev, bo->base.sgt, DMA_BIDIRECTIONAL, 0);
sg_free_table(bo->base.sgt);
kfree(bo->base.sgt);
bo->base.sgt = NULL;
}
- dma_resv_unlock(bo->base.base.resv);
}
void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
@@ -142,12 +150,12 @@ void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_m
mutex_lock(&vdev->bo_list_lock);
list_for_each_entry(bo, &vdev->bo_list, bo_list_node) {
- mutex_lock(&bo->lock);
+ ivpu_bo_lock(bo);
if (bo->ctx == ctx) {
ivpu_dbg_bo(vdev, bo, "unbind");
ivpu_bo_unbind_locked(bo);
}
- mutex_unlock(&bo->lock);
+ ivpu_bo_unlock(bo);
}
mutex_unlock(&vdev->bo_list_lock);
}
@@ -167,12 +175,52 @@ struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t siz
bo->base.pages_mark_dirty_on_put = true; /* VPU can dirty a BO anytime */
INIT_LIST_HEAD(&bo->bo_list_node);
- mutex_init(&bo->lock);
return &bo->base.base;
}
-static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags)
+struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev,
+ struct dma_buf *dma_buf)
+{
+ struct device *attach_dev = dev->dev;
+ struct dma_buf_attachment *attach;
+ struct sg_table *sgt;
+ struct drm_gem_object *obj;
+ int ret;
+
+ attach = dma_buf_attach(dma_buf, attach_dev);
+ if (IS_ERR(attach))
+ return ERR_CAST(attach);
+
+ get_dma_buf(dma_buf);
+
+ sgt = dma_buf_map_attachment_unlocked(attach, DMA_BIDIRECTIONAL);
+ if (IS_ERR(sgt)) {
+ ret = PTR_ERR(sgt);
+ goto fail_detach;
+ }
+
+ obj = drm_gem_shmem_prime_import_sg_table(dev, attach, sgt);
+ if (IS_ERR(obj)) {
+ ret = PTR_ERR(obj);
+ goto fail_unmap;
+ }
+
+ obj->import_attach = attach;
+ obj->resv = dma_buf->resv;
+
+ return obj;
+
+fail_unmap:
+ dma_buf_unmap_attachment_unlocked(attach, sgt, DMA_BIDIRECTIONAL);
+fail_detach:
+ dma_buf_detach(dma_buf, attach);
+ dma_buf_put(dma_buf);
+
+ return ERR_PTR(ret);
+}
+
+static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags, u32 ctx_id)
{
struct drm_gem_shmem_object *shmem;
struct ivpu_bo *bo;
@@ -190,6 +238,7 @@ static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 fla
return ERR_CAST(shmem);
bo = to_ivpu_bo(&shmem->base);
+ bo->ctx_id = ctx_id;
bo->base.map_wc = flags & DRM_IVPU_BO_WC;
bo->flags = flags;
@@ -197,6 +246,8 @@ static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 fla
list_add_tail(&bo->bo_list_node, &vdev->bo_list);
mutex_unlock(&vdev->bo_list_lock);
+ ivpu_dbg_bo(vdev, bo, "alloc");
+
return bo;
}
@@ -234,10 +285,14 @@ static void ivpu_gem_bo_free(struct drm_gem_object *obj)
list_del(&bo->bo_list_node);
mutex_unlock(&vdev->bo_list_lock);
- drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
+ drm_WARN_ON(&vdev->drm, !drm_gem_is_imported(&bo->base.base) &&
+ !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
+ drm_WARN_ON(&vdev->drm, ivpu_bo_size(bo) == 0);
+ drm_WARN_ON(&vdev->drm, bo->base.vaddr);
ivpu_bo_unbind_locked(bo);
- mutex_destroy(&bo->lock);
+ drm_WARN_ON(&vdev->drm, bo->mmu_mapped);
+ drm_WARN_ON(&vdev->drm, bo->ctx);
drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1);
drm_gem_shmem_free(&bo->base);
@@ -271,7 +326,7 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
if (size == 0)
return -EINVAL;
- bo = ivpu_bo_alloc(vdev, size, args->flags);
+ bo = ivpu_bo_alloc(vdev, size, args->flags, file_priv->ctx.id);
if (IS_ERR(bo)) {
ivpu_err(vdev, "Failed to allocate BO: %pe (ctx %u size %llu flags 0x%x)",
bo, file_priv->ctx.id, args->size, args->flags);
@@ -279,7 +334,10 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
}
ret = drm_gem_handle_create(file, &bo->base.base, &args->handle);
- if (!ret)
+ if (ret)
+ ivpu_err(vdev, "Failed to create handle for BO: %pe (ctx %u size %llu flags 0x%x)",
+ bo, file_priv->ctx.id, args->size, args->flags);
+ else
args->vpu_addr = bo->vpu_addr;
drm_gem_object_put(&bo->base.base);
@@ -302,7 +360,7 @@ ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(range->end));
drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size));
- bo = ivpu_bo_alloc(vdev, size, flags);
+ bo = ivpu_bo_alloc(vdev, size, flags, IVPU_GLOBAL_CONTEXT_MMU_SSID);
if (IS_ERR(bo)) {
ivpu_err(vdev, "Failed to allocate BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)",
bo, range->start, size, flags);
@@ -318,9 +376,9 @@ ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
goto err_put;
if (flags & DRM_IVPU_BO_MAPPABLE) {
- dma_resv_lock(bo->base.base.resv, NULL);
+ ivpu_bo_lock(bo);
ret = drm_gem_shmem_vmap(&bo->base, &map);
- dma_resv_unlock(bo->base.base.resv);
+ ivpu_bo_unlock(bo);
if (ret)
goto err_put;
@@ -343,9 +401,9 @@ void ivpu_bo_free(struct ivpu_bo *bo)
struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->base.vaddr);
if (bo->flags & DRM_IVPU_BO_MAPPABLE) {
- dma_resv_lock(bo->base.base.resv, NULL);
+ ivpu_bo_lock(bo);
drm_gem_shmem_vunmap(&bo->base, &map);
- dma_resv_unlock(bo->base.base.resv);
+ ivpu_bo_unlock(bo);
}
drm_gem_object_put(&bo->base.base);
@@ -364,12 +422,12 @@ int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file
bo = to_ivpu_bo(obj);
- mutex_lock(&bo->lock);
+ ivpu_bo_lock(bo);
args->flags = bo->flags;
args->mmap_offset = drm_vma_node_offset_addr(&obj->vma_node);
args->vpu_addr = bo->vpu_addr;
args->size = obj->size;
- mutex_unlock(&bo->lock);
+ ivpu_bo_unlock(bo);
drm_gem_object_put(obj);
return ret;
@@ -406,10 +464,10 @@ int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file
static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
{
- mutex_lock(&bo->lock);
+ ivpu_bo_lock(bo);
drm_printf(p, "%-9p %-3u 0x%-12llx %-10lu 0x%-8x %-4u",
- bo, bo->ctx ? bo->ctx->id : 0, bo->vpu_addr, bo->base.base.size,
+ bo, bo->ctx_id, bo->vpu_addr, bo->base.base.size,
bo->flags, kref_read(&bo->base.base.refcount));
if (bo->base.pages)
@@ -423,7 +481,7 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
drm_printf(p, "\n");
- mutex_unlock(&bo->lock);
+ ivpu_bo_unlock(bo);
}
void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p)
diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h
index d975000abd78..aa8ff14f7aae 100644
--- a/drivers/accel/ivpu/ivpu_gem.h
+++ b/drivers/accel/ivpu/ivpu_gem.h
@@ -17,10 +17,10 @@ struct ivpu_bo {
struct list_head bo_list_node;
struct drm_mm_node mm_node;
- struct mutex lock; /* Protects: ctx, mmu_mapped, vpu_addr */
u64 vpu_addr;
u32 flags;
u32 job_status; /* Valid only for command buffer */
+ u32 ctx_id;
bool mmu_mapped;
};
@@ -28,6 +28,7 @@ int ivpu_bo_pin(struct ivpu_bo *bo);
void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size);
+struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
struct ivpu_bo *ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
struct ivpu_addr_range *range, u64 size, u32 flags);
struct ivpu_bo *ivpu_bo_create_global(struct ivpu_device *vdev, u64 size, u32 flags);
diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c
index 4e1054f3466e..633160470c93 100644
--- a/drivers/accel/ivpu/ivpu_hw.c
+++ b/drivers/accel/ivpu/ivpu_hw.c
@@ -9,6 +9,16 @@
#include "ivpu_hw_ip.h"
#include <linux/dmi.h>
+#include <linux/fault-inject.h>
+#include <linux/pm_runtime.h>
+
+#ifdef CONFIG_FAULT_INJECTION
+DECLARE_FAULT_ATTR(ivpu_hw_failure);
+
+static char *ivpu_fail_hw;
+module_param_named_unsafe(fail_hw, ivpu_fail_hw, charp, 0444);
+MODULE_PARM_DESC(fail_hw, "<interval>,<probability>,<space>,<times>");
+#endif
static char *platform_to_str(u32 platform)
{
@@ -19,43 +29,36 @@ static char *platform_to_str(u32 platform)
return "SIMICS";
case IVPU_PLATFORM_FPGA:
return "FPGA";
+ case IVPU_PLATFORM_HSLE:
+ return "HSLE";
default:
return "Invalid platform";
}
}
-static const struct dmi_system_id dmi_platform_simulation[] = {
- {
- .ident = "Intel Simics",
- .matches = {
- DMI_MATCH(DMI_BOARD_NAME, "lnlrvp"),
- DMI_MATCH(DMI_BOARD_VERSION, "1.0"),
- DMI_MATCH(DMI_BOARD_SERIAL, "123456789"),
- },
- },
- {
- .ident = "Intel Simics",
- .matches = {
- DMI_MATCH(DMI_BOARD_NAME, "Simics"),
- },
- },
- { }
-};
-
static void platform_init(struct ivpu_device *vdev)
{
- if (dmi_check_system(dmi_platform_simulation))
- vdev->platform = IVPU_PLATFORM_SIMICS;
- else
- vdev->platform = IVPU_PLATFORM_SILICON;
+ int platform = ivpu_hw_btrs_platform_read(vdev);
- ivpu_dbg(vdev, MISC, "Platform type: %s (%d)\n",
- platform_to_str(vdev->platform), vdev->platform);
+ ivpu_dbg(vdev, MISC, "Platform type: %s (%d)\n", platform_to_str(platform), platform);
+
+ switch (platform) {
+ case IVPU_PLATFORM_SILICON:
+ case IVPU_PLATFORM_SIMICS:
+ case IVPU_PLATFORM_FPGA:
+ case IVPU_PLATFORM_HSLE:
+ vdev->platform = platform;
+ break;
+
+ default:
+ ivpu_err(vdev, "Invalid platform type: %d\n", platform);
+ break;
+ }
}
static void wa_init(struct ivpu_device *vdev)
{
- vdev->wa.punit_disabled = ivpu_is_fpga(vdev);
+ vdev->wa.punit_disabled = false;
vdev->wa.clear_runtime_mem = false;
if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
@@ -65,14 +68,24 @@ static void wa_init(struct ivpu_device *vdev)
ivpu_revision(vdev) < IVPU_HW_IP_REV_LNL_B0)
vdev->wa.disable_clock_relinquish = true;
+ if (ivpu_test_mode & IVPU_TEST_MODE_CLK_RELINQ_ENABLE)
+ vdev->wa.disable_clock_relinquish = false;
+
+ if (ivpu_test_mode & IVPU_TEST_MODE_CLK_RELINQ_DISABLE)
+ vdev->wa.disable_clock_relinquish = true;
+
if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
vdev->wa.wp0_during_power_up = true;
+ if (ivpu_test_mode & IVPU_TEST_MODE_D0I2_DISABLE)
+ vdev->wa.disable_d0i2 = true;
+
IVPU_PRINT_WA(punit_disabled);
IVPU_PRINT_WA(clear_runtime_mem);
IVPU_PRINT_WA(interrupt_clear_with_0);
IVPU_PRINT_WA(disable_clock_relinquish);
IVPU_PRINT_WA(wp0_during_power_up);
+ IVPU_PRINT_WA(disable_d0i2);
}
static void timeouts_init(struct ivpu_device *vdev)
@@ -84,12 +97,12 @@ static void timeouts_init(struct ivpu_device *vdev)
vdev->timeout.autosuspend = -1;
vdev->timeout.d0i3_entry_msg = -1;
} else if (ivpu_is_fpga(vdev)) {
- vdev->timeout.boot = 100000;
- vdev->timeout.jsm = 50000;
- vdev->timeout.tdr = 2000000;
+ vdev->timeout.boot = 50;
+ vdev->timeout.jsm = 15000;
+ vdev->timeout.tdr = 30000;
vdev->timeout.autosuspend = -1;
vdev->timeout.d0i3_entry_msg = 500;
- vdev->timeout.state_dump_msg = 10;
+ vdev->timeout.state_dump_msg = 10000;
} else if (ivpu_is_simics(vdev)) {
vdev->timeout.boot = 50;
vdev->timeout.jsm = 500;
@@ -106,10 +119,30 @@ static void timeouts_init(struct ivpu_device *vdev)
else
vdev->timeout.autosuspend = 100;
vdev->timeout.d0i3_entry_msg = 5;
- vdev->timeout.state_dump_msg = 10;
+ vdev->timeout.state_dump_msg = 100;
}
}
+static void priority_bands_init(struct ivpu_device *vdev)
+{
+ /* Idle */
+ vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE] = 0;
+ vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE] = 50000;
+ vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE] = 160000;
+ /* Normal */
+ vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL] = 50000;
+ vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL] = 50000;
+ vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL] = 300000;
+ /* Focus */
+ vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS] = 50000;
+ vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS] = 50000;
+ vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS] = 200000;
+ /* Realtime */
+ vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME] = 0;
+ vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME] = 50000;
+ vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME] = 200000;
+}
+
static void memory_ranges_init(struct ivpu_device *vdev)
{
if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) {
@@ -248,12 +281,18 @@ int ivpu_hw_init(struct ivpu_device *vdev)
{
ivpu_hw_btrs_info_init(vdev);
ivpu_hw_btrs_freq_ratios_init(vdev);
+ priority_bands_init(vdev);
memory_ranges_init(vdev);
platform_init(vdev);
wa_init(vdev);
timeouts_init(vdev);
atomic_set(&vdev->hw->firewall_irq_counter, 0);
+#ifdef CONFIG_FAULT_INJECTION
+ if (ivpu_fail_hw)
+ setup_fault_attr(&ivpu_hw_failure, ivpu_fail_hw);
+#endif
+
return 0;
}
@@ -285,8 +324,6 @@ void ivpu_hw_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
void ivpu_irq_handlers_init(struct ivpu_device *vdev)
{
- INIT_KFIFO(vdev->hw->irq.fifo);
-
if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
vdev->hw->irq.ip_irq_handler = ivpu_hw_ip_irq_handler_37xx;
else
@@ -300,7 +337,6 @@ void ivpu_irq_handlers_init(struct ivpu_device *vdev)
void ivpu_hw_irq_enable(struct ivpu_device *vdev)
{
- kfifo_reset(&vdev->hw->irq.fifo);
ivpu_hw_ip_irq_enable(vdev);
ivpu_hw_btrs_irq_enable(vdev);
}
@@ -327,9 +363,9 @@ irqreturn_t ivpu_hw_irq_handler(int irq, void *ptr)
/* Re-enable global interrupts to re-trigger MSI for pending interrupts */
ivpu_hw_btrs_global_int_enable(vdev);
- if (!kfifo_is_empty(&vdev->hw->irq.fifo))
- return IRQ_WAKE_THREAD;
- if (ip_handled || btrs_handled)
- return IRQ_HANDLED;
- return IRQ_NONE;
+ if (!ip_handled && !btrs_handled)
+ return IRQ_NONE;
+
+ pm_runtime_mark_last_busy(vdev->drm.dev);
+ return IRQ_HANDLED;
}
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
index fc4dbfc980c8..d79668fe1609 100644
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -1,23 +1,15 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_HW_H__
#define __IVPU_HW_H__
-#include <linux/kfifo.h>
-
#include "ivpu_drv.h"
#include "ivpu_hw_btrs.h"
#include "ivpu_hw_ip.h"
-#define IVPU_HW_IRQ_FIFO_LENGTH 1024
-
-#define IVPU_HW_IRQ_SRC_IPC 1
-#define IVPU_HW_IRQ_SRC_MMU_EVTQ 2
-#define IVPU_HW_IRQ_SRC_DCT 3
-
struct ivpu_addr_range {
resource_size_t start;
resource_size_t end;
@@ -27,7 +19,6 @@ struct ivpu_hw_info {
struct {
bool (*btrs_irq_handler)(struct ivpu_device *vdev, int irq);
bool (*ip_irq_handler)(struct ivpu_device *vdev, int irq);
- DECLARE_KFIFO(fifo, u8, IVPU_HW_IRQ_FIFO_LENGTH);
} irq;
struct {
struct ivpu_addr_range global;
@@ -45,6 +36,11 @@ struct ivpu_hw_info {
u8 pn_ratio;
u32 profiling_freq;
} pll;
+ struct {
+ u32 grace_period[VPU_HWS_NUM_PRIORITY_BANDS];
+ u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS];
+ u32 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS];
+ } hws;
u32 tile_fuse;
u32 sku;
u16 config;
@@ -86,19 +82,19 @@ static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range)
return range->end - range->start;
}
-static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
+static inline u32 ivpu_hw_dpu_max_freq_get(struct ivpu_device *vdev)
{
- return ivpu_hw_btrs_ratio_to_freq(vdev, ratio);
+ return ivpu_hw_btrs_dpu_max_freq_get(vdev);
}
-static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev)
+static inline u32 ivpu_hw_dpu_freq_get(struct ivpu_device *vdev)
{
- ivpu_hw_ip_irq_clear(vdev);
+ return ivpu_hw_btrs_dpu_freq_get(vdev);
}
-static inline u32 ivpu_hw_pll_freq_get(struct ivpu_device *vdev)
+static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev)
{
- return ivpu_hw_btrs_pll_freq_get(vdev);
+ ivpu_hw_ip_irq_clear(vdev);
}
static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.c b/drivers/accel/ivpu/ivpu_hw_btrs.c
index 3212c99f3682..b236c7234daa 100644
--- a/drivers/accel/ivpu/ivpu_hw_btrs.c
+++ b/drivers/accel/ivpu/ivpu_hw_btrs.c
@@ -1,8 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
+#include <linux/units.h>
+
#include "ivpu_drv.h"
#include "ivpu_hw.h"
#include "ivpu_hw_btrs.h"
@@ -28,17 +30,13 @@
#define BTRS_LNL_ALL_IRQ_MASK ((u32)-1)
-#define BTRS_MTL_WP_CONFIG_1_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_5_3)
-#define BTRS_MTL_WP_CONFIG_1_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_4_3)
-#define BTRS_MTL_WP_CONFIG_2_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_5_3)
-#define BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3)
-#define BTRS_MTL_WP_CONFIG_0_TILE_PLL_OFF WP_CONFIG(0, 0)
#define PLL_CDYN_DEFAULT 0x80
#define PLL_EPP_DEFAULT 0x80
#define PLL_CONFIG_DEFAULT 0x0
-#define PLL_SIMULATION_FREQ 10000000
-#define PLL_REF_CLK_FREQ 50000000
+#define PLL_REF_CLK_FREQ 50000000ull
+#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ)
+
#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC)
#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC)
#define TIMEOUT_US (150 * USEC_PER_MSEC)
@@ -62,6 +60,8 @@
#define DCT_ENABLE 0x1
#define DCT_DISABLE 0x0
+static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio);
+
int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev)
{
REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, BTRS_MTL_ALL_IRQ_MASK);
@@ -156,7 +156,7 @@ static int info_init_mtl(struct ivpu_device *vdev)
hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH;
hw->sku = BTRS_MTL_TILE_SKU_BOTH;
- hw->config = BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO;
+ hw->config = WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3);
return 0;
}
@@ -334,8 +334,8 @@ int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable)
prepare_wp_request(vdev, &wp, enable);
- ivpu_dbg(vdev, PM, "PLL workpoint request: %u Hz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n",
- PLL_RATIO_TO_FREQ(wp.target), wp.cfg, wp.epp, wp.cdyn);
+ ivpu_dbg(vdev, PM, "PLL workpoint request: %lu MHz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n",
+ pll_ratio_to_dpu_freq(vdev, wp.target) / HZ_PER_MHZ, wp.cfg, wp.epp, wp.cdyn);
ret = wp_request_send(vdev, &wp);
if (ret) {
@@ -573,6 +573,47 @@ int ivpu_hw_btrs_wait_for_idle(struct ivpu_device *vdev)
return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
}
+static u32 pll_config_get_mtl(struct ivpu_device *vdev)
+{
+ return REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL);
+}
+
+static u32 pll_config_get_lnl(struct ivpu_device *vdev)
+{
+ return REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ);
+}
+
+static u32 pll_ratio_to_dpu_freq_mtl(u16 ratio)
+{
+ return (PLL_RATIO_TO_FREQ(ratio) * 2) / 3;
+}
+
+static u32 pll_ratio_to_dpu_freq_lnl(u16 ratio)
+{
+ return PLL_RATIO_TO_FREQ(ratio) / 2;
+}
+
+static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return pll_ratio_to_dpu_freq_mtl(ratio);
+ else
+ return pll_ratio_to_dpu_freq_lnl(ratio);
+}
+
+u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev)
+{
+ return pll_ratio_to_dpu_freq(vdev, vdev->hw->pll.max_ratio);
+}
+
+u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return pll_ratio_to_dpu_freq_mtl(pll_config_get_mtl(vdev));
+ else
+ return pll_ratio_to_dpu_freq_lnl(pll_config_get_lnl(vdev));
+}
+
/* Handler for IRQs from Buttress core (irqB) */
bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq)
{
@@ -582,9 +623,12 @@ bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq)
if (!status)
return false;
- if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status))
- ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x",
- REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL));
+ if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) {
+ u32 pll = pll_config_get_mtl(vdev);
+
+ ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz",
+ pll, pll_ratio_to_dpu_freq_mtl(pll) / HZ_PER_MHZ);
+ }
if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, status)) {
ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0));
@@ -630,12 +674,15 @@ bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq)
if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, SURV_ERR, status)) {
ivpu_dbg(vdev, IRQ, "Survivability IRQ\n");
- if (!kfifo_put(&vdev->hw->irq.fifo, IVPU_HW_IRQ_SRC_DCT))
- ivpu_err_ratelimited(vdev, "IRQ FIFO full\n");
+ queue_work(system_wq, &vdev->irq_dct_work);
}
- if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status))
- ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ));
+ if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) {
+ u32 pll = pll_config_get_lnl(vdev);
+
+ ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz",
+ pll, pll_ratio_to_dpu_freq_lnl(pll) / HZ_PER_MHZ);
+ }
if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, status)) {
ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n",
@@ -718,60 +765,6 @@ void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 acti
REGB_WR32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, val);
}
-static u32 pll_ratio_to_freq_mtl(u32 ratio, u32 config)
-{
- u32 pll_clock = PLL_REF_CLK_FREQ * ratio;
- u32 cpu_clock;
-
- if ((config & 0xff) == MTL_PLL_RATIO_4_3)
- cpu_clock = pll_clock * 2 / 4;
- else
- cpu_clock = pll_clock * 2 / 5;
-
- return cpu_clock;
-}
-
-u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
-{
- struct ivpu_hw_info *hw = vdev->hw;
-
- if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
- return pll_ratio_to_freq_mtl(ratio, hw->config);
- else
- return PLL_RATIO_TO_FREQ(ratio);
-}
-
-static u32 pll_freq_get_mtl(struct ivpu_device *vdev)
-{
- u32 pll_curr_ratio;
-
- pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL);
- pll_curr_ratio &= VPU_HW_BTRS_MTL_CURRENT_PLL_RATIO_MASK;
-
- if (!ivpu_is_silicon(vdev))
- return PLL_SIMULATION_FREQ;
-
- return pll_ratio_to_freq_mtl(pll_curr_ratio, vdev->hw->config);
-}
-
-static u32 pll_freq_get_lnl(struct ivpu_device *vdev)
-{
- u32 pll_curr_ratio;
-
- pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ);
- pll_curr_ratio &= VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK;
-
- return PLL_RATIO_TO_FREQ(pll_curr_ratio);
-}
-
-u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev)
-{
- if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
- return pll_freq_get_mtl(vdev);
- else
- return pll_freq_get_lnl(vdev);
-}
-
u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev)
{
if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
@@ -888,3 +881,10 @@ void ivpu_hw_btrs_diagnose_failure(struct ivpu_device *vdev)
else
return diagnose_failure_lnl(vdev);
}
+
+int ivpu_hw_btrs_platform_read(struct ivpu_device *vdev)
+{
+ u32 reg = REGB_RD32(VPU_HW_BTRS_LNL_VPU_STATUS);
+
+ return REG_GET_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, PLATFORM, reg);
+}
diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.h b/drivers/accel/ivpu/ivpu_hw_btrs.h
index 04f14f50fed6..d2d82651976d 100644
--- a/drivers/accel/ivpu/ivpu_hw_btrs.h
+++ b/drivers/accel/ivpu/ivpu_hw_btrs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_HW_BTRS_H__
@@ -13,9 +13,8 @@
#define PLL_PROFILING_FREQ_DEFAULT 38400000
#define PLL_PROFILING_FREQ_HIGH 400000000
-#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ)
-#define DCT_DEFAULT_ACTIVE_PERCENT 15u
+#define DCT_DEFAULT_ACTIVE_PERCENT 30u
#define DCT_PERIOD_US 35300u
int ivpu_hw_btrs_info_init(struct ivpu_device *vdev);
@@ -32,12 +31,12 @@ int ivpu_hw_btrs_ip_reset(struct ivpu_device *vdev);
void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev);
void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev);
void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev);
+u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev);
+u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev);
bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq);
bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq);
int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable);
void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 dct_percent);
-u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev);
-u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio);
u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev);
u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev);
u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev);
@@ -46,5 +45,6 @@ void ivpu_hw_btrs_global_int_disable(struct ivpu_device *vdev);
void ivpu_hw_btrs_irq_enable(struct ivpu_device *vdev);
void ivpu_hw_btrs_irq_disable(struct ivpu_device *vdev);
void ivpu_hw_btrs_diagnose_failure(struct ivpu_device *vdev);
+int ivpu_hw_btrs_platform_read(struct ivpu_device *vdev);
#endif /* __IVPU_HW_BTRS_H__ */
diff --git a/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h b/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h
index fc51f3098f97..fff2ef2cada6 100644
--- a/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h
+++ b/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h
@@ -86,6 +86,7 @@
#define VPU_HW_BTRS_LNL_VPU_STATUS_POWER_RESOURCE_OWN_ACK_MASK BIT_MASK(7)
#define VPU_HW_BTRS_LNL_VPU_STATUS_PERF_CLK_MASK BIT_MASK(11)
#define VPU_HW_BTRS_LNL_VPU_STATUS_DISABLE_CLK_RELINQUISH_MASK BIT_MASK(12)
+#define VPU_HW_BTRS_LNL_VPU_STATUS_PLATFORM_MASK GENMASK(31, 29)
#define VPU_HW_BTRS_LNL_IP_RESET 0x00000160u
#define VPU_HW_BTRS_LNL_IP_RESET_TRIGGER_MASK BIT_MASK(0)
diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c
index 029dd065614b..823f6a57dc54 100644
--- a/drivers/accel/ivpu/ivpu_hw_ip.c
+++ b/drivers/accel/ivpu/ivpu_hw_ip.c
@@ -968,14 +968,14 @@ void ivpu_hw_ip_wdt_disable(struct ivpu_device *vdev)
static u32 ipc_rx_count_get_37xx(struct ivpu_device *vdev)
{
- u32 count = REGV_RD32_SILENT(VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT);
+ u32 count = readl(vdev->regv + VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT);
return REG_GET_FLD(VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count);
}
static u32 ipc_rx_count_get_40xx(struct ivpu_device *vdev)
{
- u32 count = REGV_RD32_SILENT(VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT);
+ u32 count = readl(vdev->regv + VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT);
return REG_GET_FLD(VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count);
}
diff --git a/drivers/accel/ivpu/ivpu_hw_reg_io.h b/drivers/accel/ivpu/ivpu_hw_reg_io.h
index 79b3f441eac4..66259b0ead02 100644
--- a/drivers/accel/ivpu/ivpu_hw_reg_io.h
+++ b/drivers/accel/ivpu/ivpu_hw_reg_io.h
@@ -7,6 +7,7 @@
#define __IVPU_HW_REG_IO_H__
#include <linux/bitfield.h>
+#include <linux/fault-inject.h>
#include <linux/io.h>
#include <linux/iopoll.h>
@@ -16,13 +17,11 @@
#define REG_IO_ERROR 0xffffffff
#define REGB_RD32(reg) ivpu_hw_reg_rd32(vdev, vdev->regb, (reg), #reg, __func__)
-#define REGB_RD32_SILENT(reg) readl(vdev->regb + (reg))
#define REGB_RD64(reg) ivpu_hw_reg_rd64(vdev, vdev->regb, (reg), #reg, __func__)
#define REGB_WR32(reg, val) ivpu_hw_reg_wr32(vdev, vdev->regb, (reg), (val), #reg, __func__)
#define REGB_WR64(reg, val) ivpu_hw_reg_wr64(vdev, vdev->regb, (reg), (val), #reg, __func__)
#define REGV_RD32(reg) ivpu_hw_reg_rd32(vdev, vdev->regv, (reg), #reg, __func__)
-#define REGV_RD32_SILENT(reg) readl(vdev->regv + (reg))
#define REGV_RD64(reg) ivpu_hw_reg_rd64(vdev, vdev->regv, (reg), #reg, __func__)
#define REGV_WR32(reg, val) ivpu_hw_reg_wr32(vdev, vdev->regv, (reg), (val), #reg, __func__)
#define REGV_WR64(reg, val) ivpu_hw_reg_wr64(vdev, vdev->regv, (reg), (val), #reg, __func__)
@@ -47,31 +46,42 @@
#define REG_TEST_FLD_NUM(REG, FLD, num, val) \
((num) == FIELD_GET(REG##_##FLD##_MASK, val))
-#define REGB_POLL_FLD(reg, fld, val, timeout_us) \
-({ \
- u32 var; \
- int r; \
- ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s started (expected 0x%x)\n", \
- __func__, #reg, reg, #fld, val); \
- r = read_poll_timeout(REGB_RD32_SILENT, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)),\
- REG_POLL_SLEEP_US, timeout_us, false, (reg)); \
- ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s %s (reg val 0x%08x)\n", \
- __func__, #reg, reg, #fld, r ? "ETIMEDOUT" : "OK", var); \
- r; \
-})
-
-#define REGV_POLL_FLD(reg, fld, val, timeout_us) \
-({ \
- u32 var; \
- int r; \
- ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s started (expected 0x%x)\n", \
- __func__, #reg, reg, #fld, val); \
- r = read_poll_timeout(REGV_RD32_SILENT, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)),\
- REG_POLL_SLEEP_US, timeout_us, false, (reg)); \
- ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s %s (reg val 0x%08x)\n", \
- __func__, #reg, reg, #fld, r ? "ETIMEDOUT" : "OK", var); \
- r; \
-})
+#define REGB_POLL_FLD(reg, fld, exp_fld_val, timeout_us) \
+ ivpu_hw_reg_poll_fld(vdev, vdev->regb, reg, reg##_##fld##_MASK, \
+ FIELD_PREP(reg##_##fld##_MASK, exp_fld_val), timeout_us, \
+ __func__, #reg, #fld)
+
+#define REGV_POLL_FLD(reg, fld, exp_fld_val, timeout_us) \
+ ivpu_hw_reg_poll_fld(vdev, vdev->regv, reg, reg##_##fld##_MASK, \
+ FIELD_PREP(reg##_##fld##_MASK, exp_fld_val), timeout_us, \
+ __func__, #reg, #fld)
+
+extern struct fault_attr ivpu_hw_failure;
+
+static inline int __must_check
+ivpu_hw_reg_poll_fld(struct ivpu_device *vdev, void __iomem *base,
+ u32 reg_offset, u32 reg_mask, u32 exp_masked_val, u32 timeout_us,
+ const char *func_name, const char *reg_name, const char *fld_name)
+{
+ u32 reg_val;
+ int ret;
+
+ ivpu_dbg(vdev, REG, "%s : %s (0x%08x) POLL %s started (exp_val 0x%x)\n",
+ func_name, reg_name, reg_offset, fld_name, exp_masked_val);
+
+ ret = read_poll_timeout(readl, reg_val, (reg_val & reg_mask) == exp_masked_val,
+ REG_POLL_SLEEP_US, timeout_us, false, base + reg_offset);
+
+#ifdef CONFIG_FAULT_INJECTION
+ if (should_fail(&ivpu_hw_failure, 1))
+ ret = -ETIMEDOUT;
+#endif
+
+ ivpu_dbg(vdev, REG, "%s : %s (0x%08x) POLL %s %s (reg_val 0x%08x)\n",
+ func_name, reg_name, reg_offset, fld_name, ret ? "ETIMEDOUT" : "OK", reg_val);
+
+ return ret;
+}
static inline u32
ivpu_hw_reg_rd32(struct ivpu_device *vdev, void __iomem *base, u32 reg,
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index 01ebf88fe6ef..39f83225c181 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -302,7 +302,8 @@ ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req
struct ivpu_ipc_consumer cons;
int ret;
- drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev));
+ drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev) &&
+ pm_runtime_enabled(vdev->drm.dev));
ivpu_ipc_consumer_add(vdev, &cons, channel, NULL);
@@ -459,13 +460,12 @@ void ivpu_ipc_irq_handler(struct ivpu_device *vdev)
}
}
- if (!list_empty(&ipc->cb_msg_list))
- if (!kfifo_put(&vdev->hw->irq.fifo, IVPU_HW_IRQ_SRC_IPC))
- ivpu_err_ratelimited(vdev, "IRQ FIFO full\n");
+ queue_work(system_wq, &vdev->irq_ipc_work);
}
-void ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev)
+void ivpu_ipc_irq_work_fn(struct work_struct *work)
{
+ struct ivpu_device *vdev = container_of(work, struct ivpu_device, irq_ipc_work);
struct ivpu_ipc_info *ipc = vdev->ipc;
struct ivpu_ipc_rx_msg *rx_msg, *r;
struct list_head cb_msg_list;
diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h
index b4dfb504679b..b524a1985b9d 100644
--- a/drivers/accel/ivpu/ivpu_ipc.h
+++ b/drivers/accel/ivpu/ivpu_ipc.h
@@ -90,7 +90,7 @@ void ivpu_ipc_disable(struct ivpu_device *vdev);
void ivpu_ipc_reset(struct ivpu_device *vdev);
void ivpu_ipc_irq_handler(struct ivpu_device *vdev);
-void ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev);
+void ivpu_ipc_irq_work_fn(struct work_struct *work);
void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
u32 channel, ivpu_ipc_rx_callback_t callback);
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 7149312f16e1..fae8351aa330 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -8,6 +8,7 @@
#include <linux/bitfield.h>
#include <linux/highmem.h>
#include <linux/pci.h>
+#include <linux/pm_runtime.h>
#include <linux/module.h>
#include <uapi/drm/ivpu_accel.h>
@@ -17,6 +18,7 @@
#include "ivpu_ipc.h"
#include "ivpu_job.h"
#include "ivpu_jsm_msg.h"
+#include "ivpu_mmu.h"
#include "ivpu_pm.h"
#include "ivpu_trace.h"
#include "vpu_boot_api.h"
@@ -83,23 +85,9 @@ static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv)
if (!cmdq)
return NULL;
- ret = xa_alloc_cyclic(&vdev->db_xa, &cmdq->db_id, NULL, vdev->db_limit, &vdev->db_next,
- GFP_KERNEL);
- if (ret < 0) {
- ivpu_err(vdev, "Failed to allocate doorbell id: %d\n", ret);
- goto err_free_cmdq;
- }
-
- ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &cmdq->id, cmdq, file_priv->cmdq_limit,
- &file_priv->cmdq_id_next, GFP_KERNEL);
- if (ret < 0) {
- ivpu_err(vdev, "Failed to allocate command queue id: %d\n", ret);
- goto err_erase_db_xa;
- }
-
cmdq->mem = ivpu_bo_create_global(vdev, SZ_4K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
if (!cmdq->mem)
- goto err_erase_cmdq_xa;
+ goto err_free_cmdq;
ret = ivpu_preemption_buffers_create(vdev, file_priv, cmdq);
if (ret)
@@ -107,10 +95,6 @@ static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv)
return cmdq;
-err_erase_cmdq_xa:
- xa_erase(&file_priv->cmdq_xa, cmdq->id);
-err_erase_db_xa:
- xa_erase(&vdev->db_xa, cmdq->db_id);
err_free_cmdq:
kfree(cmdq);
return NULL;
@@ -118,15 +102,44 @@ err_free_cmdq:
static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
{
- if (!cmdq)
- return;
-
ivpu_preemption_buffers_free(file_priv->vdev, file_priv, cmdq);
ivpu_bo_free(cmdq->mem);
- xa_erase(&file_priv->vdev->db_xa, cmdq->db_id);
kfree(cmdq);
}
+static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 priority,
+ bool is_legacy)
+{
+ struct ivpu_device *vdev = file_priv->vdev;
+ struct ivpu_cmdq *cmdq = NULL;
+ int ret;
+
+ lockdep_assert_held(&file_priv->lock);
+
+ cmdq = ivpu_cmdq_alloc(file_priv);
+ if (!cmdq) {
+ ivpu_err(vdev, "Failed to allocate command queue\n");
+ return NULL;
+ }
+
+ cmdq->priority = priority;
+ cmdq->is_legacy = is_legacy;
+
+ ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &cmdq->id, cmdq, file_priv->cmdq_limit,
+ &file_priv->cmdq_id_next, GFP_KERNEL);
+ if (ret < 0) {
+ ivpu_err(vdev, "Failed to allocate command queue ID: %d\n", ret);
+ goto err_free_cmdq;
+ }
+
+ ivpu_dbg(vdev, JOB, "Command queue %d created, ctx %d\n", cmdq->id, file_priv->ctx.id);
+ return cmdq;
+
+err_free_cmdq:
+ ivpu_cmdq_free(file_priv, cmdq);
+ return NULL;
+}
+
static int ivpu_hws_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u16 engine,
u8 priority)
{
@@ -152,6 +165,13 @@ static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *
struct ivpu_device *vdev = file_priv->vdev;
int ret;
+ ret = xa_alloc_cyclic(&vdev->db_xa, &cmdq->db_id, NULL, vdev->db_limit, &vdev->db_next,
+ GFP_KERNEL);
+ if (ret < 0) {
+ ivpu_err(vdev, "Failed to allocate doorbell ID: %d\n", ret);
+ return ret;
+ }
+
if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW)
ret = ivpu_jsm_hws_register_db(vdev, file_priv->ctx.id, cmdq->id, cmdq->db_id,
cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem));
@@ -160,41 +180,52 @@ static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *
cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem));
if (!ret)
- ivpu_dbg(vdev, JOB, "DB %d registered to cmdq %d ctx %d\n",
- cmdq->db_id, cmdq->id, file_priv->ctx.id);
+ ivpu_dbg(vdev, JOB, "DB %d registered to cmdq %d ctx %d priority %d\n",
+ cmdq->db_id, cmdq->id, file_priv->ctx.id, cmdq->priority);
+ else
+ xa_erase(&vdev->db_xa, cmdq->db_id);
return ret;
}
-static int
-ivpu_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u8 priority)
+static void ivpu_cmdq_jobq_init(struct ivpu_device *vdev, struct vpu_job_queue *jobq)
+{
+ jobq->header.engine_idx = VPU_ENGINE_COMPUTE;
+ jobq->header.head = 0;
+ jobq->header.tail = 0;
+
+ if (ivpu_test_mode & IVPU_TEST_MODE_TURBO) {
+ ivpu_dbg(vdev, JOB, "Turbo mode enabled");
+ jobq->header.flags = VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
+ }
+
+ wmb(); /* Flush WC buffer for jobq->header */
+}
+
+static inline u32 ivpu_cmdq_get_entry_count(struct ivpu_cmdq *cmdq)
+{
+ size_t size = ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header);
+
+ return size / sizeof(struct vpu_job_queue_entry);
+}
+
+static int ivpu_cmdq_register(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
{
struct ivpu_device *vdev = file_priv->vdev;
- struct vpu_job_queue_header *jobq_header;
int ret;
lockdep_assert_held(&file_priv->lock);
- if (cmdq->db_registered)
+ if (cmdq->db_id)
return 0;
- cmdq->entry_count = (u32)((ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header)) /
- sizeof(struct vpu_job_queue_entry));
-
+ cmdq->entry_count = ivpu_cmdq_get_entry_count(cmdq);
cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem);
- jobq_header = &cmdq->jobq->header;
- jobq_header->engine_idx = VPU_ENGINE_COMPUTE;
- jobq_header->head = 0;
- jobq_header->tail = 0;
- if (ivpu_test_mode & IVPU_TEST_MODE_TURBO) {
- ivpu_dbg(vdev, JOB, "Turbo mode enabled");
- jobq_header->flags = VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
- }
- wmb(); /* Flush WC buffer for jobq->header */
+ ivpu_cmdq_jobq_init(vdev, cmdq->jobq);
if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
- ret = ivpu_hws_cmdq_init(file_priv, cmdq, VPU_ENGINE_COMPUTE, priority);
+ ret = ivpu_hws_cmdq_init(file_priv, cmdq, VPU_ENGINE_COMPUTE, cmdq->priority);
if (ret)
return ret;
}
@@ -203,58 +234,83 @@ ivpu_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u8 prio
if (ret)
return ret;
- cmdq->db_registered = true;
-
return 0;
}
-static int ivpu_cmdq_fini(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
+static int ivpu_cmdq_unregister(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
{
struct ivpu_device *vdev = file_priv->vdev;
int ret;
lockdep_assert_held(&file_priv->lock);
- if (!cmdq->db_registered)
+ if (!cmdq->db_id)
return 0;
- cmdq->db_registered = false;
+ ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id);
+ if (!ret)
+ ivpu_dbg(vdev, JOB, "DB %d unregistered\n", cmdq->db_id);
if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->id);
if (!ret)
- ivpu_dbg(vdev, JOB, "Command queue %d destroyed\n", cmdq->id);
+ ivpu_dbg(vdev, JOB, "Command queue %d destroyed, ctx %d\n",
+ cmdq->id, file_priv->ctx.id);
}
- ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id);
- if (!ret)
- ivpu_dbg(vdev, JOB, "DB %d unregistered\n", cmdq->db_id);
+ xa_erase(&file_priv->vdev->db_xa, cmdq->db_id);
+ cmdq->db_id = 0;
return 0;
}
-static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u8 priority)
+static inline u8 ivpu_job_to_jsm_priority(u8 priority)
+{
+ if (priority == DRM_IVPU_JOB_PRIORITY_DEFAULT)
+ return VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL;
+
+ return priority - 1;
+}
+
+static void ivpu_cmdq_destroy(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
+{
+ ivpu_cmdq_unregister(file_priv, cmdq);
+ xa_erase(&file_priv->cmdq_xa, cmdq->id);
+ ivpu_cmdq_free(file_priv, cmdq);
+}
+
+static struct ivpu_cmdq *ivpu_cmdq_acquire_legacy(struct ivpu_file_priv *file_priv, u8 priority)
{
struct ivpu_cmdq *cmdq;
- unsigned long cmdq_id;
- int ret;
+ unsigned long id;
lockdep_assert_held(&file_priv->lock);
- xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq)
- if (cmdq->priority == priority)
+ xa_for_each(&file_priv->cmdq_xa, id, cmdq)
+ if (cmdq->is_legacy && cmdq->priority == priority)
break;
if (!cmdq) {
- cmdq = ivpu_cmdq_alloc(file_priv);
+ cmdq = ivpu_cmdq_create(file_priv, priority, true);
if (!cmdq)
return NULL;
- cmdq->priority = priority;
}
- ret = ivpu_cmdq_init(file_priv, cmdq, priority);
- if (ret)
+ return cmdq;
+}
+
+static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u32 cmdq_id)
+{
+ struct ivpu_device *vdev = file_priv->vdev;
+ struct ivpu_cmdq *cmdq;
+
+ lockdep_assert_held(&file_priv->lock);
+
+ cmdq = xa_load(&file_priv->cmdq_xa, cmdq_id);
+ if (!cmdq) {
+ ivpu_warn_ratelimited(vdev, "Failed to find command queue with ID: %u\n", cmdq_id);
return NULL;
+ }
return cmdq;
}
@@ -266,11 +322,8 @@ void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv)
lockdep_assert_held(&file_priv->lock);
- xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) {
- xa_erase(&file_priv->cmdq_xa, cmdq_id);
- ivpu_cmdq_fini(file_priv, cmdq);
- ivpu_cmdq_free(file_priv, cmdq);
- }
+ xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq)
+ ivpu_cmdq_destroy(file_priv, cmdq);
}
/*
@@ -286,8 +339,10 @@ static void ivpu_cmdq_reset(struct ivpu_file_priv *file_priv)
mutex_lock(&file_priv->lock);
- xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq)
- cmdq->db_registered = false;
+ xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) {
+ xa_erase(&file_priv->vdev->db_xa, cmdq->db_id);
+ cmdq->db_id = 0;
+ }
mutex_unlock(&file_priv->lock);
}
@@ -305,25 +360,24 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev)
mutex_unlock(&vdev->context_list_lock);
}
-static void ivpu_cmdq_fini_all(struct ivpu_file_priv *file_priv)
-{
- struct ivpu_cmdq *cmdq;
- unsigned long cmdq_id;
-
- xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq)
- ivpu_cmdq_fini(file_priv, cmdq);
-}
-
void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv)
{
struct ivpu_device *vdev = file_priv->vdev;
+ struct ivpu_cmdq *cmdq;
+ unsigned long cmdq_id;
lockdep_assert_held(&file_priv->lock);
+ ivpu_dbg(vdev, JOB, "Context ID: %u abort\n", file_priv->ctx.id);
- ivpu_cmdq_fini_all(file_priv);
+ xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq)
+ ivpu_cmdq_unregister(file_priv, cmdq);
if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_OS)
ivpu_jsm_context_release(vdev, file_priv->ctx.id);
+
+ ivpu_mmu_disable_ssid_events(vdev, file_priv->ctx.id);
+
+ file_priv->aborted = true;
}
static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
@@ -416,8 +470,8 @@ static void ivpu_job_destroy(struct ivpu_job *job)
struct ivpu_device *vdev = job->vdev;
u32 i;
- ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d engine %d",
- job->job_id, job->file_priv->ctx.id, job->engine_idx);
+ ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d cmdq_id %u engine %d",
+ job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx);
for (i = 0; i < job->bo_count; i++)
if (job->bos[i])
@@ -462,16 +516,14 @@ static struct ivpu_job *ivpu_job_remove_from_submitted_jobs(struct ivpu_device *
{
struct ivpu_job *job;
- xa_lock(&vdev->submitted_jobs_xa);
- job = __xa_erase(&vdev->submitted_jobs_xa, job_id);
+ lockdep_assert_held(&vdev->submitted_jobs_lock);
+ job = xa_erase(&vdev->submitted_jobs_xa, job_id);
if (xa_empty(&vdev->submitted_jobs_xa) && job) {
vdev->busy_time = ktime_add(ktime_sub(ktime_get(), vdev->busy_start_ts),
vdev->busy_time);
}
- xa_unlock(&vdev->submitted_jobs_xa);
-
return job;
}
@@ -479,6 +531,28 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32
{
struct ivpu_job *job;
+ lockdep_assert_held(&vdev->submitted_jobs_lock);
+
+ job = xa_load(&vdev->submitted_jobs_xa, job_id);
+ if (!job)
+ return -ENOENT;
+
+ if (job_status == VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW) {
+ guard(mutex)(&job->file_priv->lock);
+
+ if (job->file_priv->has_mmu_faults)
+ return 0;
+
+ /*
+ * Mark context as faulty and defer destruction of the job to jobs abort thread
+ * handler to synchronize between both faults and jobs returning context violation
+ * status and ensure both are handled in the same way
+ */
+ job->file_priv->has_mmu_faults = true;
+ queue_work(system_wq, &vdev->context_abort_work);
+ return 0;
+ }
+
job = ivpu_job_remove_from_submitted_jobs(vdev, job_id);
if (!job)
return -ENOENT;
@@ -490,13 +564,17 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32
dma_fence_signal(job->done_fence);
trace_job("done", job);
- ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n",
- job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status);
+ ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d cmdq_id %u engine %d status 0x%x\n",
+ job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, job_status);
ivpu_job_destroy(job);
ivpu_stop_job_timeout_detection(vdev);
ivpu_rpm_put(vdev);
+
+ if (!xa_empty(&vdev->submitted_jobs_xa))
+ ivpu_start_job_timeout_detection(vdev);
+
return 0;
}
@@ -505,11 +583,29 @@ void ivpu_jobs_abort_all(struct ivpu_device *vdev)
struct ivpu_job *job;
unsigned long id;
+ mutex_lock(&vdev->submitted_jobs_lock);
+
xa_for_each(&vdev->submitted_jobs_xa, id, job)
ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED);
+
+ mutex_unlock(&vdev->submitted_jobs_lock);
+}
+
+void ivpu_cmdq_abort_all_jobs(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id)
+{
+ struct ivpu_job *job;
+ unsigned long id;
+
+ mutex_lock(&vdev->submitted_jobs_lock);
+
+ xa_for_each(&vdev->submitted_jobs_xa, id, job)
+ if (job->file_priv->ctx.id == ctx_id && job->cmdq_id == cmdq_id)
+ ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED);
+
+ mutex_unlock(&vdev->submitted_jobs_lock);
}
-static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
+static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id)
{
struct ivpu_file_priv *file_priv = job->file_priv;
struct ivpu_device *vdev = job->vdev;
@@ -521,25 +617,35 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
if (ret < 0)
return ret;
+ mutex_lock(&vdev->submitted_jobs_lock);
mutex_lock(&file_priv->lock);
- cmdq = ivpu_cmdq_acquire(file_priv, priority);
+ if (cmdq_id == 0)
+ cmdq = ivpu_cmdq_acquire_legacy(file_priv, priority);
+ else
+ cmdq = ivpu_cmdq_acquire(file_priv, cmdq_id);
if (!cmdq) {
- ivpu_warn_ratelimited(vdev, "Failed to get job queue, ctx %d engine %d prio %d\n",
- file_priv->ctx.id, job->engine_idx, priority);
+ ivpu_warn_ratelimited(vdev, "Failed to get job queue, ctx %d\n", file_priv->ctx.id);
ret = -EINVAL;
- goto err_unlock_file_priv;
+ goto err_unlock;
+ }
+
+ ret = ivpu_cmdq_register(file_priv, cmdq);
+ if (ret) {
+ ivpu_err(vdev, "Failed to register command queue: %d\n", ret);
+ goto err_unlock;
}
- xa_lock(&vdev->submitted_jobs_xa);
+ job->cmdq_id = cmdq->id;
+
is_first_job = xa_empty(&vdev->submitted_jobs_xa);
- ret = __xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit,
- &file_priv->job_id_next, GFP_KERNEL);
+ ret = xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit,
+ &file_priv->job_id_next, GFP_KERNEL);
if (ret < 0) {
ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n",
file_priv->ctx.id);
ret = -EBUSY;
- goto err_unlock_submitted_jobs_xa;
+ goto err_unlock;
}
ret = ivpu_cmdq_push_job(cmdq, job);
@@ -558,25 +664,25 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
}
trace_job("submit", job);
- ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d prio %d addr 0x%llx next %d\n",
- job->job_id, file_priv->ctx.id, job->engine_idx, priority,
+ ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d cmdq_id %u engine %d prio %d addr 0x%llx next %d\n",
+ job->job_id, file_priv->ctx.id, cmdq->id, job->engine_idx, cmdq->priority,
job->cmd_buf_vpu_addr, cmdq->jobq->header.tail);
- xa_unlock(&vdev->submitted_jobs_xa);
-
mutex_unlock(&file_priv->lock);
- if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW))
+ if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) {
ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
+ }
+
+ mutex_unlock(&vdev->submitted_jobs_lock);
return 0;
err_erase_xa:
- __xa_erase(&vdev->submitted_jobs_xa, job->job_id);
-err_unlock_submitted_jobs_xa:
- xa_unlock(&vdev->submitted_jobs_xa);
-err_unlock_file_priv:
+ xa_erase(&vdev->submitted_jobs_xa, job->job_id);
+err_unlock:
mutex_unlock(&file_priv->lock);
+ mutex_unlock(&vdev->submitted_jobs_lock);
ivpu_rpm_put(vdev);
return ret;
}
@@ -585,7 +691,7 @@ static int
ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 *buf_handles,
u32 buf_count, u32 commands_offset)
{
- struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct ivpu_file_priv *file_priv = job->file_priv;
struct ivpu_device *vdev = file_priv->vdev;
struct ww_acquire_ctx acquire_ctx;
enum dma_resv_usage usage;
@@ -647,49 +753,20 @@ unlock_reservations:
return ret;
}
-static inline u8 ivpu_job_to_hws_priority(struct ivpu_file_priv *file_priv, u8 priority)
+static int ivpu_submit(struct drm_file *file, struct ivpu_file_priv *file_priv, u32 cmdq_id,
+ u32 buffer_count, u32 engine, void __user *buffers_ptr, u32 cmds_offset,
+ u8 priority)
{
- if (priority == DRM_IVPU_JOB_PRIORITY_DEFAULT)
- return DRM_IVPU_JOB_PRIORITY_NORMAL;
-
- return priority - 1;
-}
-
-int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
-{
- struct ivpu_file_priv *file_priv = file->driver_priv;
struct ivpu_device *vdev = file_priv->vdev;
- struct drm_ivpu_submit *params = data;
struct ivpu_job *job;
u32 *buf_handles;
int idx, ret;
- u8 priority;
-
- if (params->engine != DRM_IVPU_ENGINE_COMPUTE)
- return -EINVAL;
- if (params->priority > DRM_IVPU_JOB_PRIORITY_REALTIME)
- return -EINVAL;
-
- if (params->buffer_count == 0 || params->buffer_count > JOB_MAX_BUFFER_COUNT)
- return -EINVAL;
-
- if (!IS_ALIGNED(params->commands_offset, 8))
- return -EINVAL;
-
- if (!file_priv->ctx.id)
- return -EINVAL;
-
- if (file_priv->has_mmu_faults)
- return -EBADFD;
-
- buf_handles = kcalloc(params->buffer_count, sizeof(u32), GFP_KERNEL);
+ buf_handles = kcalloc(buffer_count, sizeof(u32), GFP_KERNEL);
if (!buf_handles)
return -ENOMEM;
- ret = copy_from_user(buf_handles,
- (void __user *)params->buffers_ptr,
- params->buffer_count * sizeof(u32));
+ ret = copy_from_user(buf_handles, buffers_ptr, buffer_count * sizeof(u32));
if (ret) {
ret = -EFAULT;
goto err_free_handles;
@@ -700,27 +777,24 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
goto err_free_handles;
}
- ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n",
- file_priv->ctx.id, params->buffer_count);
+ ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u cmdq_id %u buf_count %u\n",
+ file_priv->ctx.id, cmdq_id, buffer_count);
- job = ivpu_job_create(file_priv, params->engine, params->buffer_count);
+ job = ivpu_job_create(file_priv, engine, buffer_count);
if (!job) {
ivpu_err(vdev, "Failed to create job\n");
ret = -ENOMEM;
goto err_exit_dev;
}
- ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, params->buffer_count,
- params->commands_offset);
+ ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, buffer_count, cmds_offset);
if (ret) {
ivpu_err(vdev, "Failed to prepare job: %d\n", ret);
goto err_destroy_job;
}
- priority = ivpu_job_to_hws_priority(file_priv, params->priority);
-
down_read(&vdev->pm->reset_lock);
- ret = ivpu_job_submit(job, priority);
+ ret = ivpu_job_submit(job, priority, cmdq_id);
up_read(&vdev->pm->reset_lock);
if (ret)
goto err_signal_fence;
@@ -740,12 +814,137 @@ err_free_handles:
return ret;
}
+int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct drm_ivpu_submit *args = data;
+ u8 priority;
+
+ if (args->engine != DRM_IVPU_ENGINE_COMPUTE)
+ return -EINVAL;
+
+ if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME)
+ return -EINVAL;
+
+ if (args->buffer_count == 0 || args->buffer_count > JOB_MAX_BUFFER_COUNT)
+ return -EINVAL;
+
+ if (!IS_ALIGNED(args->commands_offset, 8))
+ return -EINVAL;
+
+ if (!file_priv->ctx.id)
+ return -EINVAL;
+
+ if (file_priv->has_mmu_faults)
+ return -EBADFD;
+
+ priority = ivpu_job_to_jsm_priority(args->priority);
+
+ return ivpu_submit(file, file_priv, 0, args->buffer_count, args->engine,
+ (void __user *)args->buffers_ptr, args->commands_offset, priority);
+}
+
+int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct drm_ivpu_cmdq_submit *args = data;
+
+ if (!ivpu_is_capable(file_priv->vdev, DRM_IVPU_CAP_MANAGE_CMDQ))
+ return -ENODEV;
+
+ if (args->cmdq_id < IVPU_CMDQ_MIN_ID || args->cmdq_id > IVPU_CMDQ_MAX_ID)
+ return -EINVAL;
+
+ if (args->buffer_count == 0 || args->buffer_count > JOB_MAX_BUFFER_COUNT)
+ return -EINVAL;
+
+ if (!IS_ALIGNED(args->commands_offset, 8))
+ return -EINVAL;
+
+ if (!file_priv->ctx.id)
+ return -EINVAL;
+
+ if (file_priv->has_mmu_faults)
+ return -EBADFD;
+
+ return ivpu_submit(file, file_priv, args->cmdq_id, args->buffer_count, VPU_ENGINE_COMPUTE,
+ (void __user *)args->buffers_ptr, args->commands_offset, 0);
+}
+
+int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct ivpu_device *vdev = file_priv->vdev;
+ struct drm_ivpu_cmdq_create *args = data;
+ struct ivpu_cmdq *cmdq;
+ int ret;
+
+ if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ))
+ return -ENODEV;
+
+ if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME)
+ return -EINVAL;
+
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
+ mutex_lock(&file_priv->lock);
+
+ cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), false);
+ if (cmdq)
+ args->cmdq_id = cmdq->id;
+
+ mutex_unlock(&file_priv->lock);
+
+ ivpu_rpm_put(vdev);
+
+ return cmdq ? 0 : -ENOMEM;
+}
+
+int ivpu_cmdq_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct ivpu_device *vdev = file_priv->vdev;
+ struct drm_ivpu_cmdq_destroy *args = data;
+ struct ivpu_cmdq *cmdq;
+ u32 cmdq_id = 0;
+ int ret;
+
+ if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ))
+ return -ENODEV;
+
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
+ mutex_lock(&file_priv->lock);
+
+ cmdq = xa_load(&file_priv->cmdq_xa, args->cmdq_id);
+ if (!cmdq || cmdq->is_legacy) {
+ ret = -ENOENT;
+ } else {
+ cmdq_id = cmdq->id;
+ ivpu_cmdq_destroy(file_priv, cmdq);
+ ret = 0;
+ }
+
+ mutex_unlock(&file_priv->lock);
+
+ /* Abort any pending jobs only if cmdq was destroyed */
+ if (!ret)
+ ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id);
+
+ ivpu_rpm_put(vdev);
+
+ return ret;
+}
+
static void
ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
struct vpu_jsm_msg *jsm_msg)
{
struct vpu_ipc_msg_payload_job_done *payload;
- int ret;
if (!jsm_msg) {
ivpu_err(vdev, "IPC message has no JSM payload\n");
@@ -758,9 +957,10 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
}
payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload;
- ret = ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status);
- if (!ret && !xa_empty(&vdev->submitted_jobs_xa))
- ivpu_start_job_timeout_detection(vdev);
+
+ mutex_lock(&vdev->submitted_jobs_lock);
+ ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status);
+ mutex_unlock(&vdev->submitted_jobs_lock);
}
void ivpu_job_done_consumer_init(struct ivpu_device *vdev)
@@ -773,3 +973,57 @@ void ivpu_job_done_consumer_fini(struct ivpu_device *vdev)
{
ivpu_ipc_consumer_del(vdev, &vdev->job_done_consumer);
}
+
+void ivpu_context_abort_work_fn(struct work_struct *work)
+{
+ struct ivpu_device *vdev = container_of(work, struct ivpu_device, context_abort_work);
+ struct ivpu_file_priv *file_priv;
+ struct ivpu_job *job;
+ unsigned long ctx_id;
+ unsigned long id;
+
+ if (drm_WARN_ON(&vdev->drm, pm_runtime_get_if_active(vdev->drm.dev) <= 0))
+ return;
+
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW)
+ if (ivpu_jsm_reset_engine(vdev, 0))
+ return;
+
+ mutex_lock(&vdev->context_list_lock);
+ xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
+ if (!file_priv->has_mmu_faults || file_priv->aborted)
+ continue;
+
+ mutex_lock(&file_priv->lock);
+ ivpu_context_abort_locked(file_priv);
+ mutex_unlock(&file_priv->lock);
+ }
+ mutex_unlock(&vdev->context_list_lock);
+
+ /*
+ * We will not receive new MMU event interrupts until existing events are discarded
+ * however, we want to discard these events only after aborting the faulty context
+ * to avoid generating new faults from that context
+ */
+ ivpu_mmu_discard_events(vdev);
+
+ if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW)
+ goto runtime_put;
+
+ if (ivpu_jsm_hws_resume_engine(vdev, 0))
+ return;
+ /*
+ * In hardware scheduling mode NPU already has stopped processing jobs
+ * and won't send us any further notifications, thus we have to free job related resources
+ * and notify userspace
+ */
+ mutex_lock(&vdev->submitted_jobs_lock);
+ xa_for_each(&vdev->submitted_jobs_xa, id, job)
+ if (job->file_priv->aborted)
+ ivpu_job_signal_and_destroy(vdev, job->job_id, DRM_IVPU_JOB_STATUS_ABORTED);
+ mutex_unlock(&vdev->submitted_jobs_lock);
+
+runtime_put:
+ pm_runtime_mark_last_busy(vdev->drm.dev);
+ pm_runtime_put_autosuspend(vdev->drm.dev);
+}
diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h
index 8b19e3f8b4cf..2e301c2eea7b 100644
--- a/drivers/accel/ivpu/ivpu_job.h
+++ b/drivers/accel/ivpu/ivpu_job.h
@@ -30,8 +30,8 @@ struct ivpu_cmdq {
u32 entry_count;
u32 id;
u32 db_id;
- bool db_registered;
u8 priority;
+ bool is_legacy;
};
/**
@@ -51,6 +51,7 @@ struct ivpu_job {
struct ivpu_file_priv *file_priv;
struct dma_fence *done_fence;
u64 cmd_buf_vpu_addr;
+ u32 cmdq_id;
u32 job_id;
u32 engine_idx;
size_t bo_count;
@@ -58,14 +59,19 @@ struct ivpu_job {
};
int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+int ivpu_cmdq_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv);
void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv);
void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev);
+void ivpu_cmdq_abort_all_jobs(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id);
void ivpu_job_done_consumer_init(struct ivpu_device *vdev);
void ivpu_job_done_consumer_fini(struct ivpu_device *vdev);
+void ivpu_context_abort_work_fn(struct work_struct *work);
void ivpu_jobs_abort_all(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c
index 30a40be76930..0256b2dfefc1 100644
--- a/drivers/accel/ivpu/ivpu_jsm_msg.c
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.c
@@ -7,6 +7,8 @@
#include "ivpu_hw.h"
#include "ivpu_ipc.h"
#include "ivpu_jsm_msg.h"
+#include "ivpu_pm.h"
+#include "vpu_jsm_api.h"
const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type)
{
@@ -162,8 +164,10 @@ int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine)
ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_ENGINE_RESET_DONE, &resp,
VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
- if (ret)
+ if (ret) {
ivpu_err_ratelimited(vdev, "Failed to reset engine %d: %d\n", engine, ret);
+ ivpu_pm_trigger_recovery(vdev, "Engine reset failed");
+ }
return ret;
}
@@ -353,8 +357,10 @@ int ivpu_jsm_hws_resume_engine(struct ivpu_device *vdev, u32 engine)
ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE, &resp,
VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
- if (ret)
+ if (ret) {
ivpu_err_ratelimited(vdev, "Failed to resume engine %d: %d\n", engine, ret);
+ ivpu_pm_trigger_recovery(vdev, "Engine resume failed");
+ }
return ret;
}
@@ -407,26 +413,18 @@ int ivpu_jsm_hws_setup_priority_bands(struct ivpu_device *vdev)
{
struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP };
struct vpu_jsm_msg resp;
+ struct ivpu_hw_info *hw = vdev->hw;
+ struct vpu_ipc_msg_payload_hws_priority_band_setup *setup =
+ &req.payload.hws_priority_band_setup;
int ret;
- /* Idle */
- req.payload.hws_priority_band_setup.grace_period[0] = 0;
- req.payload.hws_priority_band_setup.process_grace_period[0] = 50000;
- req.payload.hws_priority_band_setup.process_quantum[0] = 160000;
- /* Normal */
- req.payload.hws_priority_band_setup.grace_period[1] = 50000;
- req.payload.hws_priority_band_setup.process_grace_period[1] = 50000;
- req.payload.hws_priority_band_setup.process_quantum[1] = 300000;
- /* Focus */
- req.payload.hws_priority_band_setup.grace_period[2] = 50000;
- req.payload.hws_priority_band_setup.process_grace_period[2] = 50000;
- req.payload.hws_priority_band_setup.process_quantum[2] = 200000;
- /* Realtime */
- req.payload.hws_priority_band_setup.grace_period[3] = 0;
- req.payload.hws_priority_band_setup.process_grace_period[3] = 50000;
- req.payload.hws_priority_band_setup.process_quantum[3] = 200000;
-
- req.payload.hws_priority_band_setup.normal_band_percentage = 10;
+ for (int band = VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE;
+ band < VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT; band++) {
+ setup->grace_period[band] = hw->hws.grace_period[band];
+ setup->process_grace_period[band] = hw->hws.process_grace_period[band];
+ setup->process_quantum[band] = hw->hws.process_quantum[band];
+ }
+ setup->normal_band_percentage = 10;
ret = ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP,
&resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index 26ef52fbb93e..5ea010568faa 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -20,6 +20,12 @@
#define IVPU_MMU_REG_CR0 0x00200020u
#define IVPU_MMU_REG_CR0ACK 0x00200024u
#define IVPU_MMU_REG_CR0ACK_VAL_MASK GENMASK(31, 0)
+#define IVPU_MMU_REG_CR0_ATSCHK_MASK BIT(4)
+#define IVPU_MMU_REG_CR0_CMDQEN_MASK BIT(3)
+#define IVPU_MMU_REG_CR0_EVTQEN_MASK BIT(2)
+#define IVPU_MMU_REG_CR0_PRIQEN_MASK BIT(1)
+#define IVPU_MMU_REG_CR0_SMMUEN_MASK BIT(0)
+
#define IVPU_MMU_REG_CR1 0x00200028u
#define IVPU_MMU_REG_CR2 0x0020002cu
#define IVPU_MMU_REG_IRQ_CTRL 0x00200050u
@@ -141,12 +147,6 @@
#define IVPU_MMU_IRQ_EVTQ_EN BIT(2)
#define IVPU_MMU_IRQ_GERROR_EN BIT(0)
-#define IVPU_MMU_CR0_ATSCHK BIT(4)
-#define IVPU_MMU_CR0_CMDQEN BIT(3)
-#define IVPU_MMU_CR0_EVTQEN BIT(2)
-#define IVPU_MMU_CR0_PRIQEN BIT(1)
-#define IVPU_MMU_CR0_SMMUEN BIT(0)
-
#define IVPU_MMU_CR1_TABLE_SH GENMASK(11, 10)
#define IVPU_MMU_CR1_TABLE_OC GENMASK(9, 8)
#define IVPU_MMU_CR1_TABLE_IC GENMASK(7, 6)
@@ -596,7 +596,7 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev)
REGV_WR32(IVPU_MMU_REG_CMDQ_PROD, 0);
REGV_WR32(IVPU_MMU_REG_CMDQ_CONS, 0);
- val = IVPU_MMU_CR0_CMDQEN;
+ val = REG_SET_FLD(IVPU_MMU_REG_CR0, CMDQEN, 0);
ret = ivpu_mmu_reg_write_cr0(vdev, val);
if (ret)
return ret;
@@ -617,12 +617,12 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev)
REGV_WR32(IVPU_MMU_REG_EVTQ_PROD_SEC, 0);
REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, 0);
- val |= IVPU_MMU_CR0_EVTQEN;
+ val = REG_SET_FLD(IVPU_MMU_REG_CR0, EVTQEN, val);
ret = ivpu_mmu_reg_write_cr0(vdev, val);
if (ret)
return ret;
- val |= IVPU_MMU_CR0_ATSCHK;
+ val = REG_SET_FLD(IVPU_MMU_REG_CR0, ATSCHK, val);
ret = ivpu_mmu_reg_write_cr0(vdev, val);
if (ret)
return ret;
@@ -631,7 +631,7 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev)
if (ret)
return ret;
- val |= IVPU_MMU_CR0_SMMUEN;
+ val = REG_SET_FLD(IVPU_MMU_REG_CR0, SMMUEN, val);
return ivpu_mmu_reg_write_cr0(vdev, val);
}
@@ -725,8 +725,8 @@ static int ivpu_mmu_cdtab_entry_set(struct ivpu_device *vdev, u32 ssid, u64 cd_d
cd[2] = 0;
cd[3] = 0x0000000000007444;
- /* For global context generate memory fault on VPU */
- if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID)
+ /* For global and reserved contexts generate memory fault on VPU */
+ if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID || ssid == IVPU_RESERVED_CONTEXT_MMU_SSID)
cd[0] |= IVPU_MMU_CD_0_A;
if (valid)
@@ -870,28 +870,107 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev)
return evt;
}
+static int ivpu_mmu_evtq_set(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(IVPU_MMU_REG_CR0);
+
+ if (enable)
+ val = REG_SET_FLD(IVPU_MMU_REG_CR0, EVTQEN, val);
+ else
+ val = REG_CLR_FLD(IVPU_MMU_REG_CR0, EVTQEN, val);
+ REGV_WR32(IVPU_MMU_REG_CR0, val);
+
+ return REGV_POLL_FLD(IVPU_MMU_REG_CR0ACK, VAL, val, IVPU_MMU_REG_TIMEOUT_US);
+}
+
+static int ivpu_mmu_evtq_enable(struct ivpu_device *vdev)
+{
+ return ivpu_mmu_evtq_set(vdev, true);
+}
+
+static int ivpu_mmu_evtq_disable(struct ivpu_device *vdev)
+{
+ return ivpu_mmu_evtq_set(vdev, false);
+}
+
+void ivpu_mmu_discard_events(struct ivpu_device *vdev)
+{
+ struct ivpu_mmu_info *mmu = vdev->mmu;
+
+ mutex_lock(&mmu->lock);
+ /*
+ * Disable event queue (stop MMU from updating the producer)
+ * to allow synchronization of consumer and producer indexes
+ */
+ ivpu_mmu_evtq_disable(vdev);
+
+ vdev->mmu->evtq.cons = REGV_RD32(IVPU_MMU_REG_EVTQ_PROD_SEC);
+ REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons);
+ vdev->mmu->evtq.prod = REGV_RD32(IVPU_MMU_REG_EVTQ_PROD_SEC);
+
+ ivpu_mmu_evtq_enable(vdev);
+
+ drm_WARN_ON_ONCE(&vdev->drm, vdev->mmu->evtq.cons != vdev->mmu->evtq.prod);
+
+ mutex_unlock(&mmu->lock);
+}
+
+int ivpu_mmu_disable_ssid_events(struct ivpu_device *vdev, u32 ssid)
+{
+ struct ivpu_mmu_info *mmu = vdev->mmu;
+ struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab;
+ u64 *entry;
+ u64 val;
+
+ if (ssid > IVPU_MMU_CDTAB_ENT_COUNT)
+ return -EINVAL;
+
+ mutex_lock(&mmu->lock);
+
+ entry = cdtab->base + (ssid * IVPU_MMU_CDTAB_ENT_SIZE);
+
+ val = READ_ONCE(entry[0]);
+ val &= ~IVPU_MMU_CD_0_R;
+ WRITE_ONCE(entry[0], val);
+
+ if (!ivpu_is_force_snoop_enabled(vdev))
+ clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE);
+
+ ivpu_mmu_cmdq_write_cfgi_all(vdev);
+ ivpu_mmu_cmdq_sync(vdev);
+
+ mutex_unlock(&mmu->lock);
+
+ return 0;
+}
+
void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
{
+ struct ivpu_file_priv *file_priv;
u32 *event;
u32 ssid;
ivpu_dbg(vdev, IRQ, "MMU event queue\n");
- while ((event = ivpu_mmu_get_event(vdev)) != NULL) {
- ivpu_mmu_dump_event(vdev, event);
-
- ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]);
- if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) {
+ while ((event = ivpu_mmu_get_event(vdev))) {
+ ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, *event);
+ if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID ||
+ ssid == IVPU_RESERVED_CONTEXT_MMU_SSID) {
+ ivpu_mmu_dump_event(vdev, event);
ivpu_pm_trigger_recovery(vdev, "MMU event");
return;
}
- ivpu_mmu_user_context_mark_invalid(vdev, ssid);
- REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons);
+ file_priv = xa_load(&vdev->context_xa, ssid);
+ if (file_priv) {
+ if (!READ_ONCE(file_priv->has_mmu_faults)) {
+ ivpu_mmu_dump_event(vdev, event);
+ WRITE_ONCE(file_priv->has_mmu_faults, true);
+ }
+ }
}
- if (!kfifo_put(&vdev->hw->irq.fifo, IVPU_HW_IRQ_SRC_MMU_EVTQ))
- ivpu_err_ratelimited(vdev, "IRQ FIFO full\n");
+ queue_work(system_wq, &vdev->context_abort_work);
}
void ivpu_mmu_evtq_dump(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_mmu.h b/drivers/accel/ivpu/ivpu_mmu.h
index 7afea9cd8731..1ce7529746ad 100644
--- a/drivers/accel/ivpu/ivpu_mmu.h
+++ b/drivers/accel/ivpu/ivpu_mmu.h
@@ -47,5 +47,7 @@ int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid);
void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev);
void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev);
void ivpu_mmu_evtq_dump(struct ivpu_device *vdev);
+void ivpu_mmu_discard_events(struct ivpu_device *vdev);
+int ivpu_mmu_disable_ssid_events(struct ivpu_device *vdev, u32 ssid);
#endif /* __IVPU_MMU_H__ */
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c
index 0af614dfb6f9..f0267efa55aa 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@@ -635,16 +635,3 @@ void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev)
ivpu_mmu_cd_clear(vdev, vdev->rctx.id);
ivpu_mmu_context_fini(vdev, &vdev->rctx);
}
-
-void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid)
-{
- struct ivpu_file_priv *file_priv;
-
- xa_lock(&vdev->context_xa);
-
- file_priv = xa_load(&vdev->context_xa, ssid);
- if (file_priv)
- file_priv->has_mmu_faults = true;
-
- xa_unlock(&vdev->context_xa);
-}
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h
index 8042fc067062..f255310968cf 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.h
+++ b/drivers/accel/ivpu/ivpu_mmu_context.h
@@ -37,8 +37,6 @@ void ivpu_mmu_global_context_fini(struct ivpu_device *vdev);
int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev);
void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev);
-void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid);
-
int ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range,
u64 size, struct drm_mm_node *node);
void ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node);
diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c
index ffe7b10f8a76..2a043baf10ca 100644
--- a/drivers/accel/ivpu/ivpu_ms.c
+++ b/drivers/accel/ivpu/ivpu_ms.c
@@ -4,6 +4,7 @@
*/
#include <drm/drm_file.h>
+#include <linux/pm_runtime.h>
#include "ivpu_drv.h"
#include "ivpu_gem.h"
@@ -44,6 +45,10 @@ int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
args->sampling_period_ns < MS_MIN_SAMPLE_PERIOD_NS)
return -EINVAL;
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
mutex_lock(&file_priv->ms_lock);
if (get_instance_by_mask(file_priv, args->metric_group_mask)) {
@@ -96,6 +101,8 @@ err_free_ms:
kfree(ms);
unlock:
mutex_unlock(&file_priv->ms_lock);
+
+ ivpu_rpm_put(vdev);
return ret;
}
@@ -160,6 +167,10 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *
if (!args->metric_group_mask)
return -EINVAL;
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
mutex_lock(&file_priv->ms_lock);
ms = get_instance_by_mask(file_priv, args->metric_group_mask);
@@ -187,6 +198,7 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *
unlock:
mutex_unlock(&file_priv->ms_lock);
+ ivpu_rpm_put(vdev);
return ret;
}
@@ -204,11 +216,17 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file
{
struct ivpu_file_priv *file_priv = file->driver_priv;
struct drm_ivpu_metric_streamer_stop *args = data;
+ struct ivpu_device *vdev = file_priv->vdev;
struct ivpu_ms_instance *ms;
+ int ret;
if (!args->metric_group_mask)
return -EINVAL;
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
mutex_lock(&file_priv->ms_lock);
ms = get_instance_by_mask(file_priv, args->metric_group_mask);
@@ -217,6 +235,7 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file
mutex_unlock(&file_priv->ms_lock);
+ ivpu_rpm_put(vdev);
return ms ? 0 : -EINVAL;
}
@@ -281,6 +300,9 @@ unlock:
void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
{
struct ivpu_ms_instance *ms, *tmp;
+ struct ivpu_device *vdev = file_priv->vdev;
+
+ pm_runtime_get_sync(vdev->drm.dev);
mutex_lock(&file_priv->ms_lock);
@@ -293,6 +315,8 @@ void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
free_instance(file_priv, ms);
mutex_unlock(&file_priv->ms_lock);
+
+ pm_runtime_put_autosuspend(vdev->drm.dev);
}
void ivpu_ms_cleanup_all(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index 5060c5dd40d1..ac0e22454596 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -177,16 +177,11 @@ void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason)
return;
}
- if (ivpu_is_fpga(vdev)) {
- ivpu_err(vdev, "Recovery not available on FPGA\n");
- return;
- }
-
/* Trigger recovery if it's not in progress */
if (atomic_cmpxchg(&vdev->pm->reset_pending, 0, 1) == 0) {
ivpu_hw_diagnose_failure(vdev);
ivpu_hw_irq_disable(vdev); /* Disable IRQ early to protect from IRQ storm */
- queue_work(system_long_wq, &vdev->pm->recovery_work);
+ queue_work(system_unbound_wq, &vdev->pm->recovery_work);
}
}
@@ -433,16 +428,17 @@ int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent)
active_us = (DCT_PERIOD_US * active_percent) / 100;
inactive_us = DCT_PERIOD_US - active_us;
+ vdev->pm->dct_active_percent = active_percent;
+
+ ivpu_dbg(vdev, PM, "DCT requested %u%% (D0: %uus, D0i2: %uus)\n",
+ active_percent, active_us, inactive_us);
+
ret = ivpu_jsm_dct_enable(vdev, active_us, inactive_us);
if (ret) {
ivpu_err_ratelimited(vdev, "Failed to enable DCT: %d\n", ret);
return ret;
}
- vdev->pm->dct_active_percent = active_percent;
-
- ivpu_dbg(vdev, PM, "DCT set to %u%% (D0: %uus, D0i2: %uus)\n",
- active_percent, active_us, inactive_us);
return 0;
}
@@ -450,27 +446,29 @@ int ivpu_pm_dct_disable(struct ivpu_device *vdev)
{
int ret;
+ vdev->pm->dct_active_percent = 0;
+
+ ivpu_dbg(vdev, PM, "DCT requested to be disabled\n");
+
ret = ivpu_jsm_dct_disable(vdev);
if (ret) {
ivpu_err_ratelimited(vdev, "Failed to disable DCT: %d\n", ret);
return ret;
}
- vdev->pm->dct_active_percent = 0;
-
- ivpu_dbg(vdev, PM, "DCT disabled\n");
return 0;
}
-void ivpu_pm_dct_irq_thread_handler(struct ivpu_device *vdev)
+void ivpu_pm_irq_dct_work_fn(struct work_struct *work)
{
+ struct ivpu_device *vdev = container_of(work, struct ivpu_device, irq_dct_work);
bool enable;
int ret;
if (ivpu_hw_btrs_dct_get_request(vdev, &enable))
return;
- if (vdev->pm->dct_active_percent)
+ if (enable)
ret = ivpu_pm_dct_enable(vdev, DCT_DEFAULT_ACTIVE_PERCENT);
else
ret = ivpu_pm_dct_disable(vdev);
diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h
index b70efe6c36e4..89b264cc0e3e 100644
--- a/drivers/accel/ivpu/ivpu_pm.h
+++ b/drivers/accel/ivpu/ivpu_pm.h
@@ -45,6 +45,6 @@ void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev);
int ivpu_pm_dct_init(struct ivpu_device *vdev);
int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent);
int ivpu_pm_dct_disable(struct ivpu_device *vdev);
-void ivpu_pm_dct_irq_thread_handler(struct ivpu_device *vdev);
+void ivpu_pm_irq_dct_work_fn(struct work_struct *work);
#endif /* __IVPU_PM_H__ */
diff --git a/drivers/accel/ivpu/ivpu_sysfs.c b/drivers/accel/ivpu/ivpu_sysfs.c
index 616477fc17fa..268ab7744a8b 100644
--- a/drivers/accel/ivpu/ivpu_sysfs.c
+++ b/drivers/accel/ivpu/ivpu_sysfs.c
@@ -1,17 +1,22 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2024 Intel Corporation
+ * Copyright (C) 2024-2025 Intel Corporation
*/
#include <linux/device.h>
#include <linux/err.h>
+#include <linux/pm_runtime.h>
+#include <linux/units.h>
#include "ivpu_drv.h"
+#include "ivpu_gem.h"
#include "ivpu_fw.h"
#include "ivpu_hw.h"
#include "ivpu_sysfs.h"
-/*
+/**
+ * DOC: npu_busy_time_us
+ *
* npu_busy_time_us is the time that the device spent executing jobs.
* The time is counted when and only when there are jobs submitted to firmware.
*
@@ -30,11 +35,12 @@ npu_busy_time_us_show(struct device *dev, struct device_attribute *attr, char *b
struct ivpu_device *vdev = to_ivpu_device(drm);
ktime_t total, now = 0;
- xa_lock(&vdev->submitted_jobs_xa);
+ mutex_lock(&vdev->submitted_jobs_lock);
+
total = vdev->busy_time;
if (!xa_empty(&vdev->submitted_jobs_xa))
now = ktime_sub(ktime_get(), vdev->busy_start_ts);
- xa_unlock(&vdev->submitted_jobs_xa);
+ mutex_unlock(&vdev->submitted_jobs_lock);
return sysfs_emit(buf, "%lld\n", ktime_to_us(ktime_add(total, now)));
}
@@ -42,6 +48,30 @@ npu_busy_time_us_show(struct device *dev, struct device_attribute *attr, char *b
static DEVICE_ATTR_RO(npu_busy_time_us);
/**
+ * DOC: npu_memory_utilization
+ *
+ * The npu_memory_utilization is used to report in bytes a current NPU memory utilization.
+ *
+ */
+static ssize_t
+npu_memory_utilization_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct ivpu_device *vdev = to_ivpu_device(drm);
+ struct ivpu_bo *bo;
+ u64 total_npu_memory = 0;
+
+ mutex_lock(&vdev->bo_list_lock);
+ list_for_each_entry(bo, &vdev->bo_list, bo_list_node)
+ total_npu_memory += bo->base.base.size;
+ mutex_unlock(&vdev->bo_list_lock);
+
+ return sysfs_emit(buf, "%lld\n", total_npu_memory);
+}
+
+static DEVICE_ATTR_RO(npu_memory_utilization);
+
+/**
* DOC: sched_mode
*
* The sched_mode is used to report current NPU scheduling mode.
@@ -62,9 +92,55 @@ sched_mode_show(struct device *dev, struct device_attribute *attr, char *buf)
static DEVICE_ATTR_RO(sched_mode);
+/**
+ * DOC: npu_max_frequency
+ *
+ * The npu_max_frequency shows maximum frequency in MHz of the NPU's data
+ * processing unit
+ */
+static ssize_t
+npu_max_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct ivpu_device *vdev = to_ivpu_device(drm);
+ u32 freq = ivpu_hw_dpu_max_freq_get(vdev);
+
+ return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ);
+}
+
+static DEVICE_ATTR_RO(npu_max_frequency_mhz);
+
+/**
+ * DOC: npu_current_frequency_mhz
+ *
+ * The npu_current_frequency_mhz shows current frequency in MHz of the NPU's
+ * data processing unit
+ */
+static ssize_t
+npu_current_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct ivpu_device *vdev = to_ivpu_device(drm);
+ u32 freq = 0;
+
+ /* Read frequency only if device is active, otherwise frequency is 0 */
+ if (pm_runtime_get_if_active(vdev->drm.dev) > 0) {
+ freq = ivpu_hw_dpu_freq_get(vdev);
+
+ pm_runtime_put_autosuspend(vdev->drm.dev);
+ }
+
+ return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ);
+}
+
+static DEVICE_ATTR_RO(npu_current_frequency_mhz);
+
static struct attribute *ivpu_dev_attrs[] = {
&dev_attr_npu_busy_time_us.attr,
+ &dev_attr_npu_memory_utilization.attr,
&dev_attr_sched_mode.attr,
+ &dev_attr_npu_max_frequency_mhz.attr,
+ &dev_attr_npu_current_frequency_mhz.attr,
NULL,
};
diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h
index 908e68ea1c39..218468bbbcad 100644
--- a/drivers/accel/ivpu/vpu_boot_api.h
+++ b/drivers/accel/ivpu/vpu_boot_api.h
@@ -26,7 +26,7 @@
* Minor version changes when API backward compatibility is preserved.
* Resets to 0 if Major version is incremented.
*/
-#define VPU_BOOT_API_VER_MINOR 26
+#define VPU_BOOT_API_VER_MINOR 28
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
@@ -76,8 +76,15 @@ struct vpu_firmware_header {
* submission queue size and device capabilities.
*/
u32 preemption_buffer_2_size;
+ /*
+ * Maximum preemption buffer size that the FW can use: no need for the host
+ * driver to allocate more space than that specified by these fields.
+ * A value of 0 means no declared limit.
+ */
+ u32 preemption_buffer_1_max_size;
+ u32 preemption_buffer_2_max_size;
/* Space reserved for future preemption-related fields. */
- u32 preemption_reserved[6];
+ u32 preemption_reserved[4];
/* FW image read only section start address, 4KB aligned */
u64 ro_section_start_address;
/* FW image read only section size, 4KB aligned */
@@ -134,7 +141,7 @@ enum vpu_trace_destination {
/*
* Processor bit shifts (for loggable HW components).
*/
-#define VPU_TRACE_PROC_BIT_ARM 0
+#define VPU_TRACE_PROC_BIT_RESERVED 0
#define VPU_TRACE_PROC_BIT_LRT 1
#define VPU_TRACE_PROC_BIT_LNN 2
#define VPU_TRACE_PROC_BIT_SHV_0 3
diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h
index 7215c144158c..4b6b2b3d2583 100644
--- a/drivers/accel/ivpu/vpu_jsm_api.h
+++ b/drivers/accel/ivpu/vpu_jsm_api.h
@@ -22,7 +22,7 @@
/*
* Minor version changes when API backward compatibility is preserved.
*/
-#define VPU_JSM_API_VER_MINOR 25
+#define VPU_JSM_API_VER_MINOR 29
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
@@ -53,8 +53,7 @@
* Engine indexes.
*/
#define VPU_ENGINE_COMPUTE 0
-#define VPU_ENGINE_COPY 1
-#define VPU_ENGINE_NB 2
+#define VPU_ENGINE_NB 1
/*
* VPU status values.
@@ -126,11 +125,13 @@ enum {
* When set, indicates that job queue uses native fences (as inline commands
* in job queue). Such queues may also use legacy fences (as commands in batch buffers).
* When cleared, indicates the job queue only uses legacy fences.
- * NOTE: For queues using native fences, VPU expects that all jobs in the queue
- * are immediately followed by an inline command object. This object is expected
- * to be a fence signal command in most cases, but can also be a NOP in case the host
- * does not need per-job fence signalling. Other inline commands objects can be
- * inserted between "job and inline command" pairs.
+ * NOTES:
+ * 1. For queues using native fences, VPU expects that all jobs in the queue
+ * are immediately followed by an inline command object. This object is expected
+ * to be a fence signal command in most cases, but can also be a NOP in case the host
+ * does not need per-job fence signalling. Other inline commands objects can be
+ * inserted between "job and inline command" pairs.
+ * 2. Native fence queues are only supported on VPU 40xx onwards.
*/
VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U),
@@ -275,6 +276,8 @@ struct vpu_inline_cmd {
u64 value;
/* User VA of the log buffer in which to add log entry on completion. */
u64 log_buffer_va;
+ /* NPU private data. */
+ u64 npu_private_data;
} fence;
/* Other commands do not have a payload. */
/* Payload definition for future inline commands can be inserted here. */
@@ -791,12 +794,22 @@ struct vpu_jsm_metric_streamer_update {
/** Metric group mask that identifies metric streamer instance. */
u64 metric_group_mask;
/**
- * Address and size of the buffer where the VPU will write metric data. If
- * the buffer address is 0 or same as the currently used buffer the VPU will
- * continue writing metric data to the current buffer. In this case the
- * buffer size is ignored and the size of the current buffer is unchanged.
- * If the address is non-zero and differs from the current buffer address the
- * VPU will immediately switch data collection to the new buffer.
+ * Address and size of the buffer where the VPU will write metric data.
+ * This member dictates how the update operation should perform:
+ * 1. client needs information about the number of collected samples and the
+ * amount of data written to the current buffer
+ * 2. client wants to switch to a new buffer
+ *
+ * Case 1. is identified by the buffer address being 0 or the same as the
+ * currently used buffer address. In this case the buffer size is ignored and
+ * the size of the current buffer is unchanged. The VPU will return an update
+ * in the vpu_jsm_metric_streamer_done structure. The internal writing position
+ * into the buffer is not changed.
+ *
+ * Case 2. is identified by the address being non-zero and differs from the
+ * current buffer address. The VPU will immediately switch data collection to
+ * the new buffer. Then the VPU will return an update in the
+ * vpu_jsm_metric_streamer_done structure.
*/
u64 buffer_addr;
u64 buffer_size;
@@ -934,6 +947,7 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup {
/*
* Default quantum in 100ns units for scheduling across processes
* within a priority band
+ * Minimum value supported by NPU is 1ms (10000 in 100ns units).
*/
u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS];
/*
@@ -946,8 +960,10 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup {
* in situations when it's starved by the focus band.
*/
u32 normal_band_percentage;
- /* Reserved */
- u32 reserved_0;
+ /*
+ * TDR timeout value in milliseconds. Default value of 0 meaning no timeout.
+ */
+ u32 tdr_timeout;
};
/*
@@ -1024,7 +1040,10 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties {
s32 in_process_priority;
/* Zero padding / Reserved */
u32 reserved_1;
- /* Context quantum relative to other contexts of same priority in the same process */
+ /*
+ * Context quantum relative to other contexts of same priority in the same process
+ * Minimum value supported by NPU is 1ms (10000 in 100ns units).
+ */
u64 context_quantum;
/* Grace period when preempting context of the same priority within the same process */
u64 grace_period_same_priority;
diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c
index 8ab82e78dd94..13a14c6c6168 100644
--- a/drivers/accel/qaic/mhi_controller.c
+++ b/drivers/accel/qaic/mhi_controller.c
@@ -20,6 +20,11 @@ static unsigned int mhi_timeout_ms = 2000; /* 2 sec default */
module_param(mhi_timeout_ms, uint, 0600);
MODULE_PARM_DESC(mhi_timeout_ms, "MHI controller timeout value");
+static const char *fw_image_paths[FAMILY_MAX] = {
+ [FAMILY_AIC100] = "qcom/aic100/sbl.bin",
+ [FAMILY_AIC200] = "qcom/aic200/sbl.bin",
+};
+
static const struct mhi_channel_config aic100_channels[] = {
{
.name = "QAIC_LOOPBACK",
@@ -439,6 +444,297 @@ static const struct mhi_channel_config aic100_channels[] = {
},
};
+static const struct mhi_channel_config aic200_channels[] = {
+ {
+ .name = "QAIC_LOOPBACK",
+ .num = 0,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_LOOPBACK",
+ .num = 1,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_SAHARA",
+ .num = 2,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_SBL,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_SAHARA",
+ .num = 3,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_SBL,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_SSR",
+ .num = 6,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_SSR",
+ .num = 7,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_CONTROL",
+ .num = 10,
+ .num_elements = 128,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_CONTROL",
+ .num = 11,
+ .num_elements = 128,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_LOGGING",
+ .num = 12,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_SBL,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_LOGGING",
+ .num = 13,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_SBL,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_STATUS",
+ .num = 14,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_STATUS",
+ .num = 15,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_TELEMETRY",
+ .num = 16,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_TELEMETRY",
+ .num = 17,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_TIMESYNC_PERIODIC",
+ .num = 22,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_TIMESYNC_PERIODIC",
+ .num = 23,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "IPCR",
+ .num = 24,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "IPCR",
+ .num = 25,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = true,
+ .wake_capable = false,
+ },
+};
+
static struct mhi_event_config aic100_events[] = {
{
.num_elements = 32,
@@ -454,16 +750,44 @@ static struct mhi_event_config aic100_events[] = {
},
};
-static struct mhi_controller_config aic100_config = {
- .max_channels = 128,
- .timeout_ms = 0, /* controlled by mhi_timeout */
- .buf_len = 0,
- .num_channels = ARRAY_SIZE(aic100_channels),
- .ch_cfg = aic100_channels,
- .num_events = ARRAY_SIZE(aic100_events),
- .event_cfg = aic100_events,
- .use_bounce_buf = false,
- .m2_no_db = false,
+static struct mhi_event_config aic200_events[] = {
+ {
+ .num_elements = 32,
+ .irq_moderation_ms = 0,
+ .irq = 0,
+ .channel = U32_MAX,
+ .priority = 1,
+ .mode = MHI_DB_BRST_DISABLE,
+ .data_type = MHI_ER_CTRL,
+ .hardware_event = false,
+ .client_managed = false,
+ .offload_channel = false,
+ },
+};
+
+static struct mhi_controller_config mhi_cntrl_configs[] = {
+ [FAMILY_AIC100] = {
+ .max_channels = 128,
+ .timeout_ms = 0, /* controlled by mhi_timeout */
+ .buf_len = 0,
+ .num_channels = ARRAY_SIZE(aic100_channels),
+ .ch_cfg = aic100_channels,
+ .num_events = ARRAY_SIZE(aic100_events),
+ .event_cfg = aic100_events,
+ .use_bounce_buf = false,
+ .m2_no_db = false,
+ },
+ [FAMILY_AIC200] = {
+ .max_channels = 128,
+ .timeout_ms = 0, /* controlled by mhi_timeout */
+ .buf_len = 0,
+ .num_channels = ARRAY_SIZE(aic200_channels),
+ .ch_cfg = aic200_channels,
+ .num_events = ARRAY_SIZE(aic200_events),
+ .event_cfg = aic200_events,
+ .use_bounce_buf = false,
+ .m2_no_db = false,
+ },
};
static int mhi_read_reg(struct mhi_controller *mhi_cntrl, void __iomem *addr, u32 *out)
@@ -545,8 +869,9 @@ static int mhi_reset_and_async_power_up(struct mhi_controller *mhi_cntrl)
}
struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, void __iomem *mhi_bar,
- int mhi_irq, bool shared_msi)
+ int mhi_irq, bool shared_msi, int family)
{
+ struct mhi_controller_config mhi_config = mhi_cntrl_configs[family];
struct mhi_controller *mhi_cntrl;
int ret;
@@ -581,11 +906,18 @@ struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, voi
if (shared_msi) /* MSI shared with data path, no IRQF_NO_SUSPEND */
mhi_cntrl->irq_flags = IRQF_SHARED;
- mhi_cntrl->fw_image = "qcom/aic100/sbl.bin";
+ mhi_cntrl->fw_image = fw_image_paths[family];
+
+ if (family == FAMILY_AIC200) {
+ mhi_cntrl->name = "AIC200";
+ mhi_cntrl->seg_len = SZ_512K;
+ } else {
+ mhi_cntrl->name = "AIC100";
+ }
/* use latest configured timeout */
- aic100_config.timeout_ms = mhi_timeout_ms;
- ret = mhi_register_controller(mhi_cntrl, &aic100_config);
+ mhi_config.timeout_ms = mhi_timeout_ms;
+ ret = mhi_register_controller(mhi_cntrl, &mhi_config);
if (ret) {
pci_err(pci_dev, "mhi_register_controller failed %d\n", ret);
return ERR_PTR(ret);
diff --git a/drivers/accel/qaic/mhi_controller.h b/drivers/accel/qaic/mhi_controller.h
index 500e7f4af2af..8939f6ae185e 100644
--- a/drivers/accel/qaic/mhi_controller.h
+++ b/drivers/accel/qaic/mhi_controller.h
@@ -8,7 +8,7 @@
#define MHICONTROLLERQAIC_H_
struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, void __iomem *mhi_bar,
- int mhi_irq, bool shared_msi);
+ int mhi_irq, bool shared_msi, int family);
void qaic_mhi_free_controller(struct mhi_controller *mhi_cntrl, bool link_up);
void qaic_mhi_start_reset(struct mhi_controller *mhi_cntrl);
void qaic_mhi_reset_done(struct mhi_controller *mhi_cntrl);
diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
index 02561b6cecc6..0dbb8e32e4b9 100644
--- a/drivers/accel/qaic/qaic.h
+++ b/drivers/accel/qaic/qaic.h
@@ -32,6 +32,12 @@
#define to_accel_kdev(qddev) (to_drm(qddev)->accel->kdev) /* Return Linux device of accel node */
#define to_qaic_device(dev) (to_qaic_drm_device((dev))->qdev)
+enum aic_families {
+ FAMILY_AIC100,
+ FAMILY_AIC200,
+ FAMILY_MAX,
+};
+
enum __packed dev_states {
/* Device is offline or will be very soon */
QAIC_OFFLINE,
@@ -113,10 +119,10 @@ struct qaic_device {
struct pci_dev *pdev;
/* Req. ID of request that will be queued next in MHI control device */
u32 next_seq_num;
- /* Base address of bar 0 */
- void __iomem *bar_0;
- /* Base address of bar 2 */
- void __iomem *bar_2;
+ /* Base address of the MHI bar */
+ void __iomem *bar_mhi;
+ /* Base address of the DBCs bar */
+ void __iomem *bar_dbc;
/* Controller structure for MHI devices */
struct mhi_controller *mhi_cntrl;
/* MHI control channel device */
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index c20eb63750f5..43aba57b48f0 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -172,9 +172,10 @@ static void free_slice(struct kref *kref)
static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_table **sgt_out,
struct sg_table *sgt_in, u64 size, u64 offset)
{
- int total_len, len, nents, offf = 0, offl = 0;
struct scatterlist *sg, *sgn, *sgf, *sgl;
+ unsigned int len, nents, offf, offl;
struct sg_table *sgt;
+ size_t total_len;
int ret, j;
/* find out number of relevant nents needed for this mem */
@@ -182,6 +183,8 @@ static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_tabl
sgf = NULL;
sgl = NULL;
nents = 0;
+ offf = 0;
+ offl = 0;
size = size ? size : PAGE_SIZE;
for_each_sgtable_dma_sg(sgt_in, sg, j) {
@@ -554,6 +557,7 @@ static bool invalid_sem(struct qaic_sem *sem)
static int qaic_validate_req(struct qaic_device *qdev, struct qaic_attach_slice_entry *slice_ent,
u32 count, u64 total_size)
{
+ u64 total;
int i;
for (i = 0; i < count; i++) {
@@ -563,7 +567,8 @@ static int qaic_validate_req(struct qaic_device *qdev, struct qaic_attach_slice_
invalid_sem(&slice_ent[i].sem2) || invalid_sem(&slice_ent[i].sem3))
return -EINVAL;
- if (slice_ent[i].offset + slice_ent[i].size > total_size)
+ if (check_add_overflow(slice_ent[i].offset, slice_ent[i].size, &total) ||
+ total > total_size)
return -EINVAL;
}
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index 4ddf89308ff5..3b415e2c9431 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -34,13 +34,46 @@
MODULE_IMPORT_NS("DMA_BUF");
-#define PCI_DEV_AIC080 0xa080
-#define PCI_DEV_AIC100 0xa100
+#define PCI_DEVICE_ID_QCOM_AIC080 0xa080
+#define PCI_DEVICE_ID_QCOM_AIC100 0xa100
+#define PCI_DEVICE_ID_QCOM_AIC200 0xa110
#define QAIC_NAME "qaic"
#define QAIC_DESC "Qualcomm Cloud AI Accelerators"
#define CNTL_MAJOR 5
#define CNTL_MINOR 0
+struct qaic_device_config {
+ /* Indicates the AIC family the device belongs to */
+ int family;
+ /* A bitmask representing the available BARs */
+ int bar_mask;
+ /* An index value used to identify the MHI controller BAR */
+ unsigned int mhi_bar_idx;
+ /* An index value used to identify the DBCs BAR */
+ unsigned int dbc_bar_idx;
+};
+
+static const struct qaic_device_config aic080_config = {
+ .family = FAMILY_AIC100,
+ .bar_mask = BIT(0) | BIT(2) | BIT(4),
+ .mhi_bar_idx = 0,
+ .dbc_bar_idx = 2,
+};
+
+static const struct qaic_device_config aic100_config = {
+ .family = FAMILY_AIC100,
+ .bar_mask = BIT(0) | BIT(2) | BIT(4),
+ .mhi_bar_idx = 0,
+ .dbc_bar_idx = 2,
+};
+
+static const struct qaic_device_config aic200_config = {
+ .family = FAMILY_AIC200,
+ .bar_mask = BIT(0) | BIT(1) | BIT(2) | BIT(4),
+ .mhi_bar_idx = 1,
+ .dbc_bar_idx = 2,
+};
+
bool datapath_polling;
module_param(datapath_polling, bool, 0400);
MODULE_PARM_DESC(datapath_polling, "Operate the datapath in polling mode");
@@ -208,7 +241,6 @@ static const struct drm_driver qaic_accel_driver = {
.name = QAIC_NAME,
.desc = QAIC_DESC,
- .date = "20190618",
.fops = &qaic_accel_fops,
.open = qaic_open,
@@ -353,7 +385,8 @@ void qaic_dev_reset_clean_local_state(struct qaic_device *qdev)
release_dbc(qdev, i);
}
-static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_device_id *id)
+static struct qaic_device *create_qdev(struct pci_dev *pdev,
+ const struct qaic_device_config *config)
{
struct device *dev = &pdev->dev;
struct qaic_drm_device *qddev;
@@ -366,12 +399,10 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de
return NULL;
qdev->dev_state = QAIC_OFFLINE;
- if (id->device == PCI_DEV_AIC080 || id->device == PCI_DEV_AIC100) {
- qdev->num_dbc = 16;
- qdev->dbc = devm_kcalloc(dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL);
- if (!qdev->dbc)
- return NULL;
- }
+ qdev->num_dbc = 16;
+ qdev->dbc = devm_kcalloc(dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL);
+ if (!qdev->dbc)
+ return NULL;
qddev = devm_drm_dev_alloc(&pdev->dev, &qaic_accel_driver, struct qaic_drm_device, drm);
if (IS_ERR(qddev))
@@ -427,17 +458,18 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de
return qdev;
}
-static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev)
+static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev,
+ const struct qaic_device_config *config)
{
int bars;
int ret;
- bars = pci_select_bars(pdev, IORESOURCE_MEM);
+ bars = pci_select_bars(pdev, IORESOURCE_MEM) & 0x3f;
/* make sure the device has the expected BARs */
- if (bars != (BIT(0) | BIT(2) | BIT(4))) {
- pci_dbg(pdev, "%s: expected BARs 0, 2, and 4 not found in device. Found 0x%x\n",
- __func__, bars);
+ if (bars != config->bar_mask) {
+ pci_dbg(pdev, "%s: expected BARs %#x not found in device. Found %#x\n",
+ __func__, config->bar_mask, bars);
return -EINVAL;
}
@@ -450,13 +482,13 @@ static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev)
return ret;
dma_set_max_seg_size(&pdev->dev, UINT_MAX);
- qdev->bar_0 = devm_ioremap_resource(&pdev->dev, &pdev->resource[0]);
- if (IS_ERR(qdev->bar_0))
- return PTR_ERR(qdev->bar_0);
+ qdev->bar_mhi = devm_ioremap_resource(&pdev->dev, &pdev->resource[config->mhi_bar_idx]);
+ if (IS_ERR(qdev->bar_mhi))
+ return PTR_ERR(qdev->bar_mhi);
- qdev->bar_2 = devm_ioremap_resource(&pdev->dev, &pdev->resource[2]);
- if (IS_ERR(qdev->bar_2))
- return PTR_ERR(qdev->bar_2);
+ qdev->bar_dbc = devm_ioremap_resource(&pdev->dev, &pdev->resource[config->dbc_bar_idx]);
+ if (IS_ERR(qdev->bar_dbc))
+ return PTR_ERR(qdev->bar_dbc);
/* Managed release since we use pcim_enable_device above */
pci_set_master(pdev);
@@ -466,14 +498,15 @@ static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev)
static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev)
{
+ int irq_count = qdev->num_dbc + 1;
int mhi_irq;
int ret;
int i;
/* Managed release since we use pcim_enable_device */
- ret = pci_alloc_irq_vectors(pdev, 32, 32, PCI_IRQ_MSI);
+ ret = pci_alloc_irq_vectors(pdev, irq_count, irq_count, PCI_IRQ_MSI | PCI_IRQ_MSIX);
if (ret == -ENOSPC) {
- ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+ ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX);
if (ret < 0)
return ret;
@@ -486,7 +519,8 @@ static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev)
* interrupted, it shouldn't race with itself.
*/
qdev->single_msi = true;
- pci_info(pdev, "Allocating 32 MSIs failed, operating in 1 MSI mode. Performance may be impacted.\n");
+ pci_info(pdev, "Allocating %d MSIs failed, operating in 1 MSI mode. Performance may be impacted.\n",
+ irq_count);
} else if (ret < 0) {
return ret;
}
@@ -516,21 +550,22 @@ static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev)
static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
+ struct qaic_device_config *config = (struct qaic_device_config *)id->driver_data;
struct qaic_device *qdev;
int mhi_irq;
int ret;
int i;
- qdev = create_qdev(pdev, id);
+ qdev = create_qdev(pdev, config);
if (!qdev)
return -ENOMEM;
- ret = init_pci(qdev, pdev);
+ ret = init_pci(qdev, pdev, config);
if (ret)
return ret;
for (i = 0; i < qdev->num_dbc; ++i)
- qdev->dbc[i].dbc_base = qdev->bar_2 + QAIC_DBC_OFF(i);
+ qdev->dbc[i].dbc_base = qdev->bar_dbc + QAIC_DBC_OFF(i);
mhi_irq = init_msi(qdev, pdev);
if (mhi_irq < 0)
@@ -540,8 +575,8 @@ static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (ret)
return ret;
- qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq,
- qdev->single_msi);
+ qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_mhi, mhi_irq,
+ qdev->single_msi, config->family);
if (IS_ERR(qdev->mhi_cntrl)) {
ret = PTR_ERR(qdev->mhi_cntrl);
qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
@@ -608,8 +643,9 @@ static struct mhi_driver qaic_mhi_driver = {
};
static const struct pci_device_id qaic_ids[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_QCOM, PCI_DEV_AIC080), },
- { PCI_DEVICE(PCI_VENDOR_ID_QCOM, PCI_DEV_AIC100), },
+ { PCI_DEVICE_DATA(QCOM, AIC080, (kernel_ulong_t)&aic080_config), },
+ { PCI_DEVICE_DATA(QCOM, AIC100, (kernel_ulong_t)&aic100_config), },
+ { PCI_DEVICE_DATA(QCOM, AIC200, (kernel_ulong_t)&aic200_config), },
{ }
};
MODULE_DEVICE_TABLE(pci, qaic_ids);
diff --git a/drivers/accel/qaic/qaic_timesync.c b/drivers/accel/qaic/qaic_timesync.c
index 301f4462d51b..972833fabcfc 100644
--- a/drivers/accel/qaic/qaic_timesync.c
+++ b/drivers/accel/qaic/qaic_timesync.c
@@ -201,7 +201,7 @@ static int qaic_timesync_probe(struct mhi_device *mhi_dev, const struct mhi_devi
goto free_sync_msg;
/* Qtimer register pointer */
- mqtsdev->qtimer_addr = qdev->bar_0 + QTIMER_REG_OFFSET;
+ mqtsdev->qtimer_addr = qdev->bar_mhi + QTIMER_REG_OFFSET;
timer_setup(timer, qaic_timesync_timer, 0);
timer->expires = jiffies + msecs_to_jiffies(timesync_delay_ms);
add_timer(timer);
@@ -221,7 +221,7 @@ static void qaic_timesync_remove(struct mhi_device *mhi_dev)
{
struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev);
- del_timer_sync(&mqtsdev->timer);
+ timer_delete_sync(&mqtsdev->timer);
mhi_unprepare_from_transfer(mqtsdev->mhi_dev);
kfree(mqtsdev->sync_msg);
kfree(mqtsdev);
diff --git a/drivers/accel/qaic/sahara.c b/drivers/accel/qaic/sahara.c
index 6d772143d612..3ebcc1f7ff58 100644
--- a/drivers/accel/qaic/sahara.c
+++ b/drivers/accel/qaic/sahara.c
@@ -160,7 +160,7 @@ struct sahara_context {
struct work_struct fw_work;
struct work_struct dump_work;
struct mhi_device *mhi_dev;
- const char **image_table;
+ const char * const *image_table;
u32 table_size;
u32 active_image_id;
const struct firmware *firmware;
@@ -177,7 +177,7 @@ struct sahara_context {
bool is_mem_dump_mode;
};
-static const char *aic100_image_table[] = {
+static const char * const aic100_image_table[] = {
[1] = "qcom/aic100/fw1.bin",
[2] = "qcom/aic100/fw2.bin",
[4] = "qcom/aic100/fw4.bin",
@@ -188,6 +188,34 @@ static const char *aic100_image_table[] = {
[10] = "qcom/aic100/fw10.bin",
};
+static const char * const aic200_image_table[] = {
+ [5] = "qcom/aic200/uefi.elf",
+ [12] = "qcom/aic200/aic200-nsp.bin",
+ [23] = "qcom/aic200/aop.mbn",
+ [32] = "qcom/aic200/tz.mbn",
+ [33] = "qcom/aic200/hypvm.mbn",
+ [39] = "qcom/aic200/aic200_abl.elf",
+ [40] = "qcom/aic200/apdp.mbn",
+ [41] = "qcom/aic200/devcfg.mbn",
+ [42] = "qcom/aic200/sec.elf",
+ [43] = "qcom/aic200/aic200-hlos.elf",
+ [49] = "qcom/aic200/shrm.elf",
+ [50] = "qcom/aic200/cpucp.elf",
+ [51] = "qcom/aic200/aop_devcfg.mbn",
+ [57] = "qcom/aic200/cpucp_dtbs.elf",
+ [62] = "qcom/aic200/uefi_dtbs.elf",
+ [63] = "qcom/aic200/xbl_ac_config.mbn",
+ [64] = "qcom/aic200/tz_ac_config.mbn",
+ [65] = "qcom/aic200/hyp_ac_config.mbn",
+ [66] = "qcom/aic200/pdp.elf",
+ [67] = "qcom/aic200/pdp_cdb.elf",
+ [68] = "qcom/aic200/sdi.mbn",
+ [69] = "qcom/aic200/dcd.mbn",
+ [73] = "qcom/aic200/gearvm.mbn",
+ [74] = "qcom/aic200/sti.bin",
+ [75] = "qcom/aic200/pvs.bin",
+};
+
static int sahara_find_image(struct sahara_context *context, u32 image_id)
{
int ret;
@@ -748,8 +776,15 @@ static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_
context->mhi_dev = mhi_dev;
INIT_WORK(&context->fw_work, sahara_processing);
INIT_WORK(&context->dump_work, sahara_dump_processing);
- context->image_table = aic100_image_table;
- context->table_size = ARRAY_SIZE(aic100_image_table);
+
+ if (!strcmp(mhi_dev->mhi_cntrl->name, "AIC200")) {
+ context->image_table = aic200_image_table;
+ context->table_size = ARRAY_SIZE(aic200_image_table);
+ } else {
+ context->image_table = aic100_image_table;
+ context->table_size = ARRAY_SIZE(aic100_image_table);
+ }
+
context->active_image_id = SAHARA_IMAGE_ID_NONE;
dev_set_drvdata(&mhi_dev->dev, context);
@@ -772,8 +807,7 @@ static void sahara_mhi_remove(struct mhi_device *mhi_dev)
cancel_work_sync(&context->fw_work);
cancel_work_sync(&context->dump_work);
- if (context->mem_dump)
- vfree(context->mem_dump);
+ vfree(context->mem_dump);
sahara_release_image(context);
mhi_unprepare_from_transfer(mhi_dev);
}