summaryrefslogtreecommitdiff
path: root/drivers/accel
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel')
-rw-r--r--drivers/accel/Kconfig1
-rw-r--r--drivers/accel/Makefile1
-rw-r--r--drivers/accel/amdxdna/Kconfig18
-rw-r--r--drivers/accel/amdxdna/Makefile23
-rw-r--r--drivers/accel/amdxdna/TODO3
-rw-r--r--drivers/accel/amdxdna/aie2_ctx.c910
-rw-r--r--drivers/accel/amdxdna/aie2_error.c360
-rw-r--r--drivers/accel/amdxdna/aie2_message.c776
-rw-r--r--drivers/accel/amdxdna/aie2_msg_priv.h370
-rw-r--r--drivers/accel/amdxdna/aie2_pci.c928
-rw-r--r--drivers/accel/amdxdna/aie2_pci.h297
-rw-r--r--drivers/accel/amdxdna/aie2_pm.c108
-rw-r--r--drivers/accel/amdxdna/aie2_psp.c146
-rw-r--r--drivers/accel/amdxdna/aie2_smu.c136
-rw-r--r--drivers/accel/amdxdna/aie2_solver.c380
-rw-r--r--drivers/accel/amdxdna/aie2_solver.h155
-rw-r--r--drivers/accel/amdxdna/amdxdna_ctx.c550
-rw-r--r--drivers/accel/amdxdna/amdxdna_ctx.h162
-rw-r--r--drivers/accel/amdxdna/amdxdna_gem.c622
-rw-r--r--drivers/accel/amdxdna/amdxdna_gem.h65
-rw-r--r--drivers/accel/amdxdna/amdxdna_mailbox.c562
-rw-r--r--drivers/accel/amdxdna/amdxdna_mailbox.h124
-rw-r--r--drivers/accel/amdxdna/amdxdna_mailbox_helper.c61
-rw-r--r--drivers/accel/amdxdna/amdxdna_mailbox_helper.h42
-rw-r--r--drivers/accel/amdxdna/amdxdna_pci_drv.c434
-rw-r--r--drivers/accel/amdxdna/amdxdna_pci_drv.h147
-rw-r--r--drivers/accel/amdxdna/amdxdna_sysfs.c67
-rw-r--r--drivers/accel/amdxdna/npu1_regs.c114
-rw-r--r--drivers/accel/amdxdna/npu2_regs.c113
-rw-r--r--drivers/accel/amdxdna/npu4_regs.c134
-rw-r--r--drivers/accel/amdxdna/npu5_regs.c113
-rw-r--r--drivers/accel/amdxdna/npu6_regs.c114
-rw-r--r--drivers/accel/habanalabs/common/context.c3
-rw-r--r--drivers/accel/habanalabs/common/device.c2
-rw-r--r--drivers/accel/habanalabs/common/habanalabs_drv.c4
-rw-r--r--drivers/accel/habanalabs/common/habanalabs_ioctl.c11
-rw-r--r--drivers/accel/ivpu/ivpu_debugfs.c4
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c12
-rw-r--r--drivers/accel/ivpu/ivpu_fw.c3
-rw-r--r--drivers/accel/ivpu/ivpu_hw.h11
-rw-r--r--drivers/accel/ivpu/ivpu_hw_btrs.c126
-rw-r--r--drivers/accel/ivpu/ivpu_hw_btrs.h6
-rw-r--r--drivers/accel/ivpu/ivpu_ipc.c3
-rw-r--r--drivers/accel/ivpu/ivpu_ms.c24
-rw-r--r--drivers/accel/qaic/qaic_data.c9
-rw-r--r--drivers/accel/qaic/qaic_drv.c1
-rw-r--r--drivers/accel/qaic/sahara.c3
47 files changed, 8142 insertions, 116 deletions
diff --git a/drivers/accel/Kconfig b/drivers/accel/Kconfig
index 64065fb8922b..5b9490367a39 100644
--- a/drivers/accel/Kconfig
+++ b/drivers/accel/Kconfig
@@ -24,6 +24,7 @@ menuconfig DRM_ACCEL
different device files, called accel/accel* (in /dev, sysfs
and debugfs).
+source "drivers/accel/amdxdna/Kconfig"
source "drivers/accel/habanalabs/Kconfig"
source "drivers/accel/ivpu/Kconfig"
source "drivers/accel/qaic/Kconfig"
diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile
index ab3df932937f..a301fb6089d4 100644
--- a/drivers/accel/Makefile
+++ b/drivers/accel/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_DRM_ACCEL_AMDXDNA) += amdxdna/
obj-$(CONFIG_DRM_ACCEL_HABANALABS) += habanalabs/
obj-$(CONFIG_DRM_ACCEL_IVPU) += ivpu/
obj-$(CONFIG_DRM_ACCEL_QAIC) += qaic/
diff --git a/drivers/accel/amdxdna/Kconfig b/drivers/accel/amdxdna/Kconfig
new file mode 100644
index 000000000000..f39d7a87296c
--- /dev/null
+++ b/drivers/accel/amdxdna/Kconfig
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config DRM_ACCEL_AMDXDNA
+ tristate "AMD AI Engine"
+ depends on AMD_IOMMU
+ depends on DRM_ACCEL
+ depends on PCI && HAS_IOMEM
+ depends on X86_64
+ select DRM_SCHED
+ select DRM_GEM_SHMEM_HELPER
+ select FW_LOADER
+ select HMM_MIRROR
+ help
+ Choose this option to enable support for NPU integrated into AMD
+ client CPUs like AMD Ryzen AI 300 Series. AMD NPU can be used to
+ accelerate machine learning applications.
+
+ If "M" is selected, the driver module will be amdxdna.
diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
new file mode 100644
index 000000000000..0e9adf6890a0
--- /dev/null
+++ b/drivers/accel/amdxdna/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+amdxdna-y := \
+ aie2_ctx.o \
+ aie2_error.o \
+ aie2_message.o \
+ aie2_pci.o \
+ aie2_pm.o \
+ aie2_psp.o \
+ aie2_smu.o \
+ aie2_solver.o \
+ amdxdna_ctx.o \
+ amdxdna_gem.o \
+ amdxdna_mailbox.o \
+ amdxdna_mailbox_helper.o \
+ amdxdna_pci_drv.o \
+ amdxdna_sysfs.o \
+ npu1_regs.o \
+ npu2_regs.o \
+ npu4_regs.o \
+ npu5_regs.o \
+ npu6_regs.o
+obj-$(CONFIG_DRM_ACCEL_AMDXDNA) = amdxdna.o
diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO
new file mode 100644
index 000000000000..5119bccd1917
--- /dev/null
+++ b/drivers/accel/amdxdna/TODO
@@ -0,0 +1,3 @@
+- Add import and export BO support
+- Add debugfs support
+- Add debug BO support
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
new file mode 100644
index 000000000000..5f43db02b240
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -0,0 +1,910 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
+#include <drm/drm_syncobj.h>
+#include <linux/hmm.h>
+#include <linux/types.h>
+#include <linux/xarray.h>
+#include <trace/events/amdxdna.h>
+
+#include "aie2_msg_priv.h"
+#include "aie2_pci.h"
+#include "aie2_solver.h"
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+static bool force_cmdlist;
+module_param(force_cmdlist, bool, 0600);
+MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)");
+
+#define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */
+
+static void aie2_job_release(struct kref *ref)
+{
+ struct amdxdna_sched_job *job;
+
+ job = container_of(ref, struct amdxdna_sched_job, refcnt);
+ amdxdna_sched_job_cleanup(job);
+ if (job->out_fence)
+ dma_fence_put(job->out_fence);
+ kfree(job);
+}
+
+static void aie2_job_put(struct amdxdna_sched_job *job)
+{
+ kref_put(&job->refcnt, aie2_job_release);
+}
+
+/* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
+static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
+ struct drm_sched_job *bad_job)
+{
+ drm_sched_stop(&hwctx->priv->sched, bad_job);
+ aie2_destroy_context(xdna->dev_handle, hwctx);
+}
+
+static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_gem_obj *heap = hwctx->priv->heap;
+ int ret;
+
+ ret = aie2_create_context(xdna->dev_handle, hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret);
+ goto out;
+ }
+
+ ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
+ heap->mem.userptr, heap->mem.size);
+ if (ret) {
+ XDNA_ERR(xdna, "Map host buf failed, ret %d", ret);
+ goto out;
+ }
+
+ if (hwctx->status != HWCTX_STAT_READY) {
+ XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status);
+ goto out;
+ }
+
+ ret = aie2_config_cu(hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
+ goto out;
+ }
+
+out:
+ drm_sched_start(&hwctx->priv->sched, 0);
+ XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
+ return ret;
+}
+
+void aie2_restart_ctx(struct amdxdna_client *client)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_hwctx *hwctx;
+ unsigned long hwctx_id;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ mutex_lock(&client->hwctx_lock);
+ amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
+ if (hwctx->status != HWCTX_STAT_STOP)
+ continue;
+
+ hwctx->status = hwctx->old_status;
+ XDNA_DBG(xdna, "Resetting %s", hwctx->name);
+ aie2_hwctx_restart(xdna, hwctx);
+ }
+ mutex_unlock(&client->hwctx_lock);
+}
+
+static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq)
+{
+ struct dma_fence *fence, *out_fence = NULL;
+ int ret;
+
+ fence = drm_syncobj_fence_get(hwctx->priv->syncobj);
+ if (!fence)
+ return NULL;
+
+ ret = dma_fence_chain_find_seqno(&fence, seq);
+ if (ret)
+ goto out;
+
+ out_fence = dma_fence_get(dma_fence_chain_contained(fence));
+
+out:
+ dma_fence_put(fence);
+ return out_fence;
+}
+
+static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx)
+{
+ struct dma_fence *fence;
+
+ fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1);
+ if (!fence)
+ return;
+
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+}
+
+void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+
+ /*
+ * Command timeout is unlikely. But if it happens, it doesn't
+ * break the system. aie2_hwctx_stop() will destroy mailbox
+ * and abort all commands.
+ */
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ aie2_hwctx_wait_for_idle(hwctx);
+ aie2_hwctx_stop(xdna, hwctx, NULL);
+ hwctx->old_status = hwctx->status;
+ hwctx->status = HWCTX_STAT_STOP;
+}
+
+void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+
+ /*
+ * The resume path cannot guarantee that mailbox channel can be
+ * regenerated. If this happen, when submit message to this
+ * mailbox channel, error will return.
+ */
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ hwctx->status = hwctx->old_status;
+ aie2_hwctx_restart(xdna, hwctx);
+}
+
+static void
+aie2_sched_notify(struct amdxdna_sched_job *job)
+{
+ struct dma_fence *fence = job->fence;
+
+ trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
+ job->hwctx->priv->completed++;
+ dma_fence_signal(fence);
+
+ up(&job->hwctx->priv->job_sem);
+ job->job_done = true;
+ dma_fence_put(fence);
+ mmput_async(job->mm);
+ aie2_job_put(job);
+}
+
+static int
+aie2_sched_resp_handler(void *handle, const u32 *data, size_t size)
+{
+ struct amdxdna_sched_job *job = handle;
+ struct amdxdna_gem_obj *cmd_abo;
+ u32 ret = 0;
+ u32 status;
+
+ cmd_abo = job->cmd_bo;
+
+ if (unlikely(!data))
+ goto out;
+
+ if (unlikely(size != sizeof(u32))) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ status = *data;
+ XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
+ if (status == AIE2_STATUS_SUCCESS)
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
+ else
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
+
+out:
+ aie2_sched_notify(job);
+ return ret;
+}
+
+static int
+aie2_sched_nocmd_resp_handler(void *handle, const u32 *data, size_t size)
+{
+ struct amdxdna_sched_job *job = handle;
+ u32 ret = 0;
+ u32 status;
+
+ if (unlikely(!data))
+ goto out;
+
+ if (unlikely(size != sizeof(u32))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ status = *data;
+ XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
+
+out:
+ aie2_sched_notify(job);
+ return ret;
+}
+
+static int
+aie2_sched_cmdlist_resp_handler(void *handle, const u32 *data, size_t size)
+{
+ struct amdxdna_sched_job *job = handle;
+ struct amdxdna_gem_obj *cmd_abo;
+ struct cmd_chain_resp *resp;
+ struct amdxdna_dev *xdna;
+ u32 fail_cmd_status;
+ u32 fail_cmd_idx;
+ u32 ret = 0;
+
+ cmd_abo = job->cmd_bo;
+ if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ resp = (struct cmd_chain_resp *)data;
+ xdna = job->hwctx->client->xdna;
+ XDNA_DBG(xdna, "Status 0x%x", resp->status);
+ if (resp->status == AIE2_STATUS_SUCCESS) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
+ goto out;
+ }
+
+ /* Slow path to handle error, read from ringbuf on BAR */
+ fail_cmd_idx = resp->fail_cmd_idx;
+ fail_cmd_status = resp->fail_cmd_status;
+ XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x",
+ fail_cmd_idx, fail_cmd_status);
+
+ if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+ ret = -EINVAL;
+ goto out;
+ }
+ amdxdna_cmd_set_state(cmd_abo, fail_cmd_status);
+
+ if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) {
+ struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL);
+
+ cc->error_index = fail_cmd_idx;
+ if (cc->error_index >= cc->command_count)
+ cc->error_index = 0;
+ }
+out:
+ aie2_sched_notify(job);
+ return ret;
+}
+
+static struct dma_fence *
+aie2_sched_job_run(struct drm_sched_job *sched_job)
+{
+ struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
+ struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ struct amdxdna_hwctx *hwctx = job->hwctx;
+ struct dma_fence *fence;
+ int ret;
+
+ if (!mmget_not_zero(job->mm))
+ return ERR_PTR(-ESRCH);
+
+ kref_get(&job->refcnt);
+ fence = dma_fence_get(job->fence);
+
+ if (unlikely(!cmd_abo)) {
+ ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler);
+ goto out;
+ }
+
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW);
+
+ if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN)
+ ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
+ else if (force_cmdlist)
+ ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
+ else
+ ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler);
+
+out:
+ if (ret) {
+ dma_fence_put(job->fence);
+ aie2_job_put(job);
+ mmput(job->mm);
+ fence = ERR_PTR(ret);
+ }
+ trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
+
+ return fence;
+}
+
+static void aie2_sched_job_free(struct drm_sched_job *sched_job)
+{
+ struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
+ struct amdxdna_hwctx *hwctx = job->hwctx;
+
+ trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
+ if (!job->job_done)
+ up(&hwctx->priv->job_sem);
+
+ drm_sched_job_cleanup(sched_job);
+ aie2_job_put(job);
+}
+
+static enum drm_gpu_sched_stat
+aie2_sched_job_timedout(struct drm_sched_job *sched_job)
+{
+ struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
+ struct amdxdna_hwctx *hwctx = job->hwctx;
+ struct amdxdna_dev *xdna;
+
+ xdna = hwctx->client->xdna;
+ trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
+ mutex_lock(&xdna->dev_lock);
+ aie2_hwctx_stop(xdna, hwctx, sched_job);
+
+ aie2_hwctx_restart(xdna, hwctx);
+ mutex_unlock(&xdna->dev_lock);
+
+ return DRM_GPU_SCHED_STAT_NOMINAL;
+}
+
+const struct drm_sched_backend_ops sched_ops = {
+ .run_job = aie2_sched_job_run,
+ .free_job = aie2_sched_job_free,
+ .timedout_job = aie2_sched_job_timedout,
+};
+
+static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+ int start, end, first, last;
+ u32 width = 1, entries = 0;
+ int i;
+
+ if (!hwctx->num_tiles) {
+ XDNA_ERR(xdna, "Number of tiles is zero");
+ return -EINVAL;
+ }
+
+ ndev = xdna->dev_handle;
+ if (unlikely(!ndev->metadata.core.row_count)) {
+ XDNA_WARN(xdna, "Core tile row count is zero");
+ return -EINVAL;
+ }
+
+ hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count;
+ if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
+ XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
+ return -EINVAL;
+ }
+
+ if (ndev->priv->col_align == COL_ALIGN_NATURE)
+ width = hwctx->num_col;
+
+ /*
+ * In range [start, end], find out columns that is multiple of width.
+ * 'first' is the first column,
+ * 'last' is the last column,
+ * 'entries' is the total number of columns.
+ */
+ start = xdna->dev_info->first_col;
+ end = ndev->total_col - hwctx->num_col;
+ if (start > 0 && end == 0) {
+ XDNA_DBG(xdna, "Force start from col 0");
+ start = 0;
+ }
+ first = start + (width - start % width) % width;
+ last = end - end % width;
+ if (last >= first)
+ entries = (last - first) / width + 1;
+ XDNA_DBG(xdna, "start %d end %d first %d last %d",
+ start, end, first, last);
+
+ if (unlikely(!entries)) {
+ XDNA_ERR(xdna, "Start %d end %d width %d",
+ start, end, width);
+ return -EINVAL;
+ }
+
+ hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL);
+ if (!hwctx->col_list)
+ return -ENOMEM;
+
+ hwctx->col_list_len = entries;
+ hwctx->col_list[0] = first;
+ for (i = 1; i < entries; i++)
+ hwctx->col_list[i] = hwctx->col_list[i - 1] + width;
+
+ print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list,
+ entries * sizeof(*hwctx->col_list), false);
+ return 0;
+}
+
+static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct alloc_requests *xrs_req;
+ int ret;
+
+ xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL);
+ if (!xrs_req)
+ return -ENOMEM;
+
+ xrs_req->cdo.start_cols = hwctx->col_list;
+ xrs_req->cdo.cols_len = hwctx->col_list_len;
+ xrs_req->cdo.ncols = hwctx->num_col;
+ xrs_req->cdo.qos_cap.opc = hwctx->max_opc;
+
+ xrs_req->rqos.gops = hwctx->qos.gops;
+ xrs_req->rqos.fps = hwctx->qos.fps;
+ xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth;
+ xrs_req->rqos.latency = hwctx->qos.latency;
+ xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time;
+ xrs_req->rqos.priority = hwctx->qos.priority;
+
+ xrs_req->rid = (uintptr_t)hwctx;
+
+ ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx);
+ if (ret)
+ XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret);
+
+ kfree(xrs_req);
+ return ret;
+}
+
+static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ int ret;
+
+ ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
+ if (ret)
+ XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
+}
+
+static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct drm_file *filp = hwctx->client->filp;
+ struct drm_syncobj *syncobj;
+ u32 hdl;
+ int ret;
+
+ hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;
+
+ ret = drm_syncobj_create(&syncobj, 0, NULL);
+ if (ret) {
+ XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret);
+ return ret;
+ }
+ ret = drm_syncobj_get_handle(filp, syncobj, &hdl);
+ if (ret) {
+ drm_syncobj_put(syncobj);
+ XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret);
+ return ret;
+ }
+ hwctx->priv->syncobj = syncobj;
+ hwctx->syncobj_hdl = hdl;
+
+ return 0;
+}
+
+static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx)
+{
+ /*
+ * The syncobj_hdl is owned by user space and will be cleaned up
+ * separately.
+ */
+ drm_syncobj_put(hwctx->priv->syncobj);
+}
+
+int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_client *client = hwctx->client;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct drm_gpu_scheduler *sched;
+ struct amdxdna_hwctx_priv *priv;
+ struct amdxdna_gem_obj *heap;
+ struct amdxdna_dev_hdl *ndev;
+ int i, ret;
+
+ priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+ hwctx->priv = priv;
+
+ mutex_lock(&client->mm_lock);
+ heap = client->dev_heap;
+ if (!heap) {
+ XDNA_ERR(xdna, "The client dev heap object not exist");
+ mutex_unlock(&client->mm_lock);
+ ret = -ENOENT;
+ goto free_priv;
+ }
+ drm_gem_object_get(to_gobj(heap));
+ mutex_unlock(&client->mm_lock);
+ priv->heap = heap;
+ sema_init(&priv->job_sem, HWCTX_MAX_CMDS);
+
+ ret = amdxdna_gem_pin(heap);
+ if (ret) {
+ XDNA_ERR(xdna, "Dev heap pin failed, ret %d", ret);
+ goto put_heap;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
+ struct amdxdna_gem_obj *abo;
+ struct amdxdna_drm_create_bo args = {
+ .flags = 0,
+ .type = AMDXDNA_BO_DEV,
+ .vaddr = 0,
+ .size = MAX_CHAIN_CMDBUF_SIZE,
+ };
+
+ abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp, true);
+ if (IS_ERR(abo)) {
+ ret = PTR_ERR(abo);
+ goto free_cmd_bufs;
+ }
+
+ XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx",
+ i, abo->mem.dev_addr, abo->mem.size);
+ priv->cmd_buf[i] = abo;
+ }
+
+ sched = &priv->sched;
+ mutex_init(&priv->io_lock);
+
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&priv->io_lock);
+ fs_reclaim_release(GFP_KERNEL);
+
+ ret = drm_sched_init(sched, &sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT,
+ HWCTX_MAX_CMDS, 0, msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
+ NULL, NULL, hwctx->name, xdna->ddev.dev);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
+ goto free_cmd_bufs;
+ }
+
+ ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret);
+ goto free_sched;
+ }
+
+ ret = aie2_hwctx_col_list(hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Create col list failed, ret %d", ret);
+ goto free_entity;
+ }
+
+ ret = aie2_alloc_resource(hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret);
+ goto free_col_list;
+ }
+
+ ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
+ heap->mem.userptr, heap->mem.size);
+ if (ret) {
+ XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret);
+ goto release_resource;
+ }
+
+ ret = aie2_ctx_syncobj_create(hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
+ goto release_resource;
+ }
+
+ hwctx->status = HWCTX_STAT_INIT;
+ ndev = xdna->dev_handle;
+ ndev->hwctx_num++;
+
+ XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
+
+ return 0;
+
+release_resource:
+ aie2_release_resource(hwctx);
+free_col_list:
+ kfree(hwctx->col_list);
+free_entity:
+ drm_sched_entity_destroy(&priv->entity);
+free_sched:
+ drm_sched_fini(&priv->sched);
+free_cmd_bufs:
+ for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
+ if (!priv->cmd_buf[i])
+ continue;
+ drm_gem_object_put(to_gobj(priv->cmd_buf[i]));
+ }
+ amdxdna_gem_unpin(heap);
+put_heap:
+ drm_gem_object_put(to_gobj(heap));
+free_priv:
+ kfree(priv);
+ return ret;
+}
+
+void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev_hdl *ndev;
+ struct amdxdna_dev *xdna;
+ int idx;
+
+ xdna = hwctx->client->xdna;
+ ndev = xdna->dev_handle;
+ ndev->hwctx_num--;
+ drm_sched_wqueue_stop(&hwctx->priv->sched);
+
+ /* Now, scheduler will not send command to device. */
+ aie2_release_resource(hwctx);
+
+ /*
+ * All submitted commands are aborted.
+ * Restart scheduler queues to cleanup jobs. The amdxdna_sched_job_run()
+ * will return NODEV if it is called.
+ */
+ drm_sched_wqueue_start(&hwctx->priv->sched);
+
+ aie2_hwctx_wait_for_idle(hwctx);
+ drm_sched_entity_destroy(&hwctx->priv->entity);
+ drm_sched_fini(&hwctx->priv->sched);
+ aie2_ctx_syncobj_destroy(hwctx);
+
+ XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq);
+
+ for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++)
+ drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx]));
+ amdxdna_gem_unpin(hwctx->priv->heap);
+ drm_gem_object_put(to_gobj(hwctx->priv->heap));
+
+ mutex_destroy(&hwctx->priv->io_lock);
+ kfree(hwctx->col_list);
+ kfree(hwctx->priv);
+ kfree(hwctx->cus);
+}
+
+static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size)
+{
+ struct amdxdna_hwctx_param_config_cu *config = buf;
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ u32 total_size;
+ int ret;
+
+ XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name);
+ if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad)))
+ return -EINVAL;
+
+ if (hwctx->status != HWCTX_STAT_INIT) {
+ XDNA_ERR(xdna, "Not support re-config CU");
+ return -EINVAL;
+ }
+
+ if (!config->num_cus) {
+ XDNA_ERR(xdna, "Number of CU is zero");
+ return -EINVAL;
+ }
+
+ total_size = struct_size(config, cu_configs, config->num_cus);
+ if (total_size > size) {
+ XDNA_ERR(xdna, "CU config larger than size");
+ return -EINVAL;
+ }
+
+ hwctx->cus = kmemdup(config, total_size, GFP_KERNEL);
+ if (!hwctx->cus)
+ return -ENOMEM;
+
+ ret = aie2_config_cu(hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
+ goto free_cus;
+ }
+
+ wmb(); /* To avoid locking in command submit when check status */
+ hwctx->status = HWCTX_STAT_READY;
+
+ return 0;
+
+free_cus:
+ kfree(hwctx->cus);
+ hwctx->cus = NULL;
+ return ret;
+}
+
+int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ switch (type) {
+ case DRM_AMDXDNA_HWCTX_CONFIG_CU:
+ return aie2_hwctx_cu_config(hwctx, buf, size);
+ case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
+ case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
+ return -EOPNOTSUPP;
+ default:
+ XDNA_DBG(xdna, "Not supported type %d", type);
+ return -EOPNOTSUPP;
+ }
+}
+
+static int aie2_populate_range(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ struct mm_struct *mm = abo->mem.notifier.mm;
+ struct hmm_range range = { 0 };
+ unsigned long timeout;
+ int ret;
+
+ XDNA_INFO_ONCE(xdna, "populate memory range %llx size %lx",
+ abo->mem.userptr, abo->mem.size);
+ range.notifier = &abo->mem.notifier;
+ range.start = abo->mem.userptr;
+ range.end = abo->mem.userptr + abo->mem.size;
+ range.hmm_pfns = abo->mem.pfns;
+ range.default_flags = HMM_PFN_REQ_FAULT;
+
+ if (!mmget_not_zero(mm))
+ return -EFAULT;
+
+ timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+again:
+ range.notifier_seq = mmu_interval_read_begin(&abo->mem.notifier);
+ mmap_read_lock(mm);
+ ret = hmm_range_fault(&range);
+ mmap_read_unlock(mm);
+ if (ret) {
+ if (time_after(jiffies, timeout)) {
+ ret = -ETIME;
+ goto put_mm;
+ }
+
+ if (ret == -EBUSY)
+ goto again;
+
+ goto put_mm;
+ }
+
+ down_read(&xdna->notifier_lock);
+ if (mmu_interval_read_retry(&abo->mem.notifier, range.notifier_seq)) {
+ up_read(&xdna->notifier_lock);
+ goto again;
+ }
+ abo->mem.map_invalid = false;
+ up_read(&xdna->notifier_lock);
+
+put_mm:
+ mmput(mm);
+ return ret;
+}
+
+int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct ww_acquire_ctx acquire_ctx;
+ struct dma_fence_chain *chain;
+ struct amdxdna_gem_obj *abo;
+ unsigned long timeout = 0;
+ int ret, i;
+
+ ret = down_interruptible(&hwctx->priv->job_sem);
+ if (ret) {
+ XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret);
+ return ret;
+ }
+
+ chain = dma_fence_chain_alloc();
+ if (!chain) {
+ XDNA_ERR(xdna, "Alloc fence chain failed");
+ ret = -ENOMEM;
+ goto up_sem;
+ }
+
+ ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
+ goto free_chain;
+ }
+
+retry:
+ ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
+ if (ret) {
+ XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
+ goto cleanup_job;
+ }
+
+ for (i = 0; i < job->bo_cnt; i++) {
+ ret = dma_resv_reserve_fences(job->bos[i]->resv, 1);
+ if (ret) {
+ XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
+ drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
+ goto cleanup_job;
+ }
+ }
+
+ down_read(&xdna->notifier_lock);
+ for (i = 0; i < job->bo_cnt; i++) {
+ abo = to_xdna_obj(job->bos[i]);
+ if (abo->mem.map_invalid) {
+ up_read(&xdna->notifier_lock);
+ drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
+ if (!timeout) {
+ timeout = jiffies +
+ msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+ } else if (time_after(jiffies, timeout)) {
+ ret = -ETIME;
+ goto cleanup_job;
+ }
+
+ ret = aie2_populate_range(abo);
+ if (ret)
+ goto cleanup_job;
+ goto retry;
+ }
+ }
+
+ mutex_lock(&hwctx->priv->io_lock);
+ drm_sched_job_arm(&job->base);
+ job->out_fence = dma_fence_get(&job->base.s_fence->finished);
+ for (i = 0; i < job->bo_cnt; i++)
+ dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE);
+ job->seq = hwctx->priv->seq++;
+ kref_get(&job->refcnt);
+ drm_sched_entity_push_job(&job->base);
+
+ *seq = job->seq;
+ drm_syncobj_add_point(hwctx->priv->syncobj, chain, job->out_fence, *seq);
+ mutex_unlock(&hwctx->priv->io_lock);
+
+ up_read(&xdna->notifier_lock);
+ drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
+
+ aie2_job_put(job);
+
+ return 0;
+
+cleanup_job:
+ drm_sched_job_cleanup(&job->base);
+free_chain:
+ dma_fence_chain_free(chain);
+up_sem:
+ up(&hwctx->priv->job_sem);
+ job->job_done = true;
+ return ret;
+}
+
+void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
+ unsigned long cur_seq)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ struct drm_gem_object *gobj = to_gobj(abo);
+ long ret;
+
+ down_write(&xdna->notifier_lock);
+ abo->mem.map_invalid = true;
+ mmu_interval_set_seq(&abo->mem.notifier, cur_seq);
+ up_write(&xdna->notifier_lock);
+ ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP,
+ true, MAX_SCHEDULE_TIMEOUT);
+ if (!ret || ret == -ERESTARTSYS)
+ XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret);
+}
diff --git a/drivers/accel/amdxdna/aie2_error.c b/drivers/accel/amdxdna/aie2_error.c
new file mode 100644
index 000000000000..b1defaa8513b
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_error.c
@@ -0,0 +1,360 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/drm_cache.h>
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/dma-mapping.h>
+#include <linux/kthread.h>
+#include <linux/kernel.h>
+
+#include "aie2_msg_priv.h"
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+struct async_event {
+ struct amdxdna_dev_hdl *ndev;
+ struct async_event_msg_resp resp;
+ struct workqueue_struct *wq;
+ struct work_struct work;
+ u8 *buf;
+ dma_addr_t addr;
+ u32 size;
+};
+
+struct async_events {
+ struct workqueue_struct *wq;
+ u8 *buf;
+ dma_addr_t addr;
+ u32 size;
+ u32 event_cnt;
+ struct async_event event[] __counted_by(event_cnt);
+};
+
+/*
+ * Below enum, struct and lookup tables are porting from XAIE util header file.
+ *
+ * Below data is defined by AIE device and it is used for decode error message
+ * from the device.
+ */
+
+enum aie_module_type {
+ AIE_MEM_MOD = 0,
+ AIE_CORE_MOD,
+ AIE_PL_MOD,
+};
+
+enum aie_error_category {
+ AIE_ERROR_SATURATION = 0,
+ AIE_ERROR_FP,
+ AIE_ERROR_STREAM,
+ AIE_ERROR_ACCESS,
+ AIE_ERROR_BUS,
+ AIE_ERROR_INSTRUCTION,
+ AIE_ERROR_ECC,
+ AIE_ERROR_LOCK,
+ AIE_ERROR_DMA,
+ AIE_ERROR_MEM_PARITY,
+ /* Unknown is not from XAIE, added for better category */
+ AIE_ERROR_UNKNOWN,
+};
+
+/* Don't pack, unless XAIE side changed */
+struct aie_error {
+ __u8 row;
+ __u8 col;
+ __u32 mod_type;
+ __u8 event_id;
+};
+
+struct aie_err_info {
+ u32 err_cnt;
+ u32 ret_code;
+ u32 rsvd;
+ struct aie_error payload[] __counted_by(err_cnt);
+};
+
+struct aie_event_category {
+ u8 event_id;
+ enum aie_error_category category;
+};
+
+#define EVENT_CATEGORY(id, cat) { id, cat }
+static const struct aie_event_category aie_ml_mem_event_cat[] = {
+ EVENT_CATEGORY(88U, AIE_ERROR_ECC),
+ EVENT_CATEGORY(90U, AIE_ERROR_ECC),
+ EVENT_CATEGORY(91U, AIE_ERROR_MEM_PARITY),
+ EVENT_CATEGORY(92U, AIE_ERROR_MEM_PARITY),
+ EVENT_CATEGORY(93U, AIE_ERROR_MEM_PARITY),
+ EVENT_CATEGORY(94U, AIE_ERROR_MEM_PARITY),
+ EVENT_CATEGORY(95U, AIE_ERROR_MEM_PARITY),
+ EVENT_CATEGORY(96U, AIE_ERROR_MEM_PARITY),
+ EVENT_CATEGORY(97U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(98U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(99U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(100U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(101U, AIE_ERROR_LOCK),
+};
+
+static const struct aie_event_category aie_ml_core_event_cat[] = {
+ EVENT_CATEGORY(55U, AIE_ERROR_ACCESS),
+ EVENT_CATEGORY(56U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(57U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(58U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(59U, AIE_ERROR_INSTRUCTION),
+ EVENT_CATEGORY(60U, AIE_ERROR_ACCESS),
+ EVENT_CATEGORY(62U, AIE_ERROR_ECC),
+ EVENT_CATEGORY(64U, AIE_ERROR_ECC),
+ EVENT_CATEGORY(65U, AIE_ERROR_ACCESS),
+ EVENT_CATEGORY(66U, AIE_ERROR_ACCESS),
+ EVENT_CATEGORY(67U, AIE_ERROR_LOCK),
+ EVENT_CATEGORY(70U, AIE_ERROR_INSTRUCTION),
+ EVENT_CATEGORY(71U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(72U, AIE_ERROR_BUS),
+};
+
+static const struct aie_event_category aie_ml_mem_tile_event_cat[] = {
+ EVENT_CATEGORY(130U, AIE_ERROR_ECC),
+ EVENT_CATEGORY(132U, AIE_ERROR_ECC),
+ EVENT_CATEGORY(133U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(134U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(135U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(136U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(137U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(138U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(139U, AIE_ERROR_LOCK),
+};
+
+static const struct aie_event_category aie_ml_shim_tile_event_cat[] = {
+ EVENT_CATEGORY(64U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(65U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(66U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(67U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(68U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(69U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(70U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(71U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(72U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(73U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(74U, AIE_ERROR_LOCK),
+};
+
+static enum aie_error_category
+aie_get_error_category(u8 row, u8 event_id, enum aie_module_type mod_type)
+{
+ const struct aie_event_category *lut;
+ int num_entry;
+ int i;
+
+ switch (mod_type) {
+ case AIE_PL_MOD:
+ lut = aie_ml_shim_tile_event_cat;
+ num_entry = ARRAY_SIZE(aie_ml_shim_tile_event_cat);
+ break;
+ case AIE_CORE_MOD:
+ lut = aie_ml_core_event_cat;
+ num_entry = ARRAY_SIZE(aie_ml_core_event_cat);
+ break;
+ case AIE_MEM_MOD:
+ if (row == 1) {
+ lut = aie_ml_mem_tile_event_cat;
+ num_entry = ARRAY_SIZE(aie_ml_mem_tile_event_cat);
+ } else {
+ lut = aie_ml_mem_event_cat;
+ num_entry = ARRAY_SIZE(aie_ml_mem_event_cat);
+ }
+ break;
+ default:
+ return AIE_ERROR_UNKNOWN;
+ }
+
+ for (i = 0; i < num_entry; i++) {
+ if (event_id != lut[i].event_id)
+ continue;
+
+ return lut[i].category;
+ }
+
+ return AIE_ERROR_UNKNOWN;
+}
+
+static u32 aie2_error_backtrack(struct amdxdna_dev_hdl *ndev, void *err_info, u32 num_err)
+{
+ struct aie_error *errs = err_info;
+ u32 err_col = 0; /* assume that AIE has less than 32 columns */
+ int i;
+
+ /* Get err column bitmap */
+ for (i = 0; i < num_err; i++) {
+ struct aie_error *err = &errs[i];
+ enum aie_error_category cat;
+
+ cat = aie_get_error_category(err->row, err->event_id, err->mod_type);
+ XDNA_ERR(ndev->xdna, "Row: %d, Col: %d, module %d, event ID %d, category %d",
+ err->row, err->col, err->mod_type,
+ err->event_id, cat);
+
+ if (err->col >= 32) {
+ XDNA_WARN(ndev->xdna, "Invalid column number");
+ break;
+ }
+
+ err_col |= (1 << err->col);
+ }
+
+ return err_col;
+}
+
+static int aie2_error_async_cb(void *handle, const u32 *data, size_t size)
+{
+ struct async_event_msg_resp *resp;
+ struct async_event *e = handle;
+
+ if (data) {
+ resp = (struct async_event_msg_resp *)data;
+ e->resp.type = resp->type;
+ wmb(); /* Update status in the end, so that no lock for here */
+ e->resp.status = resp->status;
+ }
+ queue_work(e->wq, &e->work);
+ return 0;
+}
+
+static int aie2_error_event_send(struct async_event *e)
+{
+ drm_clflush_virt_range(e->buf, e->size); /* device can access */
+ return aie2_register_asyn_event_msg(e->ndev, e->addr, e->size, e,
+ aie2_error_async_cb);
+}
+
+static void aie2_error_worker(struct work_struct *err_work)
+{
+ struct aie_err_info *info;
+ struct amdxdna_dev *xdna;
+ struct async_event *e;
+ u32 max_err;
+ u32 err_col;
+
+ e = container_of(err_work, struct async_event, work);
+
+ xdna = e->ndev->xdna;
+
+ if (e->resp.status == MAX_AIE2_STATUS_CODE)
+ return;
+
+ e->resp.status = MAX_AIE2_STATUS_CODE;
+
+ print_hex_dump_debug("AIE error: ", DUMP_PREFIX_OFFSET, 16, 4,
+ e->buf, 0x100, false);
+
+ info = (struct aie_err_info *)e->buf;
+ XDNA_DBG(xdna, "Error count %d return code %d", info->err_cnt, info->ret_code);
+
+ max_err = (e->size - sizeof(*info)) / sizeof(struct aie_error);
+ if (unlikely(info->err_cnt > max_err)) {
+ WARN_ONCE(1, "Error count too large %d\n", info->err_cnt);
+ return;
+ }
+ err_col = aie2_error_backtrack(e->ndev, info->payload, info->err_cnt);
+ if (!err_col) {
+ XDNA_WARN(xdna, "Did not get error column");
+ return;
+ }
+
+ mutex_lock(&xdna->dev_lock);
+ /* Re-sent this event to firmware */
+ if (aie2_error_event_send(e))
+ XDNA_WARN(xdna, "Unable to register async event");
+ mutex_unlock(&xdna->dev_lock);
+}
+
+int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+ struct async_event *e;
+ int i, ret;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ for (i = 0; i < ndev->async_events->event_cnt; i++) {
+ e = &ndev->async_events->event[i];
+ ret = aie2_error_event_send(e);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+ struct async_events *events;
+
+ events = ndev->async_events;
+
+ mutex_unlock(&xdna->dev_lock);
+ destroy_workqueue(events->wq);
+ mutex_lock(&xdna->dev_lock);
+
+ dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf,
+ events->addr, DMA_FROM_DEVICE);
+ kfree(events);
+}
+
+int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+ u32 total_col = ndev->total_col;
+ u32 total_size = ASYNC_BUF_SIZE * total_col;
+ struct async_events *events;
+ int i, ret;
+
+ events = kzalloc(struct_size(events, event, total_col), GFP_KERNEL);
+ if (!events)
+ return -ENOMEM;
+
+ events->buf = dma_alloc_noncoherent(xdna->ddev.dev, total_size, &events->addr,
+ DMA_FROM_DEVICE, GFP_KERNEL);
+ if (!events->buf) {
+ ret = -ENOMEM;
+ goto free_events;
+ }
+ events->size = total_size;
+ events->event_cnt = total_col;
+
+ events->wq = alloc_ordered_workqueue("async_wq", 0);
+ if (!events->wq) {
+ ret = -ENOMEM;
+ goto free_buf;
+ }
+
+ for (i = 0; i < events->event_cnt; i++) {
+ struct async_event *e = &events->event[i];
+ u32 offset = i * ASYNC_BUF_SIZE;
+
+ e->ndev = ndev;
+ e->wq = events->wq;
+ e->buf = &events->buf[offset];
+ e->addr = events->addr + offset;
+ e->size = ASYNC_BUF_SIZE;
+ e->resp.status = MAX_AIE2_STATUS_CODE;
+ INIT_WORK(&e->work, aie2_error_worker);
+ }
+
+ ndev->async_events = events;
+
+ XDNA_DBG(xdna, "Async event count %d, buf total size 0x%x",
+ events->event_cnt, events->size);
+ return 0;
+
+free_buf:
+ dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf,
+ events->addr, DMA_FROM_DEVICE);
+free_events:
+ kfree(events);
+ return ret;
+}
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
new file mode 100644
index 000000000000..9e2c9a44f76a
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -0,0 +1,776 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_cache.h>
+#include <drm/drm_device.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/bitfield.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/types.h>
+#include <linux/xarray.h>
+
+#include "aie2_msg_priv.h"
+#include "aie2_pci.h"
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_mailbox_helper.h"
+#include "amdxdna_pci_drv.h"
+
+#define DECLARE_AIE2_MSG(name, op) \
+ DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE)
+
+static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
+ struct xdna_mailbox_msg *msg)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+ struct xdna_notify *hdl = msg->handle;
+ int ret;
+
+ if (!ndev->mgmt_chann)
+ return -ENODEV;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
+ if (ret == -ETIME) {
+ xdna_mailbox_stop_channel(ndev->mgmt_chann);
+ xdna_mailbox_destroy_channel(ndev->mgmt_chann);
+ ndev->mgmt_chann = NULL;
+ }
+
+ if (!ret && *hdl->data != AIE2_STATUS_SUCCESS) {
+ XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
+ msg->opcode, *hdl->data);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev)
+{
+ DECLARE_AIE2_MSG(suspend, MSG_OP_SUSPEND);
+
+ return aie2_send_mgmt_msg_wait(ndev, &msg);
+}
+
+int aie2_resume_fw(struct amdxdna_dev_hdl *ndev)
+{
+ DECLARE_AIE2_MSG(suspend, MSG_OP_RESUME);
+
+ return aie2_send_mgmt_msg_wait(ndev, &msg);
+}
+
+int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value)
+{
+ DECLARE_AIE2_MSG(set_runtime_cfg, MSG_OP_SET_RUNTIME_CONFIG);
+ int ret;
+
+ req.type = type;
+ req.value = value;
+
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Failed to set runtime config, ret %d", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value)
+{
+ DECLARE_AIE2_MSG(get_runtime_cfg, MSG_OP_GET_RUNTIME_CONFIG);
+ int ret;
+
+ req.type = type;
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Failed to get runtime config, ret %d", ret);
+ return ret;
+ }
+
+ *value = resp.value;
+ return 0;
+}
+
+int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid)
+{
+ DECLARE_AIE2_MSG(assign_mgmt_pasid, MSG_OP_ASSIGN_MGMT_PASID);
+
+ req.pasid = pasid;
+
+ return aie2_send_mgmt_msg_wait(ndev, &msg);
+}
+
+int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version)
+{
+ DECLARE_AIE2_MSG(aie_version_info, MSG_OP_QUERY_AIE_VERSION);
+ struct amdxdna_dev *xdna = ndev->xdna;
+ int ret;
+
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret)
+ return ret;
+
+ XDNA_DBG(xdna, "Query AIE version - major: %u minor: %u completed",
+ resp.major, resp.minor);
+
+ version->major = resp.major;
+ version->minor = resp.minor;
+
+ return 0;
+}
+
+int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata)
+{
+ DECLARE_AIE2_MSG(aie_tile_info, MSG_OP_QUERY_AIE_TILE_INFO);
+ int ret;
+
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret)
+ return ret;
+
+ metadata->size = resp.info.size;
+ metadata->cols = resp.info.cols;
+ metadata->rows = resp.info.rows;
+
+ metadata->version.major = resp.info.major;
+ metadata->version.minor = resp.info.minor;
+
+ metadata->core.row_count = resp.info.core_rows;
+ metadata->core.row_start = resp.info.core_row_start;
+ metadata->core.dma_channel_count = resp.info.core_dma_channels;
+ metadata->core.lock_count = resp.info.core_locks;
+ metadata->core.event_reg_count = resp.info.core_events;
+
+ metadata->mem.row_count = resp.info.mem_rows;
+ metadata->mem.row_start = resp.info.mem_row_start;
+ metadata->mem.dma_channel_count = resp.info.mem_dma_channels;
+ metadata->mem.lock_count = resp.info.mem_locks;
+ metadata->mem.event_reg_count = resp.info.mem_events;
+
+ metadata->shim.row_count = resp.info.shim_rows;
+ metadata->shim.row_start = resp.info.shim_row_start;
+ metadata->shim.dma_channel_count = resp.info.shim_dma_channels;
+ metadata->shim.lock_count = resp.info.shim_locks;
+ metadata->shim.event_reg_count = resp.info.shim_events;
+
+ return 0;
+}
+
+int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
+ struct amdxdna_fw_ver *fw_ver)
+{
+ DECLARE_AIE2_MSG(firmware_version, MSG_OP_GET_FIRMWARE_VERSION);
+ int ret;
+
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret)
+ return ret;
+
+ fw_ver->major = resp.major;
+ fw_ver->minor = resp.minor;
+ fw_ver->sub = resp.sub;
+ fw_ver->build = resp.build;
+
+ return 0;
+}
+
+int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
+{
+ DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT);
+ struct amdxdna_dev *xdna = ndev->xdna;
+ struct xdna_mailbox_chann_res x2i;
+ struct xdna_mailbox_chann_res i2x;
+ struct cq_pair *cq_pair;
+ u32 intr_reg;
+ int ret;
+
+ req.aie_type = 1;
+ req.start_col = hwctx->start_col;
+ req.num_col = hwctx->num_col;
+ req.num_cq_pairs_requested = 1;
+ req.pasid = hwctx->client->pasid;
+ req.context_priority = 2;
+
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret)
+ return ret;
+
+ hwctx->fw_ctx_id = resp.context_id;
+ WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id");
+
+ cq_pair = &resp.cq_pair[0];
+ x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr);
+ x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr);
+ x2i.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->x2i_q.buf_addr);
+ x2i.rb_size = cq_pair->x2i_q.buf_size;
+
+ i2x.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.head_addr);
+ i2x.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.tail_addr);
+ i2x.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->i2x_q.buf_addr);
+ i2x.rb_size = cq_pair->i2x_q.buf_size;
+
+ ret = pci_irq_vector(to_pci_dev(xdna->ddev.dev), resp.msix_id);
+ if (ret == -EINVAL) {
+ XDNA_ERR(xdna, "not able to create channel");
+ goto out_destroy_context;
+ }
+
+ intr_reg = i2x.mb_head_ptr_reg + 4;
+ hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x,
+ intr_reg, ret);
+ if (!hwctx->priv->mbox_chann) {
+ XDNA_ERR(xdna, "not able to create channel");
+ ret = -EINVAL;
+ goto out_destroy_context;
+ }
+
+ XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d",
+ hwctx->name, ret, resp.msix_id);
+ XDNA_DBG(xdna, "%s created fw ctx %d pasid %d", hwctx->name,
+ hwctx->fw_ctx_id, hwctx->client->pasid);
+
+ return 0;
+
+out_destroy_context:
+ aie2_destroy_context(ndev, hwctx);
+ return ret;
+}
+
+int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
+{
+ DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT);
+ struct amdxdna_dev *xdna = ndev->xdna;
+ int ret;
+
+ if (hwctx->fw_ctx_id == -1)
+ return 0;
+
+ xdna_mailbox_stop_channel(hwctx->priv->mbox_chann);
+
+ req.context_id = hwctx->fw_ctx_id;
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret)
+ XDNA_WARN(xdna, "%s destroy context failed, ret %d", hwctx->name, ret);
+
+ xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann);
+ XDNA_DBG(xdna, "%s destroyed fw ctx %d", hwctx->name,
+ hwctx->fw_ctx_id);
+ hwctx->priv->mbox_chann = NULL;
+ hwctx->fw_ctx_id = -1;
+
+ return ret;
+}
+
+int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size)
+{
+ DECLARE_AIE2_MSG(map_host_buffer, MSG_OP_MAP_HOST_BUFFER);
+ struct amdxdna_dev *xdna = ndev->xdna;
+ int ret;
+
+ req.context_id = context_id;
+ req.buf_addr = addr;
+ req.buf_size = size;
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret)
+ return ret;
+
+ XDNA_DBG(xdna, "fw ctx %d map host buf addr 0x%llx size 0x%llx",
+ context_id, addr, size);
+
+ return 0;
+}
+
+int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
+ u32 size, u32 *cols_filled)
+{
+ DECLARE_AIE2_MSG(aie_column_info, MSG_OP_QUERY_COL_STATUS);
+ struct amdxdna_dev *xdna = ndev->xdna;
+ struct amdxdna_client *client;
+ struct amdxdna_hwctx *hwctx;
+ unsigned long hwctx_id;
+ dma_addr_t dma_addr;
+ u32 aie_bitmap = 0;
+ u8 *buff_addr;
+ int ret, idx;
+
+ buff_addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr,
+ DMA_FROM_DEVICE, GFP_KERNEL);
+ if (!buff_addr)
+ return -ENOMEM;
+
+ /* Go through each hardware context and mark the AIE columns that are active */
+ list_for_each_entry(client, &xdna->client_list, node) {
+ idx = srcu_read_lock(&client->hwctx_srcu);
+ amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
+ aie_bitmap |= amdxdna_hwctx_col_map(hwctx);
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ }
+
+ *cols_filled = 0;
+ req.dump_buff_addr = dma_addr;
+ req.dump_buff_size = size;
+ req.num_cols = hweight32(aie_bitmap);
+ req.aie_bitmap = aie_bitmap;
+
+ drm_clflush_virt_range(buff_addr, size); /* device can access */
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret) {
+ XDNA_ERR(xdna, "Error during NPU query, status %d", ret);
+ goto fail;
+ }
+
+ if (resp.status != AIE2_STATUS_SUCCESS) {
+ XDNA_ERR(xdna, "Query NPU status failed, status 0x%x", resp.status);
+ ret = -EINVAL;
+ goto fail;
+ }
+ XDNA_DBG(xdna, "Query NPU status completed");
+
+ if (size < resp.size) {
+ ret = -EINVAL;
+ XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size);
+ goto fail;
+ }
+
+ if (copy_to_user(buf, buff_addr, resp.size)) {
+ ret = -EFAULT;
+ XDNA_ERR(xdna, "Failed to copy NPU status to user space");
+ goto fail;
+ }
+
+ *cols_filled = aie_bitmap;
+
+fail:
+ dma_free_noncoherent(xdna->ddev.dev, size, buff_addr, dma_addr, DMA_FROM_DEVICE);
+ return ret;
+}
+
+int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
+ void *handle, int (*cb)(void*, const u32 *, size_t))
+{
+ struct async_event_msg_req req = { 0 };
+ struct xdna_mailbox_msg msg = {
+ .send_data = (u8 *)&req,
+ .send_size = sizeof(req),
+ .handle = handle,
+ .opcode = MSG_OP_REGISTER_ASYNC_EVENT_MSG,
+ .notify_cb = cb,
+ };
+
+ req.buf_addr = addr;
+ req.buf_size = size;
+
+ XDNA_DBG(ndev->xdna, "Register addr 0x%llx size 0x%x", addr, size);
+ return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg, TX_TIMEOUT);
+}
+
+int aie2_config_cu(struct amdxdna_hwctx *hwctx)
+{
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ u32 shift = xdna->dev_info->dev_mem_buf_shift;
+ DECLARE_AIE2_MSG(config_cu, MSG_OP_CONFIG_CU);
+ struct drm_gem_object *gobj;
+ struct amdxdna_gem_obj *abo;
+ int ret, i;
+
+ if (!chann)
+ return -ENODEV;
+
+ if (hwctx->cus->num_cus > MAX_NUM_CUS) {
+ XDNA_DBG(xdna, "Exceed maximum CU %d", MAX_NUM_CUS);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < hwctx->cus->num_cus; i++) {
+ struct amdxdna_cu_config *cu = &hwctx->cus->cu_configs[i];
+
+ if (XDNA_MBZ_DBG(xdna, cu->pad, sizeof(cu->pad)))
+ return -EINVAL;
+
+ gobj = drm_gem_object_lookup(hwctx->client->filp, cu->cu_bo);
+ if (!gobj) {
+ XDNA_ERR(xdna, "Lookup GEM object failed");
+ return -EINVAL;
+ }
+ abo = to_xdna_obj(gobj);
+
+ if (abo->type != AMDXDNA_BO_DEV) {
+ drm_gem_object_put(gobj);
+ XDNA_ERR(xdna, "Invalid BO type");
+ return -EINVAL;
+ }
+
+ req.cfgs[i] = FIELD_PREP(AIE2_MSG_CFG_CU_PDI_ADDR,
+ abo->mem.dev_addr >> shift);
+ req.cfgs[i] |= FIELD_PREP(AIE2_MSG_CFG_CU_FUNC, cu->cu_func);
+ XDNA_DBG(xdna, "CU %d full addr 0x%llx, cfg 0x%x", i,
+ abo->mem.dev_addr, req.cfgs[i]);
+ drm_gem_object_put(gobj);
+ }
+ req.num_cus = hwctx->cus->num_cus;
+
+ ret = xdna_send_msg_wait(xdna, chann, &msg);
+ if (ret == -ETIME)
+ aie2_destroy_context(xdna->dev_handle, hwctx);
+
+ if (resp.status == AIE2_STATUS_SUCCESS) {
+ XDNA_DBG(xdna, "Configure %d CUs, ret %d", req.num_cus, ret);
+ return 0;
+ }
+
+ XDNA_ERR(xdna, "Command opcode 0x%x failed, status 0x%x ret %d",
+ msg.opcode, resp.status, ret);
+ return ret;
+}
+
+int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, const u32 *, size_t))
+{
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ union {
+ struct execute_buffer_req ebuf;
+ struct exec_dpu_req dpu;
+ } req;
+ struct xdna_mailbox_msg msg;
+ u32 payload_len;
+ void *payload;
+ int cu_idx;
+ int ret;
+ u32 op;
+
+ if (!chann)
+ return -ENODEV;
+
+ payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
+ if (!payload) {
+ XDNA_ERR(xdna, "Invalid command, cannot get payload");
+ return -EINVAL;
+ }
+
+ cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo);
+ if (cu_idx < 0) {
+ XDNA_DBG(xdna, "Invalid cu idx");
+ return -EINVAL;
+ }
+
+ op = amdxdna_cmd_get_op(cmd_abo);
+ switch (op) {
+ case ERT_START_CU:
+ if (unlikely(payload_len > sizeof(req.ebuf.payload)))
+ XDNA_DBG(xdna, "Invalid ebuf payload len: %d", payload_len);
+ req.ebuf.cu_idx = cu_idx;
+ memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload));
+ msg.send_size = sizeof(req.ebuf);
+ msg.opcode = MSG_OP_EXECUTE_BUFFER_CF;
+ break;
+ case ERT_START_NPU: {
+ struct amdxdna_cmd_start_npu *sn = payload;
+
+ if (unlikely(payload_len - sizeof(*sn) > sizeof(req.dpu.payload)))
+ XDNA_DBG(xdna, "Invalid dpu payload len: %d", payload_len);
+ req.dpu.inst_buf_addr = sn->buffer;
+ req.dpu.inst_size = sn->buffer_size;
+ req.dpu.inst_prop_cnt = sn->prop_count;
+ req.dpu.cu_idx = cu_idx;
+ memcpy(req.dpu.payload, sn->prop_args, sizeof(req.dpu.payload));
+ msg.send_size = sizeof(req.dpu);
+ msg.opcode = MSG_OP_EXEC_DPU;
+ break;
+ }
+ default:
+ XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op);
+ return -EINVAL;
+ }
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
+ 0x40, false);
+
+ ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(xdna, "Send message failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset,
+ struct amdxdna_gem_obj *abo, u32 *size)
+{
+ struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset;
+ int cu_idx = amdxdna_cmd_get_cu_idx(abo);
+ u32 payload_len;
+ void *payload;
+
+ if (cu_idx < 0)
+ return -EINVAL;
+
+ payload = amdxdna_cmd_get_payload(abo, &payload_len);
+ if (!payload)
+ return -EINVAL;
+
+ if (!slot_cf_has_space(offset, payload_len))
+ return -ENOSPC;
+
+ buf->cu_idx = cu_idx;
+ buf->arg_cnt = payload_len / sizeof(u32);
+ memcpy(buf->args, payload, payload_len);
+ /* Accurate buf size to hint firmware to do necessary copy */
+ *size = sizeof(*buf) + payload_len;
+ return 0;
+}
+
+static int
+aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
+ struct amdxdna_gem_obj *abo, u32 *size)
+{
+ struct cmd_chain_slot_dpu *buf = cmd_buf + offset;
+ int cu_idx = amdxdna_cmd_get_cu_idx(abo);
+ struct amdxdna_cmd_start_npu *sn;
+ u32 payload_len;
+ void *payload;
+ u32 arg_sz;
+
+ if (cu_idx < 0)
+ return -EINVAL;
+
+ payload = amdxdna_cmd_get_payload(abo, &payload_len);
+ if (!payload)
+ return -EINVAL;
+ sn = payload;
+ arg_sz = payload_len - sizeof(*sn);
+ if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
+ return -EINVAL;
+
+ if (!slot_dpu_has_space(offset, arg_sz))
+ return -ENOSPC;
+
+ buf->inst_buf_addr = sn->buffer;
+ buf->inst_size = sn->buffer_size;
+ buf->inst_prop_cnt = sn->prop_count;
+ buf->cu_idx = cu_idx;
+ buf->arg_cnt = arg_sz / sizeof(u32);
+ memcpy(buf->args, sn->prop_args, arg_sz);
+
+ /* Accurate buf size to hint firmware to do necessary copy */
+ *size += sizeof(*buf) + arg_sz;
+ return 0;
+}
+
+static int
+aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj *cmdbuf_abo, u32 offset,
+ struct amdxdna_gem_obj *abo, u32 *size)
+{
+ u32 this_op = amdxdna_cmd_get_op(abo);
+ void *cmd_buf = cmdbuf_abo->mem.kva;
+ int ret;
+
+ if (this_op != op) {
+ ret = -EINVAL;
+ goto done;
+ }
+
+ switch (op) {
+ case ERT_START_CU:
+ ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo, size);
+ break;
+ case ERT_START_NPU:
+ ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo, size);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ }
+
+done:
+ if (ret) {
+ XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d ret %d",
+ op, ret);
+ }
+ return ret;
+}
+
+static inline struct amdxdna_gem_obj *
+aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
+{
+ int idx = get_job_idx(job->seq);
+
+ return job->hwctx->priv->cmd_buf[idx];
+}
+
+static void
+aie2_cmdlist_prepare_request(struct cmd_chain_req *req,
+ struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 cnt)
+{
+ req->buf_addr = cmdbuf_abo->mem.dev_addr;
+ req->buf_size = size;
+ req->count = cnt;
+ drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
+ XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size 0x%x count %d",
+ req->buf_addr, size, cnt);
+}
+
+static inline u32
+aie2_cmd_op_to_msg_op(u32 op)
+{
+ switch (op) {
+ case ERT_START_CU:
+ return MSG_OP_CHAIN_EXEC_BUFFER_CF;
+ case ERT_START_NPU:
+ return MSG_OP_CHAIN_EXEC_DPU;
+ default:
+ return MSG_OP_MAX_OPCODE;
+ }
+}
+
+int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
+ struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, const u32 *, size_t))
+{
+ struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_client *client = hwctx->client;
+ struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ struct amdxdna_cmd_chain *payload;
+ struct xdna_mailbox_msg msg;
+ struct cmd_chain_req req;
+ u32 payload_len;
+ u32 offset = 0;
+ u32 size;
+ int ret;
+ u32 op;
+ u32 i;
+
+ op = amdxdna_cmd_get_op(cmd_abo);
+ payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
+ if (op != ERT_CMD_CHAIN || !payload ||
+ payload_len < struct_size(payload, data, payload->command_count))
+ return -EINVAL;
+
+ for (i = 0; i < payload->command_count; i++) {
+ u32 boh = (u32)(payload->data[i]);
+ struct amdxdna_gem_obj *abo;
+
+ abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD);
+ if (!abo) {
+ XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh);
+ return -ENOENT;
+ }
+
+ /* All sub-cmd should have same op, use the first one. */
+ if (i == 0)
+ op = amdxdna_cmd_get_op(abo);
+
+ ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset, abo, &size);
+ amdxdna_gem_put_obj(abo);
+ if (ret)
+ return -EINVAL;
+
+ offset += size;
+ }
+
+ /* The offset is the accumulated total size of the cmd buffer */
+ aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset, payload->command_count);
+
+ msg.opcode = aie2_cmd_op_to_msg_op(op);
+ if (msg.opcode == MSG_OP_MAX_OPCODE)
+ return -EOPNOTSUPP;
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(hwctx->client->xdna, "Send message failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
+ struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, const u32 *, size_t))
+{
+ struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ struct xdna_mailbox_msg msg;
+ struct cmd_chain_req req;
+ u32 size;
+ int ret;
+ u32 op;
+
+ op = amdxdna_cmd_get_op(cmd_abo);
+ ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo, &size);
+ if (ret)
+ return ret;
+
+ aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1);
+
+ msg.opcode = aie2_cmd_op_to_msg_op(op);
+ if (msg.opcode == MSG_OP_MAX_OPCODE)
+ return -EOPNOTSUPP;
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(hwctx->client->xdna, "Send message failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, const u32 *, size_t))
+{
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]);
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct xdna_mailbox_msg msg;
+ struct sync_bo_req req;
+ int ret = 0;
+
+ req.src_addr = 0;
+ req.dst_addr = abo->mem.dev_addr - hwctx->client->dev_heap->mem.dev_addr;
+ req.size = abo->mem.size;
+
+ /* Device to Host */
+ req.type = FIELD_PREP(AIE2_MSG_SYNC_BO_SRC_TYPE, SYNC_BO_DEV_MEM) |
+ FIELD_PREP(AIE2_MSG_SYNC_BO_DST_TYPE, SYNC_BO_HOST_MEM);
+
+ XDNA_DBG(xdna, "sync %d bytes src(0x%llx) to dst(0x%llx) completed",
+ req.size, req.src_addr, req.dst_addr);
+
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ msg.opcode = MSG_OP_SYNC_BO;
+
+ ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(xdna, "Send message failed");
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h
new file mode 100644
index 000000000000..4e02e744b470
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_msg_priv.h
@@ -0,0 +1,370 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AIE2_MSG_PRIV_H_
+#define _AIE2_MSG_PRIV_H_
+
+enum aie2_msg_opcode {
+ MSG_OP_CREATE_CONTEXT = 0x2,
+ MSG_OP_DESTROY_CONTEXT = 0x3,
+ MSG_OP_SYNC_BO = 0x7,
+ MSG_OP_EXECUTE_BUFFER_CF = 0xC,
+ MSG_OP_QUERY_COL_STATUS = 0xD,
+ MSG_OP_QUERY_AIE_TILE_INFO = 0xE,
+ MSG_OP_QUERY_AIE_VERSION = 0xF,
+ MSG_OP_EXEC_DPU = 0x10,
+ MSG_OP_CONFIG_CU = 0x11,
+ MSG_OP_CHAIN_EXEC_BUFFER_CF = 0x12,
+ MSG_OP_CHAIN_EXEC_DPU = 0x13,
+ MSG_OP_MAX_XRT_OPCODE,
+ MSG_OP_SUSPEND = 0x101,
+ MSG_OP_RESUME = 0x102,
+ MSG_OP_ASSIGN_MGMT_PASID = 0x103,
+ MSG_OP_INVOKE_SELF_TEST = 0x104,
+ MSG_OP_MAP_HOST_BUFFER = 0x106,
+ MSG_OP_GET_FIRMWARE_VERSION = 0x108,
+ MSG_OP_SET_RUNTIME_CONFIG = 0x10A,
+ MSG_OP_GET_RUNTIME_CONFIG = 0x10B,
+ MSG_OP_REGISTER_ASYNC_EVENT_MSG = 0x10C,
+ MSG_OP_MAX_DRV_OPCODE,
+ MSG_OP_GET_PROTOCOL_VERSION = 0x301,
+ MSG_OP_MAX_OPCODE
+};
+
+enum aie2_msg_status {
+ AIE2_STATUS_SUCCESS = 0x0,
+ /* AIE Error codes */
+ AIE2_STATUS_AIE_SATURATION_ERROR = 0x1000001,
+ AIE2_STATUS_AIE_FP_ERROR = 0x1000002,
+ AIE2_STATUS_AIE_STREAM_ERROR = 0x1000003,
+ AIE2_STATUS_AIE_ACCESS_ERROR = 0x1000004,
+ AIE2_STATUS_AIE_BUS_ERROR = 0x1000005,
+ AIE2_STATUS_AIE_INSTRUCTION_ERROR = 0x1000006,
+ AIE2_STATUS_AIE_ECC_ERROR = 0x1000007,
+ AIE2_STATUS_AIE_LOCK_ERROR = 0x1000008,
+ AIE2_STATUS_AIE_DMA_ERROR = 0x1000009,
+ AIE2_STATUS_AIE_MEM_PARITY_ERROR = 0x100000a,
+ AIE2_STATUS_AIE_PWR_CFG_ERROR = 0x100000b,
+ AIE2_STATUS_AIE_BACKTRACK_ERROR = 0x100000c,
+ AIE2_STATUS_MAX_AIE_STATUS_CODE,
+ /* MGMT ERT Error codes */
+ AIE2_STATUS_MGMT_ERT_SELF_TEST_FAILURE = 0x2000001,
+ AIE2_STATUS_MGMT_ERT_HASH_MISMATCH,
+ AIE2_STATUS_MGMT_ERT_NOAVAIL,
+ AIE2_STATUS_MGMT_ERT_INVALID_PARAM,
+ AIE2_STATUS_MGMT_ERT_ENTER_SUSPEND_FAILURE,
+ AIE2_STATUS_MGMT_ERT_BUSY,
+ AIE2_STATUS_MGMT_ERT_APPLICATION_ACTIVE,
+ MAX_MGMT_ERT_STATUS_CODE,
+ /* APP ERT Error codes */
+ AIE2_STATUS_APP_ERT_FIRST_ERROR = 0x3000001,
+ AIE2_STATUS_APP_INVALID_INSTR,
+ AIE2_STATUS_APP_LOAD_PDI_FAIL,
+ MAX_APP_ERT_STATUS_CODE,
+ /* NPU RTOS Error Codes */
+ AIE2_STATUS_INVALID_INPUT_BUFFER = 0x4000001,
+ AIE2_STATUS_INVALID_COMMAND,
+ AIE2_STATUS_INVALID_PARAM,
+ AIE2_STATUS_INVALID_OPERATION = 0x4000006,
+ AIE2_STATUS_ASYNC_EVENT_MSGS_FULL,
+ AIE2_STATUS_MAX_RTOS_STATUS_CODE,
+ MAX_AIE2_STATUS_CODE
+};
+
+struct assign_mgmt_pasid_req {
+ __u16 pasid;
+ __u16 reserved;
+} __packed;
+
+struct assign_mgmt_pasid_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct map_host_buffer_req {
+ __u32 context_id;
+ __u64 buf_addr;
+ __u64 buf_size;
+} __packed;
+
+struct map_host_buffer_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+#define MAX_CQ_PAIRS 2
+struct cq_info {
+ __u32 head_addr;
+ __u32 tail_addr;
+ __u32 buf_addr;
+ __u32 buf_size;
+};
+
+struct cq_pair {
+ struct cq_info x2i_q;
+ struct cq_info i2x_q;
+};
+
+struct create_ctx_req {
+ __u32 aie_type;
+ __u8 start_col;
+ __u8 num_col;
+ __u16 reserved;
+ __u8 num_cq_pairs_requested;
+ __u8 reserved1;
+ __u16 pasid;
+ __u32 pad[2];
+ __u32 sec_comm_target_type;
+ __u32 context_priority;
+} __packed;
+
+struct create_ctx_resp {
+ enum aie2_msg_status status;
+ __u32 context_id;
+ __u16 msix_id;
+ __u8 num_cq_pairs_allocated;
+ __u8 reserved;
+ struct cq_pair cq_pair[MAX_CQ_PAIRS];
+} __packed;
+
+struct destroy_ctx_req {
+ __u32 context_id;
+} __packed;
+
+struct destroy_ctx_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct execute_buffer_req {
+ __u32 cu_idx;
+ __u32 payload[19];
+} __packed;
+
+struct exec_dpu_req {
+ __u64 inst_buf_addr;
+ __u32 inst_size;
+ __u32 inst_prop_cnt;
+ __u32 cu_idx;
+ __u32 payload[35];
+} __packed;
+
+struct execute_buffer_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct aie_tile_info {
+ __u32 size;
+ __u16 major;
+ __u16 minor;
+ __u16 cols;
+ __u16 rows;
+ __u16 core_rows;
+ __u16 mem_rows;
+ __u16 shim_rows;
+ __u16 core_row_start;
+ __u16 mem_row_start;
+ __u16 shim_row_start;
+ __u16 core_dma_channels;
+ __u16 mem_dma_channels;
+ __u16 shim_dma_channels;
+ __u16 core_locks;
+ __u16 mem_locks;
+ __u16 shim_locks;
+ __u16 core_events;
+ __u16 mem_events;
+ __u16 shim_events;
+ __u16 reserved;
+};
+
+struct aie_tile_info_req {
+ __u32 reserved;
+} __packed;
+
+struct aie_tile_info_resp {
+ enum aie2_msg_status status;
+ struct aie_tile_info info;
+} __packed;
+
+struct aie_version_info_req {
+ __u32 reserved;
+} __packed;
+
+struct aie_version_info_resp {
+ enum aie2_msg_status status;
+ __u16 major;
+ __u16 minor;
+} __packed;
+
+struct aie_column_info_req {
+ __u64 dump_buff_addr;
+ __u32 dump_buff_size;
+ __u32 num_cols;
+ __u32 aie_bitmap;
+} __packed;
+
+struct aie_column_info_resp {
+ enum aie2_msg_status status;
+ __u32 size;
+} __packed;
+
+struct suspend_req {
+ __u32 place_holder;
+} __packed;
+
+struct suspend_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct resume_req {
+ __u32 place_holder;
+} __packed;
+
+struct resume_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct check_header_hash_req {
+ __u64 hash_high;
+ __u64 hash_low;
+} __packed;
+
+struct check_header_hash_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct query_error_req {
+ __u64 buf_addr;
+ __u32 buf_size;
+ __u32 next_row;
+ __u32 next_column;
+ __u32 next_module;
+} __packed;
+
+struct query_error_resp {
+ enum aie2_msg_status status;
+ __u32 num_err;
+ __u32 has_next_err;
+ __u32 next_row;
+ __u32 next_column;
+ __u32 next_module;
+} __packed;
+
+struct protocol_version_req {
+ __u32 reserved;
+} __packed;
+
+struct protocol_version_resp {
+ enum aie2_msg_status status;
+ __u32 major;
+ __u32 minor;
+} __packed;
+
+struct firmware_version_req {
+ __u32 reserved;
+} __packed;
+
+struct firmware_version_resp {
+ enum aie2_msg_status status;
+ __u32 major;
+ __u32 minor;
+ __u32 sub;
+ __u32 build;
+} __packed;
+
+#define MAX_NUM_CUS 32
+#define AIE2_MSG_CFG_CU_PDI_ADDR GENMASK(16, 0)
+#define AIE2_MSG_CFG_CU_FUNC GENMASK(24, 17)
+struct config_cu_req {
+ __u32 num_cus;
+ __u32 cfgs[MAX_NUM_CUS];
+} __packed;
+
+struct config_cu_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct set_runtime_cfg_req {
+ __u32 type;
+ __u64 value;
+} __packed;
+
+struct set_runtime_cfg_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct get_runtime_cfg_req {
+ __u32 type;
+} __packed;
+
+struct get_runtime_cfg_resp {
+ enum aie2_msg_status status;
+ __u64 value;
+} __packed;
+
+enum async_event_type {
+ ASYNC_EVENT_TYPE_AIE_ERROR,
+ ASYNC_EVENT_TYPE_EXCEPTION,
+ MAX_ASYNC_EVENT_TYPE
+};
+
+#define ASYNC_BUF_SIZE SZ_8K
+struct async_event_msg_req {
+ __u64 buf_addr;
+ __u32 buf_size;
+} __packed;
+
+struct async_event_msg_resp {
+ enum aie2_msg_status status;
+ enum async_event_type type;
+} __packed;
+
+#define MAX_CHAIN_CMDBUF_SIZE SZ_4K
+#define slot_cf_has_space(offset, payload_size) \
+ (MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \
+ offsetof(struct cmd_chain_slot_execbuf_cf, args[0]))
+struct cmd_chain_slot_execbuf_cf {
+ __u32 cu_idx;
+ __u32 arg_cnt;
+ __u32 args[] __counted_by(arg_cnt);
+};
+
+#define slot_dpu_has_space(offset, payload_size) \
+ (MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \
+ offsetof(struct cmd_chain_slot_dpu, args[0]))
+struct cmd_chain_slot_dpu {
+ __u64 inst_buf_addr;
+ __u32 inst_size;
+ __u32 inst_prop_cnt;
+ __u32 cu_idx;
+ __u32 arg_cnt;
+#define MAX_DPU_ARGS_SIZE (34 * sizeof(__u32))
+ __u32 args[] __counted_by(arg_cnt);
+};
+
+struct cmd_chain_req {
+ __u64 buf_addr;
+ __u32 buf_size;
+ __u32 count;
+} __packed;
+
+struct cmd_chain_resp {
+ enum aie2_msg_status status;
+ __u32 fail_cmd_idx;
+ enum aie2_msg_status fail_cmd_status;
+} __packed;
+
+#define AIE2_MSG_SYNC_BO_SRC_TYPE GENMASK(3, 0)
+#define AIE2_MSG_SYNC_BO_DST_TYPE GENMASK(7, 4)
+struct sync_bo_req {
+ __u64 src_addr;
+ __u64 dst_addr;
+ __u32 size;
+#define SYNC_BO_DEV_MEM 0
+#define SYNC_BO_HOST_MEM 2
+ __u32 type;
+} __packed;
+
+struct sync_bo_resp {
+ enum aie2_msg_status status;
+} __packed;
+#endif /* _AIE2_MSG_PRIV_H_ */
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
new file mode 100644
index 000000000000..5a058e565b01
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -0,0 +1,928 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/errno.h>
+#include <linux/firmware.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/pci.h>
+#include <linux/xarray.h>
+
+#include "aie2_msg_priv.h"
+#include "aie2_pci.h"
+#include "aie2_solver.h"
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+static int aie2_max_col = XRS_MAX_COL;
+module_param(aie2_max_col, uint, 0600);
+MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used");
+
+/*
+ * The management mailbox channel is allocated by firmware.
+ * The related register and ring buffer information is on SRAM BAR.
+ * This struct is the register layout.
+ */
+#define MGMT_MBOX_MAGIC 0x55504e5f /* _NPU */
+struct mgmt_mbox_chann_info {
+ __u32 x2i_tail;
+ __u32 x2i_head;
+ __u32 x2i_buf;
+ __u32 x2i_buf_sz;
+ __u32 i2x_tail;
+ __u32 i2x_head;
+ __u32 i2x_buf;
+ __u32 i2x_buf_sz;
+ __u32 magic;
+ __u32 msi_id;
+ __u32 prot_major;
+ __u32 prot_minor;
+ __u32 rsvd[4];
+};
+
+static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+
+ /*
+ * The driver supported mailbox behavior is defined by
+ * ndev->priv->protocol_major and protocol_minor.
+ *
+ * When protocol_major and fw_major are different, it means driver
+ * and firmware are incompatible.
+ */
+ if (ndev->priv->protocol_major != fw_major) {
+ XDNA_ERR(xdna, "Incompatible firmware protocol major %d minor %d",
+ fw_major, fw_minor);
+ return -EINVAL;
+ }
+
+ /*
+ * When protocol_minor is greater then fw_minor, that means driver
+ * relies on operation the installed firmware does not support.
+ */
+ if (ndev->priv->protocol_minor > fw_minor) {
+ XDNA_ERR(xdna, "Firmware minor version smaller than supported");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void aie2_dump_chann_info_debug(struct amdxdna_dev_hdl *ndev)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+
+ XDNA_DBG(xdna, "i2x tail 0x%x", ndev->mgmt_i2x.mb_tail_ptr_reg);
+ XDNA_DBG(xdna, "i2x head 0x%x", ndev->mgmt_i2x.mb_head_ptr_reg);
+ XDNA_DBG(xdna, "i2x ringbuf 0x%x", ndev->mgmt_i2x.rb_start_addr);
+ XDNA_DBG(xdna, "i2x rsize 0x%x", ndev->mgmt_i2x.rb_size);
+ XDNA_DBG(xdna, "x2i tail 0x%x", ndev->mgmt_x2i.mb_tail_ptr_reg);
+ XDNA_DBG(xdna, "x2i head 0x%x", ndev->mgmt_x2i.mb_head_ptr_reg);
+ XDNA_DBG(xdna, "x2i ringbuf 0x%x", ndev->mgmt_x2i.rb_start_addr);
+ XDNA_DBG(xdna, "x2i rsize 0x%x", ndev->mgmt_x2i.rb_size);
+ XDNA_DBG(xdna, "x2i chann index 0x%x", ndev->mgmt_chan_idx);
+ XDNA_DBG(xdna, "mailbox protocol major 0x%x", ndev->mgmt_prot_major);
+ XDNA_DBG(xdna, "mailbox protocol minor 0x%x", ndev->mgmt_prot_minor);
+}
+
+static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev)
+{
+ struct mgmt_mbox_chann_info info_regs;
+ struct xdna_mailbox_chann_res *i2x;
+ struct xdna_mailbox_chann_res *x2i;
+ u32 addr, off;
+ u32 *reg;
+ int ret;
+ int i;
+
+ /*
+ * Once firmware is alive, it will write management channel
+ * information in SRAM BAR and write the address of that information
+ * at FW_ALIVE_OFF offset in SRMA BAR.
+ *
+ * Read a non-zero value from FW_ALIVE_OFF implies that firmware
+ * is alive.
+ */
+ ret = readx_poll_timeout(readl, SRAM_GET_ADDR(ndev, FW_ALIVE_OFF),
+ addr, addr, AIE2_INTERVAL, AIE2_TIMEOUT);
+ if (ret || !addr)
+ return -ETIME;
+
+ off = AIE2_SRAM_OFF(ndev, addr);
+ reg = (u32 *)&info_regs;
+ for (i = 0; i < sizeof(info_regs) / sizeof(u32); i++)
+ reg[i] = readl(ndev->sram_base + off + i * sizeof(u32));
+
+ if (info_regs.magic != MGMT_MBOX_MAGIC) {
+ XDNA_ERR(ndev->xdna, "Invalid mbox magic 0x%x", info_regs.magic);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ i2x = &ndev->mgmt_i2x;
+ x2i = &ndev->mgmt_x2i;
+
+ i2x->mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.i2x_head);
+ i2x->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.i2x_tail);
+ i2x->rb_start_addr = AIE2_SRAM_OFF(ndev, info_regs.i2x_buf);
+ i2x->rb_size = info_regs.i2x_buf_sz;
+
+ x2i->mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.x2i_head);
+ x2i->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.x2i_tail);
+ x2i->rb_start_addr = AIE2_SRAM_OFF(ndev, info_regs.x2i_buf);
+ x2i->rb_size = info_regs.x2i_buf_sz;
+
+ ndev->mgmt_chan_idx = info_regs.msi_id;
+ ndev->mgmt_prot_major = info_regs.prot_major;
+ ndev->mgmt_prot_minor = info_regs.prot_minor;
+
+ ret = aie2_check_protocol(ndev, ndev->mgmt_prot_major, ndev->mgmt_prot_minor);
+
+done:
+ aie2_dump_chann_info_debug(ndev);
+
+ /* Must clear address at FW_ALIVE_OFF */
+ writel(0, SRAM_GET_ADDR(ndev, FW_ALIVE_OFF));
+
+ return ret;
+}
+
+int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
+ enum rt_config_category category, u32 *val)
+{
+ const struct rt_config *cfg;
+ u32 value;
+ int ret;
+
+ for (cfg = ndev->priv->rt_config; cfg->type; cfg++) {
+ if (cfg->category != category)
+ continue;
+
+ value = val ? *val : cfg->value;
+ ret = aie2_set_runtime_cfg(ndev, cfg->type, value);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set type %d value %d failed",
+ cfg->type, value);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int aie2_xdna_reset(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ ret = aie2_suspend_fw(ndev);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Suspend firmware failed");
+ return ret;
+ }
+
+ ret = aie2_resume_fw(ndev);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Resume firmware failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_INIT, NULL);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Runtime config failed");
+ return ret;
+ }
+
+ ret = aie2_assign_mgmt_pasid(ndev, 0);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Can not assign PASID");
+ return ret;
+ }
+
+ ret = aie2_xdna_reset(ndev);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Reset firmware failed");
+ return ret;
+ }
+
+ if (!ndev->async_events)
+ return 0;
+
+ ret = aie2_error_async_events_send(ndev);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Send async events failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int aie2_mgmt_fw_query(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "query firmware version failed");
+ return ret;
+ }
+
+ ret = aie2_query_aie_version(ndev, &ndev->version);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Query AIE version failed");
+ return ret;
+ }
+
+ ret = aie2_query_aie_metadata(ndev, &ndev->metadata);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Query AIE metadata failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+static void aie2_mgmt_fw_fini(struct amdxdna_dev_hdl *ndev)
+{
+ if (aie2_suspend_fw(ndev))
+ XDNA_ERR(ndev->xdna, "Suspend_fw failed");
+ XDNA_DBG(ndev->xdna, "Firmware suspended");
+}
+
+static int aie2_xrs_load(void *cb_arg, struct xrs_action_load *action)
+{
+ struct amdxdna_hwctx *hwctx = cb_arg;
+ struct amdxdna_dev *xdna;
+ int ret;
+
+ xdna = hwctx->client->xdna;
+
+ hwctx->start_col = action->part.start_col;
+ hwctx->num_col = action->part.ncols;
+ ret = aie2_create_context(xdna->dev_handle, hwctx);
+ if (ret)
+ XDNA_ERR(xdna, "create context failed, ret %d", ret);
+
+ return ret;
+}
+
+static int aie2_xrs_unload(void *cb_arg)
+{
+ struct amdxdna_hwctx *hwctx = cb_arg;
+ struct amdxdna_dev *xdna;
+ int ret;
+
+ xdna = hwctx->client->xdna;
+
+ ret = aie2_destroy_context(xdna->dev_handle, hwctx);
+ if (ret)
+ XDNA_ERR(xdna, "destroy context failed, ret %d", ret);
+
+ return ret;
+}
+
+static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 dpm_level)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(ddev);
+ struct amdxdna_dev_hdl *ndev;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ ndev = xdna->dev_handle;
+ ndev->dft_dpm_level = dpm_level;
+ if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == dpm_level)
+ return 0;
+
+ return ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
+}
+
+static struct xrs_action_ops aie2_xrs_actions = {
+ .load = aie2_xrs_load,
+ .unload = aie2_xrs_unload,
+ .set_dft_dpm_level = aie2_xrs_set_dft_dpm_level,
+};
+
+static void aie2_hw_stop(struct amdxdna_dev *xdna)
+{
+ struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+ struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+
+ if (ndev->dev_status <= AIE2_DEV_INIT) {
+ XDNA_ERR(xdna, "device is already stopped");
+ return;
+ }
+
+ aie2_mgmt_fw_fini(ndev);
+ xdna_mailbox_stop_channel(ndev->mgmt_chann);
+ xdna_mailbox_destroy_channel(ndev->mgmt_chann);
+ ndev->mgmt_chann = NULL;
+ drmm_kfree(&xdna->ddev, ndev->mbox);
+ ndev->mbox = NULL;
+ aie2_psp_stop(ndev->psp_hdl);
+ aie2_smu_fini(ndev);
+ pci_disable_device(pdev);
+
+ ndev->dev_status = AIE2_DEV_INIT;
+}
+
+static int aie2_hw_start(struct amdxdna_dev *xdna)
+{
+ struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+ struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+ struct xdna_mailbox_res mbox_res;
+ u32 xdna_mailbox_intr_reg;
+ int mgmt_mb_irq, ret;
+
+ if (ndev->dev_status >= AIE2_DEV_START) {
+ XDNA_INFO(xdna, "device is already started");
+ return 0;
+ }
+
+ ret = pci_enable_device(pdev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to enable device, ret %d", ret);
+ return ret;
+ }
+ pci_set_master(pdev);
+
+ ret = aie2_smu_init(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to init smu, ret %d", ret);
+ goto disable_dev;
+ }
+
+ ret = aie2_psp_start(ndev->psp_hdl);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to start psp, ret %d", ret);
+ goto fini_smu;
+ }
+
+ ret = aie2_get_mgmt_chann_info(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "firmware is not alive");
+ goto stop_psp;
+ }
+
+ mbox_res.ringbuf_base = ndev->sram_base;
+ mbox_res.ringbuf_size = pci_resource_len(pdev, xdna->dev_info->sram_bar);
+ mbox_res.mbox_base = ndev->mbox_base;
+ mbox_res.mbox_size = MBOX_SIZE(ndev);
+ mbox_res.name = "xdna_mailbox";
+ ndev->mbox = xdnam_mailbox_create(&xdna->ddev, &mbox_res);
+ if (!ndev->mbox) {
+ XDNA_ERR(xdna, "failed to create mailbox device");
+ ret = -ENODEV;
+ goto stop_psp;
+ }
+
+ mgmt_mb_irq = pci_irq_vector(pdev, ndev->mgmt_chan_idx);
+ if (mgmt_mb_irq < 0) {
+ ret = mgmt_mb_irq;
+ XDNA_ERR(xdna, "failed to alloc irq vector, ret %d", ret);
+ goto stop_psp;
+ }
+
+ xdna_mailbox_intr_reg = ndev->mgmt_i2x.mb_head_ptr_reg + 4;
+ ndev->mgmt_chann = xdna_mailbox_create_channel(ndev->mbox,
+ &ndev->mgmt_x2i,
+ &ndev->mgmt_i2x,
+ xdna_mailbox_intr_reg,
+ mgmt_mb_irq);
+ if (!ndev->mgmt_chann) {
+ XDNA_ERR(xdna, "failed to create management mailbox channel");
+ ret = -EINVAL;
+ goto stop_psp;
+ }
+
+ ret = aie2_pm_init(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
+ goto destroy_mgmt_chann;
+ }
+
+ ret = aie2_mgmt_fw_init(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
+ goto destroy_mgmt_chann;
+ }
+
+ ndev->dev_status = AIE2_DEV_START;
+
+ return 0;
+
+destroy_mgmt_chann:
+ xdna_mailbox_stop_channel(ndev->mgmt_chann);
+ xdna_mailbox_destroy_channel(ndev->mgmt_chann);
+stop_psp:
+ aie2_psp_stop(ndev->psp_hdl);
+fini_smu:
+ aie2_smu_fini(ndev);
+disable_dev:
+ pci_disable_device(pdev);
+
+ return ret;
+}
+
+static int aie2_init(struct amdxdna_dev *xdna)
+{
+ struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+ void __iomem *tbl[PCI_NUM_RESOURCES] = {0};
+ struct init_config xrs_cfg = { 0 };
+ struct amdxdna_dev_hdl *ndev;
+ struct psp_config psp_conf;
+ const struct firmware *fw;
+ unsigned long bars = 0;
+ int i, nvec, ret;
+
+ ndev = drmm_kzalloc(&xdna->ddev, sizeof(*ndev), GFP_KERNEL);
+ if (!ndev)
+ return -ENOMEM;
+
+ ndev->priv = xdna->dev_info->dev_priv;
+ ndev->xdna = xdna;
+
+ ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to request_firmware %s, ret %d",
+ ndev->priv->fw_path, ret);
+ return ret;
+ }
+
+ ret = pcim_enable_device(pdev);
+ if (ret) {
+ XDNA_ERR(xdna, "pcim enable device failed, ret %d", ret);
+ goto release_fw;
+ }
+
+ for (i = 0; i < PSP_MAX_REGS; i++)
+ set_bit(PSP_REG_BAR(ndev, i), &bars);
+
+ set_bit(xdna->dev_info->sram_bar, &bars);
+ set_bit(xdna->dev_info->smu_bar, &bars);
+ set_bit(xdna->dev_info->mbox_bar, &bars);
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ if (!test_bit(i, &bars))
+ continue;
+ tbl[i] = pcim_iomap(pdev, i, 0);
+ if (!tbl[i]) {
+ XDNA_ERR(xdna, "map bar %d failed", i);
+ ret = -ENOMEM;
+ goto release_fw;
+ }
+ }
+
+ ndev->sram_base = tbl[xdna->dev_info->sram_bar];
+ ndev->smu_base = tbl[xdna->dev_info->smu_bar];
+ ndev->mbox_base = tbl[xdna->dev_info->mbox_bar];
+
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to set DMA mask: %d", ret);
+ goto release_fw;
+ }
+
+ nvec = pci_msix_vec_count(pdev);
+ if (nvec <= 0) {
+ XDNA_ERR(xdna, "does not get number of interrupt vector");
+ ret = -EINVAL;
+ goto release_fw;
+ }
+
+ ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_MSIX);
+ if (ret < 0) {
+ XDNA_ERR(xdna, "failed to alloc irq vectors, ret %d", ret);
+ goto release_fw;
+ }
+
+ ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
+ if (ret) {
+ XDNA_ERR(xdna, "Enable PASID failed, ret %d", ret);
+ goto free_irq;
+ }
+
+ psp_conf.fw_size = fw->size;
+ psp_conf.fw_buf = fw->data;
+ for (i = 0; i < PSP_MAX_REGS; i++)
+ psp_conf.psp_regs[i] = tbl[PSP_REG_BAR(ndev, i)] + PSP_REG_OFF(ndev, i);
+ ndev->psp_hdl = aie2m_psp_create(&xdna->ddev, &psp_conf);
+ if (!ndev->psp_hdl) {
+ XDNA_ERR(xdna, "failed to create psp");
+ ret = -ENOMEM;
+ goto disable_sva;
+ }
+ xdna->dev_handle = ndev;
+
+ ret = aie2_hw_start(xdna);
+ if (ret) {
+ XDNA_ERR(xdna, "start npu failed, ret %d", ret);
+ goto disable_sva;
+ }
+
+ ret = aie2_mgmt_fw_query(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "Query firmware failed, ret %d", ret);
+ goto stop_hw;
+ }
+ ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
+
+ xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
+ for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
+ xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv->dpm_clk_tbl[i].hclk;
+ xrs_cfg.sys_eff_factor = 1;
+ xrs_cfg.ddev = &xdna->ddev;
+ xrs_cfg.actions = &aie2_xrs_actions;
+ xrs_cfg.total_col = ndev->total_col;
+
+ xdna->xrs_hdl = xrsm_init(&xrs_cfg);
+ if (!xdna->xrs_hdl) {
+ XDNA_ERR(xdna, "Initialize resolver failed");
+ ret = -EINVAL;
+ goto stop_hw;
+ }
+
+ ret = aie2_error_async_events_alloc(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
+ goto stop_hw;
+ }
+
+ ret = aie2_error_async_events_send(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "Send async events failed, ret %d", ret);
+ goto async_event_free;
+ }
+
+ /* Issue a command to make sure firmware handled async events */
+ ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver);
+ if (ret) {
+ XDNA_ERR(xdna, "Re-query firmware version failed");
+ goto async_event_free;
+ }
+
+ release_firmware(fw);
+ return 0;
+
+async_event_free:
+ aie2_error_async_events_free(ndev);
+stop_hw:
+ aie2_hw_stop(xdna);
+disable_sva:
+ iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
+free_irq:
+ pci_free_irq_vectors(pdev);
+release_fw:
+ release_firmware(fw);
+
+ return ret;
+}
+
+static void aie2_fini(struct amdxdna_dev *xdna)
+{
+ struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+ struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+
+ aie2_hw_stop(xdna);
+ aie2_error_async_events_free(ndev);
+ iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
+ pci_free_irq_vectors(pdev);
+}
+
+static int aie2_get_aie_status(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_aie_status status;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+ int ret;
+
+ ndev = xdna->dev_handle;
+ if (copy_from_user(&status, u64_to_user_ptr(args->buffer), sizeof(status))) {
+ XDNA_ERR(xdna, "Failed to copy AIE request into kernel");
+ return -EFAULT;
+ }
+
+ if (ndev->metadata.cols * ndev->metadata.size < status.buffer_size) {
+ XDNA_ERR(xdna, "Invalid buffer size. Given Size: %u. Need Size: %u.",
+ status.buffer_size, ndev->metadata.cols * ndev->metadata.size);
+ return -EINVAL;
+ }
+
+ ret = aie2_query_status(ndev, u64_to_user_ptr(status.buffer),
+ status.buffer_size, &status.cols_filled);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to get AIE status info. Ret: %d", ret);
+ return ret;
+ }
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &status, sizeof(status))) {
+ XDNA_ERR(xdna, "Failed to copy AIE request info to user space");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int aie2_get_aie_metadata(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_aie_metadata *meta;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+ int ret = 0;
+
+ ndev = xdna->dev_handle;
+ meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+ if (!meta)
+ return -ENOMEM;
+
+ meta->col_size = ndev->metadata.size;
+ meta->cols = ndev->metadata.cols;
+ meta->rows = ndev->metadata.rows;
+
+ meta->version.major = ndev->metadata.version.major;
+ meta->version.minor = ndev->metadata.version.minor;
+
+ meta->core.row_count = ndev->metadata.core.row_count;
+ meta->core.row_start = ndev->metadata.core.row_start;
+ meta->core.dma_channel_count = ndev->metadata.core.dma_channel_count;
+ meta->core.lock_count = ndev->metadata.core.lock_count;
+ meta->core.event_reg_count = ndev->metadata.core.event_reg_count;
+
+ meta->mem.row_count = ndev->metadata.mem.row_count;
+ meta->mem.row_start = ndev->metadata.mem.row_start;
+ meta->mem.dma_channel_count = ndev->metadata.mem.dma_channel_count;
+ meta->mem.lock_count = ndev->metadata.mem.lock_count;
+ meta->mem.event_reg_count = ndev->metadata.mem.event_reg_count;
+
+ meta->shim.row_count = ndev->metadata.shim.row_count;
+ meta->shim.row_start = ndev->metadata.shim.row_start;
+ meta->shim.dma_channel_count = ndev->metadata.shim.dma_channel_count;
+ meta->shim.lock_count = ndev->metadata.shim.lock_count;
+ meta->shim.event_reg_count = ndev->metadata.shim.event_reg_count;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), meta, sizeof(*meta)))
+ ret = -EFAULT;
+
+ kfree(meta);
+ return ret;
+}
+
+static int aie2_get_aie_version(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_aie_version version;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+
+ ndev = xdna->dev_handle;
+ version.major = ndev->version.major;
+ version.minor = ndev->version.minor;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &version, sizeof(version)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int aie2_get_firmware_version(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_firmware_version version;
+ struct amdxdna_dev *xdna = client->xdna;
+
+ version.major = xdna->fw_ver.major;
+ version.minor = xdna->fw_ver.minor;
+ version.patch = xdna->fw_ver.sub;
+ version.build = xdna->fw_ver.build;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &version, sizeof(version)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int aie2_get_power_mode(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_get_power_mode mode = {};
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+
+ ndev = xdna->dev_handle;
+ mode.power_mode = ndev->pw_mode;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &mode, sizeof(mode)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int aie2_get_clock_metadata(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_clock_metadata *clock;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+ int ret = 0;
+
+ ndev = xdna->dev_handle;
+ clock = kzalloc(sizeof(*clock), GFP_KERNEL);
+ if (!clock)
+ return -ENOMEM;
+
+ snprintf(clock->mp_npu_clock.name, sizeof(clock->mp_npu_clock.name),
+ "MP-NPU Clock");
+ clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq;
+ snprintf(clock->h_clock.name, sizeof(clock->h_clock.name), "H Clock");
+ clock->h_clock.freq_mhz = ndev->hclk_freq;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), clock, sizeof(*clock)))
+ ret = -EFAULT;
+
+ kfree(clock);
+ return ret;
+}
+
+static int aie2_get_hwctx_status(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_hwctx __user *buf;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_drm_query_hwctx *tmp;
+ struct amdxdna_client *tmp_client;
+ struct amdxdna_hwctx *hwctx;
+ unsigned long hwctx_id;
+ bool overflow = false;
+ u32 req_bytes = 0;
+ u32 hw_i = 0;
+ int ret = 0;
+ int idx;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ buf = u64_to_user_ptr(args->buffer);
+ list_for_each_entry(tmp_client, &xdna->client_list, node) {
+ idx = srcu_read_lock(&tmp_client->hwctx_srcu);
+ amdxdna_for_each_hwctx(tmp_client, hwctx_id, hwctx) {
+ req_bytes += sizeof(*tmp);
+ if (args->buffer_size < req_bytes) {
+ /* Continue iterating to get the required size */
+ overflow = true;
+ continue;
+ }
+
+ memset(tmp, 0, sizeof(*tmp));
+ tmp->pid = tmp_client->pid;
+ tmp->context_id = hwctx->id;
+ tmp->start_col = hwctx->start_col;
+ tmp->num_col = hwctx->num_col;
+ tmp->command_submissions = hwctx->priv->seq;
+ tmp->command_completions = hwctx->priv->completed;
+
+ if (copy_to_user(&buf[hw_i], tmp, sizeof(*tmp))) {
+ ret = -EFAULT;
+ srcu_read_unlock(&tmp_client->hwctx_srcu, idx);
+ goto out;
+ }
+ hw_i++;
+ }
+ srcu_read_unlock(&tmp_client->hwctx_srcu, idx);
+ }
+
+ if (overflow) {
+ XDNA_ERR(xdna, "Invalid buffer size. Given: %u Need: %u.",
+ args->buffer_size, req_bytes);
+ ret = -EINVAL;
+ }
+
+out:
+ kfree(tmp);
+ args->buffer_size = req_bytes;
+ return ret;
+}
+
+static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ int ret, idx;
+
+ if (!drm_dev_enter(&xdna->ddev, &idx))
+ return -ENODEV;
+
+ switch (args->param) {
+ case DRM_AMDXDNA_QUERY_AIE_STATUS:
+ ret = aie2_get_aie_status(client, args);
+ break;
+ case DRM_AMDXDNA_QUERY_AIE_METADATA:
+ ret = aie2_get_aie_metadata(client, args);
+ break;
+ case DRM_AMDXDNA_QUERY_AIE_VERSION:
+ ret = aie2_get_aie_version(client, args);
+ break;
+ case DRM_AMDXDNA_QUERY_CLOCK_METADATA:
+ ret = aie2_get_clock_metadata(client, args);
+ break;
+ case DRM_AMDXDNA_QUERY_HW_CONTEXTS:
+ ret = aie2_get_hwctx_status(client, args);
+ break;
+ case DRM_AMDXDNA_QUERY_FIRMWARE_VERSION:
+ ret = aie2_get_firmware_version(client, args);
+ break;
+ case DRM_AMDXDNA_GET_POWER_MODE:
+ ret = aie2_get_power_mode(client, args);
+ break;
+ default:
+ XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
+ ret = -EOPNOTSUPP;
+ }
+ XDNA_DBG(xdna, "Got param %d", args->param);
+
+ drm_dev_exit(idx);
+ return ret;
+}
+
+static int aie2_set_power_mode(struct amdxdna_client *client,
+ struct amdxdna_drm_set_state *args)
+{
+ struct amdxdna_drm_set_power_mode power_state;
+ enum amdxdna_power_mode_type power_mode;
+ struct amdxdna_dev *xdna = client->xdna;
+
+ if (copy_from_user(&power_state, u64_to_user_ptr(args->buffer),
+ sizeof(power_state))) {
+ XDNA_ERR(xdna, "Failed to copy power mode request into kernel");
+ return -EFAULT;
+ }
+
+ if (XDNA_MBZ_DBG(xdna, power_state.pad, sizeof(power_state.pad)))
+ return -EINVAL;
+
+ power_mode = power_state.power_mode;
+ if (power_mode > POWER_MODE_TURBO) {
+ XDNA_ERR(xdna, "Invalid power mode %d", power_mode);
+ return -EINVAL;
+ }
+
+ return aie2_pm_set_mode(xdna->dev_handle, power_mode);
+}
+
+static int aie2_set_state(struct amdxdna_client *client,
+ struct amdxdna_drm_set_state *args)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ int ret, idx;
+
+ if (!drm_dev_enter(&xdna->ddev, &idx))
+ return -ENODEV;
+
+ switch (args->param) {
+ case DRM_AMDXDNA_SET_POWER_MODE:
+ ret = aie2_set_power_mode(client, args);
+ break;
+ default:
+ XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ drm_dev_exit(idx);
+ return ret;
+}
+
+const struct amdxdna_dev_ops aie2_ops = {
+ .init = aie2_init,
+ .fini = aie2_fini,
+ .resume = aie2_hw_start,
+ .suspend = aie2_hw_stop,
+ .get_aie_info = aie2_get_info,
+ .set_aie_state = aie2_set_state,
+ .hwctx_init = aie2_hwctx_init,
+ .hwctx_fini = aie2_hwctx_fini,
+ .hwctx_config = aie2_hwctx_config,
+ .cmd_submit = aie2_cmd_submit,
+ .hmm_invalidate = aie2_hmm_invalidate,
+ .hwctx_suspend = aie2_hwctx_suspend,
+ .hwctx_resume = aie2_hwctx_resume,
+};
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
new file mode 100644
index 000000000000..f2d95531ddc2
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AIE2_PCI_H_
+#define _AIE2_PCI_H_
+
+#include <drm/amdxdna_accel.h>
+#include <linux/semaphore.h>
+
+#include "amdxdna_mailbox.h"
+
+#define AIE2_INTERVAL 20000 /* us */
+#define AIE2_TIMEOUT 1000000 /* us */
+
+/* Firmware determines device memory base address and size */
+#define AIE2_DEVM_BASE 0x4000000
+#define AIE2_DEVM_SIZE SZ_64M
+
+#define NDEV2PDEV(ndev) (to_pci_dev((ndev)->xdna->ddev.dev))
+
+#define AIE2_SRAM_OFF(ndev, addr) ((addr) - (ndev)->priv->sram_dev_addr)
+#define AIE2_MBOX_OFF(ndev, addr) ((addr) - (ndev)->priv->mbox_dev_addr)
+
+#define PSP_REG_BAR(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].bar_idx)
+#define PSP_REG_OFF(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].offset)
+#define SRAM_REG_OFF(ndev, idx) ((ndev)->priv->sram_offs[(idx)].offset)
+
+#define SMU_REG(ndev, idx) \
+({ \
+ typeof(ndev) _ndev = ndev; \
+ ((_ndev)->smu_base + (_ndev)->priv->smu_regs_off[(idx)].offset); \
+})
+#define SRAM_GET_ADDR(ndev, idx) \
+({ \
+ typeof(ndev) _ndev = ndev; \
+ ((_ndev)->sram_base + SRAM_REG_OFF((_ndev), (idx))); \
+})
+
+#define CHAN_SLOT_SZ SZ_8K
+#define MBOX_SIZE(ndev) \
+({ \
+ typeof(ndev) _ndev = (ndev); \
+ ((_ndev)->priv->mbox_size) ? (_ndev)->priv->mbox_size : \
+ pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \
+})
+
+enum aie2_smu_reg_idx {
+ SMU_CMD_REG = 0,
+ SMU_ARG_REG,
+ SMU_INTR_REG,
+ SMU_RESP_REG,
+ SMU_OUT_REG,
+ SMU_MAX_REGS /* Keep this at the end */
+};
+
+enum aie2_sram_reg_idx {
+ MBOX_CHANN_OFF = 0,
+ FW_ALIVE_OFF,
+ SRAM_MAX_INDEX /* Keep this at the end */
+};
+
+enum psp_reg_idx {
+ PSP_CMD_REG = 0,
+ PSP_ARG0_REG,
+ PSP_ARG1_REG,
+ PSP_ARG2_REG,
+ PSP_NUM_IN_REGS, /* number of input registers */
+ PSP_INTR_REG = PSP_NUM_IN_REGS,
+ PSP_STATUS_REG,
+ PSP_RESP_REG,
+ PSP_MAX_REGS /* Keep this at the end */
+};
+
+struct amdxdna_client;
+struct amdxdna_fw_ver;
+struct amdxdna_hwctx;
+struct amdxdna_sched_job;
+
+struct psp_config {
+ const void *fw_buf;
+ u32 fw_size;
+ void __iomem *psp_regs[PSP_MAX_REGS];
+};
+
+struct aie_version {
+ u16 major;
+ u16 minor;
+};
+
+struct aie_tile_metadata {
+ u16 row_count;
+ u16 row_start;
+ u16 dma_channel_count;
+ u16 lock_count;
+ u16 event_reg_count;
+};
+
+struct aie_metadata {
+ u32 size;
+ u16 cols;
+ u16 rows;
+ struct aie_version version;
+ struct aie_tile_metadata core;
+ struct aie_tile_metadata mem;
+ struct aie_tile_metadata shim;
+};
+
+enum rt_config_category {
+ AIE2_RT_CFG_INIT,
+ AIE2_RT_CFG_CLK_GATING,
+};
+
+struct rt_config {
+ u32 type;
+ u32 value;
+ u32 category;
+};
+
+struct dpm_clk_freq {
+ u32 npuclk;
+ u32 hclk;
+};
+
+/*
+ * Define the maximum number of pending commands in a hardware context.
+ * Must be power of 2!
+ */
+#define HWCTX_MAX_CMDS 4
+#define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1))
+struct amdxdna_hwctx_priv {
+ struct amdxdna_gem_obj *heap;
+ void *mbox_chann;
+
+ struct drm_gpu_scheduler sched;
+ struct drm_sched_entity entity;
+
+ struct mutex io_lock; /* protect seq and cmd order */
+ struct wait_queue_head job_free_wq;
+ u32 num_pending;
+ u64 seq;
+ struct semaphore job_sem;
+ bool job_done;
+
+ /* Completed job counter */
+ u64 completed;
+
+ struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS];
+ struct drm_syncobj *syncobj;
+};
+
+enum aie2_dev_status {
+ AIE2_DEV_UNINIT,
+ AIE2_DEV_INIT,
+ AIE2_DEV_START,
+};
+
+struct amdxdna_dev_hdl {
+ struct amdxdna_dev *xdna;
+ const struct amdxdna_dev_priv *priv;
+ void __iomem *sram_base;
+ void __iomem *smu_base;
+ void __iomem *mbox_base;
+ struct psp_device *psp_hdl;
+
+ struct xdna_mailbox_chann_res mgmt_x2i;
+ struct xdna_mailbox_chann_res mgmt_i2x;
+ u32 mgmt_chan_idx;
+ u32 mgmt_prot_major;
+ u32 mgmt_prot_minor;
+
+ u32 total_col;
+ struct aie_version version;
+ struct aie_metadata metadata;
+
+ /* power management and clock*/
+ enum amdxdna_power_mode_type pw_mode;
+ u32 dpm_level;
+ u32 dft_dpm_level;
+ u32 max_dpm_level;
+ u32 clk_gating;
+ u32 npuclk_freq;
+ u32 hclk_freq;
+
+ /* Mailbox and the management channel */
+ struct mailbox *mbox;
+ struct mailbox_channel *mgmt_chann;
+ struct async_events *async_events;
+
+ enum aie2_dev_status dev_status;
+ u32 hwctx_num;
+};
+
+#define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
+ [reg_name] = {bar##_BAR_INDEX, (reg_addr) - bar##_BAR_BASE}
+
+struct aie2_bar_off_pair {
+ int bar_idx;
+ u32 offset;
+};
+
+struct aie2_hw_ops {
+ int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+};
+
+struct amdxdna_dev_priv {
+ const char *fw_path;
+ u64 protocol_major;
+ u64 protocol_minor;
+ const struct rt_config *rt_config;
+ const struct dpm_clk_freq *dpm_clk_tbl;
+
+#define COL_ALIGN_NONE 0
+#define COL_ALIGN_NATURE 1
+ u32 col_align;
+ u32 mbox_dev_addr;
+ /* If mbox_size is 0, use BAR size. See MBOX_SIZE macro */
+ u32 mbox_size;
+ u32 sram_dev_addr;
+ struct aie2_bar_off_pair sram_offs[SRAM_MAX_INDEX];
+ struct aie2_bar_off_pair psp_regs_off[PSP_MAX_REGS];
+ struct aie2_bar_off_pair smu_regs_off[SMU_MAX_REGS];
+ struct aie2_hw_ops hw_ops;
+};
+
+extern const struct amdxdna_dev_ops aie2_ops;
+
+int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
+ enum rt_config_category category, u32 *val);
+
+/* aie2 npu hw config */
+extern const struct dpm_clk_freq npu1_dpm_clk_table[];
+extern const struct dpm_clk_freq npu4_dpm_clk_table[];
+extern const struct rt_config npu1_default_rt_cfg[];
+extern const struct rt_config npu4_default_rt_cfg[];
+
+/* aie2_smu.c */
+int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
+void aie2_smu_fini(struct amdxdna_dev_hdl *ndev);
+int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+
+/* aie2_pm.c */
+int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
+int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target);
+
+/* aie2_psp.c */
+struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf);
+int aie2_psp_start(struct psp_device *psp);
+void aie2_psp_stop(struct psp_device *psp);
+
+/* aie2_error.c */
+int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev);
+void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev);
+int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev);
+int aie2_error_async_msg_thread(void *data);
+
+/* aie2_message.c */
+int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
+int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
+int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
+int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value);
+int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid);
+int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version);
+int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
+int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
+ struct amdxdna_fw_ver *fw_ver);
+int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
+int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
+int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size);
+int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled);
+int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
+ void *handle, int (*cb)(void*, const u32 *, size_t));
+int aie2_config_cu(struct amdxdna_hwctx *hwctx);
+int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, const u32 *, size_t));
+int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
+ struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, const u32 *, size_t));
+int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
+ struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, const u32 *, size_t));
+int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, const u32 *, size_t));
+
+/* aie2_hwctx.c */
+int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
+void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx);
+int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
+void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx);
+void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx);
+int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
+void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
+void aie2_restart_ctx(struct amdxdna_client *client);
+
+#endif /* _AIE2_PCI_H_ */
diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c
new file mode 100644
index 000000000000..426c38fce848
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_pm.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_pci_drv.h"
+
+#define AIE2_CLK_GATING_ENABLE 1
+#define AIE2_CLK_GATING_DISABLE 0
+
+static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 val)
+{
+ int ret;
+
+ ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, &val);
+ if (ret)
+ return ret;
+
+ ndev->clk_gating = val;
+ return 0;
+}
+
+int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ if (ndev->dev_status != AIE2_DEV_UNINIT) {
+ /* Resume device */
+ ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level);
+ if (ret)
+ return ret;
+
+ ret = aie2_pm_set_clk_gating(ndev, ndev->clk_gating);
+ if (ret)
+ return ret;
+
+ return 0;
+ }
+
+ while (ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk)
+ ndev->max_dpm_level++;
+ ndev->max_dpm_level--;
+
+ ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level);
+ if (ret)
+ return ret;
+
+ ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE);
+ if (ret)
+ return ret;
+
+ ndev->pw_mode = POWER_MODE_DEFAULT;
+ ndev->dft_dpm_level = ndev->max_dpm_level;
+
+ return 0;
+}
+
+int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+ u32 clk_gating, dpm_level;
+ int ret;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ if (ndev->pw_mode == target)
+ return 0;
+
+ switch (target) {
+ case POWER_MODE_TURBO:
+ if (ndev->hwctx_num) {
+ XDNA_ERR(xdna, "Can not set turbo when there is active hwctx");
+ return -EINVAL;
+ }
+
+ clk_gating = AIE2_CLK_GATING_DISABLE;
+ dpm_level = ndev->max_dpm_level;
+ break;
+ case POWER_MODE_HIGH:
+ clk_gating = AIE2_CLK_GATING_ENABLE;
+ dpm_level = ndev->max_dpm_level;
+ break;
+ case POWER_MODE_DEFAULT:
+ clk_gating = AIE2_CLK_GATING_ENABLE;
+ dpm_level = ndev->dft_dpm_level;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
+ if (ret)
+ return ret;
+
+ ret = aie2_pm_set_clk_gating(ndev, clk_gating);
+ if (ret)
+ return ret;
+
+ ndev->pw_mode = target;
+
+ return 0;
+}
diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c
new file mode 100644
index 000000000000..dc3a072ce3b6
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_psp.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/drm_device.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/bitfield.h>
+#include <linux/iopoll.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+#define PSP_STATUS_READY BIT(31)
+
+/* PSP commands */
+#define PSP_VALIDATE 1
+#define PSP_START 2
+#define PSP_RELEASE_TMR 3
+
+/* PSP special arguments */
+#define PSP_START_COPY_FW 1
+
+/* PSP response error code */
+#define PSP_ERROR_CANCEL 0xFFFF0002
+#define PSP_ERROR_BAD_STATE 0xFFFF0007
+
+#define PSP_FW_ALIGN 0x10000
+#define PSP_POLL_INTERVAL 20000 /* us */
+#define PSP_POLL_TIMEOUT 1000000 /* us */
+
+#define PSP_REG(p, reg) ((p)->psp_regs[reg])
+
+struct psp_device {
+ struct drm_device *ddev;
+ struct psp_config conf;
+ u32 fw_buf_sz;
+ u64 fw_paddr;
+ void *fw_buffer;
+ void __iomem *psp_regs[PSP_MAX_REGS];
+};
+
+static int psp_exec(struct psp_device *psp, u32 *reg_vals)
+{
+ u32 resp_code;
+ int ret, i;
+ u32 ready;
+
+ /* Write command and argument registers */
+ for (i = 0; i < PSP_NUM_IN_REGS; i++)
+ writel(reg_vals[i], PSP_REG(psp, i));
+
+ /* clear and set PSP INTR register to kick off */
+ writel(0, PSP_REG(psp, PSP_INTR_REG));
+ writel(1, PSP_REG(psp, PSP_INTR_REG));
+
+ /* PSP should be busy. Wait for ready, so we know task is done. */
+ ret = readx_poll_timeout(readl, PSP_REG(psp, PSP_STATUS_REG), ready,
+ FIELD_GET(PSP_STATUS_READY, ready),
+ PSP_POLL_INTERVAL, PSP_POLL_TIMEOUT);
+ if (ret) {
+ drm_err(psp->ddev, "PSP is not ready, ret 0x%x", ret);
+ return ret;
+ }
+
+ resp_code = readl(PSP_REG(psp, PSP_RESP_REG));
+ if (resp_code) {
+ drm_err(psp->ddev, "fw return error 0x%x", resp_code);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+void aie2_psp_stop(struct psp_device *psp)
+{
+ u32 reg_vals[PSP_NUM_IN_REGS] = { PSP_RELEASE_TMR, };
+ int ret;
+
+ ret = psp_exec(psp, reg_vals);
+ if (ret)
+ drm_err(psp->ddev, "release tmr failed, ret %d", ret);
+}
+
+int aie2_psp_start(struct psp_device *psp)
+{
+ u32 reg_vals[PSP_NUM_IN_REGS];
+ int ret;
+
+ reg_vals[0] = PSP_VALIDATE;
+ reg_vals[1] = lower_32_bits(psp->fw_paddr);
+ reg_vals[2] = upper_32_bits(psp->fw_paddr);
+ reg_vals[3] = psp->fw_buf_sz;
+
+ ret = psp_exec(psp, reg_vals);
+ if (ret) {
+ drm_err(psp->ddev, "failed to validate fw, ret %d", ret);
+ return ret;
+ }
+
+ memset(reg_vals, 0, sizeof(reg_vals));
+ reg_vals[0] = PSP_START;
+ reg_vals[1] = PSP_START_COPY_FW;
+ ret = psp_exec(psp, reg_vals);
+ if (ret) {
+ drm_err(psp->ddev, "failed to start fw, ret %d", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf)
+{
+ struct psp_device *psp;
+ u64 offset;
+
+ psp = drmm_kzalloc(ddev, sizeof(*psp), GFP_KERNEL);
+ if (!psp)
+ return NULL;
+
+ psp->ddev = ddev;
+ memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs));
+
+ psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN) + PSP_FW_ALIGN;
+ psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz, GFP_KERNEL);
+ if (!psp->fw_buffer) {
+ drm_err(ddev, "no memory for fw buffer");
+ return NULL;
+ }
+
+ /*
+ * AMD Platform Security Processor(PSP) requires host physical
+ * address to load NPU firmware.
+ */
+ psp->fw_paddr = virt_to_phys(psp->fw_buffer);
+ offset = ALIGN(psp->fw_paddr, PSP_FW_ALIGN) - psp->fw_paddr;
+ psp->fw_paddr += offset;
+ memcpy(psp->fw_buffer + offset, conf->fw_buf, conf->fw_size);
+
+ return psp;
+}
diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c
new file mode 100644
index 000000000000..d303701b0ded
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_smu.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/drm_device.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/iopoll.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_pci_drv.h"
+
+#define SMU_RESULT_OK 1
+
+/* SMU commands */
+#define AIE2_SMU_POWER_ON 0x3
+#define AIE2_SMU_POWER_OFF 0x4
+#define AIE2_SMU_SET_MPNPUCLK_FREQ 0x5
+#define AIE2_SMU_SET_HCLK_FREQ 0x6
+#define AIE2_SMU_SET_SOFT_DPMLEVEL 0x7
+#define AIE2_SMU_SET_HARD_DPMLEVEL 0x8
+
+static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd,
+ u32 reg_arg, u32 *out)
+{
+ u32 resp;
+ int ret;
+
+ writel(0, SMU_REG(ndev, SMU_RESP_REG));
+ writel(reg_arg, SMU_REG(ndev, SMU_ARG_REG));
+ writel(reg_cmd, SMU_REG(ndev, SMU_CMD_REG));
+
+ /* Clear and set SMU_INTR_REG to kick off */
+ writel(0, SMU_REG(ndev, SMU_INTR_REG));
+ writel(1, SMU_REG(ndev, SMU_INTR_REG));
+
+ ret = readx_poll_timeout(readl, SMU_REG(ndev, SMU_RESP_REG), resp,
+ resp, AIE2_INTERVAL, AIE2_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "smu cmd %d timed out", reg_cmd);
+ return ret;
+ }
+
+ if (out)
+ *out = readl(SMU_REG(ndev, SMU_OUT_REG));
+
+ if (resp != SMU_RESULT_OK) {
+ XDNA_ERR(ndev->xdna, "smu cmd %d failed, 0x%x", reg_cmd, resp);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
+{
+ u32 freq;
+ int ret;
+
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
+ ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
+ ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
+ return ret;
+ }
+ ndev->npuclk_freq = freq;
+
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ,
+ ndev->priv->dpm_clk_tbl[dpm_level].hclk, &freq);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
+ ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
+ return ret;
+ }
+ ndev->hclk_freq = freq;
+ ndev->dpm_level = dpm_level;
+
+ XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
+ ndev->npuclk_freq, ndev->hclk_freq);
+
+ return 0;
+}
+
+int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
+{
+ int ret;
+
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
+ dpm_level, ret);
+ return ret;
+ }
+
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
+ dpm_level, ret);
+ return ret;
+ }
+
+ ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
+ ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
+ ndev->dpm_level = dpm_level;
+
+ XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
+ ndev->npuclk_freq, ndev->hclk_freq);
+
+ return 0;
+}
+
+int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+void aie2_smu_fini(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ ndev->priv->hw_ops.set_dpm(ndev, 0);
+ ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL);
+ if (ret)
+ XDNA_ERR(ndev->xdna, "Power off failed, ret %d", ret);
+}
diff --git a/drivers/accel/amdxdna/aie2_solver.c b/drivers/accel/amdxdna/aie2_solver.c
new file mode 100644
index 000000000000..2013d1f13aae
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_solver.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/drm_device.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <linux/bitops.h>
+#include <linux/bitmap.h>
+#include <linux/slab.h>
+
+#include "aie2_solver.h"
+
+struct partition_node {
+ struct list_head list;
+ u32 nshared; /* # shared requests */
+ u32 start_col; /* start column */
+ u32 ncols; /* # columns */
+ bool exclusive; /* can not be shared if set */
+};
+
+struct solver_node {
+ struct list_head list;
+ u64 rid; /* Request ID from consumer */
+
+ struct partition_node *pt_node;
+ void *cb_arg;
+ u32 dpm_level;
+ u32 cols_len;
+ u32 start_cols[] __counted_by(cols_len);
+};
+
+struct solver_rgroup {
+ u32 rgid;
+ u32 nnode;
+ u32 npartition_node;
+
+ DECLARE_BITMAP(resbit, XRS_MAX_COL);
+ struct list_head node_list;
+ struct list_head pt_node_list;
+};
+
+struct solver_state {
+ struct solver_rgroup rgp;
+ struct init_config cfg;
+ struct xrs_action_ops *actions;
+};
+
+static u32 calculate_gops(struct aie_qos *rqos)
+{
+ u32 service_rate = 0;
+
+ if (rqos->latency)
+ service_rate = (1000 / rqos->latency);
+
+ if (rqos->fps > service_rate)
+ return rqos->fps * rqos->gops;
+
+ return service_rate * rqos->gops;
+}
+
+/*
+ * qos_meet() - Check the QOS request can be met.
+ */
+static int qos_meet(struct solver_state *xrs, struct aie_qos *rqos, u32 cgops)
+{
+ u32 request_gops = calculate_gops(rqos) * xrs->cfg.sys_eff_factor;
+
+ if (request_gops <= cgops)
+ return 0;
+
+ return -EINVAL;
+}
+
+/*
+ * sanity_check() - Do a basic sanity check on allocation request.
+ */
+static int sanity_check(struct solver_state *xrs, struct alloc_requests *req)
+{
+ struct cdo_parts *cdop = &req->cdo;
+ struct aie_qos *rqos = &req->rqos;
+ u32 cu_clk_freq;
+
+ if (cdop->ncols > xrs->cfg.total_col)
+ return -EINVAL;
+
+ /*
+ * We can find at least one CDOs groups that meet the
+ * GOPs requirement.
+ */
+ cu_clk_freq = xrs->cfg.clk_list.cu_clk_list[xrs->cfg.clk_list.num_levels - 1];
+
+ if (qos_meet(xrs, rqos, cdop->qos_cap.opc * cu_clk_freq / 1000))
+ return -EINVAL;
+
+ return 0;
+}
+
+static bool is_valid_qos_dpm_params(struct aie_qos *rqos)
+{
+ /*
+ * gops is retrieved from the xmodel, so it's always set
+ * fps and latency are the configurable params from the application
+ */
+ if (rqos->gops > 0 && (rqos->fps > 0 || rqos->latency > 0))
+ return true;
+
+ return false;
+}
+
+static int set_dpm_level(struct solver_state *xrs, struct alloc_requests *req, u32 *dpm_level)
+{
+ struct solver_rgroup *rgp = &xrs->rgp;
+ struct cdo_parts *cdop = &req->cdo;
+ struct aie_qos *rqos = &req->rqos;
+ u32 freq, max_dpm_level, level;
+ struct solver_node *node;
+
+ max_dpm_level = xrs->cfg.clk_list.num_levels - 1;
+ /* If no QoS parameters are passed, set it to the max DPM level */
+ if (!is_valid_qos_dpm_params(rqos)) {
+ level = max_dpm_level;
+ goto set_dpm;
+ }
+
+ /* Find one CDO group that meet the GOPs requirement. */
+ for (level = 0; level < max_dpm_level; level++) {
+ freq = xrs->cfg.clk_list.cu_clk_list[level];
+ if (!qos_meet(xrs, rqos, cdop->qos_cap.opc * freq / 1000))
+ break;
+ }
+
+ /* set the dpm level which fits all the sessions */
+ list_for_each_entry(node, &rgp->node_list, list) {
+ if (node->dpm_level > level)
+ level = node->dpm_level;
+ }
+
+set_dpm:
+ *dpm_level = level;
+ return xrs->cfg.actions->set_dft_dpm_level(xrs->cfg.ddev, level);
+}
+
+static struct solver_node *rg_search_node(struct solver_rgroup *rgp, u64 rid)
+{
+ struct solver_node *node;
+
+ list_for_each_entry(node, &rgp->node_list, list) {
+ if (node->rid == rid)
+ return node;
+ }
+
+ return NULL;
+}
+
+static void remove_partition_node(struct solver_rgroup *rgp,
+ struct partition_node *pt_node)
+{
+ pt_node->nshared--;
+ if (pt_node->nshared > 0)
+ return;
+
+ list_del(&pt_node->list);
+ rgp->npartition_node--;
+
+ bitmap_clear(rgp->resbit, pt_node->start_col, pt_node->ncols);
+ kfree(pt_node);
+}
+
+static void remove_solver_node(struct solver_rgroup *rgp,
+ struct solver_node *node)
+{
+ list_del(&node->list);
+ rgp->nnode--;
+
+ if (node->pt_node)
+ remove_partition_node(rgp, node->pt_node);
+
+ kfree(node);
+}
+
+static int get_free_partition(struct solver_state *xrs,
+ struct solver_node *snode,
+ struct alloc_requests *req)
+{
+ struct partition_node *pt_node;
+ u32 ncols = req->cdo.ncols;
+ u32 col, i;
+
+ for (i = 0; i < snode->cols_len; i++) {
+ col = snode->start_cols[i];
+ if (find_next_bit(xrs->rgp.resbit, XRS_MAX_COL, col) >= col + ncols)
+ break;
+ }
+
+ if (i == snode->cols_len)
+ return -ENODEV;
+
+ pt_node = kzalloc(sizeof(*pt_node), GFP_KERNEL);
+ if (!pt_node)
+ return -ENOMEM;
+
+ pt_node->nshared = 1;
+ pt_node->start_col = col;
+ pt_node->ncols = ncols;
+
+ /*
+ * Always set exclusive to false for now.
+ */
+ pt_node->exclusive = false;
+
+ list_add_tail(&pt_node->list, &xrs->rgp.pt_node_list);
+ xrs->rgp.npartition_node++;
+ bitmap_set(xrs->rgp.resbit, pt_node->start_col, pt_node->ncols);
+
+ snode->pt_node = pt_node;
+
+ return 0;
+}
+
+static int allocate_partition(struct solver_state *xrs,
+ struct solver_node *snode,
+ struct alloc_requests *req)
+{
+ struct partition_node *pt_node, *rpt_node = NULL;
+ int idx, ret;
+
+ ret = get_free_partition(xrs, snode, req);
+ if (!ret)
+ return ret;
+
+ /* try to get a share-able partition */
+ list_for_each_entry(pt_node, &xrs->rgp.pt_node_list, list) {
+ if (pt_node->exclusive)
+ continue;
+
+ if (rpt_node && pt_node->nshared >= rpt_node->nshared)
+ continue;
+
+ for (idx = 0; idx < snode->cols_len; idx++) {
+ if (snode->start_cols[idx] != pt_node->start_col)
+ continue;
+
+ if (req->cdo.ncols != pt_node->ncols)
+ continue;
+
+ rpt_node = pt_node;
+ break;
+ }
+ }
+
+ if (!rpt_node)
+ return -ENODEV;
+
+ rpt_node->nshared++;
+ snode->pt_node = rpt_node;
+
+ return 0;
+}
+
+static struct solver_node *create_solver_node(struct solver_state *xrs,
+ struct alloc_requests *req)
+{
+ struct cdo_parts *cdop = &req->cdo;
+ struct solver_node *node;
+ int ret;
+
+ node = kzalloc(struct_size(node, start_cols, cdop->cols_len), GFP_KERNEL);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ node->rid = req->rid;
+ node->cols_len = cdop->cols_len;
+ memcpy(node->start_cols, cdop->start_cols, cdop->cols_len * sizeof(u32));
+
+ ret = allocate_partition(xrs, node, req);
+ if (ret)
+ goto free_node;
+
+ list_add_tail(&node->list, &xrs->rgp.node_list);
+ xrs->rgp.nnode++;
+ return node;
+
+free_node:
+ kfree(node);
+ return ERR_PTR(ret);
+}
+
+static void fill_load_action(struct solver_state *xrs,
+ struct solver_node *snode,
+ struct xrs_action_load *action)
+{
+ action->rid = snode->rid;
+ action->part.start_col = snode->pt_node->start_col;
+ action->part.ncols = snode->pt_node->ncols;
+}
+
+int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg)
+{
+ struct xrs_action_load load_act;
+ struct solver_node *snode;
+ struct solver_state *xrs;
+ u32 dpm_level;
+ int ret;
+
+ xrs = (struct solver_state *)hdl;
+
+ ret = sanity_check(xrs, req);
+ if (ret) {
+ drm_err(xrs->cfg.ddev, "invalid request");
+ return ret;
+ }
+
+ if (rg_search_node(&xrs->rgp, req->rid)) {
+ drm_err(xrs->cfg.ddev, "rid %lld is in-use", req->rid);
+ return -EEXIST;
+ }
+
+ snode = create_solver_node(xrs, req);
+ if (IS_ERR(snode))
+ return PTR_ERR(snode);
+
+ fill_load_action(xrs, snode, &load_act);
+ ret = xrs->cfg.actions->load(cb_arg, &load_act);
+ if (ret)
+ goto free_node;
+
+ ret = set_dpm_level(xrs, req, &dpm_level);
+ if (ret)
+ goto free_node;
+
+ snode->dpm_level = dpm_level;
+ snode->cb_arg = cb_arg;
+
+ drm_dbg(xrs->cfg.ddev, "start col %d ncols %d\n",
+ snode->pt_node->start_col, snode->pt_node->ncols);
+
+ return 0;
+
+free_node:
+ remove_solver_node(&xrs->rgp, snode);
+
+ return ret;
+}
+
+int xrs_release_resource(void *hdl, u64 rid)
+{
+ struct solver_state *xrs = hdl;
+ struct solver_node *node;
+
+ node = rg_search_node(&xrs->rgp, rid);
+ if (!node) {
+ drm_err(xrs->cfg.ddev, "node not exist");
+ return -ENODEV;
+ }
+
+ xrs->cfg.actions->unload(node->cb_arg);
+ remove_solver_node(&xrs->rgp, node);
+
+ return 0;
+}
+
+void *xrsm_init(struct init_config *cfg)
+{
+ struct solver_rgroup *rgp;
+ struct solver_state *xrs;
+
+ xrs = drmm_kzalloc(cfg->ddev, sizeof(*xrs), GFP_KERNEL);
+ if (!xrs)
+ return NULL;
+
+ memcpy(&xrs->cfg, cfg, sizeof(*cfg));
+
+ rgp = &xrs->rgp;
+ INIT_LIST_HEAD(&rgp->node_list);
+ INIT_LIST_HEAD(&rgp->pt_node_list);
+
+ return xrs;
+}
diff --git a/drivers/accel/amdxdna/aie2_solver.h b/drivers/accel/amdxdna/aie2_solver.h
new file mode 100644
index 000000000000..a2e3c52229e9
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_solver.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AIE2_SOLVER_H
+#define _AIE2_SOLVER_H
+
+#define XRS_MAX_COL 128
+
+/*
+ * Structure used to describe a partition. A partition is column based
+ * allocation unit described by its start column and number of columns.
+ */
+struct aie_part {
+ u32 start_col;
+ u32 ncols;
+};
+
+/*
+ * The QoS capabilities of a given AIE partition.
+ */
+struct aie_qos_cap {
+ u32 opc; /* operations per cycle */
+ u32 dma_bw; /* DMA bandwidth */
+};
+
+/*
+ * QoS requirement of a resource allocation.
+ */
+struct aie_qos {
+ u32 gops; /* Giga operations */
+ u32 fps; /* Frames per second */
+ u32 dma_bw; /* DMA bandwidth */
+ u32 latency; /* Frame response latency */
+ u32 exec_time; /* Frame execution time */
+ u32 priority; /* Request priority */
+};
+
+/*
+ * Structure used to describe a relocatable CDO (Configuration Data Object).
+ */
+struct cdo_parts {
+ u32 *start_cols; /* Start column array */
+ u32 cols_len; /* Length of start column array */
+ u32 ncols; /* # of column */
+ struct aie_qos_cap qos_cap; /* CDO QoS capabilities */
+};
+
+/*
+ * Structure used to describe a request to allocate.
+ */
+struct alloc_requests {
+ u64 rid;
+ struct cdo_parts cdo;
+ struct aie_qos rqos; /* Requested QoS */
+};
+
+/*
+ * Load callback argument
+ */
+struct xrs_action_load {
+ u32 rid;
+ struct aie_part part;
+};
+
+/*
+ * Define the power level available
+ *
+ * POWER_LEVEL_MIN:
+ * Lowest power level. Usually set when all actions are unloaded.
+ *
+ * POWER_LEVEL_n
+ * Power levels 0 - n, is a step increase in system frequencies
+ */
+enum power_level {
+ POWER_LEVEL_MIN = 0x0,
+ POWER_LEVEL_0 = 0x1,
+ POWER_LEVEL_1 = 0x2,
+ POWER_LEVEL_2 = 0x3,
+ POWER_LEVEL_3 = 0x4,
+ POWER_LEVEL_4 = 0x5,
+ POWER_LEVEL_5 = 0x6,
+ POWER_LEVEL_6 = 0x7,
+ POWER_LEVEL_7 = 0x8,
+ POWER_LEVEL_NUM,
+};
+
+/*
+ * Structure used to describe the frequency table.
+ * Resource solver chooses the frequency from the table
+ * to meet the QOS requirements.
+ */
+struct clk_list_info {
+ u32 num_levels; /* available power levels */
+ u32 cu_clk_list[POWER_LEVEL_NUM]; /* available aie clock frequencies in Mhz*/
+};
+
+struct xrs_action_ops {
+ int (*load)(void *cb_arg, struct xrs_action_load *action);
+ int (*unload)(void *cb_arg);
+ int (*set_dft_dpm_level)(struct drm_device *ddev, u32 level);
+};
+
+/*
+ * Structure used to describe information for solver during initialization.
+ */
+struct init_config {
+ u32 total_col;
+ u32 sys_eff_factor; /* system efficiency factor */
+ u32 latency_adj; /* latency adjustment in ms */
+ struct clk_list_info clk_list; /* List of frequencies available in system */
+ struct drm_device *ddev;
+ struct xrs_action_ops *actions;
+};
+
+/*
+ * xrsm_init() - Register resource solver. Resource solver client needs
+ * to call this function to register itself.
+ *
+ * @cfg: The system metrics for resource solver to use
+ *
+ * Return: A resource solver handle
+ *
+ * Note: We should only create one handle per AIE array to be managed.
+ */
+void *xrsm_init(struct init_config *cfg);
+
+/*
+ * xrs_allocate_resource() - Request to allocate resources for a given context
+ * and a partition metadata. (See struct part_meta)
+ *
+ * @hdl: Resource solver handle obtained from xrs_init()
+ * @req: Input to the Resource solver including request id
+ * and partition metadata.
+ * @cb_arg: callback argument pointer
+ *
+ * Return: 0 when successful.
+ * Or standard error number when failing
+ *
+ * Note:
+ * There is no lock mechanism inside resource solver. So it is
+ * the caller's responsibility to lock down XCLBINs and grab
+ * necessary lock.
+ */
+int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg);
+
+/*
+ * xrs_release_resource() - Request to free resources for a given context.
+ *
+ * @hdl: Resource solver handle obtained from xrs_init()
+ * @rid: The Request ID to identify the requesting context
+ */
+int xrs_release_resource(void *hdl, u64 rid);
+#endif /* _AIE2_SOLVER_H */
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
new file mode 100644
index 000000000000..d11b1c83d9c3
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -0,0 +1,550 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/xarray.h>
+#include <trace/events/amdxdna.h>
+
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_pci_drv.h"
+
+#define MAX_HWCTX_ID 255
+#define MAX_ARG_COUNT 4095
+
+struct amdxdna_fence {
+ struct dma_fence base;
+ spinlock_t lock; /* for base */
+ struct amdxdna_hwctx *hwctx;
+};
+
+static const char *amdxdna_fence_get_driver_name(struct dma_fence *fence)
+{
+ return KBUILD_MODNAME;
+}
+
+static const char *amdxdna_fence_get_timeline_name(struct dma_fence *fence)
+{
+ struct amdxdna_fence *xdna_fence;
+
+ xdna_fence = container_of(fence, struct amdxdna_fence, base);
+
+ return xdna_fence->hwctx->name;
+}
+
+static const struct dma_fence_ops fence_ops = {
+ .get_driver_name = amdxdna_fence_get_driver_name,
+ .get_timeline_name = amdxdna_fence_get_timeline_name,
+};
+
+static struct dma_fence *amdxdna_fence_create(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_fence *fence;
+
+ fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+ if (!fence)
+ return NULL;
+
+ fence->hwctx = hwctx;
+ spin_lock_init(&fence->lock);
+ dma_fence_init(&fence->base, &fence_ops, &fence->lock, hwctx->id, 0);
+ return &fence->base;
+}
+
+void amdxdna_hwctx_suspend(struct amdxdna_client *client)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_hwctx *hwctx;
+ unsigned long hwctx_id;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ mutex_lock(&client->hwctx_lock);
+ amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
+ xdna->dev_info->ops->hwctx_suspend(hwctx);
+ mutex_unlock(&client->hwctx_lock);
+}
+
+void amdxdna_hwctx_resume(struct amdxdna_client *client)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_hwctx *hwctx;
+ unsigned long hwctx_id;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ mutex_lock(&client->hwctx_lock);
+ amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
+ xdna->dev_info->ops->hwctx_resume(hwctx);
+ mutex_unlock(&client->hwctx_lock);
+}
+
+static void amdxdna_hwctx_destroy_rcu(struct amdxdna_hwctx *hwctx,
+ struct srcu_struct *ss)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+
+ synchronize_srcu(ss);
+
+ /* At this point, user is not able to submit new commands */
+ mutex_lock(&xdna->dev_lock);
+ xdna->dev_info->ops->hwctx_fini(hwctx);
+ mutex_unlock(&xdna->dev_lock);
+
+ kfree(hwctx->name);
+ kfree(hwctx);
+}
+
+void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+ u32 num_masks, count;
+
+ if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
+ num_masks = 0;
+ else
+ num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
+
+ if (size) {
+ count = FIELD_GET(AMDXDNA_CMD_COUNT, cmd->header);
+ if (unlikely(count <= num_masks)) {
+ *size = 0;
+ return NULL;
+ }
+ *size = (count - num_masks) * sizeof(u32);
+ }
+ return &cmd->data[num_masks];
+}
+
+int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+ u32 num_masks, i;
+ u32 *cu_mask;
+
+ if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
+ return -1;
+
+ num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
+ cu_mask = cmd->data;
+ for (i = 0; i < num_masks; i++) {
+ if (cu_mask[i])
+ return ffs(cu_mask[i]) - 1;
+ }
+
+ return -1;
+}
+
+/*
+ * This should be called in close() and remove(). DO NOT call in other syscalls.
+ * This guarantee that when hwctx and resources will be released, if user
+ * doesn't call amdxdna_drm_destroy_hwctx_ioctl.
+ */
+void amdxdna_hwctx_remove_all(struct amdxdna_client *client)
+{
+ struct amdxdna_hwctx *hwctx;
+ unsigned long hwctx_id;
+
+ mutex_lock(&client->hwctx_lock);
+ amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
+ XDNA_DBG(client->xdna, "PID %d close HW context %d",
+ client->pid, hwctx->id);
+ xa_erase(&client->hwctx_xa, hwctx->id);
+ mutex_unlock(&client->hwctx_lock);
+ amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu);
+ mutex_lock(&client->hwctx_lock);
+ }
+ mutex_unlock(&client->hwctx_lock);
+}
+
+int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_drm_create_hwctx *args = data;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_hwctx *hwctx;
+ int ret, idx;
+
+ if (args->ext || args->ext_flags)
+ return -EINVAL;
+
+ if (!drm_dev_enter(dev, &idx))
+ return -ENODEV;
+
+ hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL);
+ if (!hwctx) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ if (copy_from_user(&hwctx->qos, u64_to_user_ptr(args->qos_p), sizeof(hwctx->qos))) {
+ XDNA_ERR(xdna, "Access QoS info failed");
+ ret = -EFAULT;
+ goto free_hwctx;
+ }
+
+ hwctx->client = client;
+ hwctx->fw_ctx_id = -1;
+ hwctx->num_tiles = args->num_tiles;
+ hwctx->mem_size = args->mem_size;
+ hwctx->max_opc = args->max_opc;
+ ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
+ XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID),
+ &client->next_hwctxid, GFP_KERNEL);
+ if (ret < 0) {
+ XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
+ goto free_hwctx;
+ }
+
+ hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->id);
+ if (!hwctx->name) {
+ ret = -ENOMEM;
+ goto rm_id;
+ }
+
+ mutex_lock(&xdna->dev_lock);
+ ret = xdna->dev_info->ops->hwctx_init(hwctx);
+ if (ret) {
+ mutex_unlock(&xdna->dev_lock);
+ XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
+ goto free_name;
+ }
+ args->handle = hwctx->id;
+ args->syncobj_handle = hwctx->syncobj_hdl;
+ mutex_unlock(&xdna->dev_lock);
+
+ XDNA_DBG(xdna, "PID %d create HW context %d, ret %d", client->pid, args->handle, ret);
+ drm_dev_exit(idx);
+ return 0;
+
+free_name:
+ kfree(hwctx->name);
+rm_id:
+ xa_erase(&client->hwctx_xa, hwctx->id);
+free_hwctx:
+ kfree(hwctx);
+exit:
+ drm_dev_exit(idx);
+ return ret;
+}
+
+int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_drm_destroy_hwctx *args = data;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_hwctx *hwctx;
+ int ret = 0, idx;
+
+ if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad)))
+ return -EINVAL;
+
+ if (!drm_dev_enter(dev, &idx))
+ return -ENODEV;
+
+ hwctx = xa_erase(&client->hwctx_xa, args->handle);
+ if (!hwctx) {
+ ret = -EINVAL;
+ XDNA_DBG(xdna, "PID %d HW context %d not exist",
+ client->pid, args->handle);
+ goto out;
+ }
+
+ /*
+ * The pushed jobs are handled by DRM scheduler during destroy.
+ * SRCU to synchronize with exec command ioctls.
+ */
+ amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu);
+
+ XDNA_DBG(xdna, "PID %d destroyed HW context %d", client->pid, args->handle);
+out:
+ drm_dev_exit(idx);
+ return ret;
+}
+
+int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_drm_config_hwctx *args = data;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_hwctx *hwctx;
+ int ret, idx;
+ u32 buf_size;
+ void *buf;
+ u64 val;
+
+ if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad)))
+ return -EINVAL;
+
+ if (!xdna->dev_info->ops->hwctx_config)
+ return -EOPNOTSUPP;
+
+ val = args->param_val;
+ buf_size = args->param_val_size;
+
+ switch (args->param_type) {
+ case DRM_AMDXDNA_HWCTX_CONFIG_CU:
+ /* For those types that param_val is pointer */
+ if (buf_size > PAGE_SIZE) {
+ XDNA_ERR(xdna, "Config CU param buffer too large");
+ return -E2BIG;
+ }
+
+ /* Hwctx needs to keep buf */
+ buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ if (copy_from_user(buf, u64_to_user_ptr(val), buf_size)) {
+ kfree(buf);
+ return -EFAULT;
+ }
+
+ break;
+ case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
+ case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
+ /* For those types that param_val is a value */
+ buf = NULL;
+ buf_size = 0;
+ break;
+ default:
+ XDNA_DBG(xdna, "Unknown HW context config type %d", args->param_type);
+ return -EINVAL;
+ }
+
+ mutex_lock(&xdna->dev_lock);
+ idx = srcu_read_lock(&client->hwctx_srcu);
+ hwctx = xa_load(&client->hwctx_xa, args->handle);
+ if (!hwctx) {
+ XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle);
+ ret = -EINVAL;
+ goto unlock_srcu;
+ }
+
+ ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size);
+
+unlock_srcu:
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ mutex_unlock(&xdna->dev_lock);
+ kfree(buf);
+ return ret;
+}
+
+static void
+amdxdna_arg_bos_put(struct amdxdna_sched_job *job)
+{
+ int i;
+
+ for (i = 0; i < job->bo_cnt; i++) {
+ if (!job->bos[i])
+ break;
+ drm_gem_object_put(job->bos[i]);
+ }
+}
+
+static int
+amdxdna_arg_bos_lookup(struct amdxdna_client *client,
+ struct amdxdna_sched_job *job,
+ u32 *bo_hdls, u32 bo_cnt)
+{
+ struct drm_gem_object *gobj;
+ int i, ret;
+
+ job->bo_cnt = bo_cnt;
+ for (i = 0; i < job->bo_cnt; i++) {
+ struct amdxdna_gem_obj *abo;
+
+ gobj = drm_gem_object_lookup(client->filp, bo_hdls[i]);
+ if (!gobj) {
+ ret = -ENOENT;
+ goto put_shmem_bo;
+ }
+ abo = to_xdna_obj(gobj);
+
+ mutex_lock(&abo->lock);
+ if (abo->pinned) {
+ mutex_unlock(&abo->lock);
+ job->bos[i] = gobj;
+ continue;
+ }
+
+ ret = amdxdna_gem_pin_nolock(abo);
+ if (ret) {
+ mutex_unlock(&abo->lock);
+ drm_gem_object_put(gobj);
+ goto put_shmem_bo;
+ }
+ abo->pinned = true;
+ mutex_unlock(&abo->lock);
+
+ job->bos[i] = gobj;
+ }
+
+ return 0;
+
+put_shmem_bo:
+ amdxdna_arg_bos_put(job);
+ return ret;
+}
+
+void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job)
+{
+ trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release");
+ amdxdna_arg_bos_put(job);
+ amdxdna_gem_put_obj(job->cmd_bo);
+}
+
+int amdxdna_cmd_submit(struct amdxdna_client *client,
+ u32 cmd_bo_hdl, u32 *arg_bo_hdls, u32 arg_bo_cnt,
+ u32 hwctx_hdl, u64 *seq)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_sched_job *job;
+ struct amdxdna_hwctx *hwctx;
+ int ret, idx;
+
+ XDNA_DBG(xdna, "Command BO hdl %d, Arg BO count %d", cmd_bo_hdl, arg_bo_cnt);
+ job = kzalloc(struct_size(job, bos, arg_bo_cnt), GFP_KERNEL);
+ if (!job)
+ return -ENOMEM;
+
+ if (cmd_bo_hdl != AMDXDNA_INVALID_BO_HANDLE) {
+ job->cmd_bo = amdxdna_gem_get_obj(client, cmd_bo_hdl, AMDXDNA_BO_CMD);
+ if (!job->cmd_bo) {
+ XDNA_ERR(xdna, "Failed to get cmd bo from %d", cmd_bo_hdl);
+ ret = -EINVAL;
+ goto free_job;
+ }
+ } else {
+ job->cmd_bo = NULL;
+ }
+
+ ret = amdxdna_arg_bos_lookup(client, job, arg_bo_hdls, arg_bo_cnt);
+ if (ret) {
+ XDNA_ERR(xdna, "Argument BOs lookup failed, ret %d", ret);
+ goto cmd_put;
+ }
+
+ idx = srcu_read_lock(&client->hwctx_srcu);
+ hwctx = xa_load(&client->hwctx_xa, hwctx_hdl);
+ if (!hwctx) {
+ XDNA_DBG(xdna, "PID %d failed to get hwctx %d",
+ client->pid, hwctx_hdl);
+ ret = -EINVAL;
+ goto unlock_srcu;
+ }
+
+ if (hwctx->status != HWCTX_STAT_READY) {
+ XDNA_ERR(xdna, "HW Context is not ready");
+ ret = -EINVAL;
+ goto unlock_srcu;
+ }
+
+ job->hwctx = hwctx;
+ job->mm = current->mm;
+
+ job->fence = amdxdna_fence_create(hwctx);
+ if (!job->fence) {
+ XDNA_ERR(xdna, "Failed to create fence");
+ ret = -ENOMEM;
+ goto unlock_srcu;
+ }
+ kref_init(&job->refcnt);
+
+ ret = xdna->dev_info->ops->cmd_submit(hwctx, job, seq);
+ if (ret)
+ goto put_fence;
+
+ /*
+ * The amdxdna_hwctx_destroy_rcu() will release hwctx and associated
+ * resource after synchronize_srcu(). The submitted jobs should be
+ * handled by the queue, for example DRM scheduler, in device layer.
+ * For here we can unlock SRCU.
+ */
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ trace_amdxdna_debug_point(hwctx->name, *seq, "job pushed");
+
+ return 0;
+
+put_fence:
+ dma_fence_put(job->fence);
+unlock_srcu:
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ amdxdna_arg_bos_put(job);
+cmd_put:
+ amdxdna_gem_put_obj(job->cmd_bo);
+free_job:
+ kfree(job);
+ return ret;
+}
+
+/*
+ * The submit command ioctl submits a command to firmware. One firmware command
+ * may contain multiple command BOs for processing as a whole.
+ * The command sequence number is returned which can be used for wait command ioctl.
+ */
+static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client,
+ struct amdxdna_drm_exec_cmd *args)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ u32 *arg_bo_hdls;
+ u32 cmd_bo_hdl;
+ int ret;
+
+ if (!args->arg_count || args->arg_count > MAX_ARG_COUNT) {
+ XDNA_ERR(xdna, "Invalid arg bo count %d", args->arg_count);
+ return -EINVAL;
+ }
+
+ /* Only support single command for now. */
+ if (args->cmd_count != 1) {
+ XDNA_ERR(xdna, "Invalid cmd bo count %d", args->cmd_count);
+ return -EINVAL;
+ }
+
+ cmd_bo_hdl = (u32)args->cmd_handles;
+ arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL);
+ if (!arg_bo_hdls)
+ return -ENOMEM;
+ ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args),
+ args->arg_count * sizeof(u32));
+ if (ret) {
+ ret = -EFAULT;
+ goto free_cmd_bo_hdls;
+ }
+
+ ret = amdxdna_cmd_submit(client, cmd_bo_hdl, arg_bo_hdls,
+ args->arg_count, args->hwctx, &args->seq);
+ if (ret)
+ XDNA_DBG(xdna, "Submit cmds failed, ret %d", ret);
+
+free_cmd_bo_hdls:
+ kfree(arg_bo_hdls);
+ if (!ret)
+ XDNA_DBG(xdna, "Pushed cmd %lld to scheduler", args->seq);
+ return ret;
+}
+
+int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_drm_exec_cmd *args = data;
+
+ if (args->ext || args->ext_flags)
+ return -EINVAL;
+
+ switch (args->type) {
+ case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
+ return amdxdna_drm_submit_execbuf(client, args);
+ }
+
+ XDNA_ERR(client->xdna, "Invalid command type %d", args->type);
+ return -EINVAL;
+}
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
new file mode 100644
index 000000000000..80b0304193ec
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_CTX_H_
+#define _AMDXDNA_CTX_H_
+
+#include <linux/bitfield.h>
+
+#include "amdxdna_gem.h"
+
+struct amdxdna_hwctx_priv;
+
+enum ert_cmd_opcode {
+ ERT_START_CU = 0,
+ ERT_CMD_CHAIN = 19,
+ ERT_START_NPU = 20,
+};
+
+enum ert_cmd_state {
+ ERT_CMD_STATE_INVALID,
+ ERT_CMD_STATE_NEW,
+ ERT_CMD_STATE_QUEUED,
+ ERT_CMD_STATE_RUNNING,
+ ERT_CMD_STATE_COMPLETED,
+ ERT_CMD_STATE_ERROR,
+ ERT_CMD_STATE_ABORT,
+ ERT_CMD_STATE_SUBMITTED,
+ ERT_CMD_STATE_TIMEOUT,
+ ERT_CMD_STATE_NORESPONSE,
+};
+
+/*
+ * Interpretation of the beginning of data payload for ERT_START_NPU in
+ * amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args.
+ */
+struct amdxdna_cmd_start_npu {
+ u64 buffer; /* instruction buffer address */
+ u32 buffer_size; /* size of buffer in bytes */
+ u32 prop_count; /* properties count */
+ u32 prop_args[]; /* properties and regular kernel arguments */
+};
+
+/*
+ * Interpretation of the beginning of data payload for ERT_CMD_CHAIN in
+ * amdxdna_cmd. The rest of the payload in amdxdna_cmd is cmd BO handles.
+ */
+struct amdxdna_cmd_chain {
+ u32 command_count;
+ u32 submit_index;
+ u32 error_index;
+ u32 reserved[3];
+ u64 data[] __counted_by(command_count);
+};
+
+/* Exec buffer command header format */
+#define AMDXDNA_CMD_STATE GENMASK(3, 0)
+#define AMDXDNA_CMD_EXTRA_CU_MASK GENMASK(11, 10)
+#define AMDXDNA_CMD_COUNT GENMASK(22, 12)
+#define AMDXDNA_CMD_OPCODE GENMASK(27, 23)
+struct amdxdna_cmd {
+ u32 header;
+ u32 data[];
+};
+
+struct amdxdna_hwctx {
+ struct amdxdna_client *client;
+ struct amdxdna_hwctx_priv *priv;
+ char *name;
+
+ u32 id;
+ u32 max_opc;
+ u32 num_tiles;
+ u32 mem_size;
+ u32 fw_ctx_id;
+ u32 col_list_len;
+ u32 *col_list;
+ u32 start_col;
+ u32 num_col;
+#define HWCTX_STAT_INIT 0
+#define HWCTX_STAT_READY 1
+#define HWCTX_STAT_STOP 2
+ u32 status;
+ u32 old_status;
+
+ struct amdxdna_qos_info qos;
+ struct amdxdna_hwctx_param_config_cu *cus;
+ u32 syncobj_hdl;
+};
+
+#define drm_job_to_xdna_job(j) \
+ container_of(j, struct amdxdna_sched_job, base)
+
+struct amdxdna_sched_job {
+ struct drm_sched_job base;
+ struct kref refcnt;
+ struct amdxdna_hwctx *hwctx;
+ struct mm_struct *mm;
+ /* The fence to notice DRM scheduler that job is done by hardware */
+ struct dma_fence *fence;
+ /* user can wait on this fence */
+ struct dma_fence *out_fence;
+ bool job_done;
+ u64 seq;
+ struct amdxdna_gem_obj *cmd_bo;
+ size_t bo_cnt;
+ struct drm_gem_object *bos[] __counted_by(bo_cnt);
+};
+
+static inline u32
+amdxdna_cmd_get_op(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+
+ return FIELD_GET(AMDXDNA_CMD_OPCODE, cmd->header);
+}
+
+static inline void
+amdxdna_cmd_set_state(struct amdxdna_gem_obj *abo, enum ert_cmd_state s)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+
+ cmd->header &= ~AMDXDNA_CMD_STATE;
+ cmd->header |= FIELD_PREP(AMDXDNA_CMD_STATE, s);
+}
+
+static inline enum ert_cmd_state
+amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+
+ return FIELD_GET(AMDXDNA_CMD_STATE, cmd->header);
+}
+
+void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size);
+int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
+
+static inline u32 amdxdna_hwctx_col_map(struct amdxdna_hwctx *hwctx)
+{
+ return GENMASK(hwctx->start_col + hwctx->num_col - 1,
+ hwctx->start_col);
+}
+
+void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job);
+void amdxdna_hwctx_remove_all(struct amdxdna_client *client);
+void amdxdna_hwctx_suspend(struct amdxdna_client *client);
+void amdxdna_hwctx_resume(struct amdxdna_client *client);
+
+int amdxdna_cmd_submit(struct amdxdna_client *client,
+ u32 cmd_bo_hdls, u32 *arg_bo_hdls, u32 arg_bo_cnt,
+ u32 hwctx_hdl, u64 *seq);
+
+int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl,
+ u64 seq, u32 timeout);
+
+int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+
+#endif /* _AMDXDNA_CTX_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
new file mode 100644
index 000000000000..606433d73236
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -0,0 +1,622 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_cache.h>
+#include <drm/drm_device.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/iosys-map.h>
+#include <linux/vmalloc.h>
+
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_pci_drv.h"
+
+#define XDNA_MAX_CMD_BO_SIZE SZ_32K
+
+static int
+amdxdna_gem_insert_node_locked(struct amdxdna_gem_obj *abo, bool use_vmap)
+{
+ struct amdxdna_client *client = abo->client;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_mem *mem = &abo->mem;
+ u64 offset;
+ u32 align;
+ int ret;
+
+ align = 1 << max(PAGE_SHIFT, xdna->dev_info->dev_mem_buf_shift);
+ ret = drm_mm_insert_node_generic(&abo->dev_heap->mm, &abo->mm_node,
+ mem->size, align,
+ 0, DRM_MM_INSERT_BEST);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret);
+ return ret;
+ }
+
+ mem->dev_addr = abo->mm_node.start;
+ offset = mem->dev_addr - abo->dev_heap->mem.dev_addr;
+ mem->userptr = abo->dev_heap->mem.userptr + offset;
+ mem->pages = &abo->dev_heap->base.pages[offset >> PAGE_SHIFT];
+ mem->nr_pages = mem->size >> PAGE_SHIFT;
+
+ if (use_vmap) {
+ mem->kva = vmap(mem->pages, mem->nr_pages, VM_MAP, PAGE_KERNEL);
+ if (!mem->kva) {
+ XDNA_ERR(xdna, "Failed to vmap");
+ drm_mm_remove_node(&abo->mm_node);
+ return -EFAULT;
+ }
+ }
+
+ return 0;
+}
+
+static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
+ struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
+ struct iosys_map map = IOSYS_MAP_INIT_VADDR(abo->mem.kva);
+
+ XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr);
+ if (abo->pinned)
+ amdxdna_gem_unpin(abo);
+
+ if (abo->type == AMDXDNA_BO_DEV) {
+ mutex_lock(&abo->client->mm_lock);
+ drm_mm_remove_node(&abo->mm_node);
+ mutex_unlock(&abo->client->mm_lock);
+
+ vunmap(abo->mem.kva);
+ drm_gem_object_put(to_gobj(abo->dev_heap));
+ drm_gem_object_release(gobj);
+ mutex_destroy(&abo->lock);
+ kfree(abo);
+ return;
+ }
+
+ if (abo->type == AMDXDNA_BO_DEV_HEAP)
+ drm_mm_takedown(&abo->mm);
+
+ drm_gem_vunmap_unlocked(gobj, &map);
+ mutex_destroy(&abo->lock);
+ drm_gem_shmem_free(&abo->base);
+}
+
+static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = {
+ .free = amdxdna_gem_obj_free,
+};
+
+static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+ struct amdxdna_gem_obj *abo = container_of(mni, struct amdxdna_gem_obj,
+ mem.notifier);
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+
+ XDNA_DBG(xdna, "Invalid range 0x%llx, 0x%lx, type %d",
+ abo->mem.userptr, abo->mem.size, abo->type);
+
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+
+ xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
+
+ return true;
+}
+
+static const struct mmu_interval_notifier_ops amdxdna_hmm_ops = {
+ .invalidate = amdxdna_hmm_invalidate,
+};
+
+static void amdxdna_hmm_unregister(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+
+ if (!xdna->dev_info->ops->hmm_invalidate)
+ return;
+
+ mmu_interval_notifier_remove(&abo->mem.notifier);
+ kvfree(abo->mem.pfns);
+ abo->mem.pfns = NULL;
+}
+
+static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo, unsigned long addr,
+ size_t len)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ u32 nr_pages;
+ int ret;
+
+ if (!xdna->dev_info->ops->hmm_invalidate)
+ return 0;
+
+ if (abo->mem.pfns)
+ return -EEXIST;
+
+ nr_pages = (PAGE_ALIGN(addr + len) - (addr & PAGE_MASK)) >> PAGE_SHIFT;
+ abo->mem.pfns = kvcalloc(nr_pages, sizeof(*abo->mem.pfns),
+ GFP_KERNEL);
+ if (!abo->mem.pfns)
+ return -ENOMEM;
+
+ ret = mmu_interval_notifier_insert_locked(&abo->mem.notifier,
+ current->mm,
+ addr,
+ len,
+ &amdxdna_hmm_ops);
+ if (ret) {
+ XDNA_ERR(xdna, "Insert mmu notifier failed, ret %d", ret);
+ kvfree(abo->mem.pfns);
+ }
+ abo->mem.userptr = addr;
+
+ return ret;
+}
+
+static int amdxdna_gem_obj_mmap(struct drm_gem_object *gobj,
+ struct vm_area_struct *vma)
+{
+ struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
+ unsigned long num_pages;
+ int ret;
+
+ ret = amdxdna_hmm_register(abo, vma->vm_start, gobj->size);
+ if (ret)
+ return ret;
+
+ ret = drm_gem_shmem_mmap(&abo->base, vma);
+ if (ret)
+ goto hmm_unreg;
+
+ num_pages = gobj->size >> PAGE_SHIFT;
+ /* Try to insert the pages */
+ vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP);
+ ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages, &num_pages);
+ if (ret)
+ XDNA_ERR(abo->client->xdna, "Failed insert pages, ret %d", ret);
+
+ return 0;
+
+hmm_unreg:
+ amdxdna_hmm_unregister(abo);
+ return ret;
+}
+
+static vm_fault_t amdxdna_gem_vm_fault(struct vm_fault *vmf)
+{
+ return drm_gem_shmem_vm_ops.fault(vmf);
+}
+
+static void amdxdna_gem_vm_open(struct vm_area_struct *vma)
+{
+ drm_gem_shmem_vm_ops.open(vma);
+}
+
+static void amdxdna_gem_vm_close(struct vm_area_struct *vma)
+{
+ struct drm_gem_object *gobj = vma->vm_private_data;
+
+ amdxdna_hmm_unregister(to_xdna_obj(gobj));
+ drm_gem_shmem_vm_ops.close(vma);
+}
+
+static const struct vm_operations_struct amdxdna_gem_vm_ops = {
+ .fault = amdxdna_gem_vm_fault,
+ .open = amdxdna_gem_vm_open,
+ .close = amdxdna_gem_vm_close,
+};
+
+static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
+ .free = amdxdna_gem_obj_free,
+ .print_info = drm_gem_shmem_object_print_info,
+ .pin = drm_gem_shmem_object_pin,
+ .unpin = drm_gem_shmem_object_unpin,
+ .get_sg_table = drm_gem_shmem_object_get_sg_table,
+ .vmap = drm_gem_shmem_object_vmap,
+ .vunmap = drm_gem_shmem_object_vunmap,
+ .mmap = amdxdna_gem_obj_mmap,
+ .vm_ops = &amdxdna_gem_vm_ops,
+};
+
+static struct amdxdna_gem_obj *
+amdxdna_gem_create_obj(struct drm_device *dev, size_t size)
+{
+ struct amdxdna_gem_obj *abo;
+
+ abo = kzalloc(sizeof(*abo), GFP_KERNEL);
+ if (!abo)
+ return ERR_PTR(-ENOMEM);
+
+ abo->pinned = false;
+ abo->assigned_hwctx = AMDXDNA_INVALID_CTX_HANDLE;
+ mutex_init(&abo->lock);
+
+ abo->mem.userptr = AMDXDNA_INVALID_ADDR;
+ abo->mem.dev_addr = AMDXDNA_INVALID_ADDR;
+ abo->mem.size = size;
+
+ return abo;
+}
+
+/* For drm_driver->gem_create_object callback */
+struct drm_gem_object *
+amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size)
+{
+ struct amdxdna_gem_obj *abo;
+
+ abo = amdxdna_gem_create_obj(dev, size);
+ if (IS_ERR(abo))
+ return ERR_CAST(abo);
+
+ to_gobj(abo)->funcs = &amdxdna_gem_shmem_funcs;
+
+ return to_gobj(abo);
+}
+
+static struct amdxdna_gem_obj *
+amdxdna_drm_alloc_shmem(struct drm_device *dev,
+ struct amdxdna_drm_create_bo *args,
+ struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct drm_gem_shmem_object *shmem;
+ struct amdxdna_gem_obj *abo;
+
+ shmem = drm_gem_shmem_create(dev, args->size);
+ if (IS_ERR(shmem))
+ return ERR_CAST(shmem);
+
+ shmem->map_wc = false;
+
+ abo = to_xdna_obj(&shmem->base);
+ abo->client = client;
+ abo->type = AMDXDNA_BO_SHMEM;
+
+ return abo;
+}
+
+static struct amdxdna_gem_obj *
+amdxdna_drm_create_dev_heap(struct drm_device *dev,
+ struct amdxdna_drm_create_bo *args,
+ struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct drm_gem_shmem_object *shmem;
+ struct amdxdna_gem_obj *abo;
+ int ret;
+
+ if (args->size > xdna->dev_info->dev_mem_size) {
+ XDNA_DBG(xdna, "Invalid dev heap size 0x%llx, limit 0x%lx",
+ args->size, xdna->dev_info->dev_mem_size);
+ return ERR_PTR(-EINVAL);
+ }
+
+ mutex_lock(&client->mm_lock);
+ if (client->dev_heap) {
+ XDNA_DBG(client->xdna, "dev heap is already created");
+ ret = -EBUSY;
+ goto mm_unlock;
+ }
+
+ shmem = drm_gem_shmem_create(dev, args->size);
+ if (IS_ERR(shmem)) {
+ ret = PTR_ERR(shmem);
+ goto mm_unlock;
+ }
+
+ shmem->map_wc = false;
+ abo = to_xdna_obj(&shmem->base);
+
+ abo->type = AMDXDNA_BO_DEV_HEAP;
+ abo->client = client;
+ abo->mem.dev_addr = client->xdna->dev_info->dev_mem_base;
+ drm_mm_init(&abo->mm, abo->mem.dev_addr, abo->mem.size);
+
+ client->dev_heap = abo;
+ drm_gem_object_get(to_gobj(abo));
+ mutex_unlock(&client->mm_lock);
+
+ return abo;
+
+mm_unlock:
+ mutex_unlock(&client->mm_lock);
+ return ERR_PTR(ret);
+}
+
+struct amdxdna_gem_obj *
+amdxdna_drm_alloc_dev_bo(struct drm_device *dev,
+ struct amdxdna_drm_create_bo *args,
+ struct drm_file *filp, bool use_vmap)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ size_t aligned_sz = PAGE_ALIGN(args->size);
+ struct amdxdna_gem_obj *abo, *heap;
+ int ret;
+
+ mutex_lock(&client->mm_lock);
+ heap = client->dev_heap;
+ if (!heap) {
+ ret = -EINVAL;
+ goto mm_unlock;
+ }
+
+ if (heap->mem.userptr == AMDXDNA_INVALID_ADDR) {
+ XDNA_ERR(xdna, "Invalid dev heap userptr");
+ ret = -EINVAL;
+ goto mm_unlock;
+ }
+
+ if (args->size > heap->mem.size) {
+ XDNA_ERR(xdna, "Invalid dev bo size 0x%llx, limit 0x%lx",
+ args->size, heap->mem.size);
+ ret = -EINVAL;
+ goto mm_unlock;
+ }
+
+ abo = amdxdna_gem_create_obj(&xdna->ddev, aligned_sz);
+ if (IS_ERR(abo)) {
+ ret = PTR_ERR(abo);
+ goto mm_unlock;
+ }
+ to_gobj(abo)->funcs = &amdxdna_gem_dev_obj_funcs;
+ abo->type = AMDXDNA_BO_DEV;
+ abo->client = client;
+ abo->dev_heap = heap;
+ ret = amdxdna_gem_insert_node_locked(abo, use_vmap);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret);
+ goto mm_unlock;
+ }
+
+ drm_gem_object_get(to_gobj(heap));
+ drm_gem_private_object_init(&xdna->ddev, to_gobj(abo), aligned_sz);
+
+ mutex_unlock(&client->mm_lock);
+ return abo;
+
+mm_unlock:
+ mutex_unlock(&client->mm_lock);
+ return ERR_PTR(ret);
+}
+
+static struct amdxdna_gem_obj *
+amdxdna_drm_create_cmd_bo(struct drm_device *dev,
+ struct amdxdna_drm_create_bo *args,
+ struct drm_file *filp)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct drm_gem_shmem_object *shmem;
+ struct amdxdna_gem_obj *abo;
+ struct iosys_map map;
+ int ret;
+
+ if (args->size > XDNA_MAX_CMD_BO_SIZE) {
+ XDNA_ERR(xdna, "Command bo size 0x%llx too large", args->size);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (args->size < sizeof(struct amdxdna_cmd)) {
+ XDNA_DBG(xdna, "Command BO size 0x%llx too small", args->size);
+ return ERR_PTR(-EINVAL);
+ }
+
+ shmem = drm_gem_shmem_create(dev, args->size);
+ if (IS_ERR(shmem))
+ return ERR_CAST(shmem);
+
+ shmem->map_wc = false;
+ abo = to_xdna_obj(&shmem->base);
+
+ abo->type = AMDXDNA_BO_CMD;
+ abo->client = filp->driver_priv;
+
+ ret = drm_gem_vmap_unlocked(to_gobj(abo), &map);
+ if (ret) {
+ XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret);
+ goto release_obj;
+ }
+ abo->mem.kva = map.vaddr;
+
+ return abo;
+
+release_obj:
+ drm_gem_shmem_free(shmem);
+ return ERR_PTR(ret);
+}
+
+int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_drm_create_bo *args = data;
+ struct amdxdna_gem_obj *abo;
+ int ret;
+
+ if (args->flags || args->vaddr || !args->size)
+ return -EINVAL;
+
+ XDNA_DBG(xdna, "BO arg type %d vaddr 0x%llx size 0x%llx flags 0x%llx",
+ args->type, args->vaddr, args->size, args->flags);
+ switch (args->type) {
+ case AMDXDNA_BO_SHMEM:
+ abo = amdxdna_drm_alloc_shmem(dev, args, filp);
+ break;
+ case AMDXDNA_BO_DEV_HEAP:
+ abo = amdxdna_drm_create_dev_heap(dev, args, filp);
+ break;
+ case AMDXDNA_BO_DEV:
+ abo = amdxdna_drm_alloc_dev_bo(dev, args, filp, false);
+ break;
+ case AMDXDNA_BO_CMD:
+ abo = amdxdna_drm_create_cmd_bo(dev, args, filp);
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (IS_ERR(abo))
+ return PTR_ERR(abo);
+
+ /* ready to publish object to userspace */
+ ret = drm_gem_handle_create(filp, to_gobj(abo), &args->handle);
+ if (ret) {
+ XDNA_ERR(xdna, "Create handle failed");
+ goto put_obj;
+ }
+
+ XDNA_DBG(xdna, "BO hdl %d type %d userptr 0x%llx xdna_addr 0x%llx size 0x%lx",
+ args->handle, args->type, abo->mem.userptr,
+ abo->mem.dev_addr, abo->mem.size);
+put_obj:
+ /* Dereference object reference. Handle holds it now. */
+ drm_gem_object_put(to_gobj(abo));
+ return ret;
+}
+
+int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ int ret;
+
+ switch (abo->type) {
+ case AMDXDNA_BO_SHMEM:
+ case AMDXDNA_BO_DEV_HEAP:
+ ret = drm_gem_shmem_pin(&abo->base);
+ break;
+ case AMDXDNA_BO_DEV:
+ ret = drm_gem_shmem_pin(&abo->dev_heap->base);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ }
+
+ XDNA_DBG(xdna, "BO type %d ret %d", abo->type, ret);
+ return ret;
+}
+
+int amdxdna_gem_pin(struct amdxdna_gem_obj *abo)
+{
+ int ret;
+
+ if (abo->type == AMDXDNA_BO_DEV)
+ abo = abo->dev_heap;
+
+ mutex_lock(&abo->lock);
+ ret = amdxdna_gem_pin_nolock(abo);
+ mutex_unlock(&abo->lock);
+
+ return ret;
+}
+
+void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo)
+{
+ if (abo->type == AMDXDNA_BO_DEV)
+ abo = abo->dev_heap;
+
+ mutex_lock(&abo->lock);
+ drm_gem_shmem_unpin(&abo->base);
+ mutex_unlock(&abo->lock);
+}
+
+struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client,
+ u32 bo_hdl, u8 bo_type)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_gem_obj *abo;
+ struct drm_gem_object *gobj;
+
+ gobj = drm_gem_object_lookup(client->filp, bo_hdl);
+ if (!gobj) {
+ XDNA_DBG(xdna, "Can not find bo %d", bo_hdl);
+ return NULL;
+ }
+
+ abo = to_xdna_obj(gobj);
+ if (bo_type == AMDXDNA_BO_INVALID || abo->type == bo_type)
+ return abo;
+
+ drm_gem_object_put(gobj);
+ return NULL;
+}
+
+int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_drm_get_bo_info *args = data;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_gem_obj *abo;
+ struct drm_gem_object *gobj;
+ int ret = 0;
+
+ if (args->ext || args->ext_flags || args->pad)
+ return -EINVAL;
+
+ gobj = drm_gem_object_lookup(filp, args->handle);
+ if (!gobj) {
+ XDNA_DBG(xdna, "Lookup GEM object %d failed", args->handle);
+ return -ENOENT;
+ }
+
+ abo = to_xdna_obj(gobj);
+ args->vaddr = abo->mem.userptr;
+ args->xdna_addr = abo->mem.dev_addr;
+
+ if (abo->type != AMDXDNA_BO_DEV)
+ args->map_offset = drm_vma_node_offset_addr(&gobj->vma_node);
+ else
+ args->map_offset = AMDXDNA_INVALID_ADDR;
+
+ XDNA_DBG(xdna, "BO hdl %d map_offset 0x%llx vaddr 0x%llx xdna_addr 0x%llx",
+ args->handle, args->map_offset, args->vaddr, args->xdna_addr);
+
+ drm_gem_object_put(gobj);
+ return ret;
+}
+
+/*
+ * The sync bo ioctl is to make sure the CPU cache is in sync with memory.
+ * This is required because NPU is not cache coherent device. CPU cache
+ * flushing/invalidation is expensive so it is best to handle this outside
+ * of the command submission path. This ioctl allows explicit cache
+ * flushing/invalidation outside of the critical path.
+ */
+int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev,
+ void *data, struct drm_file *filp)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_drm_sync_bo *args = data;
+ struct amdxdna_gem_obj *abo;
+ struct drm_gem_object *gobj;
+ int ret;
+
+ gobj = drm_gem_object_lookup(filp, args->handle);
+ if (!gobj) {
+ XDNA_ERR(xdna, "Lookup GEM object failed");
+ return -ENOENT;
+ }
+ abo = to_xdna_obj(gobj);
+
+ ret = amdxdna_gem_pin(abo);
+ if (ret) {
+ XDNA_ERR(xdna, "Pin BO %d failed, ret %d", args->handle, ret);
+ goto put_obj;
+ }
+
+ if (abo->type == AMDXDNA_BO_DEV)
+ drm_clflush_pages(abo->mem.pages, abo->mem.nr_pages);
+ else
+ drm_clflush_pages(abo->base.pages, gobj->size >> PAGE_SHIFT);
+
+ amdxdna_gem_unpin(abo);
+
+ XDNA_DBG(xdna, "Sync bo %d offset 0x%llx, size 0x%llx\n",
+ args->handle, args->offset, args->size);
+
+put_obj:
+ drm_gem_object_put(gobj);
+ return ret;
+}
diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h
new file mode 100644
index 000000000000..8ccc0375dd9d
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_gem.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_GEM_H_
+#define _AMDXDNA_GEM_H_
+
+struct amdxdna_mem {
+ u64 userptr;
+ void *kva;
+ u64 dev_addr;
+ size_t size;
+ struct page **pages;
+ u32 nr_pages;
+ struct mmu_interval_notifier notifier;
+ unsigned long *pfns;
+ bool map_invalid;
+};
+
+struct amdxdna_gem_obj {
+ struct drm_gem_shmem_object base;
+ struct amdxdna_client *client;
+ u8 type;
+ bool pinned;
+ struct mutex lock; /* Protects: pinned */
+ struct amdxdna_mem mem;
+
+ /* Below members is uninitialized when needed */
+ struct drm_mm mm; /* For AMDXDNA_BO_DEV_HEAP */
+ struct amdxdna_gem_obj *dev_heap; /* For AMDXDNA_BO_DEV */
+ struct drm_mm_node mm_node; /* For AMDXDNA_BO_DEV */
+ u32 assigned_hwctx;
+};
+
+#define to_gobj(obj) (&(obj)->base.base)
+
+static inline struct amdxdna_gem_obj *to_xdna_obj(struct drm_gem_object *gobj)
+{
+ return container_of(gobj, struct amdxdna_gem_obj, base.base);
+}
+
+struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client,
+ u32 bo_hdl, u8 bo_type);
+static inline void amdxdna_gem_put_obj(struct amdxdna_gem_obj *abo)
+{
+ drm_gem_object_put(to_gobj(abo));
+}
+
+struct drm_gem_object *
+amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size);
+struct amdxdna_gem_obj *
+amdxdna_drm_alloc_dev_bo(struct drm_device *dev,
+ struct amdxdna_drm_create_bo *args,
+ struct drm_file *filp, bool use_vmap);
+
+int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo);
+int amdxdna_gem_pin(struct amdxdna_gem_obj *abo);
+void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo);
+
+int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+
+#endif /* _AMDXDNA_GEM_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
new file mode 100644
index 000000000000..e5301fac1397
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
@@ -0,0 +1,562 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/drm_device.h>
+#include <drm/drm_managed.h>
+#include <linux/bitfield.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/amdxdna.h>
+
+#include "amdxdna_mailbox.h"
+
+#define MB_ERR(chann, fmt, args...) \
+({ \
+ typeof(chann) _chann = chann; \
+ dev_err((_chann)->mb->dev, "xdna_mailbox.%d: "fmt, \
+ (_chann)->msix_irq, ##args); \
+})
+#define MB_DBG(chann, fmt, args...) \
+({ \
+ typeof(chann) _chann = chann; \
+ dev_dbg((_chann)->mb->dev, "xdna_mailbox.%d: "fmt, \
+ (_chann)->msix_irq, ##args); \
+})
+#define MB_WARN_ONCE(chann, fmt, args...) \
+({ \
+ typeof(chann) _chann = chann; \
+ dev_warn_once((_chann)->mb->dev, "xdna_mailbox.%d: "fmt, \
+ (_chann)->msix_irq, ##args); \
+})
+
+#define MAGIC_VAL 0x1D000000U
+#define MAGIC_VAL_MASK 0xFF000000
+#define MAX_MSG_ID_ENTRIES 256
+#define MSG_RX_TIMER 200 /* milliseconds */
+#define MAILBOX_NAME "xdna_mailbox"
+
+enum channel_res_type {
+ CHAN_RES_X2I,
+ CHAN_RES_I2X,
+ CHAN_RES_NUM
+};
+
+struct mailbox {
+ struct device *dev;
+ struct xdna_mailbox_res res;
+};
+
+struct mailbox_channel {
+ struct mailbox *mb;
+ struct xdna_mailbox_chann_res res[CHAN_RES_NUM];
+ int msix_irq;
+ u32 iohub_int_addr;
+ struct xarray chan_xa;
+ u32 next_msgid;
+ u32 x2i_tail;
+
+ /* Received msg related fields */
+ struct workqueue_struct *work_q;
+ struct work_struct rx_work;
+ u32 i2x_head;
+ bool bad_state;
+};
+
+#define MSG_BODY_SZ GENMASK(10, 0)
+#define MSG_PROTO_VER GENMASK(23, 16)
+struct xdna_msg_header {
+ __u32 total_size;
+ __u32 sz_ver;
+ __u32 id;
+ __u32 opcode;
+} __packed;
+
+static_assert(sizeof(struct xdna_msg_header) == 16);
+
+struct mailbox_pkg {
+ struct xdna_msg_header header;
+ __u32 payload[];
+};
+
+/* The protocol version. */
+#define MSG_PROTOCOL_VERSION 0x1
+/* The tombstone value. */
+#define TOMBSTONE 0xDEADFACE
+
+struct mailbox_msg {
+ void *handle;
+ int (*notify_cb)(void *handle, const u32 *data, size_t size);
+ size_t pkg_size; /* package size in bytes */
+ struct mailbox_pkg pkg;
+};
+
+static void mailbox_reg_write(struct mailbox_channel *mb_chann, u32 mbox_reg, u32 data)
+{
+ struct xdna_mailbox_res *mb_res = &mb_chann->mb->res;
+ void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg;
+
+ writel(data, ringbuf_addr);
+}
+
+static u32 mailbox_reg_read(struct mailbox_channel *mb_chann, u32 mbox_reg)
+{
+ struct xdna_mailbox_res *mb_res = &mb_chann->mb->res;
+ void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg;
+
+ return readl(ringbuf_addr);
+}
+
+static int mailbox_reg_read_non_zero(struct mailbox_channel *mb_chann, u32 mbox_reg, u32 *val)
+{
+ struct xdna_mailbox_res *mb_res = &mb_chann->mb->res;
+ void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg;
+ int ret, value;
+
+ /* Poll till value is not zero */
+ ret = readx_poll_timeout(readl, ringbuf_addr, value,
+ value, 1 /* us */, 100);
+ if (ret < 0)
+ return ret;
+
+ *val = value;
+ return 0;
+}
+
+static inline void
+mailbox_set_headptr(struct mailbox_channel *mb_chann, u32 headptr_val)
+{
+ mailbox_reg_write(mb_chann, mb_chann->res[CHAN_RES_I2X].mb_head_ptr_reg, headptr_val);
+ mb_chann->i2x_head = headptr_val;
+}
+
+static inline void
+mailbox_set_tailptr(struct mailbox_channel *mb_chann, u32 tailptr_val)
+{
+ mailbox_reg_write(mb_chann, mb_chann->res[CHAN_RES_X2I].mb_tail_ptr_reg, tailptr_val);
+ mb_chann->x2i_tail = tailptr_val;
+}
+
+static inline u32
+mailbox_get_headptr(struct mailbox_channel *mb_chann, enum channel_res_type type)
+{
+ return mailbox_reg_read(mb_chann, mb_chann->res[type].mb_head_ptr_reg);
+}
+
+static inline u32
+mailbox_get_tailptr(struct mailbox_channel *mb_chann, enum channel_res_type type)
+{
+ return mailbox_reg_read(mb_chann, mb_chann->res[type].mb_tail_ptr_reg);
+}
+
+static inline u32
+mailbox_get_ringbuf_size(struct mailbox_channel *mb_chann, enum channel_res_type type)
+{
+ return mb_chann->res[type].rb_size;
+}
+
+static inline int mailbox_validate_msgid(int msg_id)
+{
+ return (msg_id & MAGIC_VAL_MASK) == MAGIC_VAL;
+}
+
+static int mailbox_acquire_msgid(struct mailbox_channel *mb_chann, struct mailbox_msg *mb_msg)
+{
+ u32 msg_id;
+ int ret;
+
+ ret = xa_alloc_cyclic_irq(&mb_chann->chan_xa, &msg_id, mb_msg,
+ XA_LIMIT(0, MAX_MSG_ID_ENTRIES - 1),
+ &mb_chann->next_msgid, GFP_NOWAIT);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Add MAGIC_VAL to the higher bits.
+ */
+ msg_id |= MAGIC_VAL;
+ return msg_id;
+}
+
+static void mailbox_release_msgid(struct mailbox_channel *mb_chann, int msg_id)
+{
+ msg_id &= ~MAGIC_VAL_MASK;
+ xa_erase_irq(&mb_chann->chan_xa, msg_id);
+}
+
+static void mailbox_release_msg(struct mailbox_channel *mb_chann,
+ struct mailbox_msg *mb_msg)
+{
+ MB_DBG(mb_chann, "msg_id 0x%x msg opcode 0x%x",
+ mb_msg->pkg.header.id, mb_msg->pkg.header.opcode);
+ mb_msg->notify_cb(mb_msg->handle, NULL, 0);
+ kfree(mb_msg);
+}
+
+static int
+mailbox_send_msg(struct mailbox_channel *mb_chann, struct mailbox_msg *mb_msg)
+{
+ void __iomem *write_addr;
+ u32 ringbuf_size;
+ u32 head, tail;
+ u32 start_addr;
+ u32 tmp_tail;
+
+ head = mailbox_get_headptr(mb_chann, CHAN_RES_X2I);
+ tail = mb_chann->x2i_tail;
+ ringbuf_size = mailbox_get_ringbuf_size(mb_chann, CHAN_RES_X2I);
+ start_addr = mb_chann->res[CHAN_RES_X2I].rb_start_addr;
+ tmp_tail = tail + mb_msg->pkg_size;
+
+ if (tail < head && tmp_tail >= head)
+ goto no_space;
+
+ if (tail >= head && (tmp_tail > ringbuf_size - sizeof(u32) &&
+ mb_msg->pkg_size >= head))
+ goto no_space;
+
+ if (tail >= head && tmp_tail > ringbuf_size - sizeof(u32)) {
+ write_addr = mb_chann->mb->res.ringbuf_base + start_addr + tail;
+ writel(TOMBSTONE, write_addr);
+
+ /* tombstone is set. Write from the start of the ringbuf */
+ tail = 0;
+ }
+
+ write_addr = mb_chann->mb->res.ringbuf_base + start_addr + tail;
+ memcpy_toio(write_addr, &mb_msg->pkg, mb_msg->pkg_size);
+ mailbox_set_tailptr(mb_chann, tail + mb_msg->pkg_size);
+
+ trace_mbox_set_tail(MAILBOX_NAME, mb_chann->msix_irq,
+ mb_msg->pkg.header.opcode,
+ mb_msg->pkg.header.id);
+
+ return 0;
+
+no_space:
+ return -ENOSPC;
+}
+
+static int
+mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *header,
+ void *data)
+{
+ struct mailbox_msg *mb_msg;
+ int msg_id;
+ int ret;
+
+ msg_id = header->id;
+ if (!mailbox_validate_msgid(msg_id)) {
+ MB_ERR(mb_chann, "Bad message ID 0x%x", msg_id);
+ return -EINVAL;
+ }
+
+ msg_id &= ~MAGIC_VAL_MASK;
+ mb_msg = xa_erase_irq(&mb_chann->chan_xa, msg_id);
+ if (!mb_msg) {
+ MB_ERR(mb_chann, "Cannot find msg 0x%x", msg_id);
+ return -EINVAL;
+ }
+
+ MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x",
+ header->opcode, header->total_size, header->id);
+ ret = mb_msg->notify_cb(mb_msg->handle, data, header->total_size);
+ if (unlikely(ret))
+ MB_ERR(mb_chann, "Message callback ret %d", ret);
+
+ kfree(mb_msg);
+ return ret;
+}
+
+static int mailbox_get_msg(struct mailbox_channel *mb_chann)
+{
+ struct xdna_msg_header header;
+ void __iomem *read_addr;
+ u32 msg_size, rest;
+ u32 ringbuf_size;
+ u32 head, tail;
+ u32 start_addr;
+ int ret;
+
+ if (mailbox_reg_read_non_zero(mb_chann, mb_chann->res[CHAN_RES_I2X].mb_tail_ptr_reg, &tail))
+ return -EINVAL;
+ head = mb_chann->i2x_head;
+ ringbuf_size = mailbox_get_ringbuf_size(mb_chann, CHAN_RES_I2X);
+ start_addr = mb_chann->res[CHAN_RES_I2X].rb_start_addr;
+
+ if (unlikely(tail > ringbuf_size || !IS_ALIGNED(tail, 4))) {
+ MB_WARN_ONCE(mb_chann, "Invalid tail 0x%x", tail);
+ return -EINVAL;
+ }
+
+ /* ringbuf empty */
+ if (head == tail)
+ return -ENOENT;
+
+ if (head == ringbuf_size)
+ head = 0;
+
+ /* Peek size of the message or TOMBSTONE */
+ read_addr = mb_chann->mb->res.ringbuf_base + start_addr + head;
+ header.total_size = readl(read_addr);
+ /* size is TOMBSTONE, set next read from 0 */
+ if (header.total_size == TOMBSTONE) {
+ if (head < tail) {
+ MB_WARN_ONCE(mb_chann, "Tombstone, head 0x%x tail 0x%x",
+ head, tail);
+ return -EINVAL;
+ }
+ mailbox_set_headptr(mb_chann, 0);
+ return 0;
+ }
+
+ if (unlikely(!header.total_size || !IS_ALIGNED(header.total_size, 4))) {
+ MB_WARN_ONCE(mb_chann, "Invalid total size 0x%x", header.total_size);
+ return -EINVAL;
+ }
+ msg_size = sizeof(header) + header.total_size;
+
+ if (msg_size > ringbuf_size - head || msg_size > tail - head) {
+ MB_WARN_ONCE(mb_chann, "Invalid message size %d, tail %d, head %d",
+ msg_size, tail, head);
+ return -EINVAL;
+ }
+
+ rest = sizeof(header) - sizeof(u32);
+ read_addr += sizeof(u32);
+ memcpy_fromio((u32 *)&header + 1, read_addr, rest);
+ read_addr += rest;
+
+ ret = mailbox_get_resp(mb_chann, &header, (u32 *)read_addr);
+
+ mailbox_set_headptr(mb_chann, head + msg_size);
+ /* After update head, it can equal to ringbuf_size. This is expected. */
+ trace_mbox_set_head(MAILBOX_NAME, mb_chann->msix_irq,
+ header.opcode, header.id);
+
+ return ret;
+}
+
+static irqreturn_t mailbox_irq_handler(int irq, void *p)
+{
+ struct mailbox_channel *mb_chann = p;
+
+ trace_mbox_irq_handle(MAILBOX_NAME, irq);
+ /* Schedule a rx_work to call the callback functions */
+ queue_work(mb_chann->work_q, &mb_chann->rx_work);
+ /* Clear IOHUB register */
+ mailbox_reg_write(mb_chann, mb_chann->iohub_int_addr, 0);
+
+ return IRQ_HANDLED;
+}
+
+static void mailbox_rx_worker(struct work_struct *rx_work)
+{
+ struct mailbox_channel *mb_chann;
+ int ret;
+
+ mb_chann = container_of(rx_work, struct mailbox_channel, rx_work);
+
+ if (READ_ONCE(mb_chann->bad_state)) {
+ MB_ERR(mb_chann, "Channel in bad state, work aborted");
+ return;
+ }
+
+ while (1) {
+ /*
+ * If return is 0, keep consuming next message, until there is
+ * no messages or an error happened.
+ */
+ ret = mailbox_get_msg(mb_chann);
+ if (ret == -ENOENT)
+ break;
+
+ /* Other error means device doesn't look good, disable irq. */
+ if (unlikely(ret)) {
+ MB_ERR(mb_chann, "Unexpected ret %d, disable irq", ret);
+ WRITE_ONCE(mb_chann->bad_state, true);
+ disable_irq(mb_chann->msix_irq);
+ break;
+ }
+ }
+}
+
+int xdna_mailbox_send_msg(struct mailbox_channel *mb_chann,
+ const struct xdna_mailbox_msg *msg, u64 tx_timeout)
+{
+ struct xdna_msg_header *header;
+ struct mailbox_msg *mb_msg;
+ size_t pkg_size;
+ int ret;
+
+ pkg_size = sizeof(*header) + msg->send_size;
+ if (pkg_size > mailbox_get_ringbuf_size(mb_chann, CHAN_RES_X2I)) {
+ MB_ERR(mb_chann, "Message size larger than ringbuf size");
+ return -EINVAL;
+ }
+
+ if (unlikely(!IS_ALIGNED(msg->send_size, 4))) {
+ MB_ERR(mb_chann, "Message must be 4 bytes align");
+ return -EINVAL;
+ }
+
+ /* The fist word in payload can NOT be TOMBSTONE */
+ if (unlikely(((u32 *)msg->send_data)[0] == TOMBSTONE)) {
+ MB_ERR(mb_chann, "Tomb stone in data");
+ return -EINVAL;
+ }
+
+ if (READ_ONCE(mb_chann->bad_state)) {
+ MB_ERR(mb_chann, "Channel in bad state");
+ return -EPIPE;
+ }
+
+ mb_msg = kzalloc(sizeof(*mb_msg) + pkg_size, GFP_KERNEL);
+ if (!mb_msg)
+ return -ENOMEM;
+
+ mb_msg->handle = msg->handle;
+ mb_msg->notify_cb = msg->notify_cb;
+ mb_msg->pkg_size = pkg_size;
+
+ header = &mb_msg->pkg.header;
+ /*
+ * Hardware use total_size and size to split huge message.
+ * We do not support it here. Thus the values are the same.
+ */
+ header->total_size = msg->send_size;
+ header->sz_ver = FIELD_PREP(MSG_BODY_SZ, msg->send_size) |
+ FIELD_PREP(MSG_PROTO_VER, MSG_PROTOCOL_VERSION);
+ header->opcode = msg->opcode;
+ memcpy(mb_msg->pkg.payload, msg->send_data, msg->send_size);
+
+ ret = mailbox_acquire_msgid(mb_chann, mb_msg);
+ if (unlikely(ret < 0)) {
+ MB_ERR(mb_chann, "mailbox_acquire_msgid failed");
+ goto msg_id_failed;
+ }
+ header->id = ret;
+
+ MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x",
+ header->opcode, header->total_size, header->id);
+
+ ret = mailbox_send_msg(mb_chann, mb_msg);
+ if (ret) {
+ MB_DBG(mb_chann, "Error in mailbox send msg, ret %d", ret);
+ goto release_id;
+ }
+
+ return 0;
+
+release_id:
+ mailbox_release_msgid(mb_chann, header->id);
+msg_id_failed:
+ kfree(mb_msg);
+ return ret;
+}
+
+struct mailbox_channel *
+xdna_mailbox_create_channel(struct mailbox *mb,
+ const struct xdna_mailbox_chann_res *x2i,
+ const struct xdna_mailbox_chann_res *i2x,
+ u32 iohub_int_addr,
+ int mb_irq)
+{
+ struct mailbox_channel *mb_chann;
+ int ret;
+
+ if (!is_power_of_2(x2i->rb_size) || !is_power_of_2(i2x->rb_size)) {
+ pr_err("Ring buf size must be power of 2");
+ return NULL;
+ }
+
+ mb_chann = kzalloc(sizeof(*mb_chann), GFP_KERNEL);
+ if (!mb_chann)
+ return NULL;
+
+ mb_chann->mb = mb;
+ mb_chann->msix_irq = mb_irq;
+ mb_chann->iohub_int_addr = iohub_int_addr;
+ memcpy(&mb_chann->res[CHAN_RES_X2I], x2i, sizeof(*x2i));
+ memcpy(&mb_chann->res[CHAN_RES_I2X], i2x, sizeof(*i2x));
+
+ xa_init_flags(&mb_chann->chan_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
+ mb_chann->x2i_tail = mailbox_get_tailptr(mb_chann, CHAN_RES_X2I);
+ mb_chann->i2x_head = mailbox_get_headptr(mb_chann, CHAN_RES_I2X);
+
+ INIT_WORK(&mb_chann->rx_work, mailbox_rx_worker);
+ mb_chann->work_q = create_singlethread_workqueue(MAILBOX_NAME);
+ if (!mb_chann->work_q) {
+ MB_ERR(mb_chann, "Create workqueue failed");
+ goto free_and_out;
+ }
+
+ /* Everything look good. Time to enable irq handler */
+ ret = request_irq(mb_irq, mailbox_irq_handler, 0, MAILBOX_NAME, mb_chann);
+ if (ret) {
+ MB_ERR(mb_chann, "Failed to request irq %d ret %d", mb_irq, ret);
+ goto destroy_wq;
+ }
+
+ mb_chann->bad_state = false;
+
+ MB_DBG(mb_chann, "Mailbox channel created (irq: %d)", mb_chann->msix_irq);
+ return mb_chann;
+
+destroy_wq:
+ destroy_workqueue(mb_chann->work_q);
+free_and_out:
+ kfree(mb_chann);
+ return NULL;
+}
+
+int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann)
+{
+ struct mailbox_msg *mb_msg;
+ unsigned long msg_id;
+
+ MB_DBG(mb_chann, "IRQ disabled and RX work cancelled");
+ free_irq(mb_chann->msix_irq, mb_chann);
+ destroy_workqueue(mb_chann->work_q);
+ /* We can clean up and release resources */
+
+ xa_for_each(&mb_chann->chan_xa, msg_id, mb_msg)
+ mailbox_release_msg(mb_chann, mb_msg);
+
+ xa_destroy(&mb_chann->chan_xa);
+
+ MB_DBG(mb_chann, "Mailbox channel destroyed, irq: %d", mb_chann->msix_irq);
+ kfree(mb_chann);
+ return 0;
+}
+
+void xdna_mailbox_stop_channel(struct mailbox_channel *mb_chann)
+{
+ /* Disable an irq and wait. This might sleep. */
+ disable_irq(mb_chann->msix_irq);
+
+ /* Cancel RX work and wait for it to finish */
+ cancel_work_sync(&mb_chann->rx_work);
+ MB_DBG(mb_chann, "IRQ disabled and RX work cancelled");
+}
+
+struct mailbox *xdnam_mailbox_create(struct drm_device *ddev,
+ const struct xdna_mailbox_res *res)
+{
+ struct mailbox *mb;
+
+ mb = drmm_kzalloc(ddev, sizeof(*mb), GFP_KERNEL);
+ if (!mb)
+ return NULL;
+ mb->dev = ddev->dev;
+
+ /* mailbox and ring buf base and size information */
+ memcpy(&mb->res, res, sizeof(*res));
+
+ return mb;
+}
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.h b/drivers/accel/amdxdna/amdxdna_mailbox.h
new file mode 100644
index 000000000000..57954c303bdd
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AIE2_MAILBOX_H_
+#define _AIE2_MAILBOX_H_
+
+struct mailbox;
+struct mailbox_channel;
+
+/*
+ * xdna_mailbox_msg - message struct
+ *
+ * @opcode: opcode for firmware
+ * @handle: handle used for the notify callback
+ * @notify_cb: callback function to notify the sender when there is response
+ * @send_data: pointing to sending data
+ * @send_size: size of the sending data
+ *
+ * The mailbox will split the sending data in to multiple firmware message if
+ * the size of the data is too big. This is transparent to the sender. The
+ * sender will receive one notification.
+ */
+struct xdna_mailbox_msg {
+ u32 opcode;
+ void *handle;
+ int (*notify_cb)(void *handle, const u32 *data, size_t size);
+ u8 *send_data;
+ size_t send_size;
+};
+
+/*
+ * xdna_mailbox_res - mailbox hardware resource
+ *
+ * @ringbuf_base: ring buffer base address
+ * @ringbuf_size: ring buffer size
+ * @mbox_base: mailbox base address
+ * @mbox_size: mailbox size
+ */
+struct xdna_mailbox_res {
+ void __iomem *ringbuf_base;
+ size_t ringbuf_size;
+ void __iomem *mbox_base;
+ size_t mbox_size;
+ const char *name;
+};
+
+/*
+ * xdna_mailbox_chann_res - resources
+ *
+ * @rb_start_addr: ring buffer start address
+ * @rb_size: ring buffer size
+ * @mb_head_ptr_reg: mailbox head pointer register
+ * @mb_tail_ptr_reg: mailbox tail pointer register
+ */
+struct xdna_mailbox_chann_res {
+ u32 rb_start_addr;
+ u32 rb_size;
+ u32 mb_head_ptr_reg;
+ u32 mb_tail_ptr_reg;
+};
+
+/*
+ * xdna_mailbox_create() -- create mailbox subsystem and initialize
+ *
+ * @ddev: device pointer
+ * @res: SRAM and mailbox resources
+ *
+ * Return: If success, return a handle of mailbox subsystem.
+ * Otherwise, return NULL pointer.
+ */
+struct mailbox *xdnam_mailbox_create(struct drm_device *ddev,
+ const struct xdna_mailbox_res *res);
+
+/*
+ * xdna_mailbox_create_channel() -- Create a mailbox channel instance
+ *
+ * @mailbox: the handle return from xdna_mailbox_create()
+ * @x2i: host to firmware mailbox resources
+ * @i2x: firmware to host mailbox resources
+ * @xdna_mailbox_intr_reg: register addr of MSI-X interrupt
+ * @mb_irq: Linux IRQ number associated with mailbox MSI-X interrupt vector index
+ *
+ * Return: If success, return a handle of mailbox channel. Otherwise, return NULL.
+ */
+struct mailbox_channel *
+xdna_mailbox_create_channel(struct mailbox *mailbox,
+ const struct xdna_mailbox_chann_res *x2i,
+ const struct xdna_mailbox_chann_res *i2x,
+ u32 xdna_mailbox_intr_reg,
+ int mb_irq);
+
+/*
+ * xdna_mailbox_destroy_channel() -- destroy mailbox channel
+ *
+ * @mailbox_chann: the handle return from xdna_mailbox_create_channel()
+ *
+ * Return: if success, return 0. otherwise return error code
+ */
+int xdna_mailbox_destroy_channel(struct mailbox_channel *mailbox_chann);
+
+/*
+ * xdna_mailbox_stop_channel() -- stop mailbox channel
+ *
+ * @mailbox_chann: the handle return from xdna_mailbox_create_channel()
+ *
+ * Return: if success, return 0. otherwise return error code
+ */
+void xdna_mailbox_stop_channel(struct mailbox_channel *mailbox_chann);
+
+/*
+ * xdna_mailbox_send_msg() -- Send a message
+ *
+ * @mailbox_chann: Mailbox channel handle
+ * @msg: message struct for message information
+ * @tx_timeout: the timeout value for sending the message in ms.
+ *
+ * Return: If success return 0, otherwise, return error code
+ */
+int xdna_mailbox_send_msg(struct mailbox_channel *mailbox_chann,
+ const struct xdna_mailbox_msg *msg, u64 tx_timeout);
+
+#endif /* _AIE2_MAILBOX_ */
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox_helper.c b/drivers/accel/amdxdna/amdxdna_mailbox_helper.c
new file mode 100644
index 000000000000..5139a9c96a91
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_mailbox_helper.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/completion.h>
+
+#include "amdxdna_gem.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_mailbox_helper.h"
+#include "amdxdna_pci_drv.h"
+
+int xdna_msg_cb(void *handle, const u32 *data, size_t size)
+{
+ struct xdna_notify *cb_arg = handle;
+ int ret;
+
+ if (unlikely(!data))
+ goto out;
+
+ if (unlikely(cb_arg->size != size)) {
+ cb_arg->error = -EINVAL;
+ goto out;
+ }
+
+ print_hex_dump_debug("resp data: ", DUMP_PREFIX_OFFSET,
+ 16, 4, data, cb_arg->size, true);
+ memcpy(cb_arg->data, data, cb_arg->size);
+out:
+ ret = cb_arg->error;
+ complete(&cb_arg->comp);
+ return ret;
+}
+
+int xdna_send_msg_wait(struct amdxdna_dev *xdna, struct mailbox_channel *chann,
+ struct xdna_mailbox_msg *msg)
+{
+ struct xdna_notify *hdl = msg->handle;
+ int ret;
+
+ ret = xdna_mailbox_send_msg(chann, msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(xdna, "Send message failed, ret %d", ret);
+ return ret;
+ }
+
+ ret = wait_for_completion_timeout(&hdl->comp,
+ msecs_to_jiffies(RX_TIMEOUT));
+ if (!ret) {
+ XDNA_ERR(xdna, "Wait for completion timeout");
+ return -ETIME;
+ }
+
+ return hdl->error;
+}
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox_helper.h b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h
new file mode 100644
index 000000000000..23e1317b79fe
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_MAILBOX_HELPER_H
+#define _AMDXDNA_MAILBOX_HELPER_H
+
+#define TX_TIMEOUT 2000 /* milliseconds */
+#define RX_TIMEOUT 5000 /* milliseconds */
+
+struct amdxdna_dev;
+
+struct xdna_notify {
+ struct completion comp;
+ u32 *data;
+ size_t size;
+ int error;
+};
+
+#define DECLARE_XDNA_MSG_COMMON(name, op, status) \
+ struct name##_req req = { 0 }; \
+ struct name##_resp resp = { status }; \
+ struct xdna_notify hdl = { \
+ .error = 0, \
+ .data = (u32 *)&resp, \
+ .size = sizeof(resp), \
+ .comp = COMPLETION_INITIALIZER_ONSTACK(hdl.comp), \
+ }; \
+ struct xdna_mailbox_msg msg = { \
+ .send_data = (u8 *)&req, \
+ .send_size = sizeof(req), \
+ .handle = &hdl, \
+ .opcode = op, \
+ .notify_cb = xdna_msg_cb, \
+ }
+
+int xdna_msg_cb(void *handle, const u32 *data, size_t size);
+int xdna_send_msg_wait(struct amdxdna_dev *xdna, struct mailbox_channel *chann,
+ struct xdna_mailbox_msg *msg);
+
+#endif /* _AMDXDNA_MAILBOX_HELPER_H */
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
new file mode 100644
index 000000000000..f5b8497cf5ad
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_accel.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/iommu.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_pci_drv.h"
+
+#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
+
+MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin");
+MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin");
+MODULE_FIRMWARE("amdnpu/17f0_11/npu.sbin");
+MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin");
+
+/*
+ * Bind the driver base on (vendor_id, device_id) pair and later use the
+ * (device_id, rev_id) pair as a key to select the devices. The devices with
+ * same device_id have very similar interface to host driver.
+ */
+static const struct pci_device_id pci_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1502) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x17f0) },
+ {0}
+};
+
+MODULE_DEVICE_TABLE(pci, pci_ids);
+
+static const struct amdxdna_device_id amdxdna_ids[] = {
+ { 0x1502, 0x0, &dev_npu1_info },
+ { 0x17f0, 0x0, &dev_npu2_info },
+ { 0x17f0, 0x10, &dev_npu4_info },
+ { 0x17f0, 0x11, &dev_npu5_info },
+ { 0x17f0, 0x20, &dev_npu6_info },
+ {0}
+};
+
+static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(ddev);
+ struct amdxdna_client *client;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(ddev->dev);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to get rpm, ret %d", ret);
+ return ret;
+ }
+
+ client = kzalloc(sizeof(*client), GFP_KERNEL);
+ if (!client) {
+ ret = -ENOMEM;
+ goto put_rpm;
+ }
+
+ client->pid = pid_nr(rcu_access_pointer(filp->pid));
+ client->xdna = xdna;
+
+ client->sva = iommu_sva_bind_device(xdna->ddev.dev, current->mm);
+ if (IS_ERR(client->sva)) {
+ ret = PTR_ERR(client->sva);
+ XDNA_ERR(xdna, "SVA bind device failed, ret %d", ret);
+ goto failed;
+ }
+ client->pasid = iommu_sva_get_pasid(client->sva);
+ if (client->pasid == IOMMU_PASID_INVALID) {
+ XDNA_ERR(xdna, "SVA get pasid failed");
+ ret = -ENODEV;
+ goto unbind_sva;
+ }
+ mutex_init(&client->hwctx_lock);
+ init_srcu_struct(&client->hwctx_srcu);
+ xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC);
+ mutex_init(&client->mm_lock);
+
+ mutex_lock(&xdna->dev_lock);
+ list_add_tail(&client->node, &xdna->client_list);
+ mutex_unlock(&xdna->dev_lock);
+
+ filp->driver_priv = client;
+ client->filp = filp;
+
+ XDNA_DBG(xdna, "pid %d opened", client->pid);
+ return 0;
+
+unbind_sva:
+ iommu_sva_unbind_device(client->sva);
+failed:
+ kfree(client);
+put_rpm:
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ return ret;
+}
+
+static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(ddev);
+
+ XDNA_DBG(xdna, "closing pid %d", client->pid);
+
+ xa_destroy(&client->hwctx_xa);
+ cleanup_srcu_struct(&client->hwctx_srcu);
+ mutex_destroy(&client->hwctx_lock);
+ mutex_destroy(&client->mm_lock);
+ if (client->dev_heap)
+ drm_gem_object_put(to_gobj(client->dev_heap));
+
+ iommu_sva_unbind_device(client->sva);
+
+ XDNA_DBG(xdna, "pid %d closed", client->pid);
+ kfree(client);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+}
+
+static int amdxdna_flush(struct file *f, fl_owner_t id)
+{
+ struct drm_file *filp = f->private_data;
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = client->xdna;
+ int idx;
+
+ XDNA_DBG(xdna, "PID %d flushing...", client->pid);
+ if (!drm_dev_enter(&xdna->ddev, &idx))
+ return 0;
+
+ mutex_lock(&xdna->dev_lock);
+ list_del_init(&client->node);
+ mutex_unlock(&xdna->dev_lock);
+ amdxdna_hwctx_remove_all(client);
+
+ drm_dev_exit(idx);
+ return 0;
+}
+
+static int amdxdna_drm_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_drm_get_info *args = data;
+ int ret;
+
+ if (!xdna->dev_info->ops->get_aie_info)
+ return -EOPNOTSUPP;
+
+ XDNA_DBG(xdna, "Request parameter %u", args->param);
+ mutex_lock(&xdna->dev_lock);
+ ret = xdna->dev_info->ops->get_aie_info(client, args);
+ mutex_unlock(&xdna->dev_lock);
+ return ret;
+}
+
+static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_drm_set_state *args = data;
+ int ret;
+
+ if (!xdna->dev_info->ops->set_aie_state)
+ return -EOPNOTSUPP;
+
+ XDNA_DBG(xdna, "Request parameter %u", args->param);
+ mutex_lock(&xdna->dev_lock);
+ ret = xdna->dev_info->ops->set_aie_state(client, args);
+ mutex_unlock(&xdna->dev_lock);
+
+ return ret;
+}
+
+static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
+ /* Context */
+ DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_DESTROY_HWCTX, amdxdna_drm_destroy_hwctx_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_CONFIG_HWCTX, amdxdna_drm_config_hwctx_ioctl, 0),
+ /* BO */
+ DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_BO, amdxdna_drm_create_bo_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_GET_BO_INFO, amdxdna_drm_get_bo_info_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0),
+ /* Execution */
+ DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0),
+ /* AIE hardware */
+ DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_SET_STATE, amdxdna_drm_set_state_ioctl, DRM_ROOT_ONLY),
+};
+
+static const struct file_operations amdxdna_fops = {
+ .owner = THIS_MODULE,
+ .open = accel_open,
+ .release = drm_release,
+ .flush = amdxdna_flush,
+ .unlocked_ioctl = drm_ioctl,
+ .compat_ioctl = drm_compat_ioctl,
+ .poll = drm_poll,
+ .read = drm_read,
+ .llseek = noop_llseek,
+ .mmap = drm_gem_mmap,
+ .fop_flags = FOP_UNSIGNED_OFFSET,
+};
+
+const struct drm_driver amdxdna_drm_drv = {
+ .driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL |
+ DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE,
+ .fops = &amdxdna_fops,
+ .name = "amdxdna_accel_driver",
+ .desc = "AMD XDNA DRM implementation",
+ .open = amdxdna_drm_open,
+ .postclose = amdxdna_drm_close,
+ .ioctls = amdxdna_drm_ioctls,
+ .num_ioctls = ARRAY_SIZE(amdxdna_drm_ioctls),
+
+ .gem_create_object = amdxdna_gem_create_object_cb,
+};
+
+static const struct amdxdna_dev_info *
+amdxdna_get_dev_info(struct pci_dev *pdev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(amdxdna_ids); i++) {
+ if (pdev->device == amdxdna_ids[i].device &&
+ pdev->revision == amdxdna_ids[i].revision)
+ return amdxdna_ids[i].dev_info;
+ }
+ return NULL;
+}
+
+static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct device *dev = &pdev->dev;
+ struct amdxdna_dev *xdna;
+ int ret;
+
+ xdna = devm_drm_dev_alloc(dev, &amdxdna_drm_drv, typeof(*xdna), ddev);
+ if (IS_ERR(xdna))
+ return PTR_ERR(xdna);
+
+ xdna->dev_info = amdxdna_get_dev_info(pdev);
+ if (!xdna->dev_info)
+ return -ENODEV;
+
+ drmm_mutex_init(&xdna->ddev, &xdna->dev_lock);
+ init_rwsem(&xdna->notifier_lock);
+ INIT_LIST_HEAD(&xdna->client_list);
+ pci_set_drvdata(pdev, xdna);
+
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&xdna->notifier_lock);
+ fs_reclaim_release(GFP_KERNEL);
+ }
+
+ mutex_lock(&xdna->dev_lock);
+ ret = xdna->dev_info->ops->init(xdna);
+ mutex_unlock(&xdna->dev_lock);
+ if (ret) {
+ XDNA_ERR(xdna, "Hardware init failed, ret %d", ret);
+ return ret;
+ }
+
+ ret = amdxdna_sysfs_init(xdna);
+ if (ret) {
+ XDNA_ERR(xdna, "Create amdxdna attrs failed: %d", ret);
+ goto failed_dev_fini;
+ }
+
+ pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_allow(dev);
+
+ ret = drm_dev_register(&xdna->ddev, 0);
+ if (ret) {
+ XDNA_ERR(xdna, "DRM register failed, ret %d", ret);
+ pm_runtime_forbid(dev);
+ goto failed_sysfs_fini;
+ }
+
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_put_autosuspend(dev);
+ return 0;
+
+failed_sysfs_fini:
+ amdxdna_sysfs_fini(xdna);
+failed_dev_fini:
+ mutex_lock(&xdna->dev_lock);
+ xdna->dev_info->ops->fini(xdna);
+ mutex_unlock(&xdna->dev_lock);
+ return ret;
+}
+
+static void amdxdna_remove(struct pci_dev *pdev)
+{
+ struct amdxdna_dev *xdna = pci_get_drvdata(pdev);
+ struct device *dev = &pdev->dev;
+ struct amdxdna_client *client;
+
+ pm_runtime_get_noresume(dev);
+ pm_runtime_forbid(dev);
+
+ drm_dev_unplug(&xdna->ddev);
+ amdxdna_sysfs_fini(xdna);
+
+ mutex_lock(&xdna->dev_lock);
+ client = list_first_entry_or_null(&xdna->client_list,
+ struct amdxdna_client, node);
+ while (client) {
+ list_del_init(&client->node);
+ mutex_unlock(&xdna->dev_lock);
+
+ amdxdna_hwctx_remove_all(client);
+
+ mutex_lock(&xdna->dev_lock);
+ client = list_first_entry_or_null(&xdna->client_list,
+ struct amdxdna_client, node);
+ }
+
+ xdna->dev_info->ops->fini(xdna);
+ mutex_unlock(&xdna->dev_lock);
+}
+
+static int amdxdna_dev_suspend_nolock(struct amdxdna_dev *xdna)
+{
+ if (xdna->dev_info->ops->suspend)
+ xdna->dev_info->ops->suspend(xdna);
+
+ return 0;
+}
+
+static int amdxdna_dev_resume_nolock(struct amdxdna_dev *xdna)
+{
+ if (xdna->dev_info->ops->resume)
+ return xdna->dev_info->ops->resume(xdna);
+
+ return 0;
+}
+
+static int amdxdna_pmops_suspend(struct device *dev)
+{
+ struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
+ struct amdxdna_client *client;
+
+ mutex_lock(&xdna->dev_lock);
+ list_for_each_entry(client, &xdna->client_list, node)
+ amdxdna_hwctx_suspend(client);
+
+ amdxdna_dev_suspend_nolock(xdna);
+ mutex_unlock(&xdna->dev_lock);
+
+ return 0;
+}
+
+static int amdxdna_pmops_resume(struct device *dev)
+{
+ struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
+ struct amdxdna_client *client;
+ int ret;
+
+ XDNA_INFO(xdna, "firmware resuming...");
+ mutex_lock(&xdna->dev_lock);
+ ret = amdxdna_dev_resume_nolock(xdna);
+ if (ret) {
+ XDNA_ERR(xdna, "resume NPU firmware failed");
+ mutex_unlock(&xdna->dev_lock);
+ return ret;
+ }
+
+ XDNA_INFO(xdna, "hardware context resuming...");
+ list_for_each_entry(client, &xdna->client_list, node)
+ amdxdna_hwctx_resume(client);
+ mutex_unlock(&xdna->dev_lock);
+
+ return 0;
+}
+
+static int amdxdna_rpmops_suspend(struct device *dev)
+{
+ struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
+ int ret;
+
+ mutex_lock(&xdna->dev_lock);
+ ret = amdxdna_dev_suspend_nolock(xdna);
+ mutex_unlock(&xdna->dev_lock);
+
+ XDNA_DBG(xdna, "Runtime suspend done ret: %d", ret);
+ return ret;
+}
+
+static int amdxdna_rpmops_resume(struct device *dev)
+{
+ struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
+ int ret;
+
+ mutex_lock(&xdna->dev_lock);
+ ret = amdxdna_dev_resume_nolock(xdna);
+ mutex_unlock(&xdna->dev_lock);
+
+ XDNA_DBG(xdna, "Runtime resume done ret: %d", ret);
+ return ret;
+}
+
+static const struct dev_pm_ops amdxdna_pm_ops = {
+ SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume)
+ RUNTIME_PM_OPS(amdxdna_rpmops_suspend, amdxdna_rpmops_resume, NULL)
+};
+
+static struct pci_driver amdxdna_pci_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = pci_ids,
+ .probe = amdxdna_probe,
+ .remove = amdxdna_remove,
+ .driver.pm = &amdxdna_pm_ops,
+};
+
+module_pci_driver(amdxdna_pci_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("XRT Team <runtimeca39d@amd.com>");
+MODULE_DESCRIPTION("amdxdna driver");
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
new file mode 100644
index 000000000000..37848a8d8031
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_PCI_DRV_H_
+#define _AMDXDNA_PCI_DRV_H_
+
+#include <linux/xarray.h>
+
+#define XDNA_INFO(xdna, fmt, args...) drm_info(&(xdna)->ddev, fmt, ##args)
+#define XDNA_WARN(xdna, fmt, args...) drm_warn(&(xdna)->ddev, "%s: "fmt, __func__, ##args)
+#define XDNA_ERR(xdna, fmt, args...) drm_err(&(xdna)->ddev, "%s: "fmt, __func__, ##args)
+#define XDNA_DBG(xdna, fmt, args...) drm_dbg(&(xdna)->ddev, fmt, ##args)
+#define XDNA_INFO_ONCE(xdna, fmt, args...) drm_info_once(&(xdna)->ddev, fmt, ##args)
+
+#define XDNA_MBZ_DBG(xdna, ptr, sz) \
+ ({ \
+ int __i; \
+ int __ret = 0; \
+ u8 *__ptr = (u8 *)(ptr); \
+ for (__i = 0; __i < (sz); __i++) { \
+ if (__ptr[__i]) { \
+ XDNA_DBG(xdna, "MBZ check failed"); \
+ __ret = -EINVAL; \
+ break; \
+ } \
+ } \
+ __ret; \
+ })
+
+#define to_xdna_dev(drm_dev) \
+ ((struct amdxdna_dev *)container_of(drm_dev, struct amdxdna_dev, ddev))
+
+extern const struct drm_driver amdxdna_drm_drv;
+
+struct amdxdna_client;
+struct amdxdna_dev;
+struct amdxdna_drm_get_info;
+struct amdxdna_drm_set_state;
+struct amdxdna_gem_obj;
+struct amdxdna_hwctx;
+struct amdxdna_sched_job;
+
+/*
+ * struct amdxdna_dev_ops - Device hardware operation callbacks
+ */
+struct amdxdna_dev_ops {
+ int (*init)(struct amdxdna_dev *xdna);
+ void (*fini)(struct amdxdna_dev *xdna);
+ int (*resume)(struct amdxdna_dev *xdna);
+ void (*suspend)(struct amdxdna_dev *xdna);
+ int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
+ void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
+ int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
+ void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
+ void (*hwctx_suspend)(struct amdxdna_hwctx *hwctx);
+ void (*hwctx_resume)(struct amdxdna_hwctx *hwctx);
+ int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
+ int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args);
+ int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args);
+};
+
+/*
+ * struct amdxdna_dev_info - Device hardware information
+ * Record device static information, like reg, mbox, PSP, SMU bar index
+ */
+struct amdxdna_dev_info {
+ int reg_bar;
+ int mbox_bar;
+ int sram_bar;
+ int psp_bar;
+ int smu_bar;
+ int device_type;
+ int first_col;
+ u32 dev_mem_buf_shift;
+ u64 dev_mem_base;
+ size_t dev_mem_size;
+ char *vbnv;
+ const struct amdxdna_dev_priv *dev_priv;
+ const struct amdxdna_dev_ops *ops;
+};
+
+struct amdxdna_fw_ver {
+ u32 major;
+ u32 minor;
+ u32 sub;
+ u32 build;
+};
+
+struct amdxdna_dev {
+ struct drm_device ddev;
+ struct amdxdna_dev_hdl *dev_handle;
+ const struct amdxdna_dev_info *dev_info;
+ void *xrs_hdl;
+
+ struct mutex dev_lock; /* per device lock */
+ struct list_head client_list;
+ struct amdxdna_fw_ver fw_ver;
+ struct rw_semaphore notifier_lock; /* for mmu notifier*/
+};
+
+/*
+ * struct amdxdna_device_id - PCI device info
+ */
+struct amdxdna_device_id {
+ unsigned short device;
+ u8 revision;
+ const struct amdxdna_dev_info *dev_info;
+};
+
+/*
+ * struct amdxdna_client - amdxdna client
+ * A per fd data structure for managing context and other user process stuffs.
+ */
+struct amdxdna_client {
+ struct list_head node;
+ pid_t pid;
+ struct mutex hwctx_lock; /* protect hwctx */
+ /* do NOT wait this srcu when hwctx_lock is held */
+ struct srcu_struct hwctx_srcu;
+ struct xarray hwctx_xa;
+ u32 next_hwctxid;
+ struct amdxdna_dev *xdna;
+ struct drm_file *filp;
+
+ struct mutex mm_lock; /* protect memory related */
+ struct amdxdna_gem_obj *dev_heap;
+
+ struct iommu_sva *sva;
+ int pasid;
+};
+
+#define amdxdna_for_each_hwctx(client, hwctx_id, entry) \
+ xa_for_each(&(client)->hwctx_xa, hwctx_id, entry)
+
+/* Add device info below */
+extern const struct amdxdna_dev_info dev_npu1_info;
+extern const struct amdxdna_dev_info dev_npu2_info;
+extern const struct amdxdna_dev_info dev_npu4_info;
+extern const struct amdxdna_dev_info dev_npu5_info;
+extern const struct amdxdna_dev_info dev_npu6_info;
+
+int amdxdna_sysfs_init(struct amdxdna_dev *xdna);
+void amdxdna_sysfs_fini(struct amdxdna_dev *xdna);
+
+#endif /* _AMDXDNA_PCI_DRV_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_sysfs.c b/drivers/accel/amdxdna/amdxdna_sysfs.c
new file mode 100644
index 000000000000..f27e4ee960a0
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_sysfs.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/types.h>
+
+#include "amdxdna_gem.h"
+#include "amdxdna_pci_drv.h"
+
+static ssize_t vbnv_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct amdxdna_dev *xdna = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%s\n", xdna->dev_info->vbnv);
+}
+static DEVICE_ATTR_RO(vbnv);
+
+static ssize_t device_type_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct amdxdna_dev *xdna = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d\n", xdna->dev_info->device_type);
+}
+static DEVICE_ATTR_RO(device_type);
+
+static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct amdxdna_dev *xdna = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d.%d.%d.%d\n", xdna->fw_ver.major,
+ xdna->fw_ver.minor, xdna->fw_ver.sub,
+ xdna->fw_ver.build);
+}
+static DEVICE_ATTR_RO(fw_version);
+
+static struct attribute *amdxdna_attrs[] = {
+ &dev_attr_device_type.attr,
+ &dev_attr_vbnv.attr,
+ &dev_attr_fw_version.attr,
+ NULL,
+};
+
+static struct attribute_group amdxdna_attr_group = {
+ .attrs = amdxdna_attrs,
+};
+
+int amdxdna_sysfs_init(struct amdxdna_dev *xdna)
+{
+ int ret;
+
+ ret = sysfs_create_group(&xdna->ddev.dev->kobj, &amdxdna_attr_group);
+ if (ret)
+ XDNA_ERR(xdna, "Create attr group failed");
+
+ return ret;
+}
+
+void amdxdna_sysfs_fini(struct amdxdna_dev *xdna)
+{
+ sysfs_remove_group(&xdna->ddev.dev->kobj, &amdxdna_attr_group);
+}
diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
new file mode 100644
index 000000000000..e4f6dac7d00f
--- /dev/null
+++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/sizes.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+/* Address definition from NPU1 docs */
+#define MPNPU_PUB_SEC_INTR 0x3010090
+#define MPNPU_PUB_PWRMGMT_INTR 0x3010094
+#define MPNPU_PUB_SCRATCH2 0x30100A0
+#define MPNPU_PUB_SCRATCH3 0x30100A4
+#define MPNPU_PUB_SCRATCH4 0x30100A8
+#define MPNPU_PUB_SCRATCH5 0x30100AC
+#define MPNPU_PUB_SCRATCH6 0x30100B0
+#define MPNPU_PUB_SCRATCH7 0x30100B4
+#define MPNPU_PUB_SCRATCH9 0x30100BC
+
+#define MPNPU_SRAM_X2I_MAILBOX_0 0x30A0000
+#define MPNPU_SRAM_X2I_MAILBOX_1 0x30A2000
+#define MPNPU_SRAM_I2X_MAILBOX_15 0x30BF000
+
+#define MPNPU_APERTURE0_BASE 0x3000000
+#define MPNPU_APERTURE1_BASE 0x3080000
+#define MPNPU_APERTURE2_BASE 0x30C0000
+
+/* PCIe BAR Index for NPU1 */
+#define NPU1_REG_BAR_INDEX 0
+#define NPU1_MBOX_BAR_INDEX 4
+#define NPU1_PSP_BAR_INDEX 0
+#define NPU1_SMU_BAR_INDEX 0
+#define NPU1_SRAM_BAR_INDEX 2
+/* Associated BARs and Apertures */
+#define NPU1_REG_BAR_BASE MPNPU_APERTURE0_BASE
+#define NPU1_MBOX_BAR_BASE MPNPU_APERTURE2_BASE
+#define NPU1_PSP_BAR_BASE MPNPU_APERTURE0_BASE
+#define NPU1_SMU_BAR_BASE MPNPU_APERTURE0_BASE
+#define NPU1_SRAM_BAR_BASE MPNPU_APERTURE1_BASE
+
+const struct rt_config npu1_default_rt_cfg[] = {
+ { 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+ { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 0 },
+};
+
+const struct dpm_clk_freq npu1_dpm_clk_table[] = {
+ {400, 800},
+ {600, 1024},
+ {600, 1024},
+ {600, 1024},
+ {600, 1024},
+ {720, 1309},
+ {720, 1309},
+ {847, 1600},
+ { 0 }
+};
+
+static const struct amdxdna_dev_priv npu1_dev_priv = {
+ .fw_path = "amdnpu/1502_00/npu.sbin",
+ .protocol_major = 0x5,
+ .protocol_minor = 0x7,
+ .rt_config = npu1_default_rt_cfg,
+ .dpm_clk_tbl = npu1_dpm_clk_table,
+ .col_align = COL_ALIGN_NONE,
+ .mbox_dev_addr = NPU1_MBOX_BAR_BASE,
+ .mbox_size = 0, /* Use BAR size */
+ .sram_dev_addr = NPU1_SRAM_BAR_BASE,
+ .sram_offs = {
+ DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU1_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+ DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU1_SRAM, MPNPU_SRAM_I2X_MAILBOX_15),
+ },
+ .psp_regs_off = {
+ DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU1_PSP, MPNPU_PUB_SCRATCH2),
+ DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU1_PSP, MPNPU_PUB_SCRATCH3),
+ DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU1_PSP, MPNPU_PUB_SCRATCH4),
+ DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU1_PSP, MPNPU_PUB_SCRATCH9),
+ DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU1_PSP, MPNPU_PUB_SEC_INTR),
+ DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU1_PSP, MPNPU_PUB_SCRATCH2),
+ DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU1_PSP, MPNPU_PUB_SCRATCH3),
+ },
+ .smu_regs_off = {
+ DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU1_SMU, MPNPU_PUB_SCRATCH5),
+ DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU1_SMU, MPNPU_PUB_SCRATCH7),
+ DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU1_SMU, MPNPU_PUB_PWRMGMT_INTR),
+ DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6),
+ DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU1_SMU, MPNPU_PUB_SCRATCH7),
+ },
+ .hw_ops = {
+ .set_dpm = npu1_set_dpm,
+ },
+};
+
+const struct amdxdna_dev_info dev_npu1_info = {
+ .reg_bar = NPU1_REG_BAR_INDEX,
+ .mbox_bar = NPU1_MBOX_BAR_INDEX,
+ .sram_bar = NPU1_SRAM_BAR_INDEX,
+ .psp_bar = NPU1_PSP_BAR_INDEX,
+ .smu_bar = NPU1_SMU_BAR_INDEX,
+ .first_col = 1,
+ .dev_mem_buf_shift = 15, /* 32 KiB aligned */
+ .dev_mem_base = AIE2_DEVM_BASE,
+ .dev_mem_size = AIE2_DEVM_SIZE,
+ .vbnv = "RyzenAI-npu1",
+ .device_type = AMDXDNA_DEV_TYPE_KMQ,
+ .dev_priv = &npu1_dev_priv,
+ .ops = &aie2_ops,
+};
diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c
new file mode 100644
index 000000000000..a081cac75ee0
--- /dev/null
+++ b/drivers/accel/amdxdna/npu2_regs.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/sizes.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
+#define MPNPU_PUB_SEC_INTR 0x3010060
+#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
+#define MPNPU_PUB_SCRATCH0 0x301006C
+#define MPNPU_PUB_SCRATCH1 0x3010070
+#define MPNPU_PUB_SCRATCH2 0x3010074
+#define MPNPU_PUB_SCRATCH3 0x3010078
+#define MPNPU_PUB_SCRATCH4 0x301007C
+#define MPNPU_PUB_SCRATCH5 0x3010080
+#define MPNPU_PUB_SCRATCH6 0x3010084
+#define MPNPU_PUB_SCRATCH7 0x3010088
+#define MPNPU_PUB_SCRATCH8 0x301008C
+#define MPNPU_PUB_SCRATCH9 0x3010090
+#define MPNPU_PUB_SCRATCH10 0x3010094
+#define MPNPU_PUB_SCRATCH11 0x3010098
+#define MPNPU_PUB_SCRATCH12 0x301009C
+#define MPNPU_PUB_SCRATCH13 0x30100A0
+#define MPNPU_PUB_SCRATCH14 0x30100A4
+#define MPNPU_PUB_SCRATCH15 0x30100A8
+#define MP0_C2PMSG_73 0x3810A24
+#define MP0_C2PMSG_123 0x3810AEC
+
+#define MP1_C2PMSG_0 0x3B10900
+#define MP1_C2PMSG_60 0x3B109F0
+#define MP1_C2PMSG_61 0x3B109F4
+
+#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
+#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
+#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
+#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
+
+#define MMNPU_APERTURE0_BASE 0x3000000
+#define MMNPU_APERTURE1_BASE 0x3600000
+#define MMNPU_APERTURE3_BASE 0x3810000
+#define MMNPU_APERTURE4_BASE 0x3B10000
+
+/* PCIe BAR Index for NPU2 */
+#define NPU2_REG_BAR_INDEX 0
+#define NPU2_MBOX_BAR_INDEX 0
+#define NPU2_PSP_BAR_INDEX 4
+#define NPU2_SMU_BAR_INDEX 5
+#define NPU2_SRAM_BAR_INDEX 2
+/* Associated BARs and Apertures */
+#define NPU2_REG_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU2_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU2_PSP_BAR_BASE MMNPU_APERTURE3_BASE
+#define NPU2_SMU_BAR_BASE MMNPU_APERTURE4_BASE
+#define NPU2_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
+
+static const struct amdxdna_dev_priv npu2_dev_priv = {
+ .fw_path = "amdnpu/17f0_00/npu.sbin",
+ .protocol_major = 0x6,
+ .protocol_minor = 0x6,
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
+ .col_align = COL_ALIGN_NATURE,
+ .mbox_dev_addr = NPU2_MBOX_BAR_BASE,
+ .mbox_size = 0, /* Use BAR size */
+ .sram_dev_addr = NPU2_SRAM_BAR_BASE,
+ .sram_offs = {
+ DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+ DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
+ },
+ .psp_regs_off = {
+ DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU2_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU2_REG, MPNPU_PUB_SCRATCH3),
+ DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU2_REG, MPNPU_PUB_SCRATCH4),
+ DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU2_REG, MPNPU_PUB_SCRATCH9),
+ DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU2_PSP, MP0_C2PMSG_73),
+ DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU2_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU2_REG, MPNPU_PUB_SCRATCH3),
+ },
+ .smu_regs_off = {
+ DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU2_SMU, MP1_C2PMSG_0),
+ DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU2_SMU, MP1_C2PMSG_60),
+ DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU2_SMU, MMNPU_APERTURE4_BASE),
+ DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU2_SMU, MP1_C2PMSG_61),
+ DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU2_SMU, MP1_C2PMSG_60),
+ },
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
+};
+
+const struct amdxdna_dev_info dev_npu2_info = {
+ .reg_bar = NPU2_REG_BAR_INDEX,
+ .mbox_bar = NPU2_MBOX_BAR_INDEX,
+ .sram_bar = NPU2_SRAM_BAR_INDEX,
+ .psp_bar = NPU2_PSP_BAR_INDEX,
+ .smu_bar = NPU2_SMU_BAR_INDEX,
+ .first_col = 0,
+ .dev_mem_buf_shift = 15, /* 32 KiB aligned */
+ .dev_mem_base = AIE2_DEVM_BASE,
+ .dev_mem_size = AIE2_DEVM_SIZE,
+ .vbnv = "RyzenAI-npu2",
+ .device_type = AMDXDNA_DEV_TYPE_KMQ,
+ .dev_priv = &npu2_dev_priv,
+ .ops = &aie2_ops, /* NPU2 can share NPU1's callback */
+};
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
new file mode 100644
index 000000000000..9f2e33182ec6
--- /dev/null
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/sizes.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
+#define MPNPU_PUB_SEC_INTR 0x3010060
+#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
+#define MPNPU_PUB_SCRATCH0 0x301006C
+#define MPNPU_PUB_SCRATCH1 0x3010070
+#define MPNPU_PUB_SCRATCH2 0x3010074
+#define MPNPU_PUB_SCRATCH3 0x3010078
+#define MPNPU_PUB_SCRATCH4 0x301007C
+#define MPNPU_PUB_SCRATCH5 0x3010080
+#define MPNPU_PUB_SCRATCH6 0x3010084
+#define MPNPU_PUB_SCRATCH7 0x3010088
+#define MPNPU_PUB_SCRATCH8 0x301008C
+#define MPNPU_PUB_SCRATCH9 0x3010090
+#define MPNPU_PUB_SCRATCH10 0x3010094
+#define MPNPU_PUB_SCRATCH11 0x3010098
+#define MPNPU_PUB_SCRATCH12 0x301009C
+#define MPNPU_PUB_SCRATCH13 0x30100A0
+#define MPNPU_PUB_SCRATCH14 0x30100A4
+#define MPNPU_PUB_SCRATCH15 0x30100A8
+#define MP0_C2PMSG_73 0x3810A24
+#define MP0_C2PMSG_123 0x3810AEC
+
+#define MP1_C2PMSG_0 0x3B10900
+#define MP1_C2PMSG_60 0x3B109F0
+#define MP1_C2PMSG_61 0x3B109F4
+
+#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
+#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
+#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
+#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
+
+#define MMNPU_APERTURE0_BASE 0x3000000
+#define MMNPU_APERTURE1_BASE 0x3600000
+#define MMNPU_APERTURE3_BASE 0x3810000
+#define MMNPU_APERTURE4_BASE 0x3B10000
+
+/* PCIe BAR Index for NPU4 */
+#define NPU4_REG_BAR_INDEX 0
+#define NPU4_MBOX_BAR_INDEX 0
+#define NPU4_PSP_BAR_INDEX 4
+#define NPU4_SMU_BAR_INDEX 5
+#define NPU4_SRAM_BAR_INDEX 2
+/* Associated BARs and Apertures */
+#define NPU4_REG_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU4_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU4_PSP_BAR_BASE MMNPU_APERTURE3_BASE
+#define NPU4_SMU_BAR_BASE MMNPU_APERTURE4_BASE
+#define NPU4_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
+
+const struct rt_config npu4_default_rt_cfg[] = {
+ { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+ { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 0 },
+};
+
+const struct dpm_clk_freq npu4_dpm_clk_table[] = {
+ {396, 792},
+ {600, 1056},
+ {792, 1152},
+ {975, 1267},
+ {975, 1267},
+ {1056, 1408},
+ {1152, 1584},
+ {1267, 1800},
+ { 0 }
+};
+
+static const struct amdxdna_dev_priv npu4_dev_priv = {
+ .fw_path = "amdnpu/17f0_10/npu.sbin",
+ .protocol_major = 0x6,
+ .protocol_minor = 12,
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
+ .col_align = COL_ALIGN_NATURE,
+ .mbox_dev_addr = NPU4_MBOX_BAR_BASE,
+ .mbox_size = 0, /* Use BAR size */
+ .sram_dev_addr = NPU4_SRAM_BAR_BASE,
+ .sram_offs = {
+ DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+ DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
+ },
+ .psp_regs_off = {
+ DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU4_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU4_REG, MPNPU_PUB_SCRATCH3),
+ DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU4_REG, MPNPU_PUB_SCRATCH4),
+ DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU4_REG, MPNPU_PUB_SCRATCH9),
+ DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU4_PSP, MP0_C2PMSG_73),
+ DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU4_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU4_REG, MPNPU_PUB_SCRATCH3),
+ },
+ .smu_regs_off = {
+ DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU4_SMU, MP1_C2PMSG_0),
+ DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU4_SMU, MP1_C2PMSG_60),
+ DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU4_SMU, MMNPU_APERTURE4_BASE),
+ DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
+ DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU4_SMU, MP1_C2PMSG_60),
+ },
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
+};
+
+const struct amdxdna_dev_info dev_npu4_info = {
+ .reg_bar = NPU4_REG_BAR_INDEX,
+ .mbox_bar = NPU4_MBOX_BAR_INDEX,
+ .sram_bar = NPU4_SRAM_BAR_INDEX,
+ .psp_bar = NPU4_PSP_BAR_INDEX,
+ .smu_bar = NPU4_SMU_BAR_INDEX,
+ .first_col = 0,
+ .dev_mem_buf_shift = 15, /* 32 KiB aligned */
+ .dev_mem_base = AIE2_DEVM_BASE,
+ .dev_mem_size = AIE2_DEVM_SIZE,
+ .vbnv = "RyzenAI-npu4",
+ .device_type = AMDXDNA_DEV_TYPE_KMQ,
+ .dev_priv = &npu4_dev_priv,
+ .ops = &aie2_ops, /* NPU4 can share NPU1's callback */
+};
diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
new file mode 100644
index 000000000000..5f1cf83461c4
--- /dev/null
+++ b/drivers/accel/amdxdna/npu5_regs.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/sizes.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
+#define MPNPU_PUB_SEC_INTR 0x3010060
+#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
+#define MPNPU_PUB_SCRATCH0 0x301006C
+#define MPNPU_PUB_SCRATCH1 0x3010070
+#define MPNPU_PUB_SCRATCH2 0x3010074
+#define MPNPU_PUB_SCRATCH3 0x3010078
+#define MPNPU_PUB_SCRATCH4 0x301007C
+#define MPNPU_PUB_SCRATCH5 0x3010080
+#define MPNPU_PUB_SCRATCH6 0x3010084
+#define MPNPU_PUB_SCRATCH7 0x3010088
+#define MPNPU_PUB_SCRATCH8 0x301008C
+#define MPNPU_PUB_SCRATCH9 0x3010090
+#define MPNPU_PUB_SCRATCH10 0x3010094
+#define MPNPU_PUB_SCRATCH11 0x3010098
+#define MPNPU_PUB_SCRATCH12 0x301009C
+#define MPNPU_PUB_SCRATCH13 0x30100A0
+#define MPNPU_PUB_SCRATCH14 0x30100A4
+#define MPNPU_PUB_SCRATCH15 0x30100A8
+#define MP0_C2PMSG_73 0x3810A24
+#define MP0_C2PMSG_123 0x3810AEC
+
+#define MP1_C2PMSG_0 0x3B10900
+#define MP1_C2PMSG_60 0x3B109F0
+#define MP1_C2PMSG_61 0x3B109F4
+
+#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
+#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
+#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
+#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
+
+#define MMNPU_APERTURE0_BASE 0x3000000
+#define MMNPU_APERTURE1_BASE 0x3600000
+#define MMNPU_APERTURE3_BASE 0x3810000
+#define MMNPU_APERTURE4_BASE 0x3B10000
+
+/* PCIe BAR Index for NPU5 */
+#define NPU5_REG_BAR_INDEX 0
+#define NPU5_MBOX_BAR_INDEX 0
+#define NPU5_PSP_BAR_INDEX 4
+#define NPU5_SMU_BAR_INDEX 5
+#define NPU5_SRAM_BAR_INDEX 2
+/* Associated BARs and Apertures */
+#define NPU5_REG_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU5_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU5_PSP_BAR_BASE MMNPU_APERTURE3_BASE
+#define NPU5_SMU_BAR_BASE MMNPU_APERTURE4_BASE
+#define NPU5_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
+
+static const struct amdxdna_dev_priv npu5_dev_priv = {
+ .fw_path = "amdnpu/17f0_11/npu.sbin",
+ .protocol_major = 0x6,
+ .protocol_minor = 12,
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
+ .col_align = COL_ALIGN_NATURE,
+ .mbox_dev_addr = NPU5_MBOX_BAR_BASE,
+ .mbox_size = 0, /* Use BAR size */
+ .sram_dev_addr = NPU5_SRAM_BAR_BASE,
+ .sram_offs = {
+ DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+ DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
+ },
+ .psp_regs_off = {
+ DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU5_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU5_REG, MPNPU_PUB_SCRATCH3),
+ DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU5_REG, MPNPU_PUB_SCRATCH4),
+ DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU5_REG, MPNPU_PUB_SCRATCH9),
+ DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU5_PSP, MP0_C2PMSG_73),
+ DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU5_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU5_REG, MPNPU_PUB_SCRATCH3),
+ },
+ .smu_regs_off = {
+ DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU5_SMU, MP1_C2PMSG_0),
+ DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU5_SMU, MP1_C2PMSG_60),
+ DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU5_SMU, MMNPU_APERTURE4_BASE),
+ DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61),
+ DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU5_SMU, MP1_C2PMSG_60),
+ },
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
+};
+
+const struct amdxdna_dev_info dev_npu5_info = {
+ .reg_bar = NPU5_REG_BAR_INDEX,
+ .mbox_bar = NPU5_MBOX_BAR_INDEX,
+ .sram_bar = NPU5_SRAM_BAR_INDEX,
+ .psp_bar = NPU5_PSP_BAR_INDEX,
+ .smu_bar = NPU5_SMU_BAR_INDEX,
+ .first_col = 0,
+ .dev_mem_buf_shift = 15, /* 32 KiB aligned */
+ .dev_mem_base = AIE2_DEVM_BASE,
+ .dev_mem_size = AIE2_DEVM_SIZE,
+ .vbnv = "RyzenAI-npu5",
+ .device_type = AMDXDNA_DEV_TYPE_KMQ,
+ .dev_priv = &npu5_dev_priv,
+ .ops = &aie2_ops,
+};
diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c
new file mode 100644
index 000000000000..94a7005685a7
--- /dev/null
+++ b/drivers/accel/amdxdna/npu6_regs.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/sizes.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
+#define MPNPU_PUB_SEC_INTR 0x3010060
+#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
+#define MPNPU_PUB_SCRATCH0 0x301006C
+#define MPNPU_PUB_SCRATCH1 0x3010070
+#define MPNPU_PUB_SCRATCH2 0x3010074
+#define MPNPU_PUB_SCRATCH3 0x3010078
+#define MPNPU_PUB_SCRATCH4 0x301007C
+#define MPNPU_PUB_SCRATCH5 0x3010080
+#define MPNPU_PUB_SCRATCH6 0x3010084
+#define MPNPU_PUB_SCRATCH7 0x3010088
+#define MPNPU_PUB_SCRATCH8 0x301008C
+#define MPNPU_PUB_SCRATCH9 0x3010090
+#define MPNPU_PUB_SCRATCH10 0x3010094
+#define MPNPU_PUB_SCRATCH11 0x3010098
+#define MPNPU_PUB_SCRATCH12 0x301009C
+#define MPNPU_PUB_SCRATCH13 0x30100A0
+#define MPNPU_PUB_SCRATCH14 0x30100A4
+#define MPNPU_PUB_SCRATCH15 0x30100A8
+#define MP0_C2PMSG_73 0x3810A24
+#define MP0_C2PMSG_123 0x3810AEC
+
+#define MP1_C2PMSG_0 0x3B10900
+#define MP1_C2PMSG_60 0x3B109F0
+#define MP1_C2PMSG_61 0x3B109F4
+
+#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
+#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
+#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
+#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
+
+#define MMNPU_APERTURE0_BASE 0x3000000
+#define MMNPU_APERTURE1_BASE 0x3600000
+#define MMNPU_APERTURE3_BASE 0x3810000
+#define MMNPU_APERTURE4_BASE 0x3B10000
+
+/* PCIe BAR Index for NPU6 */
+#define NPU6_REG_BAR_INDEX 0
+#define NPU6_MBOX_BAR_INDEX 0
+#define NPU6_PSP_BAR_INDEX 4
+#define NPU6_SMU_BAR_INDEX 5
+#define NPU6_SRAM_BAR_INDEX 2
+/* Associated BARs and Apertures */
+#define NPU6_REG_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU6_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU6_PSP_BAR_BASE MMNPU_APERTURE3_BASE
+#define NPU6_SMU_BAR_BASE MMNPU_APERTURE4_BASE
+#define NPU6_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
+
+static const struct amdxdna_dev_priv npu6_dev_priv = {
+ .fw_path = "amdnpu/17f0_10/npu.sbin",
+ .protocol_major = 0x6,
+ .protocol_minor = 12,
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
+ .col_align = COL_ALIGN_NATURE,
+ .mbox_dev_addr = NPU6_MBOX_BAR_BASE,
+ .mbox_size = 0, /* Use BAR size */
+ .sram_dev_addr = NPU6_SRAM_BAR_BASE,
+ .sram_offs = {
+ DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+ DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
+ },
+ .psp_regs_off = {
+ DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU6_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU6_REG, MPNPU_PUB_SCRATCH3),
+ DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU6_REG, MPNPU_PUB_SCRATCH4),
+ DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU6_REG, MPNPU_PUB_SCRATCH9),
+ DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU6_PSP, MP0_C2PMSG_73),
+ DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU6_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU6_REG, MPNPU_PUB_SCRATCH3),
+ },
+ .smu_regs_off = {
+ DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU6_SMU, MP1_C2PMSG_0),
+ DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU6_SMU, MP1_C2PMSG_60),
+ DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU6_SMU, MMNPU_APERTURE4_BASE),
+ DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61),
+ DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU6_SMU, MP1_C2PMSG_60),
+ },
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
+
+};
+
+const struct amdxdna_dev_info dev_npu6_info = {
+ .reg_bar = NPU6_REG_BAR_INDEX,
+ .mbox_bar = NPU6_MBOX_BAR_INDEX,
+ .sram_bar = NPU6_SRAM_BAR_INDEX,
+ .psp_bar = NPU6_PSP_BAR_INDEX,
+ .smu_bar = NPU6_SMU_BAR_INDEX,
+ .first_col = 0,
+ .dev_mem_buf_shift = 15, /* 32 KiB aligned */
+ .dev_mem_base = AIE2_DEVM_BASE,
+ .dev_mem_size = AIE2_DEVM_SIZE,
+ .vbnv = "RyzenAI-npu6",
+ .device_type = AMDXDNA_DEV_TYPE_KMQ,
+ .dev_priv = &npu6_dev_priv,
+ .ops = &aie2_ops,
+};
diff --git a/drivers/accel/habanalabs/common/context.c b/drivers/accel/habanalabs/common/context.c
index b83141f58319..9f212b17611a 100644
--- a/drivers/accel/habanalabs/common/context.c
+++ b/drivers/accel/habanalabs/common/context.c
@@ -199,7 +199,6 @@ out_err:
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
{
- char task_comm[TASK_COMM_LEN];
int rc = 0, i;
ctx->hdev = hdev;
@@ -272,7 +271,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
mutex_init(&ctx->ts_reg_lock);
dev_dbg(hdev->dev, "create user context, comm=\"%s\", asid=%u\n",
- get_task_comm(task_comm, current), ctx->asid);
+ current->comm, ctx->asid);
}
return 0;
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index e0cf3b4343bb..30277ae410d4 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -817,7 +817,7 @@ static void device_hard_reset_pending(struct work_struct *work)
}
queue_delayed_work(hdev->reset_wq, &device_reset_work->reset_work,
- msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000));
+ secs_to_jiffies(HL_PENDING_RESET_PER_SEC));
}
}
diff --git a/drivers/accel/habanalabs/common/habanalabs_drv.c b/drivers/accel/habanalabs/common/habanalabs_drv.c
index 708dfd10f39c..596c52e8aa26 100644
--- a/drivers/accel/habanalabs/common/habanalabs_drv.c
+++ b/drivers/accel/habanalabs/common/habanalabs_drv.c
@@ -101,7 +101,6 @@ static const struct drm_driver hl_driver = {
.major = LINUX_VERSION_MAJOR,
.minor = LINUX_VERSION_PATCHLEVEL,
.patchlevel = LINUX_VERSION_SUBLEVEL,
- .date = "20190505",
.fops = &hl_fops,
.open = hl_device_open,
@@ -362,8 +361,7 @@ static void fixup_device_params_per_asic(struct hl_device *hdev, int timeout)
* a different default timeout for Gaudi
*/
if (timeout == HL_DEFAULT_TIMEOUT_LOCKED)
- hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED *
- MSEC_PER_SEC);
+ hdev->timeout_jiffies = secs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED);
hdev->reset_upon_device_release = 0;
break;
diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
index 1dd6e23172ca..8729a0c57d78 100644
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@@ -1279,13 +1279,10 @@ static long _hl_ioctl(struct hl_fpriv *hpriv, unsigned int cmd, unsigned long ar
retcode = -EFAULT;
out_err:
- if (retcode) {
- char task_comm[TASK_COMM_LEN];
-
+ if (retcode)
dev_dbg_ratelimited(dev,
"error in ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
- task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
- }
+ task_pid_nr(current), current->comm, cmd, nr);
if (kdata != stack_kdata)
kfree(kdata);
@@ -1308,11 +1305,9 @@ long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
if (nr == _IOC_NR(DRM_IOCTL_HL_INFO)) {
ioctl = &hl_ioctls_control[nr - HL_COMMAND_START];
} else {
- char task_comm[TASK_COMM_LEN];
-
dev_dbg_ratelimited(hdev->dev_ctrl,
"invalid ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
- task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
+ task_pid_nr(current), current->comm, cmd, nr);
return -ENOTTY;
}
diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index 8180b95ed69d..093a2e93b0b3 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -331,7 +331,7 @@ ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t si
return -EINVAL;
ret = ivpu_rpm_get(vdev);
- if (ret)
+ if (ret < 0)
return ret;
ivpu_pm_trigger_recovery(vdev, "debugfs");
@@ -382,7 +382,7 @@ static int dct_active_set(void *data, u64 active_percent)
return -EINVAL;
ret = ivpu_rpm_get(vdev);
- if (ret)
+ if (ret < 0)
return ret;
if (active_percent)
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 0c4a82271c26..3e56ce8bc2c1 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#include <linux/firmware.h>
@@ -167,7 +167,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
args->value = vdev->platform;
break;
case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
- args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio);
+ args->value = ivpu_hw_dpu_max_freq_get(vdev);
break;
case DRM_IVPU_PARAM_NUM_CONTEXTS:
args->value = ivpu_get_context_count(vdev);
@@ -462,15 +462,7 @@ static const struct drm_driver driver = {
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
-#ifdef DRIVER_DATE
- .date = DRIVER_DATE,
- .major = DRIVER_MAJOR,
- .minor = DRIVER_MINOR,
- .patchlevel = DRIVER_PATCHLEVEL,
-#else
- .date = UTS_RELEASE,
.major = 1,
-#endif
};
static void ivpu_context_abort_invalid(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index 6037ec0b3096..7db9a59640e7 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#include <linux/firmware.h>
@@ -561,7 +561,6 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->magic = VPU_BOOT_PARAMS_MAGIC;
boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number;
- boot_params->frequency = ivpu_hw_pll_freq_get(vdev);
/*
* This param is a debug firmware feature. It switches default clock
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
index fc4dbfc980c8..1e85306bcd06 100644
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_HW_H__
@@ -86,9 +86,9 @@ static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range)
return range->end - range->start;
}
-static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
+static inline u32 ivpu_hw_dpu_max_freq_get(struct ivpu_device *vdev)
{
- return ivpu_hw_btrs_ratio_to_freq(vdev, ratio);
+ return ivpu_hw_btrs_dpu_max_freq_get(vdev);
}
static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev)
@@ -96,11 +96,6 @@ static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev)
ivpu_hw_ip_irq_clear(vdev);
}
-static inline u32 ivpu_hw_pll_freq_get(struct ivpu_device *vdev)
-{
- return ivpu_hw_btrs_pll_freq_get(vdev);
-}
-
static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev)
{
return vdev->hw->pll.profiling_freq;
diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.c b/drivers/accel/ivpu/ivpu_hw_btrs.c
index 3212c99f3682..51b9581bb60a 100644
--- a/drivers/accel/ivpu/ivpu_hw_btrs.c
+++ b/drivers/accel/ivpu/ivpu_hw_btrs.c
@@ -1,8 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
+#include <linux/units.h>
+
#include "ivpu_drv.h"
#include "ivpu_hw.h"
#include "ivpu_hw_btrs.h"
@@ -28,17 +30,13 @@
#define BTRS_LNL_ALL_IRQ_MASK ((u32)-1)
-#define BTRS_MTL_WP_CONFIG_1_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_5_3)
-#define BTRS_MTL_WP_CONFIG_1_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_4_3)
-#define BTRS_MTL_WP_CONFIG_2_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_5_3)
-#define BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3)
-#define BTRS_MTL_WP_CONFIG_0_TILE_PLL_OFF WP_CONFIG(0, 0)
#define PLL_CDYN_DEFAULT 0x80
#define PLL_EPP_DEFAULT 0x80
#define PLL_CONFIG_DEFAULT 0x0
-#define PLL_SIMULATION_FREQ 10000000
-#define PLL_REF_CLK_FREQ 50000000
+#define PLL_REF_CLK_FREQ 50000000ull
+#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ)
+
#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC)
#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC)
#define TIMEOUT_US (150 * USEC_PER_MSEC)
@@ -62,6 +60,8 @@
#define DCT_ENABLE 0x1
#define DCT_DISABLE 0x0
+static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio);
+
int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev)
{
REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, BTRS_MTL_ALL_IRQ_MASK);
@@ -156,7 +156,7 @@ static int info_init_mtl(struct ivpu_device *vdev)
hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH;
hw->sku = BTRS_MTL_TILE_SKU_BOTH;
- hw->config = BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO;
+ hw->config = WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3);
return 0;
}
@@ -334,8 +334,8 @@ int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable)
prepare_wp_request(vdev, &wp, enable);
- ivpu_dbg(vdev, PM, "PLL workpoint request: %u Hz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n",
- PLL_RATIO_TO_FREQ(wp.target), wp.cfg, wp.epp, wp.cdyn);
+ ivpu_dbg(vdev, PM, "PLL workpoint request: %lu MHz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n",
+ pll_ratio_to_dpu_freq(vdev, wp.target) / HZ_PER_MHZ, wp.cfg, wp.epp, wp.cdyn);
ret = wp_request_send(vdev, &wp);
if (ret) {
@@ -573,6 +573,39 @@ int ivpu_hw_btrs_wait_for_idle(struct ivpu_device *vdev)
return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
}
+static u32 pll_config_get_mtl(struct ivpu_device *vdev)
+{
+ return REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL);
+}
+
+static u32 pll_config_get_lnl(struct ivpu_device *vdev)
+{
+ return REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ);
+}
+
+static u32 pll_ratio_to_dpu_freq_mtl(u16 ratio)
+{
+ return (PLL_RATIO_TO_FREQ(ratio) * 2) / 3;
+}
+
+static u32 pll_ratio_to_dpu_freq_lnl(u16 ratio)
+{
+ return PLL_RATIO_TO_FREQ(ratio) / 2;
+}
+
+static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return pll_ratio_to_dpu_freq_mtl(ratio);
+ else
+ return pll_ratio_to_dpu_freq_lnl(ratio);
+}
+
+u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev)
+{
+ return pll_ratio_to_dpu_freq(vdev, vdev->hw->pll.max_ratio);
+}
+
/* Handler for IRQs from Buttress core (irqB) */
bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq)
{
@@ -582,9 +615,12 @@ bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq)
if (!status)
return false;
- if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status))
- ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x",
- REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL));
+ if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) {
+ u32 pll = pll_config_get_mtl(vdev);
+
+ ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz",
+ pll, pll_ratio_to_dpu_freq_mtl(pll) / HZ_PER_MHZ);
+ }
if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, status)) {
ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0));
@@ -634,8 +670,12 @@ bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq)
ivpu_err_ratelimited(vdev, "IRQ FIFO full\n");
}
- if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status))
- ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ));
+ if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) {
+ u32 pll = pll_config_get_lnl(vdev);
+
+ ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz",
+ pll, pll_ratio_to_dpu_freq_lnl(pll) / HZ_PER_MHZ);
+ }
if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, status)) {
ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n",
@@ -718,60 +758,6 @@ void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 acti
REGB_WR32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, val);
}
-static u32 pll_ratio_to_freq_mtl(u32 ratio, u32 config)
-{
- u32 pll_clock = PLL_REF_CLK_FREQ * ratio;
- u32 cpu_clock;
-
- if ((config & 0xff) == MTL_PLL_RATIO_4_3)
- cpu_clock = pll_clock * 2 / 4;
- else
- cpu_clock = pll_clock * 2 / 5;
-
- return cpu_clock;
-}
-
-u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
-{
- struct ivpu_hw_info *hw = vdev->hw;
-
- if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
- return pll_ratio_to_freq_mtl(ratio, hw->config);
- else
- return PLL_RATIO_TO_FREQ(ratio);
-}
-
-static u32 pll_freq_get_mtl(struct ivpu_device *vdev)
-{
- u32 pll_curr_ratio;
-
- pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL);
- pll_curr_ratio &= VPU_HW_BTRS_MTL_CURRENT_PLL_RATIO_MASK;
-
- if (!ivpu_is_silicon(vdev))
- return PLL_SIMULATION_FREQ;
-
- return pll_ratio_to_freq_mtl(pll_curr_ratio, vdev->hw->config);
-}
-
-static u32 pll_freq_get_lnl(struct ivpu_device *vdev)
-{
- u32 pll_curr_ratio;
-
- pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ);
- pll_curr_ratio &= VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK;
-
- return PLL_RATIO_TO_FREQ(pll_curr_ratio);
-}
-
-u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev)
-{
- if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
- return pll_freq_get_mtl(vdev);
- else
- return pll_freq_get_lnl(vdev);
-}
-
u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev)
{
if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.h b/drivers/accel/ivpu/ivpu_hw_btrs.h
index 04f14f50fed6..71792dab3c21 100644
--- a/drivers/accel/ivpu/ivpu_hw_btrs.h
+++ b/drivers/accel/ivpu/ivpu_hw_btrs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_HW_BTRS_H__
@@ -13,7 +13,6 @@
#define PLL_PROFILING_FREQ_DEFAULT 38400000
#define PLL_PROFILING_FREQ_HIGH 400000000
-#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ)
#define DCT_DEFAULT_ACTIVE_PERCENT 15u
#define DCT_PERIOD_US 35300u
@@ -32,12 +31,11 @@ int ivpu_hw_btrs_ip_reset(struct ivpu_device *vdev);
void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev);
void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev);
void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev);
+u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev);
bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq);
bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq);
int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable);
void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 dct_percent);
-u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev);
-u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio);
u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev);
u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev);
u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index 01ebf88fe6ef..5daaf07fc1a7 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -302,7 +302,8 @@ ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req
struct ivpu_ipc_consumer cons;
int ret;
- drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev));
+ drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev) &&
+ pm_runtime_enabled(vdev->drm.dev));
ivpu_ipc_consumer_add(vdev, &cons, channel, NULL);
diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c
index ffe7b10f8a76..2a043baf10ca 100644
--- a/drivers/accel/ivpu/ivpu_ms.c
+++ b/drivers/accel/ivpu/ivpu_ms.c
@@ -4,6 +4,7 @@
*/
#include <drm/drm_file.h>
+#include <linux/pm_runtime.h>
#include "ivpu_drv.h"
#include "ivpu_gem.h"
@@ -44,6 +45,10 @@ int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
args->sampling_period_ns < MS_MIN_SAMPLE_PERIOD_NS)
return -EINVAL;
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
mutex_lock(&file_priv->ms_lock);
if (get_instance_by_mask(file_priv, args->metric_group_mask)) {
@@ -96,6 +101,8 @@ err_free_ms:
kfree(ms);
unlock:
mutex_unlock(&file_priv->ms_lock);
+
+ ivpu_rpm_put(vdev);
return ret;
}
@@ -160,6 +167,10 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *
if (!args->metric_group_mask)
return -EINVAL;
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
mutex_lock(&file_priv->ms_lock);
ms = get_instance_by_mask(file_priv, args->metric_group_mask);
@@ -187,6 +198,7 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *
unlock:
mutex_unlock(&file_priv->ms_lock);
+ ivpu_rpm_put(vdev);
return ret;
}
@@ -204,11 +216,17 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file
{
struct ivpu_file_priv *file_priv = file->driver_priv;
struct drm_ivpu_metric_streamer_stop *args = data;
+ struct ivpu_device *vdev = file_priv->vdev;
struct ivpu_ms_instance *ms;
+ int ret;
if (!args->metric_group_mask)
return -EINVAL;
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
mutex_lock(&file_priv->ms_lock);
ms = get_instance_by_mask(file_priv, args->metric_group_mask);
@@ -217,6 +235,7 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file
mutex_unlock(&file_priv->ms_lock);
+ ivpu_rpm_put(vdev);
return ms ? 0 : -EINVAL;
}
@@ -281,6 +300,9 @@ unlock:
void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
{
struct ivpu_ms_instance *ms, *tmp;
+ struct ivpu_device *vdev = file_priv->vdev;
+
+ pm_runtime_get_sync(vdev->drm.dev);
mutex_lock(&file_priv->ms_lock);
@@ -293,6 +315,8 @@ void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
free_instance(file_priv, ms);
mutex_unlock(&file_priv->ms_lock);
+
+ pm_runtime_put_autosuspend(vdev->drm.dev);
}
void ivpu_ms_cleanup_all(struct ivpu_device *vdev)
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index c20eb63750f5..43aba57b48f0 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -172,9 +172,10 @@ static void free_slice(struct kref *kref)
static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_table **sgt_out,
struct sg_table *sgt_in, u64 size, u64 offset)
{
- int total_len, len, nents, offf = 0, offl = 0;
struct scatterlist *sg, *sgn, *sgf, *sgl;
+ unsigned int len, nents, offf, offl;
struct sg_table *sgt;
+ size_t total_len;
int ret, j;
/* find out number of relevant nents needed for this mem */
@@ -182,6 +183,8 @@ static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_tabl
sgf = NULL;
sgl = NULL;
nents = 0;
+ offf = 0;
+ offl = 0;
size = size ? size : PAGE_SIZE;
for_each_sgtable_dma_sg(sgt_in, sg, j) {
@@ -554,6 +557,7 @@ static bool invalid_sem(struct qaic_sem *sem)
static int qaic_validate_req(struct qaic_device *qdev, struct qaic_attach_slice_entry *slice_ent,
u32 count, u64 total_size)
{
+ u64 total;
int i;
for (i = 0; i < count; i++) {
@@ -563,7 +567,8 @@ static int qaic_validate_req(struct qaic_device *qdev, struct qaic_attach_slice_
invalid_sem(&slice_ent[i].sem2) || invalid_sem(&slice_ent[i].sem3))
return -EINVAL;
- if (slice_ent[i].offset + slice_ent[i].size > total_size)
+ if (check_add_overflow(slice_ent[i].offset, slice_ent[i].size, &total) ||
+ total > total_size)
return -EINVAL;
}
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index 4ddf89308ff5..81819b9ef8d4 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -208,7 +208,6 @@ static const struct drm_driver qaic_accel_driver = {
.name = QAIC_NAME,
.desc = QAIC_DESC,
- .date = "20190618",
.fops = &qaic_accel_fops,
.open = qaic_open,
diff --git a/drivers/accel/qaic/sahara.c b/drivers/accel/qaic/sahara.c
index 6d772143d612..21d58aed0deb 100644
--- a/drivers/accel/qaic/sahara.c
+++ b/drivers/accel/qaic/sahara.c
@@ -772,8 +772,7 @@ static void sahara_mhi_remove(struct mhi_device *mhi_dev)
cancel_work_sync(&context->fw_work);
cancel_work_sync(&context->dump_work);
- if (context->mem_dump)
- vfree(context->mem_dump);
+ vfree(context->mem_dump);
sahara_release_image(context);
mhi_unprepare_from_transfer(mhi_dev);
}