summaryrefslogtreecommitdiff
path: root/drivers/accel
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel')
-rw-r--r--drivers/accel/ivpu/ivpu_debugfs.c86
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c55
-rw-r--r--drivers/accel/ivpu/ivpu_drv.h31
-rw-r--r--drivers/accel/ivpu/ivpu_fw.c33
-rw-r--r--drivers/accel/ivpu/ivpu_fw.h3
-rw-r--r--drivers/accel/ivpu/ivpu_fw_log.c113
-rw-r--r--drivers/accel/ivpu/ivpu_fw_log.h9
-rw-r--r--drivers/accel/ivpu/ivpu_gem.c89
-rw-r--r--drivers/accel/ivpu/ivpu_gem.h2
-rw-r--r--drivers/accel/ivpu/ivpu_hw.c23
-rw-r--r--drivers/accel/ivpu/ivpu_hw.h17
-rw-r--r--drivers/accel/ivpu/ivpu_hw_40xx_reg.h2
-rw-r--r--drivers/accel/ivpu/ivpu_hw_btrs.c128
-rw-r--r--drivers/accel/ivpu/ivpu_hw_btrs.h8
-rw-r--r--drivers/accel/ivpu/ivpu_hw_ip.c49
-rw-r--r--drivers/accel/ivpu/ivpu_job.c328
-rw-r--r--drivers/accel/ivpu/ivpu_job.h3
-rw-r--r--drivers/accel/ivpu/ivpu_jsm_msg.c49
-rw-r--r--drivers/accel/ivpu/ivpu_mmu.c3
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c19
-rw-r--r--drivers/accel/ivpu/ivpu_sysfs.c29
-rw-r--r--drivers/accel/ivpu/vpu_boot_api.h45
-rw-r--r--drivers/accel/ivpu/vpu_jsm_api.h303
-rw-r--r--drivers/accel/qaic/qaic_drv.c2
24 files changed, 983 insertions, 446 deletions
diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index eccedb0c8886..1edf6e564402 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -201,7 +201,7 @@ fw_log_fops_write(struct file *file, const char __user *user_buf, size_t size, l
if (!size)
return -EINVAL;
- ivpu_fw_log_clear(vdev);
+ ivpu_fw_log_mark_read(vdev);
return size;
}
@@ -423,6 +423,88 @@ static int dct_active_set(void *data, u64 active_percent)
DEFINE_DEBUGFS_ATTRIBUTE(ivpu_dct_fops, dct_active_get, dct_active_set, "%llu\n");
+static int priority_bands_show(struct seq_file *s, void *v)
+{
+ struct ivpu_device *vdev = s->private;
+ struct ivpu_hw_info *hw = vdev->hw;
+
+ for (int band = VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE;
+ band < VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT; band++) {
+ switch (band) {
+ case VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE:
+ seq_puts(s, "Idle: ");
+ break;
+
+ case VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL:
+ seq_puts(s, "Normal: ");
+ break;
+
+ case VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS:
+ seq_puts(s, "Focus: ");
+ break;
+
+ case VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME:
+ seq_puts(s, "Realtime: ");
+ break;
+ }
+
+ seq_printf(s, "grace_period %9u process_grace_period %9u process_quantum %9u\n",
+ hw->hws.grace_period[band], hw->hws.process_grace_period[band],
+ hw->hws.process_quantum[band]);
+ }
+
+ return 0;
+}
+
+static int priority_bands_fops_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, priority_bands_show, inode->i_private);
+}
+
+static ssize_t
+priority_bands_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
+{
+ struct seq_file *s = file->private_data;
+ struct ivpu_device *vdev = s->private;
+ char buf[64];
+ u32 grace_period;
+ u32 process_grace_period;
+ u32 process_quantum;
+ u32 band;
+ int ret;
+
+ if (size >= sizeof(buf))
+ return -EINVAL;
+
+ ret = simple_write_to_buffer(buf, sizeof(buf) - 1, pos, user_buf, size);
+ if (ret < 0)
+ return ret;
+
+ buf[size] = '\0';
+ ret = sscanf(buf, "%u %u %u %u", &band, &grace_period, &process_grace_period,
+ &process_quantum);
+ if (ret != 4)
+ return -EINVAL;
+
+ if (band >= VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT)
+ return -EINVAL;
+
+ vdev->hw->hws.grace_period[band] = grace_period;
+ vdev->hw->hws.process_grace_period[band] = process_grace_period;
+ vdev->hw->hws.process_quantum[band] = process_quantum;
+
+ return size;
+}
+
+static const struct file_operations ivpu_hws_priority_bands_fops = {
+ .owner = THIS_MODULE,
+ .open = priority_bands_fops_open,
+ .write = priority_bands_fops_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
void ivpu_debugfs_init(struct ivpu_device *vdev)
{
struct dentry *debugfs_root = vdev->drm.debugfs_root;
@@ -445,6 +527,8 @@ void ivpu_debugfs_init(struct ivpu_device *vdev)
&fw_trace_hw_comp_mask_fops);
debugfs_create_file("fw_trace_level", 0200, debugfs_root, vdev,
&fw_trace_level_fops);
+ debugfs_create_file("hws_priority_bands", 0200, debugfs_root, vdev,
+ &ivpu_hws_priority_bands_fops);
debugfs_create_file("reset_engine", 0200, debugfs_root, vdev,
&ivpu_reset_engine_fops);
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 38b4158f5278..00208c4a6580 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#include <linux/firmware.h>
@@ -36,8 +36,6 @@
__stringify(DRM_IVPU_DRIVER_MINOR) "."
#endif
-static struct lock_class_key submitted_jobs_xa_lock_class_key;
-
int ivpu_dbg_mask;
module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644);
MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros.");
@@ -54,9 +52,9 @@ u8 ivpu_pll_max_ratio = U8_MAX;
module_param_named(pll_max_ratio, ivpu_pll_max_ratio, byte, 0644);
MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set NPU frequency");
-int ivpu_sched_mode;
+int ivpu_sched_mode = IVPU_SCHED_MODE_AUTO;
module_param_named(sched_mode, ivpu_sched_mode, int, 0444);
-MODULE_PARM_DESC(sched_mode, "Scheduler mode: 0 - Default scheduler, 1 - Force HW scheduler");
+MODULE_PARM_DESC(sched_mode, "Scheduler mode: -1 - Use default scheduler, 0 - Use OS scheduler, 1 - Use HW scheduler");
bool ivpu_disable_mmu_cont_pages;
module_param_named(disable_mmu_cont_pages, ivpu_disable_mmu_cont_pages, bool, 0444);
@@ -104,6 +102,8 @@ static void file_priv_release(struct kref *ref)
pm_runtime_get_sync(vdev->drm.dev);
mutex_lock(&vdev->context_list_lock);
file_priv_unbind(vdev, file_priv);
+ drm_WARN_ON(&vdev->drm, !xa_empty(&file_priv->cmdq_xa));
+ xa_destroy(&file_priv->cmdq_xa);
mutex_unlock(&vdev->context_list_lock);
pm_runtime_put_autosuspend(vdev->drm.dev);
@@ -165,7 +165,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
args->value = vdev->platform;
break;
case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
- args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio);
+ args->value = ivpu_hw_dpu_max_freq_get(vdev);
break;
case DRM_IVPU_PARAM_NUM_CONTEXTS:
args->value = ivpu_get_context_count(vdev);
@@ -260,6 +260,13 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file)
if (ret)
goto err_xa_erase;
+ file_priv->job_limit.min = FIELD_PREP(IVPU_JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1));
+ file_priv->job_limit.max = file_priv->job_limit.min | IVPU_JOB_ID_JOB_MASK;
+
+ xa_init_flags(&file_priv->cmdq_xa, XA_FLAGS_ALLOC1);
+ file_priv->cmdq_limit.min = IVPU_CMDQ_MIN_ID;
+ file_priv->cmdq_limit.max = IVPU_CMDQ_MAX_ID;
+
mutex_unlock(&vdev->context_list_lock);
drm_dev_exit(idx);
@@ -347,7 +354,7 @@ static int ivpu_hw_sched_init(struct ivpu_device *vdev)
{
int ret = 0;
- if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW) {
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
ret = ivpu_jsm_hws_setup_priority_bands(vdev);
if (ret) {
ivpu_err(vdev, "Failed to enable hw scheduler: %d", ret);
@@ -452,26 +459,6 @@ static const struct drm_driver driver = {
.minor = DRM_IVPU_DRIVER_MINOR,
};
-static void ivpu_context_abort_invalid(struct ivpu_device *vdev)
-{
- struct ivpu_file_priv *file_priv;
- unsigned long ctx_id;
-
- mutex_lock(&vdev->context_list_lock);
-
- xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
- if (!file_priv->has_mmu_faults || file_priv->aborted)
- continue;
-
- mutex_lock(&file_priv->lock);
- ivpu_context_abort_locked(file_priv);
- file_priv->aborted = true;
- mutex_unlock(&file_priv->lock);
- }
-
- mutex_unlock(&vdev->context_list_lock);
-}
-
static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg)
{
struct ivpu_device *vdev = arg;
@@ -485,9 +472,6 @@ static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg)
case IVPU_HW_IRQ_SRC_IPC:
ivpu_ipc_irq_thread_handler(vdev);
break;
- case IVPU_HW_IRQ_SRC_MMU_EVTQ:
- ivpu_context_abort_invalid(vdev);
- break;
case IVPU_HW_IRQ_SRC_DCT:
ivpu_pm_dct_irq_thread_handler(vdev);
break;
@@ -604,13 +588,21 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1);
- lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
INIT_LIST_HEAD(&vdev->bo_list);
+ vdev->db_limit.min = IVPU_MIN_DB;
+ vdev->db_limit.max = IVPU_MAX_DB;
+
+ INIT_WORK(&vdev->context_abort_work, ivpu_context_abort_thread_handler);
+
ret = drmm_mutex_init(&vdev->drm, &vdev->context_list_lock);
if (ret)
goto err_xa_destroy;
+ ret = drmm_mutex_init(&vdev->drm, &vdev->submitted_jobs_lock);
+ if (ret)
+ goto err_xa_destroy;
+
ret = drmm_mutex_init(&vdev->drm, &vdev->bo_list_lock);
if (ret)
goto err_xa_destroy;
@@ -723,6 +715,7 @@ static struct pci_device_id ivpu_pci_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_MTL) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_ARL) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_LNL) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PTL_P) },
{ }
};
MODULE_DEVICE_TABLE(pci, ivpu_pci_ids);
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 2b30cc2e9272..f2ba3ed8b3fc 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -23,9 +23,10 @@
#define DRIVER_DESC "Driver for Intel NPU (Neural Processing Unit)"
#define DRIVER_DATE "20230117"
-#define PCI_DEVICE_ID_MTL 0x7d1d
-#define PCI_DEVICE_ID_ARL 0xad1d
-#define PCI_DEVICE_ID_LNL 0x643e
+#define PCI_DEVICE_ID_MTL 0x7d1d
+#define PCI_DEVICE_ID_ARL 0xad1d
+#define PCI_DEVICE_ID_LNL 0x643e
+#define PCI_DEVICE_ID_PTL_P 0xb03e
#define IVPU_HW_IP_37XX 37
#define IVPU_HW_IP_40XX 40
@@ -46,17 +47,22 @@
#define IVPU_MIN_DB 1
#define IVPU_MAX_DB 255
-#define IVPU_NUM_ENGINES 2
+#define IVPU_JOB_ID_JOB_MASK GENMASK(7, 0)
+#define IVPU_JOB_ID_CONTEXT_MASK GENMASK(31, 8)
+
#define IVPU_NUM_PRIORITIES 4
-#define IVPU_NUM_CMDQS_PER_CTX (IVPU_NUM_ENGINES * IVPU_NUM_PRIORITIES)
+#define IVPU_NUM_CMDQS_PER_CTX (IVPU_NUM_PRIORITIES)
-#define IVPU_CMDQ_INDEX(engine, priority) ((engine) * IVPU_NUM_PRIORITIES + (priority))
+#define IVPU_CMDQ_MIN_ID 1
+#define IVPU_CMDQ_MAX_ID 255
#define IVPU_PLATFORM_SILICON 0
#define IVPU_PLATFORM_SIMICS 2
#define IVPU_PLATFORM_FPGA 3
#define IVPU_PLATFORM_INVALID 8
+#define IVPU_SCHED_MODE_AUTO -1
+
#define IVPU_DBG_REG BIT(0)
#define IVPU_DBG_IRQ BIT(1)
#define IVPU_DBG_MMU BIT(2)
@@ -132,12 +138,16 @@ struct ivpu_device {
struct mutex context_list_lock; /* Protects user context addition/removal */
struct xarray context_xa;
struct xa_limit context_xa_limit;
+ struct work_struct context_abort_work;
struct xarray db_xa;
+ struct xa_limit db_limit;
+ u32 db_next;
struct mutex bo_list_lock; /* Protects bo_list */
struct list_head bo_list;
+ struct mutex submitted_jobs_lock; /* Protects submitted_jobs */
struct xarray submitted_jobs_xa;
struct ivpu_ipc_consumer job_done_consumer;
@@ -164,11 +174,15 @@ struct ivpu_file_priv {
struct kref ref;
struct ivpu_device *vdev;
struct mutex lock; /* Protects cmdq */
- struct ivpu_cmdq *cmdq[IVPU_NUM_CMDQS_PER_CTX];
+ struct xarray cmdq_xa;
struct ivpu_mmu_context ctx;
struct mutex ms_lock; /* Protects ms_instance_list, ms_info_bo */
struct list_head ms_instance_list;
struct ivpu_bo *ms_info_bo;
+ struct xa_limit job_limit;
+ u32 job_id_next;
+ struct xa_limit cmdq_limit;
+ u32 cmdq_id_next;
bool has_mmu_faults;
bool bound;
bool aborted;
@@ -216,6 +230,8 @@ static inline int ivpu_hw_ip_gen(struct ivpu_device *vdev)
return IVPU_HW_IP_37XX;
case PCI_DEVICE_ID_LNL:
return IVPU_HW_IP_40XX;
+ case PCI_DEVICE_ID_PTL_P:
+ return IVPU_HW_IP_50XX;
default:
dump_stack();
ivpu_err(vdev, "Unknown NPU IP generation\n");
@@ -230,6 +246,7 @@ static inline int ivpu_hw_btrs_gen(struct ivpu_device *vdev)
case PCI_DEVICE_ID_ARL:
return IVPU_HW_BTRS_MTL;
case PCI_DEVICE_ID_LNL:
+ case PCI_DEVICE_ID_PTL_P:
return IVPU_HW_BTRS_LNL;
default:
dump_stack();
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index b2b6d89f0653..cd40446a22a5 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#include <linux/firmware.h>
@@ -53,15 +53,18 @@ static struct {
int gen;
const char *name;
} fw_names[] = {
- { IVPU_HW_IP_37XX, "vpu_37xx.bin" },
+ { IVPU_HW_IP_37XX, "intel/vpu/vpu_37xx_v1.bin" },
{ IVPU_HW_IP_37XX, "intel/vpu/vpu_37xx_v0.0.bin" },
- { IVPU_HW_IP_40XX, "vpu_40xx.bin" },
+ { IVPU_HW_IP_40XX, "intel/vpu/vpu_40xx_v1.bin" },
{ IVPU_HW_IP_40XX, "intel/vpu/vpu_40xx_v0.0.bin" },
+ { IVPU_HW_IP_50XX, "intel/vpu/vpu_50xx_v1.bin" },
+ { IVPU_HW_IP_50XX, "intel/vpu/vpu_50xx_v0.0.bin" },
};
/* Production fw_names from the table above */
-MODULE_FIRMWARE("intel/vpu/vpu_37xx_v0.0.bin");
-MODULE_FIRMWARE("intel/vpu/vpu_40xx_v0.0.bin");
+MODULE_FIRMWARE("intel/vpu/vpu_37xx_v1.bin");
+MODULE_FIRMWARE("intel/vpu/vpu_40xx_v1.bin");
+MODULE_FIRMWARE("intel/vpu/vpu_50xx_v1.bin");
static int ivpu_fw_request(struct ivpu_device *vdev)
{
@@ -134,6 +137,15 @@ static bool is_within_range(u64 addr, size_t size, u64 range_start, size_t range
return true;
}
+static u32
+ivpu_fw_sched_mode_select(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr)
+{
+ if (ivpu_sched_mode != IVPU_SCHED_MODE_AUTO)
+ return ivpu_sched_mode;
+
+ return VPU_SCHEDULING_MODE_OS;
+}
+
static int ivpu_fw_parse(struct ivpu_device *vdev)
{
struct ivpu_fw_info *fw = vdev->fw;
@@ -209,14 +221,16 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
fw->cold_boot_entry_point = fw_hdr->entry_point;
fw->entry_point = fw->cold_boot_entry_point;
- fw->trace_level = min_t(u32, ivpu_log_level, IVPU_FW_LOG_FATAL);
+ fw->trace_level = min_t(u32, ivpu_fw_log_level, IVPU_FW_LOG_FATAL);
fw->trace_destination_mask = VPU_TRACE_DESTINATION_VERBOSE_TRACING;
fw->trace_hw_component_mask = -1;
fw->dvfs_mode = 0;
+ fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr);
fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size;
fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size;
+ ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS");
if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address,
fw_hdr->ro_section_size,
@@ -312,7 +326,7 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev)
goto err_free_fw_mem;
}
- if (ivpu_log_level <= IVPU_FW_LOG_INFO)
+ if (ivpu_fw_log_level <= IVPU_FW_LOG_INFO)
log_verb_size = IVPU_FW_VERBOSE_BUFFER_LARGE_SIZE;
else
log_verb_size = IVPU_FW_VERBOSE_BUFFER_SMALL_SIZE;
@@ -545,7 +559,6 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->magic = VPU_BOOT_PARAMS_MAGIC;
boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number;
- boot_params->frequency = ivpu_hw_pll_freq_get(vdev);
/*
* This param is a debug firmware feature. It switches default clock
@@ -605,8 +618,8 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->punit_telemetry_sram_base = ivpu_hw_telemetry_offset_get(vdev);
boot_params->punit_telemetry_sram_size = ivpu_hw_telemetry_size_get(vdev);
boot_params->vpu_telemetry_enable = ivpu_hw_telemetry_enable_get(vdev);
- boot_params->vpu_scheduling_mode = vdev->hw->sched_mode;
- if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW)
+ boot_params->vpu_scheduling_mode = vdev->fw->sched_mode;
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW)
boot_params->vpu_focus_present_timer_ms = IVPU_FOCUS_PRESENT_TIMER_MS;
boot_params->dvfs_mode = vdev->fw->dvfs_mode;
if (!IVPU_WA(disable_d0i3_msg))
diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h
index 5e8eb608b70f..1d0b2bd9d65c 100644
--- a/drivers/accel/ivpu/ivpu_fw.h
+++ b/drivers/accel/ivpu/ivpu_fw.h
@@ -6,6 +6,8 @@
#ifndef __IVPU_FW_H__
#define __IVPU_FW_H__
+#include "vpu_jsm_api.h"
+
#define FW_VERSION_HEADER_SIZE SZ_4K
#define FW_VERSION_STR_SIZE SZ_256
@@ -36,6 +38,7 @@ struct ivpu_fw_info {
u32 secondary_preempt_buf_size;
u64 read_only_addr;
u32 read_only_size;
+ u32 sched_mode;
};
int ivpu_fw_init(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_fw_log.c b/drivers/accel/ivpu/ivpu_fw_log.c
index ef0adb5e0fbe..337c906b0210 100644
--- a/drivers/accel/ivpu/ivpu_fw_log.c
+++ b/drivers/accel/ivpu/ivpu_fw_log.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include <linux/ctype.h>
@@ -15,19 +15,19 @@
#include "ivpu_fw_log.h"
#include "ivpu_gem.h"
-#define IVPU_FW_LOG_LINE_LENGTH 256
+#define IVPU_FW_LOG_LINE_LENGTH 256
-unsigned int ivpu_log_level = IVPU_FW_LOG_ERROR;
-module_param(ivpu_log_level, uint, 0444);
-MODULE_PARM_DESC(ivpu_log_level,
- "NPU firmware default trace level: debug=" __stringify(IVPU_FW_LOG_DEBUG)
+unsigned int ivpu_fw_log_level = IVPU_FW_LOG_ERROR;
+module_param_named(fw_log_level, ivpu_fw_log_level, uint, 0444);
+MODULE_PARM_DESC(fw_log_level,
+ "NPU firmware default log level: debug=" __stringify(IVPU_FW_LOG_DEBUG)
" info=" __stringify(IVPU_FW_LOG_INFO)
" warn=" __stringify(IVPU_FW_LOG_WARN)
" error=" __stringify(IVPU_FW_LOG_ERROR)
" fatal=" __stringify(IVPU_FW_LOG_FATAL));
-static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
- struct vpu_tracing_buffer_header **log_header)
+static int fw_log_from_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
+ struct vpu_tracing_buffer_header **out_log)
{
struct vpu_tracing_buffer_header *log;
@@ -48,7 +48,7 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
return -EINVAL;
}
- *log_header = log;
+ *out_log = log;
*offset += log->size;
ivpu_dbg(vdev, FW_BOOT,
@@ -59,7 +59,7 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
return 0;
}
-static void buffer_print(char *buffer, u32 size, struct drm_printer *p)
+static void fw_log_print_lines(char *buffer, u32 size, struct drm_printer *p)
{
char line[IVPU_FW_LOG_LINE_LENGTH];
u32 index = 0;
@@ -87,56 +87,89 @@ static void buffer_print(char *buffer, u32 size, struct drm_printer *p)
}
line[index] = 0;
if (index != 0)
- drm_printf(p, "%s\n", line);
+ drm_printf(p, "%s", line);
}
-static void fw_log_print_buffer(struct ivpu_device *vdev, struct vpu_tracing_buffer_header *log,
- const char *prefix, bool only_new_msgs, struct drm_printer *p)
+static void fw_log_print_buffer(struct vpu_tracing_buffer_header *log, const char *prefix,
+ bool only_new_msgs, struct drm_printer *p)
{
- char *log_buffer = (void *)log + log->header_size;
- u32 log_size = log->size - log->header_size;
- u32 log_start = log->read_index;
- u32 log_end = log->write_index;
-
- if (!(log->write_index || log->wrap_count) ||
- (log->write_index == log->read_index && only_new_msgs)) {
- drm_printf(p, "==== %s \"%s\" log empty ====\n", prefix, log->name);
- return;
+ char *log_data = (void *)log + log->header_size;
+ u32 data_size = log->size - log->header_size;
+ u32 log_start = only_new_msgs ? READ_ONCE(log->read_index) : 0;
+ u32 log_end = READ_ONCE(log->write_index);
+
+ if (log->wrap_count == log->read_wrap_count) {
+ if (log_end <= log_start) {
+ drm_printf(p, "==== %s \"%s\" log empty ====\n", prefix, log->name);
+ return;
+ }
+ } else if (log->wrap_count == log->read_wrap_count + 1) {
+ if (log_end > log_start)
+ log_start = log_end;
+ } else {
+ log_start = log_end;
}
drm_printf(p, "==== %s \"%s\" log start ====\n", prefix, log->name);
- if (log->write_index > log->read_index) {
- buffer_print(log_buffer + log_start, log_end - log_start, p);
+ if (log_end > log_start) {
+ fw_log_print_lines(log_data + log_start, log_end - log_start, p);
} else {
- buffer_print(log_buffer + log_end, log_size - log_end, p);
- buffer_print(log_buffer, log_end, p);
+ fw_log_print_lines(log_data + log_start, data_size - log_start, p);
+ fw_log_print_lines(log_data, log_end, p);
}
- drm_printf(p, "\x1b[0m");
+ drm_printf(p, "\n\x1b[0m"); /* add new line and clear formatting */
drm_printf(p, "==== %s \"%s\" log end ====\n", prefix, log->name);
}
-void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p)
+static void
+fw_log_print_all_in_bo(struct ivpu_device *vdev, const char *name,
+ struct ivpu_bo *bo, bool only_new_msgs, struct drm_printer *p)
{
- struct vpu_tracing_buffer_header *log_header;
+ struct vpu_tracing_buffer_header *log;
u32 next = 0;
- while (fw_log_ptr(vdev, vdev->fw->mem_log_crit, &next, &log_header) == 0)
- fw_log_print_buffer(vdev, log_header, "NPU critical", only_new_msgs, p);
+ while (fw_log_from_bo(vdev, bo, &next, &log) == 0)
+ fw_log_print_buffer(log, name, only_new_msgs, p);
+}
+
+void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p)
+{
+ fw_log_print_all_in_bo(vdev, "NPU critical", vdev->fw->mem_log_crit, only_new_msgs, p);
+ fw_log_print_all_in_bo(vdev, "NPU verbose", vdev->fw->mem_log_verb, only_new_msgs, p);
+}
+
+void ivpu_fw_log_mark_read(struct ivpu_device *vdev)
+{
+ struct vpu_tracing_buffer_header *log;
+ u32 next;
+
+ next = 0;
+ while (fw_log_from_bo(vdev, vdev->fw->mem_log_crit, &next, &log) == 0) {
+ log->read_index = READ_ONCE(log->write_index);
+ log->read_wrap_count = READ_ONCE(log->wrap_count);
+ }
next = 0;
- while (fw_log_ptr(vdev, vdev->fw->mem_log_verb, &next, &log_header) == 0)
- fw_log_print_buffer(vdev, log_header, "NPU verbose", only_new_msgs, p);
+ while (fw_log_from_bo(vdev, vdev->fw->mem_log_verb, &next, &log) == 0) {
+ log->read_index = READ_ONCE(log->write_index);
+ log->read_wrap_count = READ_ONCE(log->wrap_count);
+ }
}
-void ivpu_fw_log_clear(struct ivpu_device *vdev)
+void ivpu_fw_log_reset(struct ivpu_device *vdev)
{
- struct vpu_tracing_buffer_header *log_header;
- u32 next = 0;
+ struct vpu_tracing_buffer_header *log;
+ u32 next;
- while (fw_log_ptr(vdev, vdev->fw->mem_log_crit, &next, &log_header) == 0)
- log_header->read_index = log_header->write_index;
+ next = 0;
+ while (fw_log_from_bo(vdev, vdev->fw->mem_log_crit, &next, &log) == 0) {
+ log->read_index = 0;
+ log->read_wrap_count = 0;
+ }
next = 0;
- while (fw_log_ptr(vdev, vdev->fw->mem_log_verb, &next, &log_header) == 0)
- log_header->read_index = log_header->write_index;
+ while (fw_log_from_bo(vdev, vdev->fw->mem_log_verb, &next, &log) == 0) {
+ log->read_index = 0;
+ log->read_wrap_count = 0;
+ }
}
diff --git a/drivers/accel/ivpu/ivpu_fw_log.h b/drivers/accel/ivpu/ivpu_fw_log.h
index 4b390a99699d..8bb528a73cb7 100644
--- a/drivers/accel/ivpu/ivpu_fw_log.h
+++ b/drivers/accel/ivpu/ivpu_fw_log.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#ifndef __IVPU_FW_LOG_H__
@@ -17,14 +17,15 @@
#define IVPU_FW_LOG_ERROR 4
#define IVPU_FW_LOG_FATAL 5
-extern unsigned int ivpu_log_level;
-
#define IVPU_FW_VERBOSE_BUFFER_SMALL_SIZE SZ_1M
#define IVPU_FW_VERBOSE_BUFFER_LARGE_SIZE SZ_8M
#define IVPU_FW_CRITICAL_BUFFER_SIZE SZ_512K
+extern unsigned int ivpu_fw_log_level;
+
void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p);
-void ivpu_fw_log_clear(struct ivpu_device *vdev);
+void ivpu_fw_log_mark_read(struct ivpu_device *vdev);
+void ivpu_fw_log_reset(struct ivpu_device *vdev);
#endif /* __IVPU_FW_LOG_H__ */
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index c8daffd90f30..6b1bda7e130d 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -26,11 +26,21 @@ static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, con
{
ivpu_dbg(vdev, BO,
"%6s: bo %8p vpu_addr %9llx size %8zu ctx %d has_pages %d dma_mapped %d mmu_mapped %d wc %d imported %d\n",
- action, bo, bo->vpu_addr, ivpu_bo_size(bo), bo->ctx ? bo->ctx->id : 0,
+ action, bo, bo->vpu_addr, ivpu_bo_size(bo), bo->ctx_id,
(bool)bo->base.pages, (bool)bo->base.sgt, bo->mmu_mapped, bo->base.map_wc,
(bool)bo->base.base.import_attach);
}
+static inline int ivpu_bo_lock(struct ivpu_bo *bo)
+{
+ return dma_resv_lock(bo->base.base.resv, NULL);
+}
+
+static inline void ivpu_bo_unlock(struct ivpu_bo *bo)
+{
+ dma_resv_unlock(bo->base.base.resv);
+}
+
/*
* ivpu_bo_pin() - pin the backing physical pages and map them to VPU.
*
@@ -41,22 +51,22 @@ static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, con
int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
{
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+ struct sg_table *sgt;
int ret = 0;
- mutex_lock(&bo->lock);
-
ivpu_dbg_bo(vdev, bo, "pin");
- drm_WARN_ON(&vdev->drm, !bo->ctx);
- if (!bo->mmu_mapped) {
- struct sg_table *sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
+ sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
+ if (IS_ERR(sgt)) {
+ ret = PTR_ERR(sgt);
+ ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
+ return ret;
+ }
- if (IS_ERR(sgt)) {
- ret = PTR_ERR(sgt);
- ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
- goto unlock;
- }
+ ivpu_bo_lock(bo);
+ if (!bo->mmu_mapped) {
+ drm_WARN_ON(&vdev->drm, !bo->ctx);
ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, sgt,
ivpu_bo_is_snooped(bo));
if (ret) {
@@ -67,7 +77,7 @@ int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
}
unlock:
- mutex_unlock(&bo->lock);
+ ivpu_bo_unlock(bo);
return ret;
}
@@ -82,7 +92,7 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
if (!drm_dev_enter(&vdev->drm, &idx))
return -ENODEV;
- mutex_lock(&bo->lock);
+ ivpu_bo_lock(bo);
ret = ivpu_mmu_context_insert_node(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
if (!ret) {
@@ -92,9 +102,7 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
ivpu_err(vdev, "Failed to add BO to context %u: %d\n", ctx->id, ret);
}
- ivpu_dbg_bo(vdev, bo, "alloc");
-
- mutex_unlock(&bo->lock);
+ ivpu_bo_unlock(bo);
drm_dev_exit(idx);
@@ -105,7 +113,7 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
{
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
- lockdep_assert(lockdep_is_held(&bo->lock) || !kref_read(&bo->base.base.refcount));
+ lockdep_assert(dma_resv_held(bo->base.base.resv) || !kref_read(&bo->base.base.refcount));
if (bo->mmu_mapped) {
drm_WARN_ON(&vdev->drm, !bo->ctx);
@@ -123,14 +131,12 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
if (bo->base.base.import_attach)
return;
- dma_resv_lock(bo->base.base.resv, NULL);
if (bo->base.sgt) {
dma_unmap_sgtable(vdev->drm.dev, bo->base.sgt, DMA_BIDIRECTIONAL, 0);
sg_free_table(bo->base.sgt);
kfree(bo->base.sgt);
bo->base.sgt = NULL;
}
- dma_resv_unlock(bo->base.base.resv);
}
void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
@@ -142,12 +148,12 @@ void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_m
mutex_lock(&vdev->bo_list_lock);
list_for_each_entry(bo, &vdev->bo_list, bo_list_node) {
- mutex_lock(&bo->lock);
+ ivpu_bo_lock(bo);
if (bo->ctx == ctx) {
ivpu_dbg_bo(vdev, bo, "unbind");
ivpu_bo_unbind_locked(bo);
}
- mutex_unlock(&bo->lock);
+ ivpu_bo_unlock(bo);
}
mutex_unlock(&vdev->bo_list_lock);
}
@@ -167,12 +173,11 @@ struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t siz
bo->base.pages_mark_dirty_on_put = true; /* VPU can dirty a BO anytime */
INIT_LIST_HEAD(&bo->bo_list_node);
- mutex_init(&bo->lock);
return &bo->base.base;
}
-static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags)
+static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags, u32 ctx_id)
{
struct drm_gem_shmem_object *shmem;
struct ivpu_bo *bo;
@@ -190,6 +195,7 @@ static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 fla
return ERR_CAST(shmem);
bo = to_ivpu_bo(&shmem->base);
+ bo->ctx_id = ctx_id;
bo->base.map_wc = flags & DRM_IVPU_BO_WC;
bo->flags = flags;
@@ -197,6 +203,8 @@ static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 fla
list_add_tail(&bo->bo_list_node, &vdev->bo_list);
mutex_unlock(&vdev->bo_list_lock);
+ ivpu_dbg_bo(vdev, bo, "alloc");
+
return bo;
}
@@ -234,10 +242,14 @@ static void ivpu_gem_bo_free(struct drm_gem_object *obj)
list_del(&bo->bo_list_node);
mutex_unlock(&vdev->bo_list_lock);
- drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
+ drm_WARN_ON(&vdev->drm, !drm_gem_is_imported(&bo->base.base) &&
+ !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
+ drm_WARN_ON(&vdev->drm, ivpu_bo_size(bo) == 0);
+ drm_WARN_ON(&vdev->drm, bo->base.vaddr);
ivpu_bo_unbind_locked(bo);
- mutex_destroy(&bo->lock);
+ drm_WARN_ON(&vdev->drm, bo->mmu_mapped);
+ drm_WARN_ON(&vdev->drm, bo->ctx);
drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1);
drm_gem_shmem_free(&bo->base);
@@ -271,7 +283,7 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
if (size == 0)
return -EINVAL;
- bo = ivpu_bo_alloc(vdev, size, args->flags);
+ bo = ivpu_bo_alloc(vdev, size, args->flags, file_priv->ctx.id);
if (IS_ERR(bo)) {
ivpu_err(vdev, "Failed to allocate BO: %pe (ctx %u size %llu flags 0x%x)",
bo, file_priv->ctx.id, args->size, args->flags);
@@ -279,7 +291,10 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
}
ret = drm_gem_handle_create(file, &bo->base.base, &args->handle);
- if (!ret)
+ if (ret)
+ ivpu_err(vdev, "Failed to create handle for BO: %pe (ctx %u size %llu flags 0x%x)",
+ bo, file_priv->ctx.id, args->size, args->flags);
+ else
args->vpu_addr = bo->vpu_addr;
drm_gem_object_put(&bo->base.base);
@@ -302,7 +317,7 @@ ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(range->end));
drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size));
- bo = ivpu_bo_alloc(vdev, size, flags);
+ bo = ivpu_bo_alloc(vdev, size, flags, IVPU_GLOBAL_CONTEXT_MMU_SSID);
if (IS_ERR(bo)) {
ivpu_err(vdev, "Failed to allocate BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)",
bo, range->start, size, flags);
@@ -318,9 +333,9 @@ ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
goto err_put;
if (flags & DRM_IVPU_BO_MAPPABLE) {
- dma_resv_lock(bo->base.base.resv, NULL);
+ ivpu_bo_lock(bo);
ret = drm_gem_shmem_vmap(&bo->base, &map);
- dma_resv_unlock(bo->base.base.resv);
+ ivpu_bo_unlock(bo);
if (ret)
goto err_put;
@@ -343,9 +358,9 @@ void ivpu_bo_free(struct ivpu_bo *bo)
struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->base.vaddr);
if (bo->flags & DRM_IVPU_BO_MAPPABLE) {
- dma_resv_lock(bo->base.base.resv, NULL);
+ ivpu_bo_lock(bo);
drm_gem_shmem_vunmap(&bo->base, &map);
- dma_resv_unlock(bo->base.base.resv);
+ ivpu_bo_unlock(bo);
}
drm_gem_object_put(&bo->base.base);
@@ -364,12 +379,12 @@ int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file
bo = to_ivpu_bo(obj);
- mutex_lock(&bo->lock);
+ ivpu_bo_lock(bo);
args->flags = bo->flags;
args->mmap_offset = drm_vma_node_offset_addr(&obj->vma_node);
args->vpu_addr = bo->vpu_addr;
args->size = obj->size;
- mutex_unlock(&bo->lock);
+ ivpu_bo_unlock(bo);
drm_gem_object_put(obj);
return ret;
@@ -403,10 +418,10 @@ int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file
static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
{
- mutex_lock(&bo->lock);
+ ivpu_bo_lock(bo);
drm_printf(p, "%-9p %-3u 0x%-12llx %-10lu 0x%-8x %-4u",
- bo, bo->ctx ? bo->ctx->id : 0, bo->vpu_addr, bo->base.base.size,
+ bo, bo->ctx_id, bo->vpu_addr, bo->base.base.size,
bo->flags, kref_read(&bo->base.base.refcount));
if (bo->base.pages)
@@ -420,7 +435,7 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
drm_printf(p, "\n");
- mutex_unlock(&bo->lock);
+ ivpu_bo_unlock(bo);
}
void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p)
diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h
index d975000abd78..07bffe98c963 100644
--- a/drivers/accel/ivpu/ivpu_gem.h
+++ b/drivers/accel/ivpu/ivpu_gem.h
@@ -17,10 +17,10 @@ struct ivpu_bo {
struct list_head bo_list_node;
struct drm_mm_node mm_node;
- struct mutex lock; /* Protects: ctx, mmu_mapped, vpu_addr */
u64 vpu_addr;
u32 flags;
u32 job_status; /* Valid only for command buffer */
+ u32 ctx_id;
bool mmu_mapped;
};
diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c
index 08b3cef58fd2..37ef8ce64210 100644
--- a/drivers/accel/ivpu/ivpu_hw.c
+++ b/drivers/accel/ivpu/ivpu_hw.c
@@ -106,10 +106,30 @@ static void timeouts_init(struct ivpu_device *vdev)
else
vdev->timeout.autosuspend = 100;
vdev->timeout.d0i3_entry_msg = 5;
- vdev->timeout.state_dump_msg = 10;
+ vdev->timeout.state_dump_msg = 100;
}
}
+static void priority_bands_init(struct ivpu_device *vdev)
+{
+ /* Idle */
+ vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE] = 0;
+ vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE] = 50000;
+ vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE] = 160000;
+ /* Normal */
+ vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL] = 50000;
+ vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL] = 50000;
+ vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL] = 300000;
+ /* Focus */
+ vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS] = 50000;
+ vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS] = 50000;
+ vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS] = 200000;
+ /* Realtime */
+ vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME] = 0;
+ vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME] = 50000;
+ vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME] = 200000;
+}
+
static void memory_ranges_init(struct ivpu_device *vdev)
{
if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) {
@@ -248,6 +268,7 @@ int ivpu_hw_init(struct ivpu_device *vdev)
{
ivpu_hw_btrs_info_init(vdev);
ivpu_hw_btrs_freq_ratios_init(vdev);
+ priority_bands_init(vdev);
memory_ranges_init(vdev);
platform_init(vdev);
wa_init(vdev);
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
index a96a05b2acda..1c016b99a0fd 100644
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_HW_H__
@@ -45,8 +45,12 @@ struct ivpu_hw_info {
u8 pn_ratio;
u32 profiling_freq;
} pll;
+ struct {
+ u32 grace_period[VPU_HWS_NUM_PRIORITY_BANDS];
+ u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS];
+ u32 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS];
+ } hws;
u32 tile_fuse;
- u32 sched_mode;
u32 sku;
u16 config;
int dma_bits;
@@ -87,9 +91,9 @@ static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range)
return range->end - range->start;
}
-static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
+static inline u32 ivpu_hw_dpu_max_freq_get(struct ivpu_device *vdev)
{
- return ivpu_hw_btrs_ratio_to_freq(vdev, ratio);
+ return ivpu_hw_btrs_dpu_max_freq_get(vdev);
}
static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev)
@@ -97,11 +101,6 @@ static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev)
ivpu_hw_ip_irq_clear(vdev);
}
-static inline u32 ivpu_hw_pll_freq_get(struct ivpu_device *vdev)
-{
- return ivpu_hw_btrs_pll_freq_get(vdev);
-}
-
static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev)
{
return vdev->hw->pll.profiling_freq;
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h
index d0b795b344c7..fc0ee8d637f9 100644
--- a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h
+++ b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h
@@ -115,6 +115,8 @@
#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY 0x00030068u
#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST_DLY_MASK GENMASK(7, 0)
+#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST1_DLY_MASK GENMASK(15, 8)
+#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST2_DLY_MASK GENMASK(23, 16)
#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY 0x0003006cu
#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY_STATUS_DLY_MASK GENMASK(7, 0)
diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.c b/drivers/accel/ivpu/ivpu_hw_btrs.c
index 745e5248803d..2d88357b9a3a 100644
--- a/drivers/accel/ivpu/ivpu_hw_btrs.c
+++ b/drivers/accel/ivpu/ivpu_hw_btrs.c
@@ -1,8 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
+#include <linux/units.h>
+
#include "ivpu_drv.h"
#include "ivpu_hw.h"
#include "ivpu_hw_btrs.h"
@@ -28,17 +30,13 @@
#define BTRS_LNL_ALL_IRQ_MASK ((u32)-1)
-#define BTRS_MTL_WP_CONFIG_1_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_5_3)
-#define BTRS_MTL_WP_CONFIG_1_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_4_3)
-#define BTRS_MTL_WP_CONFIG_2_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_5_3)
-#define BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3)
-#define BTRS_MTL_WP_CONFIG_0_TILE_PLL_OFF WP_CONFIG(0, 0)
#define PLL_CDYN_DEFAULT 0x80
#define PLL_EPP_DEFAULT 0x80
#define PLL_CONFIG_DEFAULT 0x0
-#define PLL_SIMULATION_FREQ 10000000
-#define PLL_REF_CLK_FREQ 50000000
+#define PLL_REF_CLK_FREQ 50000000ull
+#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ)
+
#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC)
#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC)
#define TIMEOUT_US (150 * USEC_PER_MSEC)
@@ -62,6 +60,8 @@
#define DCT_ENABLE 0x1
#define DCT_DISABLE 0x0
+static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio);
+
int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev)
{
REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, BTRS_MTL_ALL_IRQ_MASK);
@@ -162,8 +162,7 @@ static int info_init_mtl(struct ivpu_device *vdev)
hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH;
hw->sku = BTRS_MTL_TILE_SKU_BOTH;
- hw->config = BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO;
- hw->sched_mode = ivpu_sched_mode;
+ hw->config = WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3);
return 0;
}
@@ -178,7 +177,6 @@ static int info_init_lnl(struct ivpu_device *vdev)
if (ret)
return ret;
- hw->sched_mode = ivpu_sched_mode;
hw->tile_fuse = tile_fuse_config;
hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT;
@@ -346,8 +344,8 @@ int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable)
prepare_wp_request(vdev, &wp, enable);
- ivpu_dbg(vdev, PM, "PLL workpoint request: %u Hz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n",
- PLL_RATIO_TO_FREQ(wp.target), wp.cfg, wp.epp, wp.cdyn);
+ ivpu_dbg(vdev, PM, "PLL workpoint request: %lu MHz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n",
+ pll_ratio_to_dpu_freq(vdev, wp.target) / HZ_PER_MHZ, wp.cfg, wp.epp, wp.cdyn);
ret = wp_request_send(vdev, &wp);
if (ret) {
@@ -588,6 +586,39 @@ int ivpu_hw_btrs_wait_for_idle(struct ivpu_device *vdev)
return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
}
+static u32 pll_config_get_mtl(struct ivpu_device *vdev)
+{
+ return REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL);
+}
+
+static u32 pll_config_get_lnl(struct ivpu_device *vdev)
+{
+ return REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ);
+}
+
+static u32 pll_ratio_to_dpu_freq_mtl(u16 ratio)
+{
+ return (PLL_RATIO_TO_FREQ(ratio) * 2) / 3;
+}
+
+static u32 pll_ratio_to_dpu_freq_lnl(u16 ratio)
+{
+ return PLL_RATIO_TO_FREQ(ratio) / 2;
+}
+
+static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return pll_ratio_to_dpu_freq_mtl(ratio);
+ else
+ return pll_ratio_to_dpu_freq_lnl(ratio);
+}
+
+u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev)
+{
+ return pll_ratio_to_dpu_freq(vdev, vdev->hw->pll.max_ratio);
+}
+
/* Handler for IRQs from Buttress core (irqB) */
bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq)
{
@@ -597,9 +628,12 @@ bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq)
if (!status)
return false;
- if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status))
- ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x",
- REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL));
+ if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) {
+ u32 pll = pll_config_get_mtl(vdev);
+
+ ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz",
+ pll, pll_ratio_to_dpu_freq_mtl(pll) / HZ_PER_MHZ);
+ }
if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, status)) {
ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0));
@@ -649,8 +683,12 @@ bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq)
ivpu_err_ratelimited(vdev, "IRQ FIFO full\n");
}
- if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status))
- ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ));
+ if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) {
+ u32 pll = pll_config_get_lnl(vdev);
+
+ ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz",
+ pll, pll_ratio_to_dpu_freq_lnl(pll) / HZ_PER_MHZ);
+ }
if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, status)) {
ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n",
@@ -733,60 +771,6 @@ void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 acti
REGB_WR32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, val);
}
-static u32 pll_ratio_to_freq_mtl(u32 ratio, u32 config)
-{
- u32 pll_clock = PLL_REF_CLK_FREQ * ratio;
- u32 cpu_clock;
-
- if ((config & 0xff) == MTL_PLL_RATIO_4_3)
- cpu_clock = pll_clock * 2 / 4;
- else
- cpu_clock = pll_clock * 2 / 5;
-
- return cpu_clock;
-}
-
-u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
-{
- struct ivpu_hw_info *hw = vdev->hw;
-
- if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
- return pll_ratio_to_freq_mtl(ratio, hw->config);
- else
- return PLL_RATIO_TO_FREQ(ratio);
-}
-
-static u32 pll_freq_get_mtl(struct ivpu_device *vdev)
-{
- u32 pll_curr_ratio;
-
- pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL);
- pll_curr_ratio &= VPU_HW_BTRS_MTL_CURRENT_PLL_RATIO_MASK;
-
- if (!ivpu_is_silicon(vdev))
- return PLL_SIMULATION_FREQ;
-
- return pll_ratio_to_freq_mtl(pll_curr_ratio, vdev->hw->config);
-}
-
-static u32 pll_freq_get_lnl(struct ivpu_device *vdev)
-{
- u32 pll_curr_ratio;
-
- pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ);
- pll_curr_ratio &= VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK;
-
- return PLL_RATIO_TO_FREQ(pll_curr_ratio);
-}
-
-u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev)
-{
- if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
- return pll_freq_get_mtl(vdev);
- else
- return pll_freq_get_lnl(vdev);
-}
-
u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev)
{
if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.h b/drivers/accel/ivpu/ivpu_hw_btrs.h
index 04f14f50fed6..3855e2df1e0c 100644
--- a/drivers/accel/ivpu/ivpu_hw_btrs.h
+++ b/drivers/accel/ivpu/ivpu_hw_btrs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_HW_BTRS_H__
@@ -13,9 +13,8 @@
#define PLL_PROFILING_FREQ_DEFAULT 38400000
#define PLL_PROFILING_FREQ_HIGH 400000000
-#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ)
-#define DCT_DEFAULT_ACTIVE_PERCENT 15u
+#define DCT_DEFAULT_ACTIVE_PERCENT 30u
#define DCT_PERIOD_US 35300u
int ivpu_hw_btrs_info_init(struct ivpu_device *vdev);
@@ -32,12 +31,11 @@ int ivpu_hw_btrs_ip_reset(struct ivpu_device *vdev);
void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev);
void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev);
void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev);
+u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev);
bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq);
bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq);
int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable);
void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 dct_percent);
-u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev);
-u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio);
u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev);
u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev);
u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c
index 60b33fc59d96..bd2582a8c80f 100644
--- a/drivers/accel/ivpu/ivpu_hw_ip.c
+++ b/drivers/accel/ivpu/ivpu_hw_ip.c
@@ -8,15 +8,12 @@
#include "ivpu_hw.h"
#include "ivpu_hw_37xx_reg.h"
#include "ivpu_hw_40xx_reg.h"
+#include "ivpu_hw_btrs.h"
#include "ivpu_hw_ip.h"
#include "ivpu_hw_reg_io.h"
#include "ivpu_mmu.h"
#include "ivpu_pm.h"
-#define PWR_ISLAND_EN_POST_DLY_FREQ_DEFAULT 0
-#define PWR_ISLAND_EN_POST_DLY_FREQ_HIGH 18
-#define PWR_ISLAND_STATUS_DLY_FREQ_DEFAULT 3
-#define PWR_ISLAND_STATUS_DLY_FREQ_HIGH 46
#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC)
#define TIM_SAFE_ENABLE 0xf1d0dead
@@ -268,20 +265,15 @@ void ivpu_hw_ip_idle_gen_disable(struct ivpu_device *vdev)
idle_gen_drive_40xx(vdev, false);
}
-static void pwr_island_delay_set_50xx(struct ivpu_device *vdev)
+static void
+pwr_island_delay_set_50xx(struct ivpu_device *vdev, u32 post, u32 post1, u32 post2, u32 status)
{
- u32 val, post, status;
-
- if (vdev->hw->pll.profiling_freq == PLL_PROFILING_FREQ_DEFAULT) {
- post = PWR_ISLAND_EN_POST_DLY_FREQ_DEFAULT;
- status = PWR_ISLAND_STATUS_DLY_FREQ_DEFAULT;
- } else {
- post = PWR_ISLAND_EN_POST_DLY_FREQ_HIGH;
- status = PWR_ISLAND_STATUS_DLY_FREQ_HIGH;
- }
+ u32 val;
val = REGV_RD32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY);
val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST_DLY, post, val);
+ val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST1_DLY, post1, val);
+ val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST2_DLY, post2, val);
REGV_WR32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, val);
val = REGV_RD32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY);
@@ -686,13 +678,36 @@ static void dpu_active_drive_37xx(struct ivpu_device *vdev, bool enable)
REGV_WR32(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, val);
}
+static void pwr_island_delay_set(struct ivpu_device *vdev)
+{
+ bool high = vdev->hw->pll.profiling_freq == PLL_PROFILING_FREQ_HIGH;
+ u32 post, post1, post2, status;
+
+ if (ivpu_hw_ip_gen(vdev) < IVPU_HW_IP_50XX)
+ return;
+
+ switch (ivpu_device_id(vdev)) {
+ case PCI_DEVICE_ID_PTL_P:
+ post = high ? 18 : 0;
+ post1 = 0;
+ post2 = 0;
+ status = high ? 46 : 3;
+ break;
+
+ default:
+ dump_stack();
+ ivpu_err(vdev, "Unknown device ID\n");
+ return;
+ }
+
+ pwr_island_delay_set_50xx(vdev, post, post1, post2, status);
+}
+
int ivpu_hw_ip_pwr_domain_enable(struct ivpu_device *vdev)
{
int ret;
- if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_50XX)
- pwr_island_delay_set_50xx(vdev);
-
+ pwr_island_delay_set(vdev);
pwr_island_enable(vdev);
ret = wait_for_pwr_island_status(vdev, 0x1);
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index be2e2bf0f43f..e631098718b1 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -21,8 +21,6 @@
#include "vpu_boot_api.h"
#define CMD_BUF_IDX 0
-#define JOB_ID_JOB_MASK GENMASK(7, 0)
-#define JOB_ID_CONTEXT_MASK GENMASK(31, 8)
#define JOB_MAX_BUFFER_COUNT 65535
static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq)
@@ -37,7 +35,7 @@ static int ivpu_preemption_buffers_create(struct ivpu_device *vdev,
u64 secondary_size = ALIGN(vdev->fw->secondary_preempt_buf_size, PAGE_SIZE);
struct ivpu_addr_range range;
- if (vdev->hw->sched_mode != VPU_SCHEDULING_MODE_HW)
+ if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW)
return 0;
range.start = vdev->hw->ranges.user.end - (primary_size * IVPU_NUM_CMDQS_PER_CTX);
@@ -62,24 +60,24 @@ static int ivpu_preemption_buffers_create(struct ivpu_device *vdev,
err_free_primary:
ivpu_bo_free(cmdq->primary_preempt_buf);
+ cmdq->primary_preempt_buf = NULL;
return -ENOMEM;
}
static void ivpu_preemption_buffers_free(struct ivpu_device *vdev,
struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
{
- if (vdev->hw->sched_mode != VPU_SCHEDULING_MODE_HW)
+ if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW)
return;
- drm_WARN_ON(&vdev->drm, !cmdq->primary_preempt_buf);
- drm_WARN_ON(&vdev->drm, !cmdq->secondary_preempt_buf);
- ivpu_bo_free(cmdq->primary_preempt_buf);
- ivpu_bo_free(cmdq->secondary_preempt_buf);
+ if (cmdq->primary_preempt_buf)
+ ivpu_bo_free(cmdq->primary_preempt_buf);
+ if (cmdq->secondary_preempt_buf)
+ ivpu_bo_free(cmdq->secondary_preempt_buf);
}
static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv)
{
- struct xa_limit db_xa_limit = {.max = IVPU_MAX_DB, .min = IVPU_MIN_DB};
struct ivpu_device *vdev = file_priv->vdev;
struct ivpu_cmdq *cmdq;
int ret;
@@ -88,26 +86,16 @@ static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv)
if (!cmdq)
return NULL;
- ret = xa_alloc(&vdev->db_xa, &cmdq->db_id, NULL, db_xa_limit, GFP_KERNEL);
- if (ret) {
- ivpu_err(vdev, "Failed to allocate doorbell id: %d\n", ret);
- goto err_free_cmdq;
- }
-
cmdq->mem = ivpu_bo_create_global(vdev, SZ_4K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
if (!cmdq->mem)
- goto err_erase_xa;
+ goto err_free_cmdq;
ret = ivpu_preemption_buffers_create(vdev, file_priv, cmdq);
if (ret)
- goto err_free_cmdq_mem;
+ ivpu_warn(vdev, "Failed to allocate preemption buffers, preemption limited\n");
return cmdq;
-err_free_cmdq_mem:
- ivpu_bo_free(cmdq->mem);
-err_erase_xa:
- xa_erase(&vdev->db_xa, cmdq->db_id);
err_free_cmdq:
kfree(cmdq);
return NULL;
@@ -130,13 +118,13 @@ static int ivpu_hws_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq
struct ivpu_device *vdev = file_priv->vdev;
int ret;
- ret = ivpu_jsm_hws_create_cmdq(vdev, file_priv->ctx.id, file_priv->ctx.id, cmdq->db_id,
+ ret = ivpu_jsm_hws_create_cmdq(vdev, file_priv->ctx.id, file_priv->ctx.id, cmdq->id,
task_pid_nr(current), engine,
cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem));
if (ret)
return ret;
- ret = ivpu_jsm_hws_set_context_sched_properties(vdev, file_priv->ctx.id, cmdq->db_id,
+ ret = ivpu_jsm_hws_set_context_sched_properties(vdev, file_priv->ctx.id, cmdq->id,
priority);
if (ret)
return ret;
@@ -149,21 +137,22 @@ static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *
struct ivpu_device *vdev = file_priv->vdev;
int ret;
- if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW)
- ret = ivpu_jsm_hws_register_db(vdev, file_priv->ctx.id, cmdq->db_id, cmdq->db_id,
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW)
+ ret = ivpu_jsm_hws_register_db(vdev, file_priv->ctx.id, cmdq->id, cmdq->db_id,
cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem));
else
ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id,
cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem));
if (!ret)
- ivpu_dbg(vdev, JOB, "DB %d registered to ctx %d\n", cmdq->db_id, file_priv->ctx.id);
+ ivpu_dbg(vdev, JOB, "DB %d registered to cmdq %d ctx %d\n",
+ cmdq->db_id, cmdq->id, file_priv->ctx.id);
return ret;
}
static int
-ivpu_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u16 engine, u8 priority)
+ivpu_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u8 priority)
{
struct ivpu_device *vdev = file_priv->vdev;
struct vpu_job_queue_header *jobq_header;
@@ -179,13 +168,13 @@ ivpu_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u16 eng
cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem);
jobq_header = &cmdq->jobq->header;
- jobq_header->engine_idx = engine;
+ jobq_header->engine_idx = VPU_ENGINE_COMPUTE;
jobq_header->head = 0;
jobq_header->tail = 0;
wmb(); /* Flush WC buffer for jobq->header */
- if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW) {
- ret = ivpu_hws_cmdq_init(file_priv, cmdq, engine, priority);
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
+ ret = ivpu_hws_cmdq_init(file_priv, cmdq, VPU_ENGINE_COMPUTE, priority);
if (ret)
return ret;
}
@@ -211,10 +200,10 @@ static int ivpu_cmdq_fini(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cm
cmdq->db_registered = false;
- if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW) {
- ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->db_id);
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
+ ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->id);
if (!ret)
- ivpu_dbg(vdev, JOB, "Command queue %d destroyed\n", cmdq->db_id);
+ ivpu_dbg(vdev, JOB, "Command queue %d destroyed\n", cmdq->id);
}
ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id);
@@ -224,55 +213,104 @@ static int ivpu_cmdq_fini(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cm
return 0;
}
-static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u16 engine,
- u8 priority)
+static int ivpu_db_id_alloc(struct ivpu_device *vdev, u32 *db_id)
+{
+ int ret;
+ u32 id;
+
+ ret = xa_alloc_cyclic(&vdev->db_xa, &id, NULL, vdev->db_limit, &vdev->db_next, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+
+ *db_id = id;
+ return 0;
+}
+
+static int ivpu_cmdq_id_alloc(struct ivpu_file_priv *file_priv, u32 *cmdq_id)
{
- int cmdq_idx = IVPU_CMDQ_INDEX(engine, priority);
- struct ivpu_cmdq *cmdq = file_priv->cmdq[cmdq_idx];
+ int ret;
+ u32 id;
+
+ ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &id, NULL, file_priv->cmdq_limit,
+ &file_priv->cmdq_id_next, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+
+ *cmdq_id = id;
+ return 0;
+}
+
+static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u8 priority)
+{
+ struct ivpu_device *vdev = file_priv->vdev;
+ struct ivpu_cmdq *cmdq;
+ unsigned long id;
int ret;
lockdep_assert_held(&file_priv->lock);
+ xa_for_each(&file_priv->cmdq_xa, id, cmdq)
+ if (cmdq->priority == priority)
+ break;
+
if (!cmdq) {
cmdq = ivpu_cmdq_alloc(file_priv);
- if (!cmdq)
+ if (!cmdq) {
+ ivpu_err(vdev, "Failed to allocate command queue\n");
return NULL;
- file_priv->cmdq[cmdq_idx] = cmdq;
+ }
+
+ ret = ivpu_db_id_alloc(vdev, &cmdq->db_id);
+ if (ret) {
+ ivpu_err(file_priv->vdev, "Failed to allocate doorbell ID: %d\n", ret);
+ goto err_free_cmdq;
+ }
+
+ ret = ivpu_cmdq_id_alloc(file_priv, &cmdq->id);
+ if (ret) {
+ ivpu_err(vdev, "Failed to allocate command queue ID: %d\n", ret);
+ goto err_erase_db_id;
+ }
+
+ cmdq->priority = priority;
+ ret = xa_err(xa_store(&file_priv->cmdq_xa, cmdq->id, cmdq, GFP_KERNEL));
+ if (ret) {
+ ivpu_err(vdev, "Failed to store command queue in cmdq_xa: %d\n", ret);
+ goto err_erase_cmdq_id;
+ }
}
- ret = ivpu_cmdq_init(file_priv, cmdq, engine, priority);
- if (ret)
- return NULL;
+ ret = ivpu_cmdq_init(file_priv, cmdq, priority);
+ if (ret) {
+ ivpu_err(vdev, "Failed to initialize command queue: %d\n", ret);
+ goto err_free_cmdq;
+ }
return cmdq;
+
+err_erase_cmdq_id:
+ xa_erase(&file_priv->cmdq_xa, cmdq->id);
+err_erase_db_id:
+ xa_erase(&vdev->db_xa, cmdq->db_id);
+err_free_cmdq:
+ ivpu_cmdq_free(file_priv, cmdq);
+ return NULL;
}
-static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engine, u8 priority)
+void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv)
{
- int cmdq_idx = IVPU_CMDQ_INDEX(engine, priority);
- struct ivpu_cmdq *cmdq = file_priv->cmdq[cmdq_idx];
+ struct ivpu_cmdq *cmdq;
+ unsigned long cmdq_id;
lockdep_assert_held(&file_priv->lock);
- if (cmdq) {
- file_priv->cmdq[cmdq_idx] = NULL;
+ xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) {
+ xa_erase(&file_priv->cmdq_xa, cmdq_id);
ivpu_cmdq_fini(file_priv, cmdq);
ivpu_cmdq_free(file_priv, cmdq);
}
}
-void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv)
-{
- u16 engine;
- u8 priority;
-
- lockdep_assert_held(&file_priv->lock);
-
- for (engine = 0; engine < IVPU_NUM_ENGINES; engine++)
- for (priority = 0; priority < IVPU_NUM_PRIORITIES; priority++)
- ivpu_cmdq_release_locked(file_priv, engine, priority);
-}
-
/*
* Mark the doorbell as unregistered
* This function needs to be called when the VPU hardware is restarted
@@ -281,20 +319,13 @@ void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv)
*/
static void ivpu_cmdq_reset(struct ivpu_file_priv *file_priv)
{
- u16 engine;
- u8 priority;
+ struct ivpu_cmdq *cmdq;
+ unsigned long cmdq_id;
mutex_lock(&file_priv->lock);
- for (engine = 0; engine < IVPU_NUM_ENGINES; engine++) {
- for (priority = 0; priority < IVPU_NUM_PRIORITIES; priority++) {
- int cmdq_idx = IVPU_CMDQ_INDEX(engine, priority);
- struct ivpu_cmdq *cmdq = file_priv->cmdq[cmdq_idx];
-
- if (cmdq)
- cmdq->db_registered = false;
- }
- }
+ xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq)
+ cmdq->db_registered = false;
mutex_unlock(&file_priv->lock);
}
@@ -314,17 +345,11 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev)
static void ivpu_cmdq_fini_all(struct ivpu_file_priv *file_priv)
{
- u16 engine;
- u8 priority;
-
- for (engine = 0; engine < IVPU_NUM_ENGINES; engine++) {
- for (priority = 0; priority < IVPU_NUM_PRIORITIES; priority++) {
- int cmdq_idx = IVPU_CMDQ_INDEX(engine, priority);
+ struct ivpu_cmdq *cmdq;
+ unsigned long cmdq_id;
- if (file_priv->cmdq[cmdq_idx])
- ivpu_cmdq_fini(file_priv, file_priv->cmdq[cmdq_idx]);
- }
- }
+ xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq)
+ ivpu_cmdq_fini(file_priv, cmdq);
}
void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv)
@@ -335,8 +360,10 @@ void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv)
ivpu_cmdq_fini_all(file_priv);
- if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_OS)
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_OS)
ivpu_jsm_context_release(vdev, file_priv->ctx.id);
+
+ file_priv->aborted = true;
}
static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
@@ -349,24 +376,30 @@ static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
/* Check if there is space left in job queue */
if (next_entry == header->head) {
- ivpu_dbg(vdev, JOB, "Job queue full: ctx %d engine %d db %d head %d tail %d\n",
- job->file_priv->ctx.id, job->engine_idx, cmdq->db_id, header->head, tail);
+ ivpu_dbg(vdev, JOB, "Job queue full: ctx %d cmdq %d db %d head %d tail %d\n",
+ job->file_priv->ctx.id, cmdq->id, cmdq->db_id, header->head, tail);
return -EBUSY;
}
- entry = &cmdq->jobq->job[tail];
+ entry = &cmdq->jobq->slot[tail].job;
entry->batch_buf_addr = job->cmd_buf_vpu_addr;
entry->job_id = job->job_id;
entry->flags = 0;
if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_SUBMISSION))
entry->flags = VPU_JOB_FLAGS_NULL_SUBMISSION_MASK;
- if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW &&
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW &&
(unlikely(!(ivpu_test_mode & IVPU_TEST_MODE_PREEMPTION_DISABLE)))) {
- entry->primary_preempt_buf_addr = cmdq->primary_preempt_buf->vpu_addr;
- entry->primary_preempt_buf_size = ivpu_bo_size(cmdq->primary_preempt_buf);
- entry->secondary_preempt_buf_addr = cmdq->secondary_preempt_buf->vpu_addr;
- entry->secondary_preempt_buf_size = ivpu_bo_size(cmdq->secondary_preempt_buf);
+ if (cmdq->primary_preempt_buf) {
+ entry->primary_preempt_buf_addr = cmdq->primary_preempt_buf->vpu_addr;
+ entry->primary_preempt_buf_size = ivpu_bo_size(cmdq->primary_preempt_buf);
+ }
+
+ if (cmdq->secondary_preempt_buf) {
+ entry->secondary_preempt_buf_addr = cmdq->secondary_preempt_buf->vpu_addr;
+ entry->secondary_preempt_buf_size =
+ ivpu_bo_size(cmdq->secondary_preempt_buf);
+ }
}
wmb(); /* Ensure that tail is updated after filling entry */
@@ -469,16 +502,14 @@ static struct ivpu_job *ivpu_job_remove_from_submitted_jobs(struct ivpu_device *
{
struct ivpu_job *job;
- xa_lock(&vdev->submitted_jobs_xa);
- job = __xa_erase(&vdev->submitted_jobs_xa, job_id);
+ lockdep_assert_held(&vdev->submitted_jobs_lock);
+ job = xa_erase(&vdev->submitted_jobs_xa, job_id);
if (xa_empty(&vdev->submitted_jobs_xa) && job) {
vdev->busy_time = ktime_add(ktime_sub(ktime_get(), vdev->busy_start_ts),
vdev->busy_time);
}
- xa_unlock(&vdev->submitted_jobs_xa);
-
return job;
}
@@ -486,6 +517,28 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32
{
struct ivpu_job *job;
+ lockdep_assert_held(&vdev->submitted_jobs_lock);
+
+ job = xa_load(&vdev->submitted_jobs_xa, job_id);
+ if (!job)
+ return -ENOENT;
+
+ if (job_status == VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW) {
+ guard(mutex)(&job->file_priv->lock);
+
+ if (job->file_priv->has_mmu_faults)
+ return 0;
+
+ /*
+ * Mark context as faulty and defer destruction of the job to jobs abort thread
+ * handler to synchronize between both faults and jobs returning context violation
+ * status and ensure both are handled in the same way
+ */
+ job->file_priv->has_mmu_faults = true;
+ queue_work(system_wq, &vdev->context_abort_work);
+ return 0;
+ }
+
job = ivpu_job_remove_from_submitted_jobs(vdev, job_id);
if (!job)
return -ENOENT;
@@ -503,6 +556,10 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32
ivpu_stop_job_timeout_detection(vdev);
ivpu_rpm_put(vdev);
+
+ if (!xa_empty(&vdev->submitted_jobs_xa))
+ ivpu_start_job_timeout_detection(vdev);
+
return 0;
}
@@ -511,15 +568,18 @@ void ivpu_jobs_abort_all(struct ivpu_device *vdev)
struct ivpu_job *job;
unsigned long id;
+ mutex_lock(&vdev->submitted_jobs_lock);
+
xa_for_each(&vdev->submitted_jobs_xa, id, job)
ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED);
+
+ mutex_unlock(&vdev->submitted_jobs_lock);
}
static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
{
struct ivpu_file_priv *file_priv = job->file_priv;
struct ivpu_device *vdev = job->vdev;
- struct xa_limit job_id_range;
struct ivpu_cmdq *cmdq;
bool is_first_job;
int ret;
@@ -528,27 +588,25 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
if (ret < 0)
return ret;
+ mutex_lock(&vdev->submitted_jobs_lock);
mutex_lock(&file_priv->lock);
- cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx, priority);
+ cmdq = ivpu_cmdq_acquire(file_priv, priority);
if (!cmdq) {
ivpu_warn_ratelimited(vdev, "Failed to get job queue, ctx %d engine %d prio %d\n",
file_priv->ctx.id, job->engine_idx, priority);
ret = -EINVAL;
- goto err_unlock_file_priv;
+ goto err_unlock;
}
- job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1));
- job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK;
-
- xa_lock(&vdev->submitted_jobs_xa);
is_first_job = xa_empty(&vdev->submitted_jobs_xa);
- ret = __xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL);
- if (ret) {
+ ret = xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit,
+ &file_priv->job_id_next, GFP_KERNEL);
+ if (ret < 0) {
ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n",
file_priv->ctx.id);
ret = -EBUSY;
- goto err_unlock_submitted_jobs_xa;
+ goto err_unlock;
}
ret = ivpu_cmdq_push_job(cmdq, job);
@@ -570,20 +628,20 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
job->job_id, file_priv->ctx.id, job->engine_idx, priority,
job->cmd_buf_vpu_addr, cmdq->jobq->header.tail);
- xa_unlock(&vdev->submitted_jobs_xa);
-
mutex_unlock(&file_priv->lock);
- if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW))
+ if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) {
ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
+ }
+
+ mutex_unlock(&vdev->submitted_jobs_lock);
return 0;
err_erase_xa:
- __xa_erase(&vdev->submitted_jobs_xa, job->job_id);
-err_unlock_submitted_jobs_xa:
- xa_unlock(&vdev->submitted_jobs_xa);
-err_unlock_file_priv:
+ xa_erase(&vdev->submitted_jobs_xa, job->job_id);
+err_unlock:
+ mutex_unlock(&vdev->submitted_jobs_lock);
mutex_unlock(&file_priv->lock);
ivpu_rpm_put(vdev);
return ret;
@@ -673,7 +731,7 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
int idx, ret;
u8 priority;
- if (params->engine > DRM_IVPU_ENGINE_COPY)
+ if (params->engine != DRM_IVPU_ENGINE_COMPUTE)
return -EINVAL;
if (params->priority > DRM_IVPU_JOB_PRIORITY_REALTIME)
@@ -753,7 +811,6 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
struct vpu_jsm_msg *jsm_msg)
{
struct vpu_ipc_msg_payload_job_done *payload;
- int ret;
if (!jsm_msg) {
ivpu_err(vdev, "IPC message has no JSM payload\n");
@@ -766,9 +823,10 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
}
payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload;
- ret = ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status);
- if (!ret && !xa_empty(&vdev->submitted_jobs_xa))
- ivpu_start_job_timeout_detection(vdev);
+
+ mutex_lock(&vdev->submitted_jobs_lock);
+ ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status);
+ mutex_unlock(&vdev->submitted_jobs_lock);
}
void ivpu_job_done_consumer_init(struct ivpu_device *vdev)
@@ -781,3 +839,43 @@ void ivpu_job_done_consumer_fini(struct ivpu_device *vdev)
{
ivpu_ipc_consumer_del(vdev, &vdev->job_done_consumer);
}
+
+void ivpu_context_abort_thread_handler(struct work_struct *work)
+{
+ struct ivpu_device *vdev = container_of(work, struct ivpu_device, context_abort_work);
+ struct ivpu_file_priv *file_priv;
+ unsigned long ctx_id;
+ struct ivpu_job *job;
+ unsigned long id;
+
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW)
+ if (ivpu_jsm_reset_engine(vdev, 0))
+ return;
+
+ mutex_lock(&vdev->context_list_lock);
+ xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
+ if (!file_priv->has_mmu_faults || file_priv->aborted)
+ continue;
+
+ mutex_lock(&file_priv->lock);
+ ivpu_context_abort_locked(file_priv);
+ mutex_unlock(&file_priv->lock);
+ }
+ mutex_unlock(&vdev->context_list_lock);
+
+ if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW)
+ return;
+
+ if (ivpu_jsm_hws_resume_engine(vdev, 0))
+ return;
+ /*
+ * In hardware scheduling mode NPU already has stopped processing jobs
+ * and won't send us any further notifications, thus we have to free job related resources
+ * and notify userspace
+ */
+ mutex_lock(&vdev->submitted_jobs_lock);
+ xa_for_each(&vdev->submitted_jobs_xa, id, job)
+ if (job->file_priv->aborted)
+ ivpu_job_signal_and_destroy(vdev, job->job_id, DRM_IVPU_JOB_STATUS_ABORTED);
+ mutex_unlock(&vdev->submitted_jobs_lock);
+}
diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h
index 6accb94028c7..af1ed039569c 100644
--- a/drivers/accel/ivpu/ivpu_job.h
+++ b/drivers/accel/ivpu/ivpu_job.h
@@ -28,8 +28,10 @@ struct ivpu_cmdq {
struct ivpu_bo *secondary_preempt_buf;
struct ivpu_bo *mem;
u32 entry_count;
+ u32 id;
u32 db_id;
bool db_registered;
+ u8 priority;
};
/**
@@ -64,6 +66,7 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev);
void ivpu_job_done_consumer_init(struct ivpu_device *vdev);
void ivpu_job_done_consumer_fini(struct ivpu_device *vdev);
+void ivpu_context_abort_thread_handler(struct work_struct *work);
void ivpu_jobs_abort_all(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c
index f7618b605f02..7c08308d5725 100644
--- a/drivers/accel/ivpu/ivpu_jsm_msg.c
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.c
@@ -7,6 +7,8 @@
#include "ivpu_hw.h"
#include "ivpu_ipc.h"
#include "ivpu_jsm_msg.h"
+#include "ivpu_pm.h"
+#include "vpu_jsm_api.h"
const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type)
{
@@ -48,9 +50,10 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type)
IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP);
IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP_RSP);
- IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED);
IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL);
IVPU_CASE_TO_STR(VPU_JSM_MSG_JOB_DONE);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED);
IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_RESET_DONE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_PREEMPT_DONE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_REGISTER_DB_DONE);
@@ -131,7 +134,7 @@ int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat)
struct vpu_jsm_msg resp;
int ret;
- if (engine > VPU_ENGINE_COPY)
+ if (engine != VPU_ENGINE_COMPUTE)
return -EINVAL;
req.payload.query_engine_hb.engine_idx = engine;
@@ -154,15 +157,17 @@ int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine)
struct vpu_jsm_msg resp;
int ret;
- if (engine > VPU_ENGINE_COPY)
+ if (engine != VPU_ENGINE_COMPUTE)
return -EINVAL;
req.payload.engine_reset.engine_idx = engine;
ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_ENGINE_RESET_DONE, &resp,
VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
- if (ret)
+ if (ret) {
ivpu_err_ratelimited(vdev, "Failed to reset engine %d: %d\n", engine, ret);
+ ivpu_pm_trigger_recovery(vdev, "Engine reset failed");
+ }
return ret;
}
@@ -173,7 +178,7 @@ int ivpu_jsm_preempt_engine(struct ivpu_device *vdev, u32 engine, u32 preempt_id
struct vpu_jsm_msg resp;
int ret;
- if (engine > VPU_ENGINE_COPY)
+ if (engine != VPU_ENGINE_COMPUTE)
return -EINVAL;
req.payload.engine_preempt.engine_idx = engine;
@@ -345,15 +350,17 @@ int ivpu_jsm_hws_resume_engine(struct ivpu_device *vdev, u32 engine)
struct vpu_jsm_msg resp;
int ret;
- if (engine >= VPU_ENGINE_NB)
+ if (engine != VPU_ENGINE_COMPUTE)
return -EINVAL;
req.payload.hws_resume_engine.engine_idx = engine;
ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE, &resp,
VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
- if (ret)
+ if (ret) {
ivpu_err_ratelimited(vdev, "Failed to resume engine %d: %d\n", engine, ret);
+ ivpu_pm_trigger_recovery(vdev, "Engine resume failed");
+ }
return ret;
}
@@ -408,26 +415,18 @@ int ivpu_jsm_hws_setup_priority_bands(struct ivpu_device *vdev)
{
struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP };
struct vpu_jsm_msg resp;
+ struct ivpu_hw_info *hw = vdev->hw;
+ struct vpu_ipc_msg_payload_hws_priority_band_setup *setup =
+ &req.payload.hws_priority_band_setup;
int ret;
- /* Idle */
- req.payload.hws_priority_band_setup.grace_period[0] = 0;
- req.payload.hws_priority_band_setup.process_grace_period[0] = 50000;
- req.payload.hws_priority_band_setup.process_quantum[0] = 160000;
- /* Normal */
- req.payload.hws_priority_band_setup.grace_period[1] = 50000;
- req.payload.hws_priority_band_setup.process_grace_period[1] = 50000;
- req.payload.hws_priority_band_setup.process_quantum[1] = 300000;
- /* Focus */
- req.payload.hws_priority_band_setup.grace_period[2] = 50000;
- req.payload.hws_priority_band_setup.process_grace_period[2] = 50000;
- req.payload.hws_priority_band_setup.process_quantum[2] = 200000;
- /* Realtime */
- req.payload.hws_priority_band_setup.grace_period[3] = 0;
- req.payload.hws_priority_band_setup.process_grace_period[3] = 50000;
- req.payload.hws_priority_band_setup.process_quantum[3] = 200000;
-
- req.payload.hws_priority_band_setup.normal_band_percentage = 10;
+ for (int band = VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE;
+ band < VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT; band++) {
+ setup->grace_period[band] = hw->hws.grace_period[band];
+ setup->process_grace_period[band] = hw->hws.process_grace_period[band];
+ setup->process_quantum[band] = hw->hws.process_quantum[band];
+ }
+ setup->normal_band_percentage = 10;
ret = ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP,
&resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index c078e214b221..fb15eb75b5ba 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -917,8 +917,7 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons);
}
- if (!kfifo_put(&vdev->hw->irq.fifo, IVPU_HW_IRQ_SRC_MMU_EVTQ))
- ivpu_err_ratelimited(vdev, "IRQ FIFO full\n");
+ queue_work(system_wq, &vdev->context_abort_work);
}
void ivpu_mmu_evtq_dump(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index fbb61a2c3b19..2269569bdee7 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -38,6 +38,7 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
ivpu_cmdq_reset_all_contexts(vdev);
ivpu_ipc_reset(vdev);
+ ivpu_fw_log_reset(vdev);
ivpu_fw_load(vdev);
fw->entry_point = fw->cold_boot_entry_point;
}
@@ -421,16 +422,17 @@ int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent)
active_us = (DCT_PERIOD_US * active_percent) / 100;
inactive_us = DCT_PERIOD_US - active_us;
+ vdev->pm->dct_active_percent = active_percent;
+
+ ivpu_dbg(vdev, PM, "DCT requested %u%% (D0: %uus, D0i2: %uus)\n",
+ active_percent, active_us, inactive_us);
+
ret = ivpu_jsm_dct_enable(vdev, active_us, inactive_us);
if (ret) {
ivpu_err_ratelimited(vdev, "Filed to enable DCT: %d\n", ret);
return ret;
}
- vdev->pm->dct_active_percent = active_percent;
-
- ivpu_dbg(vdev, PM, "DCT set to %u%% (D0: %uus, D0i2: %uus)\n",
- active_percent, active_us, inactive_us);
return 0;
}
@@ -438,15 +440,16 @@ int ivpu_pm_dct_disable(struct ivpu_device *vdev)
{
int ret;
+ vdev->pm->dct_active_percent = 0;
+
+ ivpu_dbg(vdev, PM, "DCT requested to be disabled\n");
+
ret = ivpu_jsm_dct_disable(vdev);
if (ret) {
ivpu_err_ratelimited(vdev, "Filed to disable DCT: %d\n", ret);
return ret;
}
- vdev->pm->dct_active_percent = 0;
-
- ivpu_dbg(vdev, PM, "DCT disabled\n");
return 0;
}
@@ -458,7 +461,7 @@ void ivpu_pm_dct_irq_thread_handler(struct ivpu_device *vdev)
if (ivpu_hw_btrs_dct_get_request(vdev, &enable))
return;
- if (vdev->pm->dct_active_percent)
+ if (enable)
ret = ivpu_pm_dct_enable(vdev, DCT_DEFAULT_ACTIVE_PERCENT);
else
ret = ivpu_pm_dct_disable(vdev);
diff --git a/drivers/accel/ivpu/ivpu_sysfs.c b/drivers/accel/ivpu/ivpu_sysfs.c
index 913669f1786e..8a616791c32f 100644
--- a/drivers/accel/ivpu/ivpu_sysfs.c
+++ b/drivers/accel/ivpu/ivpu_sysfs.c
@@ -6,6 +6,8 @@
#include <linux/device.h>
#include <linux/err.h>
+#include "ivpu_drv.h"
+#include "ivpu_fw.h"
#include "ivpu_hw.h"
#include "ivpu_sysfs.h"
@@ -28,19 +30,42 @@ npu_busy_time_us_show(struct device *dev, struct device_attribute *attr, char *b
struct ivpu_device *vdev = to_ivpu_device(drm);
ktime_t total, now = 0;
- xa_lock(&vdev->submitted_jobs_xa);
+ mutex_lock(&vdev->submitted_jobs_lock);
+
total = vdev->busy_time;
if (!xa_empty(&vdev->submitted_jobs_xa))
now = ktime_sub(ktime_get(), vdev->busy_start_ts);
- xa_unlock(&vdev->submitted_jobs_xa);
+ mutex_unlock(&vdev->submitted_jobs_lock);
return sysfs_emit(buf, "%lld\n", ktime_to_us(ktime_add(total, now)));
}
static DEVICE_ATTR_RO(npu_busy_time_us);
+/**
+ * DOC: sched_mode
+ *
+ * The sched_mode is used to report current NPU scheduling mode.
+ *
+ * It returns following strings:
+ * - "HW" - Hardware Scheduler mode
+ * - "OS" - Operating System Scheduler mode
+ *
+ */
+static ssize_t
+sched_mode_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct ivpu_device *vdev = to_ivpu_device(drm);
+
+ return sysfs_emit(buf, "%s\n", vdev->fw->sched_mode ? "HW" : "OS");
+}
+
+static DEVICE_ATTR_RO(sched_mode);
+
static struct attribute *ivpu_dev_attrs[] = {
&dev_attr_npu_busy_time_us.attr,
+ &dev_attr_sched_mode.attr,
NULL,
};
diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h
index 82954b91b748..908e68ea1c39 100644
--- a/drivers/accel/ivpu/vpu_boot_api.h
+++ b/drivers/accel/ivpu/vpu_boot_api.h
@@ -1,14 +1,13 @@
/* SPDX-License-Identifier: MIT */
/*
- * Copyright (c) 2020-2023, Intel Corporation.
+ * Copyright (c) 2020-2024, Intel Corporation.
*/
#ifndef VPU_BOOT_API_H
#define VPU_BOOT_API_H
/*
- * =========== FW API version information beginning ================
- * The bellow values will be used to construct the version info this way:
+ * The below values will be used to construct the version info this way:
* fw_bin_header->api_version[VPU_BOOT_API_VER_ID] = (VPU_BOOT_API_VER_MAJOR << 16) |
* VPU_BOOT_API_VER_MINOR;
* VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes
@@ -27,19 +26,18 @@
* Minor version changes when API backward compatibility is preserved.
* Resets to 0 if Major version is incremented.
*/
-#define VPU_BOOT_API_VER_MINOR 24
+#define VPU_BOOT_API_VER_MINOR 26
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
*/
-#define VPU_BOOT_API_VER_PATCH 0
+#define VPU_BOOT_API_VER_PATCH 3
/*
* Index in the API version table
* Must be unique for each API
*/
#define VPU_BOOT_API_VER_INDEX 0
-/* ------------ FW API version information end ---------------------*/
#pragma pack(push, 4)
@@ -164,8 +162,6 @@ enum vpu_trace_destination {
/* VPU 30xx HW component IDs are sequential, so define first and last IDs. */
#define VPU_TRACE_PROC_BIT_30XX_FIRST VPU_TRACE_PROC_BIT_LRT
#define VPU_TRACE_PROC_BIT_30XX_LAST VPU_TRACE_PROC_BIT_SHV_15
-#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_30XX_FIRST
-#define VPU_TRACE_PROC_BIT_KMB_LAST VPU_TRACE_PROC_BIT_30XX_LAST
struct vpu_boot_l2_cache_config {
u8 use;
@@ -199,6 +195,17 @@ struct vpu_warm_boot_section {
*/
#define POWER_PROFILE_SURVIVABILITY 0x1
+/**
+ * Enum for dvfs_mode boot param.
+ */
+enum vpu_governor {
+ VPU_GOV_DEFAULT = 0, /* Default Governor for the system */
+ VPU_GOV_MAX_PERFORMANCE = 1, /* Maximum performance governor */
+ VPU_GOV_ON_DEMAND = 2, /* On Demand frequency control governor */
+ VPU_GOV_POWER_SAVE = 3, /* Power save governor */
+ VPU_GOV_ON_DEMAND_PRIORITY_AWARE = 4 /* On Demand priority based governor */
+};
+
struct vpu_boot_params {
u32 magic;
u32 vpu_id;
@@ -301,7 +308,14 @@ struct vpu_boot_params {
u32 temp_sensor_period_ms;
/** PLL ratio for efficient clock frequency */
u32 pn_freq_pll_ratio;
- /** DVFS Mode: Default: 0, Max Performance: 1, On Demand: 2, Power Save: 3 */
+ /**
+ * DVFS Mode:
+ * 0 - Default, DVFS mode selected by the firmware
+ * 1 - Max Performance
+ * 2 - On Demand
+ * 3 - Power Save
+ * 4 - On Demand Priority Aware
+ */
u32 dvfs_mode;
/**
* Depending on DVFS Mode:
@@ -332,8 +346,8 @@ struct vpu_boot_params {
u64 d0i3_entry_vpu_ts;
/*
* The system time of the host operating system in microseconds.
- * E.g the number of microseconds since 1st of January 1970, or whatever date the
- * host operating system uses to maintain system time.
+ * E.g the number of microseconds since 1st of January 1970, or whatever
+ * date the host operating system uses to maintain system time.
* This value will be used to track system time on the VPU.
* The KMD is required to update this value on every VPU reset.
*/
@@ -382,10 +396,7 @@ struct vpu_boot_params {
u32 pad6[734];
};
-/*
- * Magic numbers set between host and vpu to detect corruptio of tracing init
- */
-
+/* Magic numbers set between host and vpu to detect corruption of tracing init */
#define VPU_TRACING_BUFFER_CANARY (0xCAFECAFE)
/* Tracing buffer message format definitions */
@@ -405,7 +416,9 @@ struct vpu_tracing_buffer_header {
u32 host_canary_start;
/* offset from start of buffer for trace entries */
u32 read_index;
- u32 pad_to_cache_line_size_0[14];
+ /* keeps track of wrapping on the reader side */
+ u32 read_wrap_count;
+ u32 pad_to_cache_line_size_0[13];
/* End of first cache line */
/**
diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h
index 33f462b1a25d..7215c144158c 100644
--- a/drivers/accel/ivpu/vpu_jsm_api.h
+++ b/drivers/accel/ivpu/vpu_jsm_api.h
@@ -22,7 +22,7 @@
/*
* Minor version changes when API backward compatibility is preserved.
*/
-#define VPU_JSM_API_VER_MINOR 16
+#define VPU_JSM_API_VER_MINOR 25
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
@@ -36,7 +36,7 @@
/*
* Number of Priority Bands for Hardware Scheduling
- * Bands: RealTime, Focus, Normal, Idle
+ * Bands: Idle(0), Normal(1), Focus(2), RealTime(3)
*/
#define VPU_HWS_NUM_PRIORITY_BANDS 4
@@ -74,6 +74,7 @@
#define VPU_JSM_STATUS_MVNCI_INTERNAL_ERROR 0xCU
/* Job status returned when the job was preempted mid-inference */
#define VPU_JSM_STATUS_PREEMPTED_MID_INFERENCE 0xDU
+#define VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW 0xEU
/*
* Host <-> VPU IPC channels.
@@ -86,18 +87,58 @@
/*
* Job flags bit masks.
*/
-#define VPU_JOB_FLAGS_NULL_SUBMISSION_MASK 0x00000001
-#define VPU_JOB_FLAGS_PRIVATE_DATA_MASK 0xFF000000
+enum {
+ /*
+ * Null submission mask.
+ * When set, batch buffer's commands are not processed but returned as
+ * successful immediately, except fences and timestamps.
+ * When cleared, batch buffer's commands are processed normally.
+ * Used for testing and profiling purposes.
+ */
+ VPU_JOB_FLAGS_NULL_SUBMISSION_MASK = (1 << 0U),
+ /*
+ * Inline command mask.
+ * When set, the object in job queue is an inline command (see struct vpu_inline_cmd below).
+ * When cleared, the object in job queue is a job (see struct vpu_job_queue_entry below).
+ */
+ VPU_JOB_FLAGS_INLINE_CMD_MASK = (1 << 1U),
+ /*
+ * VPU private data mask.
+ * Reserved for the VPU to store private data about the job (or inline command)
+ * while being processed.
+ */
+ VPU_JOB_FLAGS_PRIVATE_DATA_MASK = 0xFFFF0000U
+};
/*
- * Sizes of the reserved areas in jobs, in bytes.
+ * Job queue flags bit masks.
*/
-#define VPU_JOB_RESERVED_BYTES 8
+enum {
+ /*
+ * No job done notification mask.
+ * When set, indicates that no job done notification should be sent for any
+ * job from this queue. When cleared, indicates that job done notification
+ * should be sent for every job completed from this queue.
+ */
+ VPU_JOB_QUEUE_FLAGS_NO_JOB_DONE_MASK = (1 << 0U),
+ /*
+ * Native fence usage mask.
+ * When set, indicates that job queue uses native fences (as inline commands
+ * in job queue). Such queues may also use legacy fences (as commands in batch buffers).
+ * When cleared, indicates the job queue only uses legacy fences.
+ * NOTE: For queues using native fences, VPU expects that all jobs in the queue
+ * are immediately followed by an inline command object. This object is expected
+ * to be a fence signal command in most cases, but can also be a NOP in case the host
+ * does not need per-job fence signalling. Other inline commands objects can be
+ * inserted between "job and inline command" pairs.
+ */
+ VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U),
-/*
- * Sizes of the reserved areas in job queues, in bytes.
- */
-#define VPU_JOB_QUEUE_RESERVED_BYTES 52
+ /*
+ * Enable turbo mode for testing NPU performance; not recommended for regular usage.
+ */
+ VPU_JOB_QUEUE_FLAGS_TURBO_MODE = (1 << 2U)
+};
/*
* Max length (including trailing NULL char) of trace entity name (e.g., the
@@ -141,23 +182,112 @@
#define VPU_HWS_INVALID_CMDQ_HANDLE 0ULL
/*
+ * Inline commands types.
+ */
+/*
+ * NOP.
+ * VPU does nothing other than consuming the inline command object.
+ */
+#define VPU_INLINE_CMD_TYPE_NOP 0x0
+/*
+ * Fence wait.
+ * VPU waits for the fence current value to reach monitored value.
+ * Fence wait operations are executed upon job dispatching. While waiting for
+ * the fence to be satisfied, VPU blocks fetching of the next objects in the queue.
+ * Jobs present in the queue prior to the fence wait object may be processed
+ * concurrently.
+ */
+#define VPU_INLINE_CMD_TYPE_FENCE_WAIT 0x1
+/*
+ * Fence signal.
+ * VPU sets the fence current value to the provided value. If new current value
+ * is equal to or higher than monitored value, VPU sends fence signalled notification
+ * to the host. Fence signal operations are executed upon completion of all the jobs
+ * present in the queue prior to them, and in-order relative to each other in the queue.
+ * But jobs in-between them may be processed concurrently and may complete out-of-order.
+ */
+#define VPU_INLINE_CMD_TYPE_FENCE_SIGNAL 0x2
+
+/*
+ * Job scheduling priority bands for both hardware scheduling and OS scheduling.
+ */
+enum vpu_job_scheduling_priority_band {
+ VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE = 0,
+ VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL = 1,
+ VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS = 2,
+ VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME = 3,
+ VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT = 4,
+};
+
+/*
* Job format.
+ * Jobs defines the actual workloads to be executed by a given engine.
*/
struct vpu_job_queue_entry {
- u64 batch_buf_addr; /**< Address of VPU commands batch buffer */
- u32 job_id; /**< Job ID */
- u32 flags; /**< Flags bit field, see VPU_JOB_FLAGS_* above */
- u64 root_page_table_addr; /**< Address of root page table to use for this job */
- u64 root_page_table_update_counter; /**< Page tables update events counter */
- u64 primary_preempt_buf_addr;
+ /**< Address of VPU commands batch buffer */
+ u64 batch_buf_addr;
+ /**< Job ID */
+ u32 job_id;
+ /**< Flags bit field, see VPU_JOB_FLAGS_* above */
+ u32 flags;
+ /**
+ * Doorbell ring timestamp taken by KMD from SoC's global system clock, in
+ * microseconds. NPU can convert this value to its own fixed clock's timebase,
+ * to match other profiling timestamps.
+ */
+ u64 doorbell_timestamp;
+ /**< Extra id for job tracking, used only in the firmware perf traces */
+ u64 host_tracking_id;
/**< Address of the primary preemption buffer to use for this job */
- u32 primary_preempt_buf_size;
+ u64 primary_preempt_buf_addr;
/**< Size of the primary preemption buffer to use for this job */
- u32 secondary_preempt_buf_size;
+ u32 primary_preempt_buf_size;
/**< Size of secondary preemption buffer to use for this job */
- u64 secondary_preempt_buf_addr;
+ u32 secondary_preempt_buf_size;
/**< Address of secondary preemption buffer to use for this job */
- u8 reserved_0[VPU_JOB_RESERVED_BYTES];
+ u64 secondary_preempt_buf_addr;
+ u64 reserved_0;
+};
+
+/*
+ * Inline command format.
+ * Inline commands are the commands executed at scheduler level (typically,
+ * synchronization directives). Inline command and job objects must be of
+ * the same size and have flags field at same offset.
+ */
+struct vpu_inline_cmd {
+ u64 reserved_0;
+ /* Inline command type, see VPU_INLINE_CMD_TYPE_* defines. */
+ u32 type;
+ /* Flags bit field, see VPU_JOB_FLAGS_* above. */
+ u32 flags;
+ /* Inline command payload. Depends on inline command type. */
+ union {
+ /* Fence (wait and signal) commands' payload. */
+ struct {
+ /* Fence object handle. */
+ u64 fence_handle;
+ /* User VA of the current fence value. */
+ u64 current_value_va;
+ /* User VA of the monitored fence value (read-only). */
+ u64 monitored_value_va;
+ /* Value to wait for or write in fence location. */
+ u64 value;
+ /* User VA of the log buffer in which to add log entry on completion. */
+ u64 log_buffer_va;
+ } fence;
+ /* Other commands do not have a payload. */
+ /* Payload definition for future inline commands can be inserted here. */
+ u64 reserved_1[6];
+ } payload;
+};
+
+/*
+ * Job queue slots can be populated either with job objects or inline command objects.
+ */
+union vpu_jobq_slot {
+ struct vpu_job_queue_entry job;
+ struct vpu_inline_cmd inline_cmd;
};
/*
@@ -167,7 +297,21 @@ struct vpu_job_queue_header {
u32 engine_idx;
u32 head;
u32 tail;
- u8 reserved_0[VPU_JOB_QUEUE_RESERVED_BYTES];
+ u32 flags;
+ /* Set to 1 to indicate priority_band field is valid */
+ u32 priority_band_valid;
+ /*
+ * Priority for the work of this job queue, valid only if the HWS is NOT used
+ * and the `priority_band_valid` is set to 1. It is applied only during
+ * the VPU_JSM_MSG_REGISTER_DB message processing.
+ * The device firmware might use the `priority_band` to optimize the power
+ * management logic, but it will not affect the order of jobs.
+ * Available priority bands: @see enum vpu_job_scheduling_priority_band
+ */
+ u32 priority_band;
+ /* Inside realtime band assigns a further priority, limited to 0..31 range */
+ u32 realtime_priority_level;
+ u32 reserved_0[9];
};
/*
@@ -175,7 +319,7 @@ struct vpu_job_queue_header {
*/
struct vpu_job_queue {
struct vpu_job_queue_header header;
- struct vpu_job_queue_entry job[];
+ union vpu_jobq_slot slot[];
};
/**
@@ -197,9 +341,7 @@ enum vpu_trace_entity_type {
struct vpu_hws_log_buffer_header {
/* Written by VPU after adding a log entry. Initialised by host to 0. */
u32 first_free_entry_index;
- /* Incremented by VPU every time the VPU overwrites the 0th entry;
- * initialised by host to 0.
- */
+ /* Incremented by VPU every time the VPU writes the 0th entry; initialised by host to 0. */
u32 wraparound_count;
/*
* This is the number of buffers that can be stored in the log buffer provided by the host.
@@ -230,14 +372,80 @@ struct vpu_hws_log_buffer_entry {
u64 operation_data[2];
};
+/* Native fence log buffer types. */
+enum vpu_hws_native_fence_log_type {
+ VPU_HWS_NATIVE_FENCE_LOG_TYPE_WAITS = 1,
+ VPU_HWS_NATIVE_FENCE_LOG_TYPE_SIGNALS = 2
+};
+
+/* HWS native fence log buffer header. */
+struct vpu_hws_native_fence_log_header {
+ union {
+ struct {
+ /* Index of the first free entry in buffer. */
+ u32 first_free_entry_idx;
+ /* Incremented each time NPU wraps around the buffer to write next entry. */
+ u32 wraparound_count;
+ };
+ /* Field allowing atomic update of both fields above. */
+ u64 atomic_wraparound_and_entry_idx;
+ };
+ /* Log buffer type, see enum vpu_hws_native_fence_log_type. */
+ u64 type;
+ /* Allocated number of entries in the log buffer. */
+ u64 entry_nb;
+ u64 reserved[2];
+};
+
+/* Native fence log operation types. */
+enum vpu_hws_native_fence_log_op {
+ VPU_HWS_NATIVE_FENCE_LOG_OP_SIGNAL_EXECUTED = 0,
+ VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED = 1
+};
+
+/* HWS native fence log entry. */
+struct vpu_hws_native_fence_log_entry {
+ /* Newly signaled/unblocked fence value. */
+ u64 fence_value;
+ /* Native fence object handle to which this operation belongs. */
+ u64 fence_handle;
+ /* Operation type, see enum vpu_hws_native_fence_log_op. */
+ u64 op_type;
+ u64 reserved_0;
+ /*
+ * VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED only: Timestamp at which fence
+ * wait was started (in NPU SysTime).
+ */
+ u64 fence_wait_start_ts;
+ u64 reserved_1;
+ /* Timestamp at which fence operation was completed (in NPU SysTime). */
+ u64 fence_end_ts;
+};
+
+/* Native fence log buffer. */
+struct vpu_hws_native_fence_log_buffer {
+ struct vpu_hws_native_fence_log_header header;
+ struct vpu_hws_native_fence_log_entry entry[];
+};
+
/*
* Host <-> VPU IPC messages types.
*/
enum vpu_ipc_msg_type {
VPU_JSM_MSG_UNKNOWN = 0xFFFFFFFF,
+
/* IPC Host -> Device, Async commands */
VPU_JSM_MSG_ASYNC_CMD = 0x1100,
VPU_JSM_MSG_ENGINE_RESET = VPU_JSM_MSG_ASYNC_CMD,
+ /**
+ * Preempt engine. The NPU stops (preempts) all the jobs currently
+ * executing on the target engine making the engine become idle and ready to
+ * execute new jobs.
+ * NOTE: The NPU does not remove unstarted jobs (if any) from job queues of
+ * the target engine, but it stops processing them (until the queue doorbell
+ * is rung again); the host is responsible to reset the job queue, either
+ * after preemption or when resubmitting jobs to the queue.
+ */
VPU_JSM_MSG_ENGINE_PREEMPT = 0x1101,
VPU_JSM_MSG_REGISTER_DB = 0x1102,
VPU_JSM_MSG_UNREGISTER_DB = 0x1103,
@@ -323,9 +531,10 @@ enum vpu_ipc_msg_type {
* NOTE: Please introduce new ASYNC commands before this one. *
*/
VPU_JSM_MSG_STATE_DUMP = 0x11FF,
+
/* IPC Host -> Device, General commands */
VPU_JSM_MSG_GENERAL_CMD = 0x1200,
- VPU_JSM_MSG_BLOB_DEINIT = VPU_JSM_MSG_GENERAL_CMD,
+ VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED = VPU_JSM_MSG_GENERAL_CMD,
/**
* Control dyndbg behavior by executing a dyndbg command; equivalent to
* Linux command: `echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control`.
@@ -335,8 +544,12 @@ enum vpu_ipc_msg_type {
* Perform the save procedure for the D0i3 entry
*/
VPU_JSM_MSG_PWR_D0I3_ENTER = 0x1202,
+
/* IPC Device -> Host, Job completion */
VPU_JSM_MSG_JOB_DONE = 0x2100,
+ /* IPC Device -> Host, Fence signalled */
+ VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED = 0x2101,
+
/* IPC Device -> Host, Async command completion */
VPU_JSM_MSG_ASYNC_CMD_DONE = 0x2200,
VPU_JSM_MSG_ENGINE_RESET_DONE = VPU_JSM_MSG_ASYNC_CMD_DONE,
@@ -422,6 +635,7 @@ enum vpu_ipc_msg_type {
* NOTE: Please introduce new ASYNC responses before this one. *
*/
VPU_JSM_MSG_STATE_DUMP_RSP = 0x22FF,
+
/* IPC Device -> Host, General command completion */
VPU_JSM_MSG_GENERAL_CMD_DONE = 0x2300,
VPU_JSM_MSG_BLOB_DEINIT_DONE = VPU_JSM_MSG_GENERAL_CMD_DONE,
@@ -600,11 +814,6 @@ struct vpu_jsm_metric_streamer_update {
u64 next_buffer_size;
};
-struct vpu_ipc_msg_payload_blob_deinit {
- /* 64-bit unique ID for the blob to be de-initialized. */
- u64 blob_id;
-};
-
struct vpu_ipc_msg_payload_job_done {
/* Engine to which the job was submitted. */
u32 engine_idx;
@@ -622,6 +831,21 @@ struct vpu_ipc_msg_payload_job_done {
u64 cmdq_id;
};
+/*
+ * Notification message upon native fence signalling.
+ * @see VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED
+ */
+struct vpu_ipc_msg_payload_native_fence_signalled {
+ /* Engine ID. */
+ u32 engine_idx;
+ /* Host SSID. */
+ u32 host_ssid;
+ /* CMDQ ID */
+ u64 cmdq_id;
+ /* Fence object handle. */
+ u64 fence_handle;
+};
+
struct vpu_jsm_engine_reset_context {
/* Host SSID */
u32 host_ssid;
@@ -700,11 +924,6 @@ struct vpu_ipc_msg_payload_get_power_level_count_done {
u8 power_limit[16];
};
-struct vpu_ipc_msg_payload_blob_deinit_done {
- /* 64-bit unique ID for the blob de-initialized. */
- u64 blob_id;
-};
-
/* HWS priority band setup request / response */
struct vpu_ipc_msg_payload_hws_priority_band_setup {
/*
@@ -794,7 +1013,10 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties {
u32 reserved_0;
/* Command queue id */
u64 cmdq_id;
- /* Priority band to assign to work of this context */
+ /*
+ * Priority band to assign to work of this context.
+ * Available priority bands: @see enum vpu_job_scheduling_priority_band
+ */
u32 priority_band;
/* Inside realtime band assigns a further priority */
u32 realtime_priority_level;
@@ -869,9 +1091,7 @@ struct vpu_ipc_msg_payload_hws_set_scheduling_log {
*/
u64 notify_index;
/*
- * Enable extra events to be output to log for debug of scheduling algorithm.
- * Interpreted by VPU as a boolean to enable or disable, expected values are
- * 0 and 1.
+ * Field is now deprecated, will be removed when KMD is updated to support removal
*/
u32 enable_extra_events;
/* Zero Padding */
@@ -1243,10 +1463,10 @@ union vpu_ipc_msg_payload {
struct vpu_jsm_metric_streamer_start metric_streamer_start;
struct vpu_jsm_metric_streamer_stop metric_streamer_stop;
struct vpu_jsm_metric_streamer_update metric_streamer_update;
- struct vpu_ipc_msg_payload_blob_deinit blob_deinit;
struct vpu_ipc_msg_payload_ssid_release ssid_release;
struct vpu_jsm_hws_register_db hws_register_db;
struct vpu_ipc_msg_payload_job_done job_done;
+ struct vpu_ipc_msg_payload_native_fence_signalled native_fence_signalled;
struct vpu_ipc_msg_payload_engine_reset_done engine_reset_done;
struct vpu_ipc_msg_payload_engine_preempt_done engine_preempt_done;
struct vpu_ipc_msg_payload_register_db_done register_db_done;
@@ -1254,7 +1474,6 @@ union vpu_ipc_msg_payload {
struct vpu_ipc_msg_payload_query_engine_hb_done query_engine_hb_done;
struct vpu_ipc_msg_payload_get_power_level_count_done get_power_level_count_done;
struct vpu_jsm_metric_streamer_done metric_streamer_done;
- struct vpu_ipc_msg_payload_blob_deinit_done blob_deinit_done;
struct vpu_ipc_msg_payload_trace_config trace_config;
struct vpu_ipc_msg_payload_trace_capability_rsp trace_capability;
struct vpu_ipc_msg_payload_trace_get_name trace_get_name;
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index f139c564eadf..10e711c96a67 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -432,7 +432,7 @@ static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev)
int bars;
int ret;
- bars = pci_select_bars(pdev, IORESOURCE_MEM);
+ bars = pci_select_bars(pdev, IORESOURCE_MEM) & 0x3f;
/* make sure the device has the expected BARs */
if (bars != (BIT(0) | BIT(2) | BIT(4))) {