summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/panthor
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/panthor')
-rw-r--r--drivers/gpu/drm/panthor/panthor_devfreq.c41
-rw-r--r--drivers/gpu/drm/panthor/panthor_devfreq.h4
-rw-r--r--drivers/gpu/drm/panthor/panthor_device.c123
-rw-r--r--drivers/gpu/drm/panthor/panthor_device.h86
-rw-r--r--drivers/gpu/drm/panthor/panthor_drv.c268
-rw-r--r--drivers/gpu/drm/panthor/panthor_fw.c213
-rw-r--r--drivers/gpu/drm/panthor/panthor_fw.h6
-rw-r--r--drivers/gpu/drm/panthor/panthor_gem.c248
-rw-r--r--drivers/gpu/drm/panthor/panthor_gem.h87
-rw-r--r--drivers/gpu/drm/panthor/panthor_gpu.c72
-rw-r--r--drivers/gpu/drm/panthor/panthor_gpu.h4
-rw-r--r--drivers/gpu/drm/panthor/panthor_heap.c60
-rw-r--r--drivers/gpu/drm/panthor/panthor_heap.h2
-rw-r--r--drivers/gpu/drm/panthor/panthor_mmu.c185
-rw-r--r--drivers/gpu/drm/panthor/panthor_mmu.h4
-rw-r--r--drivers/gpu/drm/panthor/panthor_regs.h4
-rw-r--r--drivers/gpu/drm/panthor/panthor_sched.c516
-rw-r--r--drivers/gpu/drm/panthor/panthor_sched.h5
18 files changed, 1566 insertions, 362 deletions
diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.c b/drivers/gpu/drm/panthor/panthor_devfreq.c
index c6d3c327cc24..3686515d368d 100644
--- a/drivers/gpu/drm/panthor/panthor_devfreq.c
+++ b/drivers/gpu/drm/panthor/panthor_devfreq.c
@@ -62,14 +62,20 @@ static void panthor_devfreq_update_utilization(struct panthor_devfreq *pdevfreq)
static int panthor_devfreq_target(struct device *dev, unsigned long *freq,
u32 flags)
{
+ struct panthor_device *ptdev = dev_get_drvdata(dev);
struct dev_pm_opp *opp;
+ int err;
opp = devfreq_recommended_opp(dev, freq, flags);
if (IS_ERR(opp))
return PTR_ERR(opp);
dev_pm_opp_put(opp);
- return dev_pm_opp_set_rate(dev, *freq);
+ err = dev_pm_opp_set_rate(dev, *freq);
+ if (!err)
+ ptdev->current_frequency = *freq;
+
+ return err;
}
static void panthor_devfreq_reset(struct panthor_devfreq *pdevfreq)
@@ -130,6 +136,7 @@ int panthor_devfreq_init(struct panthor_device *ptdev)
struct panthor_devfreq *pdevfreq;
struct dev_pm_opp *opp;
unsigned long cur_freq;
+ unsigned long freq = ULONG_MAX;
int ret;
pdevfreq = drmm_kzalloc(&ptdev->base, sizeof(*ptdev->devfreq), GFP_KERNEL);
@@ -156,12 +163,6 @@ int panthor_devfreq_init(struct panthor_device *ptdev)
cur_freq = clk_get_rate(ptdev->clks.core);
- opp = devfreq_recommended_opp(dev, &cur_freq, 0);
- if (IS_ERR(opp))
- return PTR_ERR(opp);
-
- panthor_devfreq_profile.initial_freq = cur_freq;
-
/* Regulator coupling only takes care of synchronizing/balancing voltage
* updates, but the coupled regulator needs to be enabled manually.
*
@@ -192,16 +193,30 @@ int panthor_devfreq_init(struct panthor_device *ptdev)
return ret;
}
+ opp = devfreq_recommended_opp(dev, &cur_freq, 0);
+ if (IS_ERR(opp))
+ return PTR_ERR(opp);
+
+ panthor_devfreq_profile.initial_freq = cur_freq;
+ ptdev->current_frequency = cur_freq;
+
/*
* Set the recommend OPP this will enable and configure the regulator
* if any and will avoid a switch off by regulator_late_cleanup()
*/
ret = dev_pm_opp_set_opp(dev, opp);
+ dev_pm_opp_put(opp);
if (ret) {
DRM_DEV_ERROR(dev, "Couldn't set recommended OPP\n");
return ret;
}
+ /* Find the fastest defined rate */
+ opp = dev_pm_opp_find_freq_floor(dev, &freq);
+ if (IS_ERR(opp))
+ return PTR_ERR(opp);
+ ptdev->fast_rate = freq;
+
dev_pm_opp_put(opp);
/*
@@ -228,26 +243,26 @@ int panthor_devfreq_init(struct panthor_device *ptdev)
return 0;
}
-int panthor_devfreq_resume(struct panthor_device *ptdev)
+void panthor_devfreq_resume(struct panthor_device *ptdev)
{
struct panthor_devfreq *pdevfreq = ptdev->devfreq;
if (!pdevfreq->devfreq)
- return 0;
+ return;
panthor_devfreq_reset(pdevfreq);
- return devfreq_resume_device(pdevfreq->devfreq);
+ drm_WARN_ON(&ptdev->base, devfreq_resume_device(pdevfreq->devfreq));
}
-int panthor_devfreq_suspend(struct panthor_device *ptdev)
+void panthor_devfreq_suspend(struct panthor_device *ptdev)
{
struct panthor_devfreq *pdevfreq = ptdev->devfreq;
if (!pdevfreq->devfreq)
- return 0;
+ return;
- return devfreq_suspend_device(pdevfreq->devfreq);
+ drm_WARN_ON(&ptdev->base, devfreq_suspend_device(pdevfreq->devfreq));
}
void panthor_devfreq_record_busy(struct panthor_device *ptdev)
diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.h b/drivers/gpu/drm/panthor/panthor_devfreq.h
index 83a5c9522493..b7631de695f7 100644
--- a/drivers/gpu/drm/panthor/panthor_devfreq.h
+++ b/drivers/gpu/drm/panthor/panthor_devfreq.h
@@ -12,8 +12,8 @@ struct panthor_devfreq;
int panthor_devfreq_init(struct panthor_device *ptdev);
-int panthor_devfreq_resume(struct panthor_device *ptdev);
-int panthor_devfreq_suspend(struct panthor_device *ptdev);
+void panthor_devfreq_resume(struct panthor_device *ptdev);
+void panthor_devfreq_suspend(struct panthor_device *ptdev);
void panthor_devfreq_record_busy(struct panthor_device *ptdev);
void panthor_devfreq_record_idle(struct panthor_device *ptdev);
diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
index 4082c8f2951d..f0b2da5b2b96 100644
--- a/drivers/gpu/drm/panthor/panthor_device.c
+++ b/drivers/gpu/drm/panthor/panthor_device.c
@@ -22,6 +22,24 @@
#include "panthor_regs.h"
#include "panthor_sched.h"
+static int panthor_gpu_coherency_init(struct panthor_device *ptdev)
+{
+ ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT;
+
+ if (!ptdev->coherent)
+ return 0;
+
+ /* Check if the ACE-Lite coherency protocol is actually supported by the GPU.
+ * ACE protocol has never been supported for command stream frontend GPUs.
+ */
+ if ((gpu_read(ptdev, GPU_COHERENCY_FEATURES) &
+ GPU_COHERENCY_PROT_BIT(ACE_LITE)))
+ return 0;
+
+ drm_err(&ptdev->base, "Coherency not supported by the device");
+ return -ENOTSUPP;
+}
+
static int panthor_clk_init(struct panthor_device *ptdev)
{
ptdev->clks.core = devm_clk_get(ptdev->base.dev, NULL);
@@ -110,14 +128,11 @@ static void panthor_device_reset_work(struct work_struct *work)
struct panthor_device *ptdev = container_of(work, struct panthor_device, reset.work);
int ret = 0, cookie;
- if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE) {
- /*
- * No need for a reset as the device has been (or will be)
- * powered down
- */
- atomic_set(&ptdev->reset.pending, 0);
+ /* If the device is entering suspend, we don't reset. A slow reset will
+ * be forced at resume time instead.
+ */
+ if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE)
return;
- }
if (!drm_dev_enter(&ptdev->base, &cookie))
return;
@@ -156,8 +171,6 @@ int panthor_device_init(struct panthor_device *ptdev)
struct page *p;
int ret;
- ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT;
-
init_completion(&ptdev->unplug.done);
ret = drmm_mutex_init(&ptdev->base, &ptdev->unplug.lock);
if (ret)
@@ -167,6 +180,11 @@ int panthor_device_init(struct panthor_device *ptdev)
if (ret)
return ret;
+#ifdef CONFIG_DEBUG_FS
+ drmm_mutex_init(&ptdev->base, &ptdev->gems.lock);
+ INIT_LIST_HEAD(&ptdev->gems.node);
+#endif
+
atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
p = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!p)
@@ -230,6 +248,10 @@ int panthor_device_init(struct panthor_device *ptdev)
if (ret)
goto err_rpm_put;
+ ret = panthor_gpu_coherency_init(ptdev);
+ if (ret)
+ goto err_unplug_gpu;
+
ret = panthor_mmu_init(ptdev);
if (ret)
goto err_unplug_gpu;
@@ -390,11 +412,15 @@ int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct *
{
u64 offset = (u64)vma->vm_pgoff << PAGE_SHIFT;
+ if ((vma->vm_flags & VM_SHARED) == 0)
+ return -EINVAL;
+
switch (offset) {
case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
if (vma->vm_end - vma->vm_start != PAGE_SIZE ||
(vma->vm_flags & (VM_WRITE | VM_EXEC)))
return -EINVAL;
+ vm_flags_clear(vma, VM_MAYWRITE);
break;
@@ -411,6 +437,22 @@ int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct *
return 0;
}
+static int panthor_device_resume_hw_components(struct panthor_device *ptdev)
+{
+ int ret;
+
+ panthor_gpu_resume(ptdev);
+ panthor_mmu_resume(ptdev);
+
+ ret = panthor_fw_resume(ptdev);
+ if (!ret)
+ return 0;
+
+ panthor_mmu_suspend(ptdev);
+ panthor_gpu_suspend(ptdev);
+ return ret;
+}
+
int panthor_device_resume(struct device *dev)
{
struct panthor_device *ptdev = dev_get_drvdata(dev);
@@ -433,31 +475,34 @@ int panthor_device_resume(struct device *dev)
if (ret)
goto err_disable_stacks_clk;
- ret = panthor_devfreq_resume(ptdev);
- if (ret)
- goto err_disable_coregroup_clk;
+ panthor_devfreq_resume(ptdev);
if (panthor_device_is_initialized(ptdev) &&
drm_dev_enter(&ptdev->base, &cookie)) {
- panthor_gpu_resume(ptdev);
- panthor_mmu_resume(ptdev);
- ret = drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev));
- if (!ret) {
- panthor_sched_resume(ptdev);
- } else {
- panthor_mmu_suspend(ptdev);
- panthor_gpu_suspend(ptdev);
+ /* If there was a reset pending at the time we suspended the
+ * device, we force a slow reset.
+ */
+ if (atomic_read(&ptdev->reset.pending)) {
+ ptdev->reset.fast = false;
+ atomic_set(&ptdev->reset.pending, 0);
}
+ ret = panthor_device_resume_hw_components(ptdev);
+ if (ret && ptdev->reset.fast) {
+ drm_err(&ptdev->base, "Fast reset failed, trying a slow reset");
+ ptdev->reset.fast = false;
+ ret = panthor_device_resume_hw_components(ptdev);
+ }
+
+ if (!ret)
+ panthor_sched_resume(ptdev);
+
drm_dev_exit(cookie);
if (ret)
goto err_suspend_devfreq;
}
- if (atomic_read(&ptdev->reset.pending))
- queue_work(ptdev->reset.wq, &ptdev->reset.work);
-
/* Clear all IOMEM mappings pointing to this device after we've
* resumed. This way the fake mappings pointing to the dummy pages
* are removed and the real iomem mapping will be restored on next
@@ -472,8 +517,6 @@ int panthor_device_resume(struct device *dev)
err_suspend_devfreq:
panthor_devfreq_suspend(ptdev);
-
-err_disable_coregroup_clk:
clk_disable_unprepare(ptdev->clks.coregroup);
err_disable_stacks_clk:
@@ -484,13 +527,14 @@ err_disable_core_clk:
err_set_suspended:
atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
+ atomic_set(&ptdev->pm.recovery_needed, 1);
return ret;
}
int panthor_device_suspend(struct device *dev)
{
struct panthor_device *ptdev = dev_get_drvdata(dev);
- int ret, cookie;
+ int cookie;
if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE)
return -EINVAL;
@@ -522,36 +566,11 @@ int panthor_device_suspend(struct device *dev)
drm_dev_exit(cookie);
}
- ret = panthor_devfreq_suspend(ptdev);
- if (ret) {
- if (panthor_device_is_initialized(ptdev) &&
- drm_dev_enter(&ptdev->base, &cookie)) {
- panthor_gpu_resume(ptdev);
- panthor_mmu_resume(ptdev);
- drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev));
- panthor_sched_resume(ptdev);
- drm_dev_exit(cookie);
- }
-
- goto err_set_active;
- }
+ panthor_devfreq_suspend(ptdev);
clk_disable_unprepare(ptdev->clks.coregroup);
clk_disable_unprepare(ptdev->clks.stacks);
clk_disable_unprepare(ptdev->clks.core);
atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
return 0;
-
-err_set_active:
- /* If something failed and we have to revert back to an
- * active state, we also need to clear the MMIO userspace
- * mappings, so any dumb pages that were mapped while we
- * were trying to suspend gets invalidated.
- */
- mutex_lock(&ptdev->pm.mmio_lock);
- atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);
- unmap_mapping_range(ptdev->base.anon_inode->i_mapping,
- DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
- mutex_unlock(&ptdev->pm.mmio_lock);
- return ret;
}
diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
index e388c0472ba7..465d3ab1b79e 100644
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -9,6 +9,7 @@
#include <linux/atomic.h>
#include <linux/io-pgtable.h>
#include <linux/regulator/consumer.h>
+#include <linux/pm_runtime.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
@@ -67,6 +68,25 @@ struct panthor_irq {
};
/**
+ * enum panthor_device_profiling_mode - Profiling state
+ */
+enum panthor_device_profiling_flags {
+ /** @PANTHOR_DEVICE_PROFILING_DISABLED: Profiling is disabled. */
+ PANTHOR_DEVICE_PROFILING_DISABLED = 0,
+
+ /** @PANTHOR_DEVICE_PROFILING_CYCLES: Sampling job cycles. */
+ PANTHOR_DEVICE_PROFILING_CYCLES = BIT(0),
+
+ /** @PANTHOR_DEVICE_PROFILING_TIMESTAMP: Sampling job timestamp. */
+ PANTHOR_DEVICE_PROFILING_TIMESTAMP = BIT(1),
+
+ /** @PANTHOR_DEVICE_PROFILING_ALL: Sampling everything. */
+ PANTHOR_DEVICE_PROFILING_ALL =
+ PANTHOR_DEVICE_PROFILING_CYCLES |
+ PANTHOR_DEVICE_PROFILING_TIMESTAMP,
+};
+
+/**
* struct panthor_device - Panthor device
*/
struct panthor_device {
@@ -137,6 +157,17 @@ struct panthor_device {
/** @pending: Set to true if a reset is pending. */
atomic_t pending;
+
+ /**
+ * @fast: True if the post_reset logic can proceed with a fast reset.
+ *
+ * A fast reset is just a reset where the driver doesn't reload the FW sections.
+ *
+ * Any time the firmware is properly suspended, a fast reset can take place.
+ * On the other hand, if the halt operation failed, the driver will reload
+ * all FW sections to make sure we start from a fresh state.
+ */
+ bool fast;
} reset;
/** @pm: Power management related data. */
@@ -161,7 +192,35 @@ struct panthor_device {
* is suspended.
*/
struct page *dummy_latest_flush;
+
+ /** @recovery_needed: True when a resume attempt failed. */
+ atomic_t recovery_needed;
} pm;
+
+ /** @profile_mask: User-set profiling flags for job accounting. */
+ u32 profile_mask;
+
+ /** @current_frequency: Device clock frequency at present. Set by DVFS*/
+ unsigned long current_frequency;
+
+ /** @fast_rate: Maximum device clock frequency. Set by DVFS */
+ unsigned long fast_rate;
+
+#ifdef CONFIG_DEBUG_FS
+ /** @gems: Device-wide list of GEM objects owned by at least one file. */
+ struct {
+ /** @gems.lock: Protects the device-wide list of GEM objects. */
+ struct mutex lock;
+
+ /** @node: Used to keep track of all the device's DRM objects */
+ struct list_head node;
+ } gems;
+#endif
+};
+
+struct panthor_gpu_usage {
+ u64 time;
+ u64 cycles;
};
/**
@@ -176,6 +235,9 @@ struct panthor_file {
/** @groups: Scheduling group pool attached to this file. */
struct panthor_group_pool *groups;
+
+ /** @stats: cycle and timestamp measures for job execution. */
+ struct panthor_gpu_usage stats;
};
int panthor_device_init(struct panthor_device *ptdev);
@@ -207,6 +269,28 @@ int panthor_device_mmap_io(struct panthor_device *ptdev,
int panthor_device_resume(struct device *dev);
int panthor_device_suspend(struct device *dev);
+static inline int panthor_device_resume_and_get(struct panthor_device *ptdev)
+{
+ int ret = pm_runtime_resume_and_get(ptdev->base.dev);
+
+ /* If the resume failed, we need to clear the runtime_error, which
+ * can done by forcing the RPM state to suspended. If multiple
+ * threads called panthor_device_resume_and_get(), we only want
+ * one of them to update the state, hence the cmpxchg. Note that a
+ * thread might enter panthor_device_resume_and_get() and call
+ * pm_runtime_resume_and_get() after another thread had attempted
+ * to resume and failed. This means we will end up with an error
+ * without even attempting a resume ourselves. The only risk here
+ * is to report an error when the second resume attempt might have
+ * succeeded. Given resume errors are not expected, this is probably
+ * something we can live with.
+ */
+ if (ret && atomic_cmpxchg(&ptdev->pm.recovery_needed, 1, 0) == 1)
+ pm_runtime_set_suspended(ptdev->base.dev);
+
+ return ret;
+}
+
enum drm_panthor_exception_type {
DRM_PANTHOR_EXCEPTION_OK = 0x00,
DRM_PANTHOR_EXCEPTION_TERMINATED = 0x04,
@@ -310,8 +394,6 @@ static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *da
if (!status) \
break; \
\
- gpu_write(ptdev, __reg_prefix ## _INT_CLEAR, status); \
- \
__handler(ptdev, status); \
ret = IRQ_HANDLED; \
} \
diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
index c520f156e2d7..6200cad22563 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -3,12 +3,17 @@
/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
/* Copyright 2019 Collabora ltd. */
+#ifdef CONFIG_ARM_ARCH_TIMER
+#include <asm/arch_timer.h>
+#endif
+
#include <linux/list.h>
#include <linux/module.h>
#include <linux/of_platform.h>
#include <linux/pagemap.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
+#include <linux/time64.h>
#include <drm/drm_auth.h>
#include <drm/drm_debugfs.h>
@@ -165,6 +170,8 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride,
_Generic(_obj_name, \
PANTHOR_UOBJ_DECL(struct drm_panthor_gpu_info, tiler_present), \
PANTHOR_UOBJ_DECL(struct drm_panthor_csif_info, pad), \
+ PANTHOR_UOBJ_DECL(struct drm_panthor_timestamp_info, current_timestamp), \
+ PANTHOR_UOBJ_DECL(struct drm_panthor_group_priorities_info, pad), \
PANTHOR_UOBJ_DECL(struct drm_panthor_sync_op, timeline_value), \
PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \
PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, ringbuf_size), \
@@ -751,10 +758,64 @@ static void panthor_submit_ctx_cleanup(struct panthor_submit_ctx *ctx,
kvfree(ctx->jobs);
}
+static int panthor_query_timestamp_info(struct panthor_device *ptdev,
+ struct drm_panthor_timestamp_info *arg)
+{
+ int ret;
+
+ ret = panthor_device_resume_and_get(ptdev);
+ if (ret)
+ return ret;
+
+#ifdef CONFIG_ARM_ARCH_TIMER
+ arg->timestamp_frequency = arch_timer_get_cntfrq();
+#else
+ arg->timestamp_frequency = 0;
+#endif
+ arg->current_timestamp = panthor_gpu_read_timestamp(ptdev);
+ arg->timestamp_offset = panthor_gpu_read_timestamp_offset(ptdev);
+
+ pm_runtime_put(ptdev->base.dev);
+ return 0;
+}
+
+static int group_priority_permit(struct drm_file *file,
+ u8 priority)
+{
+ /* Ensure that priority is valid */
+ if (priority > PANTHOR_GROUP_PRIORITY_REALTIME)
+ return -EINVAL;
+
+ /* Medium priority and below are always allowed */
+ if (priority <= PANTHOR_GROUP_PRIORITY_MEDIUM)
+ return 0;
+
+ /* Higher priorities require CAP_SYS_NICE or DRM_MASTER */
+ if (capable(CAP_SYS_NICE) || drm_is_current_master(file))
+ return 0;
+
+ return -EACCES;
+}
+
+static void panthor_query_group_priorities_info(struct drm_file *file,
+ struct drm_panthor_group_priorities_info *arg)
+{
+ int prio;
+
+ memset(arg, 0, sizeof(*arg));
+ for (prio = PANTHOR_GROUP_PRIORITY_REALTIME; prio >= 0; prio--) {
+ if (!group_priority_permit(file, prio))
+ arg->allowed_mask |= BIT(prio);
+ }
+}
+
static int panthor_ioctl_dev_query(struct drm_device *ddev, void *data, struct drm_file *file)
{
struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
struct drm_panthor_dev_query *args = data;
+ struct drm_panthor_timestamp_info timestamp_info;
+ struct drm_panthor_group_priorities_info priorities_info;
+ int ret;
if (!args->pointer) {
switch (args->type) {
@@ -766,6 +827,14 @@ static int panthor_ioctl_dev_query(struct drm_device *ddev, void *data, struct d
args->size = sizeof(ptdev->csif_info);
return 0;
+ case DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO:
+ args->size = sizeof(timestamp_info);
+ return 0;
+
+ case DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO:
+ args->size = sizeof(priorities_info);
+ return 0;
+
default:
return -EINVAL;
}
@@ -778,6 +847,18 @@ static int panthor_ioctl_dev_query(struct drm_device *ddev, void *data, struct d
case DRM_PANTHOR_DEV_QUERY_CSIF_INFO:
return PANTHOR_UOBJ_SET(args->pointer, args->size, ptdev->csif_info);
+ case DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO:
+ ret = panthor_query_timestamp_info(ptdev, &timestamp_info);
+
+ if (ret)
+ return ret;
+
+ return PANTHOR_UOBJ_SET(args->pointer, args->size, timestamp_info);
+
+ case DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO:
+ panthor_query_group_priorities_info(file, &priorities_info);
+ return PANTHOR_UOBJ_SET(args->pointer, args->size, priorities_info);
+
default:
return -EINVAL;
}
@@ -859,6 +940,7 @@ static int panthor_ioctl_bo_mmap_offset(struct drm_device *ddev, void *data,
struct drm_file *file)
{
struct drm_panthor_bo_mmap_offset *args = data;
+ struct panthor_gem_object *bo;
struct drm_gem_object *obj;
int ret;
@@ -869,6 +951,12 @@ static int panthor_ioctl_bo_mmap_offset(struct drm_device *ddev, void *data,
if (!obj)
return -ENOENT;
+ bo = to_panthor_bo(obj);
+ if (bo->flags & DRM_PANTHOR_BO_NO_MMAP) {
+ ret = -EPERM;
+ goto out;
+ }
+
ret = drm_gem_create_mmap_offset(obj);
if (ret)
goto out;
@@ -997,24 +1085,6 @@ static int panthor_ioctl_group_destroy(struct drm_device *ddev, void *data,
return panthor_group_destroy(pfile, args->group_handle);
}
-static int group_priority_permit(struct drm_file *file,
- u8 priority)
-{
- /* Ensure that priority is valid */
- if (priority > PANTHOR_GROUP_PRIORITY_HIGH)
- return -EINVAL;
-
- /* Medium priority and below are always allowed */
- if (priority <= PANTHOR_GROUP_PRIORITY_MEDIUM)
- return 0;
-
- /* Higher priorities require CAP_SYS_NICE or DRM_MASTER */
- if (capable(CAP_SYS_NICE) || drm_is_current_master(file))
- return 0;
-
- return -EACCES;
-}
-
static int panthor_ioctl_group_create(struct drm_device *ddev, void *data,
struct drm_file *file)
{
@@ -1268,6 +1338,46 @@ static int panthor_ioctl_vm_get_state(struct drm_device *ddev, void *data,
return 0;
}
+static int panthor_ioctl_bo_set_label(struct drm_device *ddev, void *data,
+ struct drm_file *file)
+{
+ struct drm_panthor_bo_set_label *args = data;
+ struct drm_gem_object *obj;
+ const char *label = NULL;
+ int ret = 0;
+
+ if (args->pad)
+ return -EINVAL;
+
+ obj = drm_gem_object_lookup(file, args->handle);
+ if (!obj)
+ return -ENOENT;
+
+ if (args->label) {
+ label = strndup_user((const char __user *)(uintptr_t)args->label,
+ PANTHOR_BO_LABEL_MAXLEN);
+ if (IS_ERR(label)) {
+ ret = PTR_ERR(label);
+ if (ret == -EINVAL)
+ ret = -E2BIG;
+ goto err_put_obj;
+ }
+ }
+
+ /*
+ * We treat passing a label of length 0 and passing a NULL label
+ * differently, because even though they might seem conceptually
+ * similar, future uses of the BO label might expect a different
+ * behaviour in each case.
+ */
+ panthor_gem_bo_set_label(obj, label);
+
+err_put_obj:
+ drm_gem_object_put(obj);
+
+ return ret;
+}
+
static int
panthor_open(struct drm_device *ddev, struct drm_file *file)
{
@@ -1337,6 +1447,7 @@ static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = {
PANTHOR_IOCTL(TILER_HEAP_CREATE, tiler_heap_create, DRM_RENDER_ALLOW),
PANTHOR_IOCTL(TILER_HEAP_DESTROY, tiler_heap_destroy, DRM_RENDER_ALLOW),
PANTHOR_IOCTL(GROUP_SUBMIT, group_submit, DRM_RENDER_ALLOW),
+ PANTHOR_IOCTL(BO_SET_LABEL, bo_set_label, DRM_RENDER_ALLOW),
};
static int panthor_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -1374,6 +1485,51 @@ static int panthor_mmap(struct file *filp, struct vm_area_struct *vma)
return ret;
}
+static void panthor_gpu_show_fdinfo(struct panthor_device *ptdev,
+ struct panthor_file *pfile,
+ struct drm_printer *p)
+{
+ if (ptdev->profile_mask & PANTHOR_DEVICE_PROFILING_ALL)
+ panthor_fdinfo_gather_group_samples(pfile);
+
+ if (ptdev->profile_mask & PANTHOR_DEVICE_PROFILING_TIMESTAMP) {
+#ifdef CONFIG_ARM_ARCH_TIMER
+ drm_printf(p, "drm-engine-panthor:\t%llu ns\n",
+ DIV_ROUND_UP_ULL((pfile->stats.time * NSEC_PER_SEC),
+ arch_timer_get_cntfrq()));
+#endif
+ }
+ if (ptdev->profile_mask & PANTHOR_DEVICE_PROFILING_CYCLES)
+ drm_printf(p, "drm-cycles-panthor:\t%llu\n", pfile->stats.cycles);
+
+ drm_printf(p, "drm-maxfreq-panthor:\t%lu Hz\n", ptdev->fast_rate);
+ drm_printf(p, "drm-curfreq-panthor:\t%lu Hz\n", ptdev->current_frequency);
+}
+
+static void panthor_show_internal_memory_stats(struct drm_printer *p, struct drm_file *file)
+{
+ char *drv_name = file->minor->dev->driver->name;
+ struct panthor_file *pfile = file->driver_priv;
+ struct drm_memory_stats stats = {0};
+
+ panthor_fdinfo_gather_group_mem_info(pfile, &stats);
+ panthor_vm_heaps_sizes(pfile, &stats);
+
+ drm_fdinfo_print_size(p, drv_name, "resident", "memory", stats.resident);
+ drm_fdinfo_print_size(p, drv_name, "active", "memory", stats.active);
+}
+
+static void panthor_show_fdinfo(struct drm_printer *p, struct drm_file *file)
+{
+ struct drm_device *dev = file->minor->dev;
+ struct panthor_device *ptdev = container_of(dev, struct panthor_device, base);
+
+ panthor_gpu_show_fdinfo(ptdev, file->driver_priv, p);
+ panthor_show_internal_memory_stats(p, file);
+
+ drm_show_memory_stats(p, file);
+}
+
static const struct file_operations panthor_drm_driver_fops = {
.open = drm_open,
.release = drm_release,
@@ -1383,33 +1539,64 @@ static const struct file_operations panthor_drm_driver_fops = {
.read = drm_read,
.llseek = noop_llseek,
.mmap = panthor_mmap,
+ .show_fdinfo = drm_show_fdinfo,
.fop_flags = FOP_UNSIGNED_OFFSET,
};
#ifdef CONFIG_DEBUG_FS
+static int panthor_gems_show(struct seq_file *m, void *data)
+{
+ struct drm_info_node *node = m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct panthor_device *ptdev = container_of(dev, struct panthor_device, base);
+
+ panthor_gem_debugfs_print_bos(ptdev, m);
+
+ return 0;
+}
+
+static struct drm_info_list panthor_debugfs_list[] = {
+ {"gems", panthor_gems_show, 0, NULL},
+};
+
+static int panthor_gems_debugfs_init(struct drm_minor *minor)
+{
+ drm_debugfs_create_files(panthor_debugfs_list,
+ ARRAY_SIZE(panthor_debugfs_list),
+ minor->debugfs_root, minor);
+
+ return 0;
+}
+
static void panthor_debugfs_init(struct drm_minor *minor)
{
panthor_mmu_debugfs_init(minor);
+ panthor_gems_debugfs_init(minor);
}
#endif
/*
* PanCSF driver version:
* - 1.0 - initial interface
+ * - 1.1 - adds DEV_QUERY_TIMESTAMP_INFO query
+ * - 1.2 - adds DEV_QUERY_GROUP_PRIORITIES_INFO query
+ * - adds PANTHOR_GROUP_PRIORITY_REALTIME priority
+ * - 1.3 - adds DRM_PANTHOR_GROUP_STATE_INNOCENT flag
+ * - 1.4 - adds DRM_IOCTL_PANTHOR_BO_SET_LABEL ioctl
*/
static const struct drm_driver panthor_drm_driver = {
.driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ |
DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
.open = panthor_open,
.postclose = panthor_postclose,
+ .show_fdinfo = panthor_show_fdinfo,
.ioctls = panthor_drm_driver_ioctls,
.num_ioctls = ARRAY_SIZE(panthor_drm_driver_ioctls),
.fops = &panthor_drm_driver_fops,
.name = "panthor",
.desc = "Panthor DRM driver",
- .date = "20230801",
.major = 1,
- .minor = 0,
+ .minor = 4,
.gem_create_object = panthor_gem_create_object,
.gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
@@ -1439,6 +1626,44 @@ static void panthor_remove(struct platform_device *pdev)
panthor_device_unplug(ptdev);
}
+static ssize_t profiling_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct panthor_device *ptdev = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%d\n", ptdev->profile_mask);
+}
+
+static ssize_t profiling_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct panthor_device *ptdev = dev_get_drvdata(dev);
+ u32 value;
+ int err;
+
+ err = kstrtou32(buf, 0, &value);
+ if (err)
+ return err;
+
+ if ((value & ~PANTHOR_DEVICE_PROFILING_ALL) != 0)
+ return -EINVAL;
+
+ ptdev->profile_mask = value;
+
+ return len;
+}
+
+static DEVICE_ATTR_RW(profiling);
+
+static struct attribute *panthor_attrs[] = {
+ &dev_attr_profiling.attr,
+ NULL,
+};
+
+ATTRIBUTE_GROUPS(panthor);
+
static const struct of_device_id dt_match[] = {
{ .compatible = "rockchip,rk3588-mali" },
{ .compatible = "arm,mali-valhall-csf" },
@@ -1453,11 +1678,12 @@ static DEFINE_RUNTIME_DEV_PM_OPS(panthor_pm_ops,
static struct platform_driver panthor_driver = {
.probe = panthor_probe,
- .remove_new = panthor_remove,
+ .remove = panthor_remove,
.driver = {
.name = "panthor",
.pm = pm_ptr(&panthor_pm_ops),
.of_match_table = dt_match,
+ .dev_groups = panthor_groups,
},
};
diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c
index ef232c0c2049..7bc38e635329 100644
--- a/drivers/gpu/drm/panthor/panthor_fw.c
+++ b/drivers/gpu/drm/panthor/panthor_fw.c
@@ -12,6 +12,7 @@
#include <linux/iosys-map.h>
#include <linux/mutex.h>
#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#include <drm/drm_drv.h>
#include <drm/drm_managed.h>
@@ -78,6 +79,12 @@ enum panthor_fw_binary_entry_type {
/** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */
CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4,
+
+ /**
+ * @CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: Metadata about how
+ * the FW binary was built.
+ */
+ CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA = 6
};
#define CSF_FW_BINARY_ENTRY_TYPE(ehdr) ((ehdr) & 0xff)
@@ -85,26 +92,26 @@ enum panthor_fw_binary_entry_type {
#define CSF_FW_BINARY_ENTRY_UPDATE BIT(30)
#define CSF_FW_BINARY_ENTRY_OPTIONAL BIT(31)
-#define CSF_FW_BINARY_IFACE_ENTRY_RD_RD BIT(0)
-#define CSF_FW_BINARY_IFACE_ENTRY_RD_WR BIT(1)
-#define CSF_FW_BINARY_IFACE_ENTRY_RD_EX BIT(2)
-#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_NONE (0 << 3)
-#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED (1 << 3)
-#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_UNCACHED_COHERENT (2 << 3)
-#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED_COHERENT (3 << 3)
-#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK GENMASK(4, 3)
-#define CSF_FW_BINARY_IFACE_ENTRY_RD_PROT BIT(5)
-#define CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED BIT(30)
-#define CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO BIT(31)
-
-#define CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS \
- (CSF_FW_BINARY_IFACE_ENTRY_RD_RD | \
- CSF_FW_BINARY_IFACE_ENTRY_RD_WR | \
- CSF_FW_BINARY_IFACE_ENTRY_RD_EX | \
- CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK | \
- CSF_FW_BINARY_IFACE_ENTRY_RD_PROT | \
- CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED | \
- CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD BIT(0)
+#define CSF_FW_BINARY_IFACE_ENTRY_WR BIT(1)
+#define CSF_FW_BINARY_IFACE_ENTRY_EX BIT(2)
+#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_NONE (0 << 3)
+#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED (1 << 3)
+#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_UNCACHED_COHERENT (2 << 3)
+#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED_COHERENT (3 << 3)
+#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK GENMASK(4, 3)
+#define CSF_FW_BINARY_IFACE_ENTRY_PROT BIT(5)
+#define CSF_FW_BINARY_IFACE_ENTRY_SHARED BIT(30)
+#define CSF_FW_BINARY_IFACE_ENTRY_ZERO BIT(31)
+
+#define CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS \
+ (CSF_FW_BINARY_IFACE_ENTRY_RD | \
+ CSF_FW_BINARY_IFACE_ENTRY_WR | \
+ CSF_FW_BINARY_IFACE_ENTRY_EX | \
+ CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK | \
+ CSF_FW_BINARY_IFACE_ENTRY_PROT | \
+ CSF_FW_BINARY_IFACE_ENTRY_SHARED | \
+ CSF_FW_BINARY_IFACE_ENTRY_ZERO)
/**
* struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary
@@ -132,6 +139,13 @@ struct panthor_fw_binary_section_entry_hdr {
} data;
};
+struct panthor_fw_build_info_hdr {
+ /** @meta_start: Offset of the build info data in the FW binary */
+ u32 meta_start;
+ /** @meta_size: Size of the build info data in the FW binary */
+ u32 meta_size;
+};
+
/**
* struct panthor_fw_binary_iter - Firmware binary iterator
*
@@ -187,7 +201,6 @@ struct panthor_fw_section {
#define MIN_CS_PER_CSG 8
#define MIN_CSGS 3
-#define MAX_CSG_PRIO 0xf
#define CSF_IFACE_VERSION(major, minor, patch) \
(((major) << 24) | ((minor) << 16) | (patch))
@@ -249,17 +262,6 @@ struct panthor_fw {
/** @booted: True is the FW is booted */
bool booted;
- /**
- * @fast_reset: True if the post_reset logic can proceed with a fast reset.
- *
- * A fast reset is just a reset where the driver doesn't reload the FW sections.
- *
- * Any time the firmware is properly suspended, a fast reset can take place.
- * On the other hand, if the halt operation failed, the driver will reload
- * all sections to make sure we start from a fresh state.
- */
- bool fast_reset;
-
/** @irq: Job irq data. */
struct panthor_irq irq;
};
@@ -400,7 +402,7 @@ static void panthor_fw_init_section_mem(struct panthor_device *ptdev,
int ret;
if (!section->data.size &&
- !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO))
+ !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO))
return;
ret = panthor_kernel_bo_vmap(section->mem);
@@ -408,7 +410,7 @@ static void panthor_fw_init_section_mem(struct panthor_device *ptdev,
return;
memcpy(section->mem->kmap, section->data.buf, section->data.size);
- if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) {
+ if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO) {
memset(section->mem->kmap + section->data.size, 0,
panthor_kernel_bo_size(section->mem) - section->data.size);
}
@@ -447,7 +449,8 @@ panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
DRM_PANTHOR_BO_NO_MMAP,
DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
- PANTHOR_VM_KERNEL_AUTO_VA);
+ PANTHOR_VM_KERNEL_AUTO_VA,
+ "Queue FW interface");
if (IS_ERR(mem))
return mem;
@@ -479,7 +482,8 @@ panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size)
return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size,
DRM_PANTHOR_BO_NO_MMAP,
DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
- PANTHOR_VM_KERNEL_AUTO_VA);
+ PANTHOR_VM_KERNEL_AUTO_VA,
+ "FW suspend buffer");
}
static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
@@ -487,6 +491,7 @@ static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
struct panthor_fw_binary_iter *iter,
u32 ehdr)
{
+ ssize_t vm_pgsz = panthor_vm_page_size(ptdev->fw->vm);
struct panthor_fw_binary_section_entry_hdr hdr;
struct panthor_fw_section *section;
u32 section_size;
@@ -515,27 +520,26 @@ static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
return -EINVAL;
}
- if ((hdr.va.start & ~PAGE_MASK) != 0 ||
- (hdr.va.end & ~PAGE_MASK) != 0) {
+ if (!IS_ALIGNED(hdr.va.start, vm_pgsz) || !IS_ALIGNED(hdr.va.end, vm_pgsz)) {
drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n",
hdr.va.start, hdr.va.end);
return -EINVAL;
}
- if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) {
+ if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS) {
drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n",
hdr.flags);
return -EINVAL;
}
- if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) {
+ if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_PROT) {
drm_warn(&ptdev->base,
"Firmware protected mode entry not be supported, ignoring");
return 0;
}
if (hdr.va.start == CSF_MCU_SHARED_REGION_START &&
- !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) {
+ !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED)) {
drm_err(&ptdev->base,
"Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START);
return -EINVAL;
@@ -574,39 +578,39 @@ static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
section_size = hdr.va.end - hdr.va.start;
if (section_size) {
- u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK;
+ u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK;
struct panthor_gem_object *bo;
u32 vm_map_flags = 0;
struct sg_table *sgt;
u64 va = hdr.va.start;
- if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
+ if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_WR))
vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY;
- if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX))
+ if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_EX))
vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC;
- /* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are mapped to
+ /* TODO: CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_*_COHERENT are mapped to
* non-cacheable for now. We might want to introduce a new
* IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device
* memory and is currently not used by our driver) for
* AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit
* of IO-coherent systems.
*/
- if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED)
+ if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED)
vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED;
section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev),
section_size,
DRM_PANTHOR_BO_NO_MMAP,
- vm_map_flags, va);
+ vm_map_flags, va, "FW section");
if (IS_ERR(section->mem))
return PTR_ERR(section->mem);
if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start))
return -EINVAL;
- if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) {
+ if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED) {
ret = panthor_kernel_bo_vmap(section->mem);
if (ret)
return ret;
@@ -628,6 +632,45 @@ static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
return 0;
}
+static int panthor_fw_read_build_info(struct panthor_device *ptdev,
+ const struct firmware *fw,
+ struct panthor_fw_binary_iter *iter,
+ u32 ehdr)
+{
+ struct panthor_fw_build_info_hdr hdr;
+ static const char git_sha_header[] = "git_sha: ";
+ const int header_len = sizeof(git_sha_header) - 1;
+ int ret;
+
+ ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
+ if (ret)
+ return ret;
+
+ if (hdr.meta_start > fw->size ||
+ hdr.meta_start + hdr.meta_size > fw->size) {
+ drm_err(&ptdev->base, "Firmware build info corrupt\n");
+ /* We don't need the build info, so continue */
+ return 0;
+ }
+
+ if (memcmp(git_sha_header, fw->data + hdr.meta_start, header_len)) {
+ /* Not the expected header, this isn't metadata we understand */
+ return 0;
+ }
+
+ /* Check that the git SHA is NULL terminated as expected */
+ if (fw->data[hdr.meta_start + hdr.meta_size - 1] != '\0') {
+ drm_warn(&ptdev->base, "Firmware's git sha is not NULL terminated\n");
+ /* Don't treat as fatal */
+ return 0;
+ }
+
+ drm_info(&ptdev->base, "Firmware git sha: %s\n",
+ fw->data + hdr.meta_start + header_len);
+
+ return 0;
+}
+
static void
panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload)
{
@@ -636,7 +679,7 @@ panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload)
list_for_each_entry(section, &ptdev->fw->sections, node) {
struct sg_table *sgt;
- if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
+ if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_WR))
continue;
panthor_fw_init_section_mem(ptdev, section);
@@ -672,6 +715,8 @@ static int panthor_fw_load_entry(struct panthor_device *ptdev,
switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) {
case CSF_FW_BINARY_ENTRY_TYPE_IFACE:
return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr);
+ case CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA:
+ return panthor_fw_read_build_info(ptdev, fw, &eiter, ehdr);
/* FIXME: handle those entry types? */
case CSF_FW_BINARY_ENTRY_TYPE_CONFIG:
@@ -921,7 +966,7 @@ static int panthor_fw_init_ifaces(struct panthor_device *ptdev)
return ret;
}
- drm_info(&ptdev->base, "CSF FW v%d.%d.%d, Features %#x Instrumentation features %#x",
+ drm_info(&ptdev->base, "CSF FW using interface v%d.%d.%d, Features %#x Instrumentation features %#x",
CSF_IFACE_VERSION_MAJOR(glb_iface->control->version),
CSF_IFACE_VERSION_MINOR(glb_iface->control->version),
CSF_IFACE_VERSION_PATCH(glb_iface->control->version),
@@ -965,6 +1010,8 @@ static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
{
+ gpu_write(ptdev, JOB_INT_CLEAR, status);
+
if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
ptdev->fw->booted = true;
@@ -1034,7 +1081,7 @@ void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
/* Make sure we won't be woken up by a ping. */
cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
- ptdev->fw->fast_reset = false;
+ ptdev->reset.fast = false;
if (!on_hang) {
struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
@@ -1043,17 +1090,11 @@ void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
- status == MCU_STATUS_HALT, 10, 100000) &&
- glb_iface->output->halt_status == PANTHOR_FW_HALT_OK) {
- ptdev->fw->fast_reset = true;
+ status == MCU_STATUS_HALT, 10, 100000)) {
+ ptdev->reset.fast = true;
} else {
drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
}
-
- /* The FW detects 0 -> 1 transitions. Make sure we reset
- * the HALT bit before the FW is rebooted.
- */
- panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
}
panthor_job_irq_suspend(&ptdev->fw->irq);
@@ -1075,41 +1116,30 @@ int panthor_fw_post_reset(struct panthor_device *ptdev)
if (ret)
return ret;
- /* If this is a fast reset, try to start the MCU without reloading
- * the FW sections. If it fails, go for a full reset.
- */
- if (ptdev->fw->fast_reset) {
- ret = panthor_fw_start(ptdev);
- if (!ret)
- goto out;
-
- /* Forcibly reset the MCU and force a slow reset, so we get a
- * fresh boot on the next panthor_fw_start() call.
+ if (!ptdev->reset.fast) {
+ /* On a slow reset, reload all sections, including RO ones.
+ * We're not supposed to end up here anyway, let's just assume
+ * the overhead of reloading everything is acceptable.
*/
- panthor_fw_stop(ptdev);
- ptdev->fw->fast_reset = false;
- drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset");
+ panthor_reload_fw_sections(ptdev, true);
+ } else {
+ /* The FW detects 0 -> 1 transitions. Make sure we reset
+ * the HALT bit before the FW is rebooted.
+ * This is not needed on a slow reset because FW sections are
+ * re-initialized.
+ */
+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
- ret = panthor_vm_flush_all(ptdev->fw->vm);
- if (ret) {
- drm_err(&ptdev->base, "FW slow reset failed (couldn't flush FW's AS l2cache)");
- return ret;
- }
+ panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
}
- /* Reload all sections, including RO ones. We're not supposed
- * to end up here anyway, let's just assume the overhead of
- * reloading everything is acceptable.
- */
- panthor_reload_fw_sections(ptdev, true);
-
ret = panthor_fw_start(ptdev);
if (ret) {
- drm_err(&ptdev->base, "FW slow reset failed (couldn't start the FW )");
+ drm_err(&ptdev->base, "FW %s reset failed",
+ ptdev->reset.fast ? "fast" : "slow");
return ret;
}
-out:
/* We must re-initialize the global interface even on fast-reset. */
panthor_fw_init_global_iface(ptdev);
return 0;
@@ -1133,11 +1163,13 @@ void panthor_fw_unplug(struct panthor_device *ptdev)
cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
- /* Make sure the IRQ handler can be called after that point. */
- if (ptdev->fw->irq.irq)
- panthor_job_irq_suspend(&ptdev->fw->irq);
+ if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) {
+ /* Make sure the IRQ handler cannot be called after that point. */
+ if (ptdev->fw->irq.irq)
+ panthor_job_irq_suspend(&ptdev->fw->irq);
- panthor_fw_stop(ptdev);
+ panthor_fw_stop(ptdev);
+ }
list_for_each_entry(section, &ptdev->fw->sections, node)
panthor_kernel_bo_destroy(section->mem);
@@ -1150,7 +1182,8 @@ void panthor_fw_unplug(struct panthor_device *ptdev)
panthor_vm_put(ptdev->fw->vm);
ptdev->fw->vm = NULL;
- panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
+ if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev))
+ panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
}
/**
diff --git a/drivers/gpu/drm/panthor/panthor_fw.h b/drivers/gpu/drm/panthor/panthor_fw.h
index 22448abde992..6598d96c6d2a 100644
--- a/drivers/gpu/drm/panthor/panthor_fw.h
+++ b/drivers/gpu/drm/panthor/panthor_fw.h
@@ -102,9 +102,9 @@ struct panthor_fw_cs_output_iface {
#define CS_STATUS_BLOCKED_REASON_SB_WAIT 1
#define CS_STATUS_BLOCKED_REASON_PROGRESS_WAIT 2
#define CS_STATUS_BLOCKED_REASON_SYNC_WAIT 3
-#define CS_STATUS_BLOCKED_REASON_DEFERRED 5
-#define CS_STATUS_BLOCKED_REASON_RES 6
-#define CS_STATUS_BLOCKED_REASON_FLUSH 7
+#define CS_STATUS_BLOCKED_REASON_DEFERRED 4
+#define CS_STATUS_BLOCKED_REASON_RESOURCE 5
+#define CS_STATUS_BLOCKED_REASON_FLUSH 6
#define CS_STATUS_BLOCKED_REASON_MASK GENMASK(3, 0)
u32 status_blocked_reason;
u32 status_wait_sync_value_hi;
diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c
index 38f560864879..7c00fd77758b 100644
--- a/drivers/gpu/drm/panthor/panthor_gem.c
+++ b/drivers/gpu/drm/panthor/panthor_gem.c
@@ -2,6 +2,7 @@
/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
/* Copyright 2023 Collabora ltd. */
+#include <linux/cleanup.h>
#include <linux/dma-buf.h>
#include <linux/dma-mapping.h>
#include <linux/err.h>
@@ -10,14 +11,64 @@
#include <drm/panthor_drm.h>
#include "panthor_device.h"
+#include "panthor_fw.h"
#include "panthor_gem.h"
#include "panthor_mmu.h"
+#ifdef CONFIG_DEBUG_FS
+static void panthor_gem_debugfs_bo_add(struct panthor_device *ptdev,
+ struct panthor_gem_object *bo)
+{
+ INIT_LIST_HEAD(&bo->debugfs.node);
+
+ bo->debugfs.creator.tgid = current->group_leader->pid;
+ get_task_comm(bo->debugfs.creator.process_name, current->group_leader);
+
+ mutex_lock(&ptdev->gems.lock);
+ list_add_tail(&bo->debugfs.node, &ptdev->gems.node);
+ mutex_unlock(&ptdev->gems.lock);
+}
+
+static void panthor_gem_debugfs_bo_rm(struct panthor_gem_object *bo)
+{
+ struct panthor_device *ptdev = container_of(bo->base.base.dev,
+ struct panthor_device, base);
+
+ if (list_empty(&bo->debugfs.node))
+ return;
+
+ mutex_lock(&ptdev->gems.lock);
+ list_del_init(&bo->debugfs.node);
+ mutex_unlock(&ptdev->gems.lock);
+}
+
+static void panthor_gem_debugfs_set_usage_flags(struct panthor_gem_object *bo, u32 usage_flags)
+{
+ bo->debugfs.flags = usage_flags | PANTHOR_DEBUGFS_GEM_USAGE_FLAG_INITIALIZED;
+}
+#else
+static void panthor_gem_debugfs_bo_add(struct panthor_device *ptdev,
+ struct panthor_gem_object *bo)
+{}
+static void panthor_gem_debugfs_bo_rm(struct panthor_gem_object *bo) {}
+static void panthor_gem_debugfs_set_usage_flags(struct panthor_gem_object *bo, u32 usage_flags) {}
+#endif
+
static void panthor_gem_free_object(struct drm_gem_object *obj)
{
struct panthor_gem_object *bo = to_panthor_bo(obj);
struct drm_gem_object *vm_root_gem = bo->exclusive_vm_root_gem;
+ panthor_gem_debugfs_bo_rm(bo);
+
+ /*
+ * Label might have been allocated with kstrdup_const(),
+ * we need to take that into account when freeing the memory
+ */
+ kfree_const(bo->label.str);
+
+ mutex_destroy(&bo->label.lock);
+
drm_gem_free_mmap_offset(&bo->base.base);
mutex_destroy(&bo->gpuva_list_lock);
drm_gem_shmem_free(&bo->base);
@@ -44,8 +95,7 @@ void panthor_kernel_bo_destroy(struct panthor_kernel_bo *bo)
to_panthor_bo(bo->obj)->exclusive_vm_root_gem != panthor_vm_root_gem(vm)))
goto out_free_bo;
- ret = panthor_vm_unmap_range(vm, bo->va_node.start,
- panthor_kernel_bo_size(bo));
+ ret = panthor_vm_unmap_range(vm, bo->va_node.start, bo->va_node.size);
if (ret)
goto out_free_bo;
@@ -68,17 +118,19 @@ out_free_bo:
* @gpu_va: GPU address assigned when mapping to the VM.
* If gpu_va == PANTHOR_VM_KERNEL_AUTO_VA, the virtual address will be
* automatically allocated.
+ * @name: Descriptive label of the BO's contents
*
* Return: A valid pointer in case of success, an ERR_PTR() otherwise.
*/
struct panthor_kernel_bo *
panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm,
size_t size, u32 bo_flags, u32 vm_map_flags,
- u64 gpu_va)
+ u64 gpu_va, const char *name)
{
struct drm_gem_shmem_object *obj;
struct panthor_kernel_bo *kbo;
struct panthor_gem_object *bo;
+ u32 debug_flags = PANTHOR_DEBUGFS_GEM_USAGE_FLAG_KERNEL;
int ret;
if (drm_WARN_ON(&ptdev->base, !vm))
@@ -95,10 +147,22 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm,
}
bo = to_panthor_bo(&obj->base);
- size = obj->base.size;
kbo->obj = &obj->base;
bo->flags = bo_flags;
+ if (vm == panthor_fw_vm(ptdev))
+ debug_flags |= PANTHOR_DEBUGFS_GEM_USAGE_FLAG_FW_MAPPED;
+
+ panthor_gem_kernel_bo_set_label(kbo, name);
+ panthor_gem_debugfs_set_usage_flags(to_panthor_bo(kbo->obj), debug_flags);
+
+ /* The system and GPU MMU page size might differ, which becomes a
+ * problem for FW sections that need to be mapped at explicit address
+ * since our PAGE_SIZE alignment might cover a VA range that's
+ * expected to be used for another section.
+ * Make sure we never map more than we need.
+ */
+ size = ALIGN(size, panthor_vm_page_size(vm));
ret = panthor_vm_alloc_va(vm, gpu_va, size, &kbo->va_node);
if (ret)
goto err_put_obj;
@@ -124,17 +188,6 @@ err_free_bo:
return ERR_PTR(ret);
}
-static int panthor_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
-{
- struct panthor_gem_object *bo = to_panthor_bo(obj);
-
- /* Don't allow mmap on objects that have the NO_MMAP flag set. */
- if (bo->flags & DRM_PANTHOR_BO_NO_MMAP)
- return -EINVAL;
-
- return drm_gem_shmem_object_mmap(obj, vma);
-}
-
static struct dma_buf *
panthor_gem_prime_export(struct drm_gem_object *obj, int flags)
{
@@ -145,6 +198,17 @@ panthor_gem_prime_export(struct drm_gem_object *obj, int flags)
return drm_gem_prime_export(obj, flags);
}
+static enum drm_gem_object_status panthor_gem_status(struct drm_gem_object *obj)
+{
+ struct panthor_gem_object *bo = to_panthor_bo(obj);
+ enum drm_gem_object_status res = 0;
+
+ if (drm_gem_is_imported(&bo->base.base) || bo->base.pages)
+ res |= DRM_GEM_OBJECT_RESIDENT;
+
+ return res;
+}
+
static const struct drm_gem_object_funcs panthor_gem_funcs = {
.free = panthor_gem_free_object,
.print_info = drm_gem_shmem_object_print_info,
@@ -153,7 +217,8 @@ static const struct drm_gem_object_funcs panthor_gem_funcs = {
.get_sg_table = drm_gem_shmem_object_get_sg_table,
.vmap = drm_gem_shmem_object_vmap,
.vunmap = drm_gem_shmem_object_vunmap,
- .mmap = panthor_gem_mmap,
+ .mmap = drm_gem_shmem_object_mmap,
+ .status = panthor_gem_status,
.export = panthor_gem_prime_export,
.vm_ops = &drm_gem_shmem_vm_ops,
};
@@ -179,6 +244,9 @@ struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t
obj->base.map_wc = !ptdev->coherent;
mutex_init(&obj->gpuva_list_lock);
drm_gem_gpuva_set_lock(&obj->base.base, &obj->gpuva_list_lock);
+ mutex_init(&obj->label.lock);
+
+ panthor_gem_debugfs_bo_add(ptdev, obj);
return &obj->base.base;
}
@@ -228,5 +296,153 @@ panthor_gem_create_with_handle(struct drm_file *file,
/* drop reference from allocate - handle holds it now. */
drm_gem_object_put(&shmem->base);
+ /*
+ * No explicit flags are needed in the call below, since the
+ * function internally sets the INITIALIZED bit for us.
+ */
+ panthor_gem_debugfs_set_usage_flags(bo, 0);
+
return ret;
}
+
+void
+panthor_gem_bo_set_label(struct drm_gem_object *obj, const char *label)
+{
+ struct panthor_gem_object *bo = to_panthor_bo(obj);
+ const char *old_label;
+
+ scoped_guard(mutex, &bo->label.lock) {
+ old_label = bo->label.str;
+ bo->label.str = label;
+ }
+
+ kfree_const(old_label);
+}
+
+void
+panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const char *label)
+{
+ const char *str;
+
+ /* We should never attempt labelling a UM-exposed GEM object */
+ if (drm_WARN_ON(bo->obj->dev, bo->obj->handle_count > 0))
+ return;
+
+ if (!label)
+ return;
+
+ str = kstrdup_const(label, GFP_KERNEL);
+ if (!str) {
+ /* Failing to allocate memory for a label isn't a fatal condition */
+ drm_warn(bo->obj->dev, "Not enough memory to allocate BO label");
+ return;
+ }
+
+ panthor_gem_bo_set_label(bo->obj, str);
+}
+
+#ifdef CONFIG_DEBUG_FS
+struct gem_size_totals {
+ size_t size;
+ size_t resident;
+ size_t reclaimable;
+};
+
+static void panthor_gem_debugfs_print_flag_names(struct seq_file *m)
+{
+ int len;
+ int i;
+
+ static const char * const gem_state_flags_names[] = {
+ [PANTHOR_DEBUGFS_GEM_STATE_IMPORTED_BIT] = "imported",
+ [PANTHOR_DEBUGFS_GEM_STATE_EXPORTED_BIT] = "exported",
+ };
+
+ static const char * const gem_usage_flags_names[] = {
+ [PANTHOR_DEBUGFS_GEM_USAGE_KERNEL_BIT] = "kernel",
+ [PANTHOR_DEBUGFS_GEM_USAGE_FW_MAPPED_BIT] = "fw-mapped",
+ };
+
+ seq_puts(m, "GEM state flags: ");
+ for (i = 0, len = ARRAY_SIZE(gem_state_flags_names); i < len; i++) {
+ if (!gem_state_flags_names[i])
+ continue;
+ seq_printf(m, "%s (0x%x)%s", gem_state_flags_names[i],
+ (u32)BIT(i), (i < len - 1) ? ", " : "\n");
+ }
+
+ seq_puts(m, "GEM usage flags: ");
+ for (i = 0, len = ARRAY_SIZE(gem_usage_flags_names); i < len; i++) {
+ if (!gem_usage_flags_names[i])
+ continue;
+ seq_printf(m, "%s (0x%x)%s", gem_usage_flags_names[i],
+ (u32)BIT(i), (i < len - 1) ? ", " : "\n\n");
+ }
+}
+
+static void panthor_gem_debugfs_bo_print(struct panthor_gem_object *bo,
+ struct seq_file *m,
+ struct gem_size_totals *totals)
+{
+ unsigned int refcount = kref_read(&bo->base.base.refcount);
+ char creator_info[32] = {};
+ size_t resident_size;
+ u32 gem_usage_flags = bo->debugfs.flags & (u32)~PANTHOR_DEBUGFS_GEM_USAGE_FLAG_INITIALIZED;
+ u32 gem_state_flags = 0;
+
+ /* Skip BOs being destroyed. */
+ if (!refcount)
+ return;
+
+ resident_size = bo->base.pages ? bo->base.base.size : 0;
+
+ snprintf(creator_info, sizeof(creator_info),
+ "%s/%d", bo->debugfs.creator.process_name, bo->debugfs.creator.tgid);
+ seq_printf(m, "%-32s%-16d%-16d%-16zd%-16zd0x%-16lx",
+ creator_info,
+ bo->base.base.name,
+ refcount,
+ bo->base.base.size,
+ resident_size,
+ drm_vma_node_start(&bo->base.base.vma_node));
+
+ if (bo->base.base.import_attach)
+ gem_state_flags |= PANTHOR_DEBUGFS_GEM_STATE_FLAG_IMPORTED;
+ if (bo->base.base.dma_buf)
+ gem_state_flags |= PANTHOR_DEBUGFS_GEM_STATE_FLAG_EXPORTED;
+
+ seq_printf(m, "0x%-8x 0x%-10x", gem_state_flags, gem_usage_flags);
+
+ scoped_guard(mutex, &bo->label.lock) {
+ seq_printf(m, "%s\n", bo->label.str ? : "");
+ }
+
+ totals->size += bo->base.base.size;
+ totals->resident += resident_size;
+ if (bo->base.madv > 0)
+ totals->reclaimable += resident_size;
+}
+
+void panthor_gem_debugfs_print_bos(struct panthor_device *ptdev,
+ struct seq_file *m)
+{
+ struct gem_size_totals totals = {0};
+ struct panthor_gem_object *bo;
+
+ panthor_gem_debugfs_print_flag_names(m);
+
+ seq_puts(m, "created-by global-name refcount size resident-size file-offset state usage label\n");
+ seq_puts(m, "----------------------------------------------------------------------------------------------------------------------------------------------\n");
+
+ scoped_guard(mutex, &ptdev->gems.lock) {
+ list_for_each_entry(bo, &ptdev->gems.node, debugfs.node) {
+ if (bo->debugfs.flags & PANTHOR_DEBUGFS_GEM_USAGE_FLAG_INITIALIZED)
+ panthor_gem_debugfs_bo_print(bo, m, &totals);
+ }
+ }
+
+ seq_puts(m, "==============================================================================================================================================\n");
+ seq_printf(m, "Total size: %zd, Total resident: %zd, Total reclaimable: %zd\n",
+ totals.size, totals.resident, totals.reclaimable);
+}
+#endif
diff --git a/drivers/gpu/drm/panthor/panthor_gem.h b/drivers/gpu/drm/panthor/panthor_gem.h
index e43021cf6d45..4dd732dcd59f 100644
--- a/drivers/gpu/drm/panthor/panthor_gem.h
+++ b/drivers/gpu/drm/panthor/panthor_gem.h
@@ -13,6 +13,56 @@
struct panthor_vm;
+#define PANTHOR_BO_LABEL_MAXLEN 4096
+
+enum panthor_debugfs_gem_state_flags {
+ PANTHOR_DEBUGFS_GEM_STATE_IMPORTED_BIT = 0,
+ PANTHOR_DEBUGFS_GEM_STATE_EXPORTED_BIT = 1,
+
+ /** @PANTHOR_DEBUGFS_GEM_STATE_FLAG_IMPORTED: GEM BO is PRIME imported. */
+ PANTHOR_DEBUGFS_GEM_STATE_FLAG_IMPORTED = BIT(PANTHOR_DEBUGFS_GEM_STATE_IMPORTED_BIT),
+
+ /** @PANTHOR_DEBUGFS_GEM_STATE_FLAG_EXPORTED: GEM BO is PRIME exported. */
+ PANTHOR_DEBUGFS_GEM_STATE_FLAG_EXPORTED = BIT(PANTHOR_DEBUGFS_GEM_STATE_EXPORTED_BIT),
+};
+
+enum panthor_debugfs_gem_usage_flags {
+ PANTHOR_DEBUGFS_GEM_USAGE_KERNEL_BIT = 0,
+ PANTHOR_DEBUGFS_GEM_USAGE_FW_MAPPED_BIT = 1,
+
+ /** @PANTHOR_DEBUGFS_GEM_USAGE_FLAG_KERNEL: BO is for kernel use only. */
+ PANTHOR_DEBUGFS_GEM_USAGE_FLAG_KERNEL = BIT(PANTHOR_DEBUGFS_GEM_USAGE_KERNEL_BIT),
+
+ /** @PANTHOR_DEBUGFS_GEM_USAGE_FLAG_FW_MAPPED: BO is mapped on the FW VM. */
+ PANTHOR_DEBUGFS_GEM_USAGE_FLAG_FW_MAPPED = BIT(PANTHOR_DEBUGFS_GEM_USAGE_FW_MAPPED_BIT),
+
+ /** @PANTHOR_DEBUGFS_GEM_USAGE_FLAG_INITIALIZED: BO is ready for DebugFS display. */
+ PANTHOR_DEBUGFS_GEM_USAGE_FLAG_INITIALIZED = BIT(31),
+};
+
+/**
+ * struct panthor_gem_debugfs - GEM object's DebugFS list information
+ */
+struct panthor_gem_debugfs {
+ /**
+ * @node: Node used to insert the object in the device-wide list of
+ * GEM objects, to display information about it through a DebugFS file.
+ */
+ struct list_head node;
+
+ /** @creator: Information about the UM process which created the GEM. */
+ struct {
+ /** @creator.process_name: Group leader name in owning thread's process */
+ char process_name[TASK_COMM_LEN];
+
+ /** @creator.tgid: PID of the thread's group leader within its process */
+ pid_t tgid;
+ } creator;
+
+ /** @flags: Combination of panthor_debugfs_gem_usage_flags flags */
+ u32 flags;
+};
+
/**
* struct panthor_gem_object - Driver specific GEM object.
*/
@@ -46,6 +96,24 @@ struct panthor_gem_object {
/** @flags: Combination of drm_panthor_bo_flags flags. */
u32 flags;
+
+ /**
+ * @label: BO tagging fields. The label can be assigned within the
+ * driver itself or through a specific IOCTL.
+ */
+ struct {
+ /**
+ * @label.str: Pointer to NULL-terminated string,
+ */
+ const char *str;
+
+ /** @lock.str: Protects access to the @label.str field. */
+ struct mutex lock;
+ } label;
+
+#ifdef CONFIG_DEBUG_FS
+ struct panthor_gem_debugfs debugfs;
+#endif
};
/**
@@ -85,17 +153,15 @@ struct panthor_gem_object *to_panthor_bo(struct drm_gem_object *obj)
struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size);
-struct drm_gem_object *
-panthor_gem_prime_import_sg_table(struct drm_device *ddev,
- struct dma_buf_attachment *attach,
- struct sg_table *sgt);
-
int
panthor_gem_create_with_handle(struct drm_file *file,
struct drm_device *ddev,
struct panthor_vm *exclusive_vm,
u64 *size, u32 flags, uint32_t *handle);
+void panthor_gem_bo_set_label(struct drm_gem_object *obj, const char *label);
+void panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const char *label);
+
static inline u64
panthor_kernel_bo_gpuva(struct panthor_kernel_bo *bo)
{
@@ -117,7 +183,7 @@ panthor_kernel_bo_vmap(struct panthor_kernel_bo *bo)
if (bo->kmap)
return 0;
- ret = drm_gem_vmap_unlocked(bo->obj, &map);
+ ret = drm_gem_vmap(bo->obj, &map);
if (ret)
return ret;
@@ -131,7 +197,7 @@ panthor_kernel_bo_vunmap(struct panthor_kernel_bo *bo)
if (bo->kmap) {
struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->kmap);
- drm_gem_vunmap_unlocked(bo->obj, &map);
+ drm_gem_vunmap(bo->obj, &map);
bo->kmap = NULL;
}
}
@@ -139,8 +205,13 @@ panthor_kernel_bo_vunmap(struct panthor_kernel_bo *bo)
struct panthor_kernel_bo *
panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm,
size_t size, u32 bo_flags, u32 vm_map_flags,
- u64 gpu_va);
+ u64 gpu_va, const char *name);
void panthor_kernel_bo_destroy(struct panthor_kernel_bo *bo);
+#ifdef CONFIG_DEBUG_FS
+void panthor_gem_debugfs_print_bos(struct panthor_device *pfdev,
+ struct seq_file *m);
+#endif
+
#endif /* __PANTHOR_GEM_H__ */
diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
index 5251d8764e7d..32d678a0114e 100644
--- a/drivers/gpu/drm/panthor/panthor_gpu.c
+++ b/drivers/gpu/drm/panthor/panthor_gpu.c
@@ -77,6 +77,12 @@ static const struct panthor_model gpu_models[] = {
GPU_IRQ_RESET_COMPLETED | \
GPU_IRQ_CLEAN_CACHES_COMPLETED)
+static void panthor_gpu_coherency_set(struct panthor_device *ptdev)
+{
+ gpu_write(ptdev, GPU_COHERENCY_PROTOCOL,
+ ptdev->coherent ? GPU_COHERENCY_PROT_BIT(ACE_LITE) : GPU_COHERENCY_NONE);
+}
+
static void panthor_gpu_init_info(struct panthor_device *ptdev)
{
const struct panthor_model *model;
@@ -144,6 +150,8 @@ static void panthor_gpu_init_info(struct panthor_device *ptdev)
static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status)
{
+ gpu_write(ptdev, GPU_INT_CLEAR, status);
+
if (status & GPU_IRQ_FAULT) {
u32 fault_status = gpu_read(ptdev, GPU_FAULT_STATUS);
u64 address = ((u64)gpu_read(ptdev, GPU_FAULT_ADDR_HI) << 32) |
@@ -174,7 +182,8 @@ void panthor_gpu_unplug(struct panthor_device *ptdev)
unsigned long flags;
/* Make sure the IRQ handler is not running after that point. */
- panthor_gpu_irq_suspend(&ptdev->gpu->irq);
+ if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev))
+ panthor_gpu_irq_suspend(&ptdev->gpu->irq);
/* Wake-up all waiters. */
spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
@@ -365,6 +374,9 @@ int panthor_gpu_l2_power_on(struct panthor_device *ptdev)
hweight64(ptdev->gpu_info.shader_present));
}
+ /* Set the desired coherency mode before the power up of L2 */
+ panthor_gpu_coherency_set(ptdev);
+
return panthor_gpu_power_on(ptdev, L2, 1, 20000);
}
@@ -460,11 +472,12 @@ int panthor_gpu_soft_reset(struct panthor_device *ptdev)
*/
void panthor_gpu_suspend(struct panthor_device *ptdev)
{
- /*
- * It may be preferable to simply power down the L2, but for now just
- * soft-reset which will leave the L2 powered down.
- */
- panthor_gpu_soft_reset(ptdev);
+ /* On a fast reset, simply power down the L2. */
+ if (!ptdev->reset.fast)
+ panthor_gpu_soft_reset(ptdev);
+ else
+ panthor_gpu_power_off(ptdev, L2, 1, 20000);
+
panthor_gpu_irq_suspend(&ptdev->gpu->irq);
}
@@ -480,3 +493,50 @@ void panthor_gpu_resume(struct panthor_device *ptdev)
panthor_gpu_irq_resume(&ptdev->gpu->irq, GPU_INTERRUPTS_MASK);
panthor_gpu_l2_power_on(ptdev);
}
+
+/**
+ * panthor_gpu_read_64bit_counter() - Read a 64-bit counter at a given offset.
+ * @ptdev: Device.
+ * @reg: The offset of the register to read.
+ *
+ * Return: The counter value.
+ */
+static u64
+panthor_gpu_read_64bit_counter(struct panthor_device *ptdev, u32 reg)
+{
+ u32 hi, lo;
+
+ do {
+ hi = gpu_read(ptdev, reg + 0x4);
+ lo = gpu_read(ptdev, reg);
+ } while (hi != gpu_read(ptdev, reg + 0x4));
+
+ return ((u64)hi << 32) | lo;
+}
+
+/**
+ * panthor_gpu_read_timestamp() - Read the timestamp register.
+ * @ptdev: Device.
+ *
+ * Return: The GPU timestamp value.
+ */
+u64 panthor_gpu_read_timestamp(struct panthor_device *ptdev)
+{
+ return panthor_gpu_read_64bit_counter(ptdev, GPU_TIMESTAMP_LO);
+}
+
+/**
+ * panthor_gpu_read_timestamp_offset() - Read the timestamp offset register.
+ * @ptdev: Device.
+ *
+ * Return: The GPU timestamp offset value.
+ */
+u64 panthor_gpu_read_timestamp_offset(struct panthor_device *ptdev)
+{
+ u32 hi, lo;
+
+ hi = gpu_read(ptdev, GPU_TIMESTAMP_OFFSET_HI);
+ lo = gpu_read(ptdev, GPU_TIMESTAMP_OFFSET_LO);
+
+ return ((u64)hi << 32) | lo;
+}
diff --git a/drivers/gpu/drm/panthor/panthor_gpu.h b/drivers/gpu/drm/panthor/panthor_gpu.h
index bba7555dd3c6..7f6133a66127 100644
--- a/drivers/gpu/drm/panthor/panthor_gpu.h
+++ b/drivers/gpu/drm/panthor/panthor_gpu.h
@@ -5,6 +5,8 @@
#ifndef __PANTHOR_GPU_H__
#define __PANTHOR_GPU_H__
+#include <linux/types.h>
+
struct panthor_device;
int panthor_gpu_init(struct panthor_device *ptdev);
@@ -48,5 +50,7 @@ int panthor_gpu_l2_power_on(struct panthor_device *ptdev);
int panthor_gpu_flush_caches(struct panthor_device *ptdev,
u32 l2, u32 lsc, u32 other);
int panthor_gpu_soft_reset(struct panthor_device *ptdev);
+u64 panthor_gpu_read_timestamp(struct panthor_device *ptdev);
+u64 panthor_gpu_read_timestamp_offset(struct panthor_device *ptdev);
#endif
diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c
index 3796a9eb22af..d236e9ceade4 100644
--- a/drivers/gpu/drm/panthor/panthor_heap.c
+++ b/drivers/gpu/drm/panthor/panthor_heap.c
@@ -97,6 +97,9 @@ struct panthor_heap_pool {
/** @gpu_contexts: Buffer object containing the GPU heap contexts. */
struct panthor_kernel_bo *gpu_contexts;
+
+ /** @size: Size of all chunks across all heaps in the pool. */
+ atomic_t size;
};
static int panthor_heap_ctx_stride(struct panthor_device *ptdev)
@@ -118,7 +121,7 @@ static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id)
panthor_get_heap_ctx_offset(pool, id);
}
-static void panthor_free_heap_chunk(struct panthor_vm *vm,
+static void panthor_free_heap_chunk(struct panthor_heap_pool *pool,
struct panthor_heap *heap,
struct panthor_heap_chunk *chunk)
{
@@ -127,12 +130,13 @@ static void panthor_free_heap_chunk(struct panthor_vm *vm,
heap->chunk_count--;
mutex_unlock(&heap->lock);
+ atomic_sub(heap->chunk_size, &pool->size);
+
panthor_kernel_bo_destroy(chunk->bo);
kfree(chunk);
}
-static int panthor_alloc_heap_chunk(struct panthor_device *ptdev,
- struct panthor_vm *vm,
+static int panthor_alloc_heap_chunk(struct panthor_heap_pool *pool,
struct panthor_heap *heap,
bool initial_chunk)
{
@@ -144,10 +148,11 @@ static int panthor_alloc_heap_chunk(struct panthor_device *ptdev,
if (!chunk)
return -ENOMEM;
- chunk->bo = panthor_kernel_bo_create(ptdev, vm, heap->chunk_size,
+ chunk->bo = panthor_kernel_bo_create(pool->ptdev, pool->vm, heap->chunk_size,
DRM_PANTHOR_BO_NO_MMAP,
DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
- PANTHOR_VM_KERNEL_AUTO_VA);
+ PANTHOR_VM_KERNEL_AUTO_VA,
+ "Tiler heap chunk");
if (IS_ERR(chunk->bo)) {
ret = PTR_ERR(chunk->bo);
goto err_free_chunk;
@@ -180,6 +185,8 @@ static int panthor_alloc_heap_chunk(struct panthor_device *ptdev,
heap->chunk_count++;
mutex_unlock(&heap->lock);
+ atomic_add(heap->chunk_size, &pool->size);
+
return 0;
err_destroy_bo:
@@ -191,17 +198,16 @@ err_free_chunk:
return ret;
}
-static void panthor_free_heap_chunks(struct panthor_vm *vm,
+static void panthor_free_heap_chunks(struct panthor_heap_pool *pool,
struct panthor_heap *heap)
{
struct panthor_heap_chunk *chunk, *tmp;
list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
- panthor_free_heap_chunk(vm, heap, chunk);
+ panthor_free_heap_chunk(pool, heap, chunk);
}
-static int panthor_alloc_heap_chunks(struct panthor_device *ptdev,
- struct panthor_vm *vm,
+static int panthor_alloc_heap_chunks(struct panthor_heap_pool *pool,
struct panthor_heap *heap,
u32 chunk_count)
{
@@ -209,7 +215,7 @@ static int panthor_alloc_heap_chunks(struct panthor_device *ptdev,
u32 i;
for (i = 0; i < chunk_count; i++) {
- ret = panthor_alloc_heap_chunk(ptdev, vm, heap, true);
+ ret = panthor_alloc_heap_chunk(pool, heap, true);
if (ret)
return ret;
}
@@ -226,7 +232,7 @@ panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
if (!heap)
return -EINVAL;
- panthor_free_heap_chunks(pool->vm, heap);
+ panthor_free_heap_chunks(pool, heap);
mutex_destroy(&heap->lock);
kfree(heap);
return 0;
@@ -308,8 +314,7 @@ int panthor_heap_create(struct panthor_heap_pool *pool,
heap->max_chunks = max_chunks;
heap->target_in_flight = target_in_flight;
- ret = panthor_alloc_heap_chunks(pool->ptdev, vm, heap,
- initial_chunk_count);
+ ret = panthor_alloc_heap_chunks(pool, heap, initial_chunk_count);
if (ret)
goto err_free_heap;
@@ -342,7 +347,7 @@ int panthor_heap_create(struct panthor_heap_pool *pool,
return id;
err_free_heap:
- panthor_free_heap_chunks(pool->vm, heap);
+ panthor_free_heap_chunks(pool, heap);
mutex_destroy(&heap->lock);
kfree(heap);
@@ -389,6 +394,7 @@ int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
removed = chunk;
list_del(&chunk->node);
heap->chunk_count--;
+ atomic_sub(heap->chunk_size, &pool->size);
break;
}
}
@@ -466,7 +472,7 @@ int panthor_heap_grow(struct panthor_heap_pool *pool,
* further jobs in this queue fail immediately instead of having to
* wait for the job timeout.
*/
- ret = panthor_alloc_heap_chunk(pool->ptdev, pool->vm, heap, false);
+ ret = panthor_alloc_heap_chunk(pool, heap, false);
if (ret)
goto out_unlock;
@@ -550,7 +556,8 @@ panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,
DRM_PANTHOR_BO_NO_MMAP,
DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
- PANTHOR_VM_KERNEL_AUTO_VA);
+ PANTHOR_VM_KERNEL_AUTO_VA,
+ "Heap pool");
if (IS_ERR(pool->gpu_contexts)) {
ret = PTR_ERR(pool->gpu_contexts);
goto err_destroy_pool;
@@ -560,6 +567,8 @@ panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
if (ret)
goto err_destroy_pool;
+ atomic_add(pool->gpu_contexts->obj->size, &pool->size);
+
return pool;
err_destroy_pool:
@@ -594,8 +603,10 @@ void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
xa_for_each(&pool->xa, i, heap)
drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
- if (!IS_ERR_OR_NULL(pool->gpu_contexts))
+ if (!IS_ERR_OR_NULL(pool->gpu_contexts)) {
+ atomic_sub(pool->gpu_contexts->obj->size, &pool->size);
panthor_kernel_bo_destroy(pool->gpu_contexts);
+ }
/* Reflects the fact the pool has been destroyed. */
pool->vm = NULL;
@@ -603,3 +614,18 @@ void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
panthor_heap_pool_put(pool);
}
+
+/**
+ * panthor_heap_pool_size() - Get a heap pool's total size
+ * @pool: Pool whose total chunks size to return
+ *
+ * Returns the aggregated size of all chunks for all heaps in the pool
+ *
+ */
+size_t panthor_heap_pool_size(struct panthor_heap_pool *pool)
+{
+ if (!pool)
+ return 0;
+
+ return atomic_read(&pool->size);
+}
diff --git a/drivers/gpu/drm/panthor/panthor_heap.h b/drivers/gpu/drm/panthor/panthor_heap.h
index 25a5f2bba445..e3358d4e8edb 100644
--- a/drivers/gpu/drm/panthor/panthor_heap.h
+++ b/drivers/gpu/drm/panthor/panthor_heap.h
@@ -27,6 +27,8 @@ struct panthor_heap_pool *
panthor_heap_pool_get(struct panthor_heap_pool *pool);
void panthor_heap_pool_put(struct panthor_heap_pool *pool);
+size_t panthor_heap_pool_size(struct panthor_heap_pool *pool);
+
int panthor_heap_grow(struct panthor_heap_pool *pool,
u64 heap_gpu_va,
u32 renderpasses_in_flight,
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
index 3cd2bce59edc..6ca9a2642a4e 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -53,26 +53,27 @@ struct panthor_mmu {
/** @irq: The MMU irq. */
struct panthor_irq irq;
- /** @as: Address space related fields.
+ /**
+ * @as: Address space related fields.
*
* The GPU has a limited number of address spaces (AS) slots, forcing
* us to re-assign them to re-assign slots on-demand.
*/
struct {
- /** @slots_lock: Lock protecting access to all other AS fields. */
+ /** @as.slots_lock: Lock protecting access to all other AS fields. */
struct mutex slots_lock;
- /** @alloc_mask: Bitmask encoding the allocated slots. */
+ /** @as.alloc_mask: Bitmask encoding the allocated slots. */
unsigned long alloc_mask;
- /** @faulty_mask: Bitmask encoding the faulty slots. */
+ /** @as.faulty_mask: Bitmask encoding the faulty slots. */
unsigned long faulty_mask;
- /** @slots: VMs currently bound to the AS slots. */
+ /** @as.slots: VMs currently bound to the AS slots. */
struct panthor_as_slot slots[MAX_AS_SLOTS];
/**
- * @lru_list: List of least recently used VMs.
+ * @as.lru_list: List of least recently used VMs.
*
* We use this list to pick a VM to evict when all slots are
* used.
@@ -87,16 +88,16 @@ struct panthor_mmu {
/** @vm: VMs management fields */
struct {
- /** @lock: Lock protecting access to list. */
+ /** @vm.lock: Lock protecting access to list. */
struct mutex lock;
- /** @list: List containing all VMs. */
+ /** @vm.list: List containing all VMs. */
struct list_head list;
- /** @reset_in_progress: True if a reset is in progress. */
+ /** @vm.reset_in_progress: True if a reset is in progress. */
bool reset_in_progress;
- /** @wq: Workqueue used for the VM_BIND queues. */
+ /** @vm.wq: Workqueue used for the VM_BIND queues. */
struct workqueue_struct *wq;
} vm;
};
@@ -143,14 +144,14 @@ struct panthor_vma {
struct panthor_vm_op_ctx {
/** @rsvd_page_tables: Pages reserved for the MMU page table update. */
struct {
- /** @count: Number of pages reserved. */
+ /** @rsvd_page_tables.count: Number of pages reserved. */
u32 count;
- /** @ptr: Point to the first unused page in the @pages table. */
+ /** @rsvd_page_tables.ptr: Point to the first unused page in the @pages table. */
u32 ptr;
/**
- * @page: Array of pages that can be used for an MMU page table update.
+ * @rsvd_page_tables.pages: Array of pages to be used for an MMU page table update.
*
* After an VM operation, there might be free pages left in this array.
* They should be returned to the pt_cache as part of the op_ctx cleanup.
@@ -172,10 +173,10 @@ struct panthor_vm_op_ctx {
/** @va: Virtual range targeted by the VM operation. */
struct {
- /** @addr: Start address. */
+ /** @va.addr: Start address. */
u64 addr;
- /** @range: Range size. */
+ /** @va.range: Range size. */
u64 range;
} va;
@@ -195,14 +196,14 @@ struct panthor_vm_op_ctx {
/** @map: Fields specific to a map operation. */
struct {
- /** @vm_bo: Buffer object to map. */
+ /** @map.vm_bo: Buffer object to map. */
struct drm_gpuvm_bo *vm_bo;
- /** @bo_offset: Offset in the buffer object. */
+ /** @map.bo_offset: Offset in the buffer object. */
u64 bo_offset;
/**
- * @sgt: sg-table pointing to pages backing the GEM object.
+ * @map.sgt: sg-table pointing to pages backing the GEM object.
*
* This is gathered at job creation time, such that we don't have
* to allocate in ::run_job().
@@ -210,7 +211,7 @@ struct panthor_vm_op_ctx {
struct sg_table *sgt;
/**
- * @new_vma: The new VMA object that will be inserted to the VA tree.
+ * @map.new_vma: The new VMA object that will be inserted to the VA tree.
*/
struct panthor_vma *new_vma;
} map;
@@ -304,27 +305,27 @@ struct panthor_vm {
/** @kernel_auto_va: Automatic VA-range for kernel BOs. */
struct {
- /** @start: Start of the automatic VA-range for kernel BOs. */
+ /** @kernel_auto_va.start: Start of the automatic VA-range for kernel BOs. */
u64 start;
- /** @size: Size of the automatic VA-range for kernel BOs. */
+ /** @kernel_auto_va.size: Size of the automatic VA-range for kernel BOs. */
u64 end;
} kernel_auto_va;
/** @as: Address space related fields. */
struct {
/**
- * @id: ID of the address space this VM is bound to.
+ * @as.id: ID of the address space this VM is bound to.
*
* A value of -1 means the VM is inactive/not bound.
*/
int id;
- /** @active_cnt: Number of active users of this VM. */
+ /** @as.active_cnt: Number of active users of this VM. */
refcount_t active_cnt;
/**
- * @lru_node: Used to instead the VM in the panthor_mmu::as::lru_list.
+ * @as.lru_node: Used to instead the VM in the panthor_mmu::as::lru_list.
*
* Active VMs should not be inserted in the LRU list.
*/
@@ -336,13 +337,13 @@ struct panthor_vm {
*/
struct {
/**
- * @pool: The heap pool attached to this VM.
+ * @heaps.pool: The heap pool attached to this VM.
*
* Will stay NULL until someone creates a heap context on this VM.
*/
struct panthor_heap_pool *pool;
- /** @lock: Lock used to protect access to @pool. */
+ /** @heaps.lock: Lock used to protect access to @pool. */
struct mutex lock;
} heaps;
@@ -408,7 +409,7 @@ struct panthor_vm_bind_job {
struct panthor_vm_op_ctx ctx;
};
-/**
+/*
* @pt_cache: Cache used to allocate MMU page tables.
*
* The pre-allocation pattern forces us to over-allocate to plan for
@@ -478,7 +479,7 @@ static void *alloc_pt(void *cookie, size_t size, gfp_t gfp)
}
/**
- * @free_pt() - Custom page table free function
+ * free_pt() - Custom page table free function
* @cookie: Cookie passed at page table allocation time.
* @data: Page table to free.
* @size: Size of the page table. This size should be fixed,
@@ -697,7 +698,7 @@ static void panthor_vm_release_as_locked(struct panthor_vm *vm)
/**
* panthor_vm_active() - Flag a VM as active
- * @VM: VM to flag as active.
+ * @vm: VM to flag as active.
*
* Assigns an address space to a VM so it can be used by the GPU/MCU.
*
@@ -780,6 +781,7 @@ out_enable_as:
if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) {
gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as));
ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as);
+ ptdev->mmu->irq.mask |= panthor_mmu_as_fault_mask(ptdev, as);
gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask);
}
@@ -801,7 +803,7 @@ out_dev_exit:
/**
* panthor_vm_idle() - Flag a VM idle
- * @VM: VM to flag as idle.
+ * @vm: VM to flag as idle.
*
* When we know the GPU is done with the VM (no more jobs to process),
* we can relinquish the AS slot attached to this VM, if any.
@@ -826,6 +828,14 @@ void panthor_vm_idle(struct panthor_vm *vm)
mutex_unlock(&ptdev->mmu->as.slots_lock);
}
+u32 panthor_vm_page_size(struct panthor_vm *vm)
+{
+ const struct io_pgtable *pgt = io_pgtable_ops_to_pgtable(vm->pgtbl_ops);
+ u32 pg_shift = ffs(pgt->cfg.pgsize_bitmap) - 1;
+
+ return 1u << pg_shift;
+}
+
static void panthor_vm_stop(struct panthor_vm *vm)
{
drm_sched_stop(&vm->sched, NULL);
@@ -833,7 +843,7 @@ static void panthor_vm_stop(struct panthor_vm *vm)
static void panthor_vm_start(struct panthor_vm *vm)
{
- drm_sched_start(&vm->sched);
+ drm_sched_start(&vm->sched, 0);
}
/**
@@ -982,6 +992,8 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot,
if (!size)
break;
+
+ offset = 0;
}
return panthor_vm_flush_range(vm, start_iova, iova - start_iova);
@@ -1007,7 +1019,7 @@ static int flags_to_prot(u32 flags)
/**
* panthor_vm_alloc_va() - Allocate a region in the auto-va space
- * @VM: VM to allocate a region on.
+ * @vm: VM to allocate a region on.
* @va: start of the VA range. Can be PANTHOR_VM_KERNEL_AUTO_VA if the user
* wants the VA to be automatically allocated from the auto-VA range.
* @size: size of the VA range.
@@ -1025,12 +1037,13 @@ int
panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size,
struct drm_mm_node *va_node)
{
+ ssize_t vm_pgsz = panthor_vm_page_size(vm);
int ret;
- if (!size || (size & ~PAGE_MASK))
+ if (!size || !IS_ALIGNED(size, vm_pgsz))
return -EINVAL;
- if (va != PANTHOR_VM_KERNEL_AUTO_VA && (va & ~PAGE_MASK))
+ if (va != PANTHOR_VM_KERNEL_AUTO_VA && !IS_ALIGNED(va, vm_pgsz))
return -EINVAL;
mutex_lock(&vm->mm_lock);
@@ -1052,7 +1065,7 @@ panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size,
/**
* panthor_vm_free_va() - Free a region allocated with panthor_vm_alloc_va()
- * @VM: VM to free the region on.
+ * @vm: VM to free the region on.
* @va_node: Memory node representing the region to free.
*/
void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node)
@@ -1091,7 +1104,7 @@ static void panthor_vm_bo_put(struct drm_gpuvm_bo *vm_bo)
/* If the vm_bo object was destroyed, release the pin reference that
* was hold by this object.
*/
- if (unpin && !bo->base.base.import_attach)
+ if (unpin && !drm_gem_is_imported(&bo->base.base))
drm_gem_shmem_unpin(&bo->base);
drm_gpuvm_put(vm);
@@ -1222,7 +1235,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
if (ret)
goto err_cleanup;
- if (!bo->base.base.import_attach) {
+ if (!drm_gem_is_imported(&bo->base.base)) {
/* Pre-reserve the BO pages, so the map operation doesn't have to
* allocate.
*/
@@ -1233,7 +1246,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
if (IS_ERR(sgt)) {
- if (!bo->base.base.import_attach)
+ if (!drm_gem_is_imported(&bo->base.base))
drm_gem_shmem_unpin(&bo->base);
ret = PTR_ERR(sgt);
@@ -1244,7 +1257,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
preallocated_vm_bo = drm_gpuvm_bo_create(&vm->base, &bo->base.base);
if (!preallocated_vm_bo) {
- if (!bo->base.base.import_attach)
+ if (!drm_gem_is_imported(&bo->base.base))
drm_gem_shmem_unpin(&bo->base);
ret = -ENOMEM;
@@ -1270,7 +1283,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
* which will be released in panthor_vm_bo_put().
*/
if (preallocated_vm_bo != op_ctx->map.vm_bo &&
- !bo->base.base.import_attach)
+ !drm_gem_is_imported(&bo->base.base))
drm_gem_shmem_unpin(&bo->base);
op_ctx->map.bo_offset = offset;
@@ -1481,9 +1494,9 @@ panthor_vm_create_check_args(const struct panthor_device *ptdev,
/**
* panthor_vm_pool_create_vm() - Create a VM
+ * @ptdev: The panthor device
* @pool: The VM to create this VM on.
- * @kernel_va_start: Start of the region reserved for kernel objects.
- * @kernel_va_range: Size of the region reserved for kernel objects.
+ * @args: VM creation args.
*
* Return: a positive VM ID on success, a negative error code otherwise.
*/
@@ -1547,6 +1560,8 @@ static void panthor_vm_destroy(struct panthor_vm *vm)
*
* The VM resources are freed when the last reference on the VM object is
* dropped.
+ *
+ * Return: %0 for success, negative errno value for failure
*/
int panthor_vm_pool_destroy_vm(struct panthor_vm_pool *pool, u32 handle)
{
@@ -1571,7 +1586,9 @@ panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle)
{
struct panthor_vm *vm;
+ xa_lock(&pool->xa);
vm = panthor_vm_get(xa_load(&pool->xa, handle));
+ xa_unlock(&pool->xa);
return vm;
}
@@ -1693,11 +1710,17 @@ static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
access_type, access_type_name(ptdev, fault_status),
source_id);
+ /* We don't handle VM faults at the moment, so let's just clear the
+ * interrupt and let the writer/reader crash.
+ * Note that COMPLETED irqs are never cleared, but this is fine
+ * because they are always masked.
+ */
+ gpu_write(ptdev, MMU_INT_CLEAR, mask);
+
/* Ignore MMU interrupts on this AS until it's been
* re-enabled.
*/
ptdev->mmu->irq.mask = new_int_mask;
- gpu_write(ptdev, MMU_INT_MASK, new_int_mask);
if (ptdev->mmu->as.slots[as].vm)
ptdev->mmu->as.slots[as].vm->unhandled_fault = true;
@@ -1928,7 +1951,34 @@ struct panthor_heap_pool *panthor_vm_get_heap_pool(struct panthor_vm *vm, bool c
return pool;
}
-static u64 mair_to_memattr(u64 mair)
+/**
+ * panthor_vm_heaps_sizes() - Calculate size of all heap chunks across all
+ * heaps over all the heap pools in a VM
+ * @pfile: File.
+ * @stats: Memory stats to be updated.
+ *
+ * Calculate all heap chunk sizes in all heap pools bound to a VM. If the VM
+ * is active, record the size as active as well.
+ */
+void panthor_vm_heaps_sizes(struct panthor_file *pfile, struct drm_memory_stats *stats)
+{
+ struct panthor_vm *vm;
+ unsigned long i;
+
+ if (!pfile->vms)
+ return;
+
+ xa_lock(&pfile->vms->xa);
+ xa_for_each(&pfile->vms->xa, i, vm) {
+ size_t size = panthor_heap_pool_size(vm->heaps.pool);
+ stats->resident += size;
+ if (vm->as.id >= 0)
+ stats->active += size;
+ }
+ xa_unlock(&pfile->vms->xa);
+}
+
+static u64 mair_to_memattr(u64 mair, bool coherent)
{
u64 memattr = 0;
u32 i;
@@ -1947,14 +1997,21 @@ static u64 mair_to_memattr(u64 mair)
AS_MEMATTR_AARCH64_SH_MIDGARD_INNER |
AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(false, false);
} else {
- /* Use SH_CPU_INNER mode so SH_IS, which is used when
- * IOMMU_CACHE is set, actually maps to the standard
- * definition of inner-shareable and not Mali's
- * internal-shareable mode.
- */
out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_WB |
- AS_MEMATTR_AARCH64_SH_CPU_INNER |
AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(inner & 1, inner & 2);
+ /* Use SH_MIDGARD_INNER mode when device isn't coherent,
+ * so SH_IS, which is used when IOMMU_CACHE is set, maps
+ * to Mali's internal-shareable mode. As per the Mali
+ * Spec, inner and outer-shareable modes aren't allowed
+ * for WB memory when coherency is disabled.
+ * Use SH_CPU_INNER mode when coherency is enabled, so
+ * that SH_IS actually maps to the standard definition of
+ * inner-shareable.
+ */
+ if (!coherent)
+ out_attr |= AS_MEMATTR_AARCH64_SH_MIDGARD_INNER;
+ else
+ out_attr |= AS_MEMATTR_AARCH64_SH_CPU_INNER;
}
memattr |= (u64)out_attr << (8 * i);
@@ -2255,6 +2312,16 @@ panthor_vm_create(struct panthor_device *ptdev, bool for_mcu,
u64 full_va_range = 1ull << va_bits;
struct drm_gem_object *dummy_gem;
struct drm_gpu_scheduler *sched;
+ const struct drm_sched_init_args sched_args = {
+ .ops = &panthor_vm_bind_ops,
+ .submit_wq = ptdev->mmu->vm.wq,
+ .num_rqs = 1,
+ .credit_limit = 1,
+ /* Bind operations are synchronous for now, no timeout needed. */
+ .timeout = MAX_SCHEDULE_TIMEOUT,
+ .name = "panthor-vm-bind",
+ .dev = ptdev->base.dev,
+ };
struct io_pgtable_cfg pgtbl_cfg;
u64 mair, min_va, va_range;
struct panthor_vm *vm;
@@ -2312,11 +2379,7 @@ panthor_vm_create(struct panthor_device *ptdev, bool for_mcu,
goto err_mm_takedown;
}
- /* Bind operations are synchronous for now, no timeout needed. */
- ret = drm_sched_init(&vm->sched, &panthor_vm_bind_ops, ptdev->mmu->vm.wq,
- 1, 1, 0,
- MAX_SCHEDULE_TIMEOUT, NULL, NULL,
- "panthor-vm-bind", ptdev->base.dev);
+ ret = drm_sched_init(&vm->sched, &sched_args);
if (ret)
goto err_free_io_pgtable;
@@ -2326,7 +2389,7 @@ panthor_vm_create(struct panthor_device *ptdev, bool for_mcu,
goto err_sched_fini;
mair = io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg.arm_lpae_s1_cfg.mair;
- vm->memattr = mair_to_memattr(mair);
+ vm->memattr = mair_to_memattr(mair, ptdev->coherent);
mutex_lock(&ptdev->mmu->vm.lock);
list_add_tail(&vm->node, &ptdev->mmu->vm.list);
@@ -2366,11 +2429,12 @@ panthor_vm_bind_prepare_op_ctx(struct drm_file *file,
const struct drm_panthor_vm_bind_op *op,
struct panthor_vm_op_ctx *op_ctx)
{
+ ssize_t vm_pgsz = panthor_vm_page_size(vm);
struct drm_gem_object *gem;
int ret;
/* Aligned on page size. */
- if ((op->va | op->size) & ~PAGE_MASK)
+ if (!IS_ALIGNED(op->va | op->size, vm_pgsz))
return -EINVAL;
switch (op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) {
@@ -2651,7 +2715,8 @@ int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec, struct panthor_vm
*/
void panthor_mmu_unplug(struct panthor_device *ptdev)
{
- panthor_mmu_irq_suspend(&ptdev->mmu->irq);
+ if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev))
+ panthor_mmu_irq_suspend(&ptdev->mmu->irq);
mutex_lock(&ptdev->mmu->as.slots_lock);
for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) {
@@ -2716,9 +2781,9 @@ int panthor_mmu_init(struct panthor_device *ptdev)
* which passes iova as an unsigned long. Patch the mmu_features to reflect this
* limitation.
*/
- if (sizeof(unsigned long) * 8 < va_bits) {
+ if (va_bits > BITS_PER_LONG) {
ptdev->gpu_info.mmu_features &= ~GENMASK(7, 0);
- ptdev->gpu_info.mmu_features |= sizeof(unsigned long) * 8;
+ ptdev->gpu_info.mmu_features |= BITS_PER_LONG;
}
return drmm_add_action_or_reset(&ptdev->base, panthor_mmu_release_wq, mmu->vm.wq);
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.h b/drivers/gpu/drm/panthor/panthor_mmu.h
index 6788771071e3..fc274637114e 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.h
+++ b/drivers/gpu/drm/panthor/panthor_mmu.h
@@ -9,6 +9,7 @@
struct drm_exec;
struct drm_sched_job;
+struct drm_memory_stats;
struct panthor_gem_object;
struct panthor_heap_pool;
struct panthor_vm;
@@ -30,12 +31,15 @@ panthor_vm_get_bo_for_va(struct panthor_vm *vm, u64 va, u64 *bo_offset);
int panthor_vm_active(struct panthor_vm *vm);
void panthor_vm_idle(struct panthor_vm *vm);
+u32 panthor_vm_page_size(struct panthor_vm *vm);
int panthor_vm_as(struct panthor_vm *vm);
int panthor_vm_flush_all(struct panthor_vm *vm);
struct panthor_heap_pool *
panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create);
+void panthor_vm_heaps_sizes(struct panthor_file *pfile, struct drm_memory_stats *stats);
+
struct panthor_vm *panthor_vm_get(struct panthor_vm *vm);
void panthor_vm_put(struct panthor_vm *vm);
struct panthor_vm *panthor_vm_create(struct panthor_device *ptdev, bool for_mcu,
diff --git a/drivers/gpu/drm/panthor/panthor_regs.h b/drivers/gpu/drm/panthor/panthor_regs.h
index b7b3b3add166..a7a323dc5cf9 100644
--- a/drivers/gpu/drm/panthor/panthor_regs.h
+++ b/drivers/gpu/drm/panthor/panthor_regs.h
@@ -133,8 +133,8 @@
#define GPU_COHERENCY_PROT_BIT(name) BIT(GPU_COHERENCY_ ## name)
#define GPU_COHERENCY_PROTOCOL 0x304
-#define GPU_COHERENCY_ACE 0
-#define GPU_COHERENCY_ACE_LITE 1
+#define GPU_COHERENCY_ACE_LITE 0
+#define GPU_COHERENCY_ACE 1
#define GPU_COHERENCY_NONE 31
#define MCU_CONTROL 0x700
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index aee362abb710..43ee57728de5 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -9,6 +9,7 @@
#include <drm/panthor_drm.h>
#include <linux/build_bug.h>
+#include <linux/cleanup.h>
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/dma-mapping.h>
@@ -88,11 +89,11 @@
#define JOB_TIMEOUT_MS 5000
-#define MIN_CS_PER_CSG 8
-
-#define MIN_CSGS 3
#define MAX_CSG_PRIO 0xf
+#define NUM_INSTRS_PER_CACHE_LINE (64 / sizeof(u64))
+#define MAX_INSTRS_PER_JOB 24
+
struct panthor_group;
/**
@@ -137,8 +138,6 @@ enum panthor_csg_priority {
* non-real-time groups. When such a group becomes executable,
* it will evict the group with the lowest non-rt priority if
* there's no free group slot available.
- *
- * Currently not exposed to userspace.
*/
PANTHOR_CSG_PRIORITY_RT,
@@ -476,6 +475,18 @@ struct panthor_queue {
*/
struct list_head in_flight_jobs;
} fence_ctx;
+
+ /** @profiling: Job profiling data slots and access information. */
+ struct {
+ /** @slots: Kernel BO holding the slots. */
+ struct panthor_kernel_bo *slots;
+
+ /** @slot_count: Number of jobs ringbuffer can hold at once. */
+ u32 slot_count;
+
+ /** @seqno: Index of the next available profiling information slot. */
+ u32 seqno;
+ } profiling;
};
/**
@@ -589,14 +600,25 @@ struct panthor_group {
* @timedout: True when a timeout occurred on any of the queues owned by
* this group.
*
- * Timeouts can be reported by drm_sched or by the FW. In any case, any
- * timeout situation is unrecoverable, and the group becomes useless.
- * We simply wait for all references to be dropped so we can release the
- * group object.
+ * Timeouts can be reported by drm_sched or by the FW. If a reset is required,
+ * and the group can't be suspended, this also leads to a timeout. In any case,
+ * any timeout situation is unrecoverable, and the group becomes useless. We
+ * simply wait for all references to be dropped so we can release the group
+ * object.
*/
bool timedout;
/**
+ * @innocent: True when the group becomes unusable because the group suspension
+ * failed during a reset.
+ *
+ * Sometimes the FW was put in a bad state by other groups, causing the group
+ * suspension happening in the reset path to fail. In that case, we consider the
+ * group innocent.
+ */
+ bool innocent;
+
+ /**
* @syncobjs: Pool of per-queue synchronization objects.
*
* One sync object per queue. The position of the sync object is
@@ -604,6 +626,21 @@ struct panthor_group {
*/
struct panthor_kernel_bo *syncobjs;
+ /** @fdinfo: Per-file info exposed through /proc/<process>/fdinfo */
+ struct {
+ /** @data: Total sampled values for jobs in queues from this group. */
+ struct panthor_gpu_usage data;
+
+ /**
+ * @fdinfo.lock: Spinlock to govern concurrent access from drm file's fdinfo
+ * callback and job post-completion processing function
+ */
+ spinlock_t lock;
+
+ /** @fdinfo.kbo_sizes: Aggregate size of private kernel BO's held by the group. */
+ size_t kbo_sizes;
+ } fdinfo;
+
/** @state: Group state. */
enum panthor_group_state state;
@@ -661,6 +698,18 @@ struct panthor_group {
struct list_head wait_node;
};
+struct panthor_job_profiling_data {
+ struct {
+ u64 before;
+ u64 after;
+ } cycles;
+
+ struct {
+ u64 before;
+ u64 after;
+ } time;
+};
+
/**
* group_queue_work() - Queue a group work
* @group: Group to queue the work for.
@@ -774,6 +823,15 @@ struct panthor_job {
/** @done_fence: Fence signaled when the job is finished or cancelled. */
struct dma_fence *done_fence;
+
+ /** @profiling: Job profiling information. */
+ struct {
+ /** @mask: Current device job profiling enablement bitmask. */
+ u32 mask;
+
+ /** @slot: Job index in the profiling slots BO. */
+ u32 slot;
+ } profiling;
};
static void
@@ -782,7 +840,7 @@ panthor_queue_put_syncwait_obj(struct panthor_queue *queue)
if (queue->syncwait.kmap) {
struct iosys_map map = IOSYS_MAP_INIT_VADDR(queue->syncwait.kmap);
- drm_gem_vunmap_unlocked(queue->syncwait.obj, &map);
+ drm_gem_vunmap(queue->syncwait.obj, &map);
queue->syncwait.kmap = NULL;
}
@@ -808,7 +866,7 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue
goto err_put_syncwait_obj;
queue->syncwait.obj = &bo->base.base;
- ret = drm_gem_vmap_unlocked(queue->syncwait.obj, &map);
+ ret = drm_gem_vmap(queue->syncwait.obj, &map);
if (drm_WARN_ON(&ptdev->base, ret))
goto err_put_syncwait_obj;
@@ -838,6 +896,7 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue *
panthor_kernel_bo_destroy(queue->ringbuf);
panthor_kernel_bo_destroy(queue->iface.mem);
+ panthor_kernel_bo_destroy(queue->profiling.slots);
/* Release the last_fence we were holding, if any. */
dma_fence_put(queue->fence_ctx.last_fence);
@@ -1988,8 +2047,6 @@ tick_ctx_init(struct panthor_scheduler *sched,
}
}
-#define NUM_INSTRS_PER_SLOT 16
-
static void
group_term_post_processing(struct panthor_group *group)
{
@@ -2306,7 +2363,7 @@ static void tick_work(struct work_struct *work)
if (!drm_dev_enter(&ptdev->base, &cookie))
return;
- ret = pm_runtime_resume_and_get(ptdev->base.dev);
+ ret = panthor_device_resume_and_get(ptdev);
if (drm_WARN_ON(&ptdev->base, ret))
goto out_dev_exit;
@@ -2545,7 +2602,7 @@ static void queue_start(struct panthor_queue *queue)
list_for_each_entry(job, &queue->scheduler.pending_list, base.list)
job->base.s_fence->parent = dma_fence_get(job->done_fence);
- drm_sched_start(&queue->scheduler);
+ drm_sched_start(&queue->scheduler, 0);
}
static void panthor_group_stop(struct panthor_group *group)
@@ -2640,6 +2697,18 @@ void panthor_sched_suspend(struct panthor_device *ptdev)
csgs_upd_ctx_init(&upd_ctx);
while (slot_mask) {
u32 csg_id = ffs(slot_mask) - 1;
+ struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
+
+ /* If the group was still usable before that point, we consider
+ * it innocent.
+ */
+ if (group_can_run(csg_slot->group))
+ csg_slot->group->innocent = true;
+
+ /* We consider group suspension failures as fatal and flag the
+ * group as unusable by setting timedout=true.
+ */
+ csg_slot->group->timedout = true;
csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
CSG_STATE_TERMINATE,
@@ -2783,6 +2852,42 @@ void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed)
}
}
+static void update_fdinfo_stats(struct panthor_job *job)
+{
+ struct panthor_group *group = job->group;
+ struct panthor_queue *queue = group->queues[job->queue_idx];
+ struct panthor_gpu_usage *fdinfo = &group->fdinfo.data;
+ struct panthor_job_profiling_data *slots = queue->profiling.slots->kmap;
+ struct panthor_job_profiling_data *data = &slots[job->profiling.slot];
+
+ scoped_guard(spinlock, &group->fdinfo.lock) {
+ if (job->profiling.mask & PANTHOR_DEVICE_PROFILING_CYCLES)
+ fdinfo->cycles += data->cycles.after - data->cycles.before;
+ if (job->profiling.mask & PANTHOR_DEVICE_PROFILING_TIMESTAMP)
+ fdinfo->time += data->time.after - data->time.before;
+ }
+}
+
+void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile)
+{
+ struct panthor_group_pool *gpool = pfile->groups;
+ struct panthor_group *group;
+ unsigned long i;
+
+ if (IS_ERR_OR_NULL(gpool))
+ return;
+
+ xa_lock(&gpool->xa);
+ xa_for_each(&gpool->xa, i, group) {
+ guard(spinlock)(&group->fdinfo.lock);
+ pfile->stats.cycles += group->fdinfo.data.cycles;
+ pfile->stats.time += group->fdinfo.data.time;
+ group->fdinfo.data.cycles = 0;
+ group->fdinfo.data.time = 0;
+ }
+ xa_unlock(&gpool->xa);
+}
+
static void group_sync_upd_work(struct work_struct *work)
{
struct panthor_group *group =
@@ -2815,6 +2920,8 @@ static void group_sync_upd_work(struct work_struct *work)
dma_fence_end_signalling(cookie);
list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
+ if (job->profiling.mask)
+ update_fdinfo_stats(job);
list_del_init(&job->node);
panthor_job_put(&job->base);
}
@@ -2822,65 +2929,198 @@ static void group_sync_upd_work(struct work_struct *work)
group_put(group);
}
-static struct dma_fence *
-queue_run_job(struct drm_sched_job *sched_job)
+struct panthor_job_ringbuf_instrs {
+ u64 buffer[MAX_INSTRS_PER_JOB];
+ u32 count;
+};
+
+struct panthor_job_instr {
+ u32 profile_mask;
+ u64 instr;
+};
+
+#define JOB_INSTR(__prof, __instr) \
+ { \
+ .profile_mask = __prof, \
+ .instr = __instr, \
+ }
+
+static void
+copy_instrs_to_ringbuf(struct panthor_queue *queue,
+ struct panthor_job *job,
+ struct panthor_job_ringbuf_instrs *instrs)
+{
+ u64 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
+ u64 start = job->ringbuf.start & (ringbuf_size - 1);
+ u64 size, written;
+
+ /*
+ * We need to write a whole slot, including any trailing zeroes
+ * that may come at the end of it. Also, because instrs.buffer has
+ * been zero-initialised, there's no need to pad it with 0's
+ */
+ instrs->count = ALIGN(instrs->count, NUM_INSTRS_PER_CACHE_LINE);
+ size = instrs->count * sizeof(u64);
+ WARN_ON(size > ringbuf_size);
+ written = min(ringbuf_size - start, size);
+
+ memcpy(queue->ringbuf->kmap + start, instrs->buffer, written);
+
+ if (written < size)
+ memcpy(queue->ringbuf->kmap,
+ &instrs->buffer[written / sizeof(u64)],
+ size - written);
+}
+
+struct panthor_job_cs_params {
+ u32 profile_mask;
+ u64 addr_reg; u64 val_reg;
+ u64 cycle_reg; u64 time_reg;
+ u64 sync_addr; u64 times_addr;
+ u64 cs_start; u64 cs_size;
+ u32 last_flush; u32 waitall_mask;
+};
+
+static void
+get_job_cs_params(struct panthor_job *job, struct panthor_job_cs_params *params)
{
- struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
struct panthor_group *group = job->group;
struct panthor_queue *queue = group->queues[job->queue_idx];
struct panthor_device *ptdev = group->ptdev;
struct panthor_scheduler *sched = ptdev->scheduler;
- u32 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
- u32 ringbuf_insert = queue->iface.input->insert & (ringbuf_size - 1);
- u64 addr_reg = ptdev->csif_info.cs_reg_count -
- ptdev->csif_info.unpreserved_cs_reg_count;
- u64 val_reg = addr_reg + 2;
- u64 sync_addr = panthor_kernel_bo_gpuva(group->syncobjs) +
- job->queue_idx * sizeof(struct panthor_syncobj_64b);
- u32 waitall_mask = GENMASK(sched->sb_slot_count - 1, 0);
- struct dma_fence *done_fence;
- int ret;
- u64 call_instrs[NUM_INSTRS_PER_SLOT] = {
- /* MOV32 rX+2, cs.latest_flush */
- (2ull << 56) | (val_reg << 48) | job->call_info.latest_flush,
+ params->addr_reg = ptdev->csif_info.cs_reg_count -
+ ptdev->csif_info.unpreserved_cs_reg_count;
+ params->val_reg = params->addr_reg + 2;
+ params->cycle_reg = params->addr_reg;
+ params->time_reg = params->val_reg;
- /* FLUSH_CACHE2.clean_inv_all.no_wait.signal(0) rX+2 */
- (36ull << 56) | (0ull << 48) | (val_reg << 40) | (0 << 16) | 0x233,
+ params->sync_addr = panthor_kernel_bo_gpuva(group->syncobjs) +
+ job->queue_idx * sizeof(struct panthor_syncobj_64b);
+ params->times_addr = panthor_kernel_bo_gpuva(queue->profiling.slots) +
+ (job->profiling.slot * sizeof(struct panthor_job_profiling_data));
+ params->waitall_mask = GENMASK(sched->sb_slot_count - 1, 0);
- /* MOV48 rX:rX+1, cs.start */
- (1ull << 56) | (addr_reg << 48) | job->call_info.start,
+ params->cs_start = job->call_info.start;
+ params->cs_size = job->call_info.size;
+ params->last_flush = job->call_info.latest_flush;
- /* MOV32 rX+2, cs.size */
- (2ull << 56) | (val_reg << 48) | job->call_info.size,
+ params->profile_mask = job->profiling.mask;
+}
- /* WAIT(0) => waits for FLUSH_CACHE2 instruction */
- (3ull << 56) | (1 << 16),
+#define JOB_INSTR_ALWAYS(instr) \
+ JOB_INSTR(PANTHOR_DEVICE_PROFILING_DISABLED, (instr))
+#define JOB_INSTR_TIMESTAMP(instr) \
+ JOB_INSTR(PANTHOR_DEVICE_PROFILING_TIMESTAMP, (instr))
+#define JOB_INSTR_CYCLES(instr) \
+ JOB_INSTR(PANTHOR_DEVICE_PROFILING_CYCLES, (instr))
+static void
+prepare_job_instrs(const struct panthor_job_cs_params *params,
+ struct panthor_job_ringbuf_instrs *instrs)
+{
+ const struct panthor_job_instr instr_seq[] = {
+ /* MOV32 rX+2, cs.latest_flush */
+ JOB_INSTR_ALWAYS((2ull << 56) | (params->val_reg << 48) | params->last_flush),
+ /* FLUSH_CACHE2.clean_inv_all.no_wait.signal(0) rX+2 */
+ JOB_INSTR_ALWAYS((36ull << 56) | (0ull << 48) | (params->val_reg << 40) |
+ (0 << 16) | 0x233),
+ /* MOV48 rX:rX+1, cycles_offset */
+ JOB_INSTR_CYCLES((1ull << 56) | (params->cycle_reg << 48) |
+ (params->times_addr +
+ offsetof(struct panthor_job_profiling_data, cycles.before))),
+ /* STORE_STATE cycles */
+ JOB_INSTR_CYCLES((40ull << 56) | (params->cycle_reg << 40) | (1ll << 32)),
+ /* MOV48 rX:rX+1, time_offset */
+ JOB_INSTR_TIMESTAMP((1ull << 56) | (params->time_reg << 48) |
+ (params->times_addr +
+ offsetof(struct panthor_job_profiling_data, time.before))),
+ /* STORE_STATE timer */
+ JOB_INSTR_TIMESTAMP((40ull << 56) | (params->time_reg << 40) | (0ll << 32)),
+ /* MOV48 rX:rX+1, cs.start */
+ JOB_INSTR_ALWAYS((1ull << 56) | (params->addr_reg << 48) | params->cs_start),
+ /* MOV32 rX+2, cs.size */
+ JOB_INSTR_ALWAYS((2ull << 56) | (params->val_reg << 48) | params->cs_size),
+ /* WAIT(0) => waits for FLUSH_CACHE2 instruction */
+ JOB_INSTR_ALWAYS((3ull << 56) | (1 << 16)),
/* CALL rX:rX+1, rX+2 */
- (32ull << 56) | (addr_reg << 40) | (val_reg << 32),
-
+ JOB_INSTR_ALWAYS((32ull << 56) | (params->addr_reg << 40) |
+ (params->val_reg << 32)),
+ /* MOV48 rX:rX+1, cycles_offset */
+ JOB_INSTR_CYCLES((1ull << 56) | (params->cycle_reg << 48) |
+ (params->times_addr +
+ offsetof(struct panthor_job_profiling_data, cycles.after))),
+ /* STORE_STATE cycles */
+ JOB_INSTR_CYCLES((40ull << 56) | (params->cycle_reg << 40) | (1ll << 32)),
+ /* MOV48 rX:rX+1, time_offset */
+ JOB_INSTR_TIMESTAMP((1ull << 56) | (params->time_reg << 48) |
+ (params->times_addr +
+ offsetof(struct panthor_job_profiling_data, time.after))),
+ /* STORE_STATE timer */
+ JOB_INSTR_TIMESTAMP((40ull << 56) | (params->time_reg << 40) | (0ll << 32)),
/* MOV48 rX:rX+1, sync_addr */
- (1ull << 56) | (addr_reg << 48) | sync_addr,
-
+ JOB_INSTR_ALWAYS((1ull << 56) | (params->addr_reg << 48) | params->sync_addr),
/* MOV48 rX+2, #1 */
- (1ull << 56) | (val_reg << 48) | 1,
-
+ JOB_INSTR_ALWAYS((1ull << 56) | (params->val_reg << 48) | 1),
/* WAIT(all) */
- (3ull << 56) | (waitall_mask << 16),
-
+ JOB_INSTR_ALWAYS((3ull << 56) | (params->waitall_mask << 16)),
/* SYNC_ADD64.system_scope.propage_err.nowait rX:rX+1, rX+2*/
- (51ull << 56) | (0ull << 48) | (addr_reg << 40) | (val_reg << 32) | (0 << 16) | 1,
+ JOB_INSTR_ALWAYS((51ull << 56) | (0ull << 48) | (params->addr_reg << 40) |
+ (params->val_reg << 32) | (0 << 16) | 1),
+ /* ERROR_BARRIER, so we can recover from faults at job boundaries. */
+ JOB_INSTR_ALWAYS((47ull << 56)),
+ };
+ u32 pad;
- /* ERROR_BARRIER, so we can recover from faults at job
- * boundaries.
- */
- (47ull << 56),
+ instrs->count = 0;
+
+ /* NEED to be cacheline aligned to please the prefetcher. */
+ static_assert(sizeof(instrs->buffer) % 64 == 0,
+ "panthor_job_ringbuf_instrs::buffer is not aligned on a cacheline");
+
+ /* Make sure we have enough storage to store the whole sequence. */
+ static_assert(ALIGN(ARRAY_SIZE(instr_seq), NUM_INSTRS_PER_CACHE_LINE) ==
+ ARRAY_SIZE(instrs->buffer),
+ "instr_seq vs panthor_job_ringbuf_instrs::buffer size mismatch");
+
+ for (u32 i = 0; i < ARRAY_SIZE(instr_seq); i++) {
+ /* If the profile mask of this instruction is not enabled, skip it. */
+ if (instr_seq[i].profile_mask &&
+ !(instr_seq[i].profile_mask & params->profile_mask))
+ continue;
+
+ instrs->buffer[instrs->count++] = instr_seq[i].instr;
+ }
+
+ pad = ALIGN(instrs->count, NUM_INSTRS_PER_CACHE_LINE);
+ memset(&instrs->buffer[instrs->count], 0,
+ (pad - instrs->count) * sizeof(instrs->buffer[0]));
+ instrs->count = pad;
+}
+
+static u32 calc_job_credits(u32 profile_mask)
+{
+ struct panthor_job_ringbuf_instrs instrs;
+ struct panthor_job_cs_params params = {
+ .profile_mask = profile_mask,
};
- /* Need to be cacheline aligned to please the prefetcher. */
- static_assert(sizeof(call_instrs) % 64 == 0,
- "call_instrs is not aligned on a cacheline");
+ prepare_job_instrs(&params, &instrs);
+ return instrs.count;
+}
+
+static struct dma_fence *
+queue_run_job(struct drm_sched_job *sched_job)
+{
+ struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
+ struct panthor_group *group = job->group;
+ struct panthor_queue *queue = group->queues[job->queue_idx];
+ struct panthor_device *ptdev = group->ptdev;
+ struct panthor_scheduler *sched = ptdev->scheduler;
+ struct panthor_job_ringbuf_instrs instrs;
+ struct panthor_job_cs_params cs_params;
+ struct dma_fence *done_fence;
+ int ret;
/* Stream size is zero, nothing to do except making sure all previously
* submitted jobs are done before we signal the
@@ -2891,7 +3131,7 @@ queue_run_job(struct drm_sched_job *sched_job)
return dma_fence_get(job->done_fence);
}
- ret = pm_runtime_resume_and_get(ptdev->base.dev);
+ ret = panthor_device_resume_and_get(ptdev);
if (drm_WARN_ON(&ptdev->base, ret))
return ERR_PTR(ret);
@@ -2907,17 +3147,23 @@ queue_run_job(struct drm_sched_job *sched_job)
queue->fence_ctx.id,
atomic64_inc_return(&queue->fence_ctx.seqno));
- memcpy(queue->ringbuf->kmap + ringbuf_insert,
- call_instrs, sizeof(call_instrs));
+ job->profiling.slot = queue->profiling.seqno++;
+ if (queue->profiling.seqno == queue->profiling.slot_count)
+ queue->profiling.seqno = 0;
+
+ job->ringbuf.start = queue->iface.input->insert;
+
+ get_job_cs_params(job, &cs_params);
+ prepare_job_instrs(&cs_params, &instrs);
+ copy_instrs_to_ringbuf(queue, job, &instrs);
+
+ job->ringbuf.end = job->ringbuf.start + (instrs.count * sizeof(u64));
panthor_job_get(&job->base);
spin_lock(&queue->fence_ctx.lock);
list_add_tail(&job->node, &queue->fence_ctx.in_flight_jobs);
spin_unlock(&queue->fence_ctx.lock);
- job->ringbuf.start = queue->iface.input->insert;
- job->ringbuf.end = job->ringbuf.start + sizeof(call_instrs);
-
/* Make sure the ring buffer is updated before the INSERT
* register.
*/
@@ -3010,10 +3256,53 @@ static const struct drm_sched_backend_ops panthor_queue_sched_ops = {
.free_job = queue_free_job,
};
+static u32 calc_profiling_ringbuf_num_slots(struct panthor_device *ptdev,
+ u32 cs_ringbuf_size)
+{
+ u32 min_profiled_job_instrs = U32_MAX;
+ u32 last_flag = fls(PANTHOR_DEVICE_PROFILING_ALL);
+
+ /*
+ * We want to calculate the minimum size of a profiled job's CS,
+ * because since they need additional instructions for the sampling
+ * of performance metrics, they might take up further slots in
+ * the queue's ringbuffer. This means we might not need as many job
+ * slots for keeping track of their profiling information. What we
+ * need is the maximum number of slots we should allocate to this end,
+ * which matches the maximum number of profiled jobs we can place
+ * simultaneously in the queue's ring buffer.
+ * That has to be calculated separately for every single job profiling
+ * flag, but not in the case job profiling is disabled, since unprofiled
+ * jobs don't need to keep track of this at all.
+ */
+ for (u32 i = 0; i < last_flag; i++) {
+ min_profiled_job_instrs =
+ min(min_profiled_job_instrs, calc_job_credits(BIT(i)));
+ }
+
+ return DIV_ROUND_UP(cs_ringbuf_size, min_profiled_job_instrs * sizeof(u64));
+}
+
static struct panthor_queue *
group_create_queue(struct panthor_group *group,
const struct drm_panthor_queue_create *args)
{
+ const struct drm_sched_init_args sched_args = {
+ .ops = &panthor_queue_sched_ops,
+ .submit_wq = group->ptdev->scheduler->wq,
+ .num_rqs = 1,
+ /*
+ * The credit limit argument tells us the total number of
+ * instructions across all CS slots in the ringbuffer, with
+ * some jobs requiring twice as many as others, depending on
+ * their profiling status.
+ */
+ .credit_limit = args->ringbuf_size / sizeof(u64),
+ .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
+ .timeout_wq = group->ptdev->reset.wq,
+ .name = "panthor-queue",
+ .dev = group->ptdev->base.dev,
+ };
struct drm_gpu_scheduler *drm_sched;
struct panthor_queue *queue;
int ret;
@@ -3043,7 +3332,8 @@ group_create_queue(struct panthor_group *group,
DRM_PANTHOR_BO_NO_MMAP,
DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
- PANTHOR_VM_KERNEL_AUTO_VA);
+ PANTHOR_VM_KERNEL_AUTO_VA,
+ "CS ring buffer");
if (IS_ERR(queue->ringbuf)) {
ret = PTR_ERR(queue->ringbuf);
goto err_free_queue;
@@ -3063,12 +3353,29 @@ group_create_queue(struct panthor_group *group,
goto err_free_queue;
}
- ret = drm_sched_init(&queue->scheduler, &panthor_queue_sched_ops,
- group->ptdev->scheduler->wq, 1,
- args->ringbuf_size / (NUM_INSTRS_PER_SLOT * sizeof(u64)),
- 0, msecs_to_jiffies(JOB_TIMEOUT_MS),
- group->ptdev->reset.wq,
- NULL, "panthor-queue", group->ptdev->base.dev);
+ queue->profiling.slot_count =
+ calc_profiling_ringbuf_num_slots(group->ptdev, args->ringbuf_size);
+
+ queue->profiling.slots =
+ panthor_kernel_bo_create(group->ptdev, group->vm,
+ queue->profiling.slot_count *
+ sizeof(struct panthor_job_profiling_data),
+ DRM_PANTHOR_BO_NO_MMAP,
+ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
+ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
+ PANTHOR_VM_KERNEL_AUTO_VA,
+ "Group job stats");
+
+ if (IS_ERR(queue->profiling.slots)) {
+ ret = PTR_ERR(queue->profiling.slots);
+ goto err_free_queue;
+ }
+
+ ret = panthor_kernel_bo_vmap(queue->profiling.slots);
+ if (ret)
+ goto err_free_queue;
+
+ ret = drm_sched_init(&queue->scheduler, &sched_args);
if (ret)
goto err_free_queue;
@@ -3082,6 +3389,29 @@ err_free_queue:
return ERR_PTR(ret);
}
+static void add_group_kbo_sizes(struct panthor_device *ptdev,
+ struct panthor_group *group)
+{
+ struct panthor_queue *queue;
+ int i;
+
+ if (drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(group)))
+ return;
+ if (drm_WARN_ON(&ptdev->base, ptdev != group->ptdev))
+ return;
+
+ group->fdinfo.kbo_sizes += group->suspend_buf->obj->size;
+ group->fdinfo.kbo_sizes += group->protm_suspend_buf->obj->size;
+ group->fdinfo.kbo_sizes += group->syncobjs->obj->size;
+
+ for (i = 0; i < group->queue_count; i++) {
+ queue = group->queues[i];
+ group->fdinfo.kbo_sizes += queue->ringbuf->obj->size;
+ group->fdinfo.kbo_sizes += queue->iface.mem->obj->size;
+ group->fdinfo.kbo_sizes += queue->profiling.slots->obj->size;
+ }
+}
+
#define MAX_GROUPS_PER_POOL 128
int panthor_group_create(struct panthor_file *pfile,
@@ -3165,7 +3495,8 @@ int panthor_group_create(struct panthor_file *pfile,
DRM_PANTHOR_BO_NO_MMAP,
DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
- PANTHOR_VM_KERNEL_AUTO_VA);
+ PANTHOR_VM_KERNEL_AUTO_VA,
+ "Group sync objects");
if (IS_ERR(group->syncobjs)) {
ret = PTR_ERR(group->syncobjs);
goto err_put_group;
@@ -3206,6 +3537,9 @@ int panthor_group_create(struct panthor_file *pfile,
}
mutex_unlock(&sched->reset.lock);
+ add_group_kbo_sizes(group->ptdev, group);
+ spin_lock_init(&group->fdinfo.lock);
+
return gid;
err_put_group:
@@ -3285,6 +3619,8 @@ int panthor_group_get_state(struct panthor_file *pfile,
get_state->state |= DRM_PANTHOR_GROUP_STATE_FATAL_FAULT;
get_state->fatal_queues = group->fatal_queues;
}
+ if (group->innocent)
+ get_state->state |= DRM_PANTHOR_GROUP_STATE_INNOCENT;
mutex_unlock(&sched->lock);
group_put(group);
@@ -3321,6 +3657,33 @@ void panthor_group_pool_destroy(struct panthor_file *pfile)
pfile->groups = NULL;
}
+/**
+ * panthor_fdinfo_gather_group_mem_info() - Retrieve aggregate size of all private kernel BO's
+ * belonging to all the groups owned by an open Panthor file
+ * @pfile: File.
+ * @stats: Memory statistics to be updated.
+ *
+ */
+void
+panthor_fdinfo_gather_group_mem_info(struct panthor_file *pfile,
+ struct drm_memory_stats *stats)
+{
+ struct panthor_group_pool *gpool = pfile->groups;
+ struct panthor_group *group;
+ unsigned long i;
+
+ if (IS_ERR_OR_NULL(gpool))
+ return;
+
+ xa_lock(&gpool->xa);
+ xa_for_each(&gpool->xa, i, group) {
+ stats->resident += group->fdinfo.kbo_sizes;
+ if (group->csg_id >= 0)
+ stats->active += group->fdinfo.kbo_sizes;
+ }
+ xa_unlock(&gpool->xa);
+}
+
static void job_release(struct kref *ref)
{
struct panthor_job *job = container_of(ref, struct panthor_job, refcount);
@@ -3373,6 +3736,7 @@ panthor_job_create(struct panthor_file *pfile,
{
struct panthor_group_pool *gpool = pfile->groups;
struct panthor_job *job;
+ u32 credits;
int ret;
if (qsubmit->pad)
@@ -3409,6 +3773,11 @@ panthor_job_create(struct panthor_file *pfile,
goto err_put_job;
}
+ if (!group_can_run(job->group)) {
+ ret = -EINVAL;
+ goto err_put_job;
+ }
+
if (job->queue_idx >= job->group->queue_count ||
!job->group->queues[job->queue_idx]) {
ret = -EINVAL;
@@ -3426,9 +3795,16 @@ panthor_job_create(struct panthor_file *pfile,
}
}
+ job->profiling.mask = pfile->ptdev->profile_mask;
+ credits = calc_job_credits(job->profiling.mask);
+ if (credits == 0) {
+ ret = -EINVAL;
+ goto err_put_job;
+ }
+
ret = drm_sched_job_init(&job->base,
&job->group->queues[job->queue_idx]->entity,
- 1, job->group);
+ credits, job->group);
if (ret)
goto err_put_job;
diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h
index 3a30d2328b30..e650a445cf50 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.h
+++ b/drivers/gpu/drm/panthor/panthor_sched.h
@@ -9,6 +9,7 @@ struct dma_fence;
struct drm_file;
struct drm_gem_object;
struct drm_sched_job;
+struct drm_memory_stats;
struct drm_panthor_group_create;
struct drm_panthor_queue_create;
struct drm_panthor_group_get_state;
@@ -36,6 +37,8 @@ void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *job);
int panthor_group_pool_create(struct panthor_file *pfile);
void panthor_group_pool_destroy(struct panthor_file *pfile);
+void panthor_fdinfo_gather_group_mem_info(struct panthor_file *pfile,
+ struct drm_memory_stats *stats);
int panthor_sched_init(struct panthor_device *ptdev);
void panthor_sched_unplug(struct panthor_device *ptdev);
@@ -47,4 +50,6 @@ void panthor_sched_resume(struct panthor_device *ptdev);
void panthor_sched_report_mmu_fault(struct panthor_device *ptdev);
void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events);
+void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile);
+
#endif