diff options
Diffstat (limited to 'drivers/gpu/drm/panthor')
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_devfreq.c | 41 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_devfreq.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_device.c | 123 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_device.h | 86 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_drv.c | 268 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_fw.c | 213 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_fw.h | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_gem.c | 248 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_gem.h | 87 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_gpu.c | 72 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_gpu.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_heap.c | 60 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_heap.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_mmu.c | 185 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_mmu.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_regs.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_sched.c | 516 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_sched.h | 5 |
18 files changed, 1566 insertions, 362 deletions
diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.c b/drivers/gpu/drm/panthor/panthor_devfreq.c index c6d3c327cc24..3686515d368d 100644 --- a/drivers/gpu/drm/panthor/panthor_devfreq.c +++ b/drivers/gpu/drm/panthor/panthor_devfreq.c @@ -62,14 +62,20 @@ static void panthor_devfreq_update_utilization(struct panthor_devfreq *pdevfreq) static int panthor_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) { + struct panthor_device *ptdev = dev_get_drvdata(dev); struct dev_pm_opp *opp; + int err; opp = devfreq_recommended_opp(dev, freq, flags); if (IS_ERR(opp)) return PTR_ERR(opp); dev_pm_opp_put(opp); - return dev_pm_opp_set_rate(dev, *freq); + err = dev_pm_opp_set_rate(dev, *freq); + if (!err) + ptdev->current_frequency = *freq; + + return err; } static void panthor_devfreq_reset(struct panthor_devfreq *pdevfreq) @@ -130,6 +136,7 @@ int panthor_devfreq_init(struct panthor_device *ptdev) struct panthor_devfreq *pdevfreq; struct dev_pm_opp *opp; unsigned long cur_freq; + unsigned long freq = ULONG_MAX; int ret; pdevfreq = drmm_kzalloc(&ptdev->base, sizeof(*ptdev->devfreq), GFP_KERNEL); @@ -156,12 +163,6 @@ int panthor_devfreq_init(struct panthor_device *ptdev) cur_freq = clk_get_rate(ptdev->clks.core); - opp = devfreq_recommended_opp(dev, &cur_freq, 0); - if (IS_ERR(opp)) - return PTR_ERR(opp); - - panthor_devfreq_profile.initial_freq = cur_freq; - /* Regulator coupling only takes care of synchronizing/balancing voltage * updates, but the coupled regulator needs to be enabled manually. * @@ -192,16 +193,30 @@ int panthor_devfreq_init(struct panthor_device *ptdev) return ret; } + opp = devfreq_recommended_opp(dev, &cur_freq, 0); + if (IS_ERR(opp)) + return PTR_ERR(opp); + + panthor_devfreq_profile.initial_freq = cur_freq; + ptdev->current_frequency = cur_freq; + /* * Set the recommend OPP this will enable and configure the regulator * if any and will avoid a switch off by regulator_late_cleanup() */ ret = dev_pm_opp_set_opp(dev, opp); + dev_pm_opp_put(opp); if (ret) { DRM_DEV_ERROR(dev, "Couldn't set recommended OPP\n"); return ret; } + /* Find the fastest defined rate */ + opp = dev_pm_opp_find_freq_floor(dev, &freq); + if (IS_ERR(opp)) + return PTR_ERR(opp); + ptdev->fast_rate = freq; + dev_pm_opp_put(opp); /* @@ -228,26 +243,26 @@ int panthor_devfreq_init(struct panthor_device *ptdev) return 0; } -int panthor_devfreq_resume(struct panthor_device *ptdev) +void panthor_devfreq_resume(struct panthor_device *ptdev) { struct panthor_devfreq *pdevfreq = ptdev->devfreq; if (!pdevfreq->devfreq) - return 0; + return; panthor_devfreq_reset(pdevfreq); - return devfreq_resume_device(pdevfreq->devfreq); + drm_WARN_ON(&ptdev->base, devfreq_resume_device(pdevfreq->devfreq)); } -int panthor_devfreq_suspend(struct panthor_device *ptdev) +void panthor_devfreq_suspend(struct panthor_device *ptdev) { struct panthor_devfreq *pdevfreq = ptdev->devfreq; if (!pdevfreq->devfreq) - return 0; + return; - return devfreq_suspend_device(pdevfreq->devfreq); + drm_WARN_ON(&ptdev->base, devfreq_suspend_device(pdevfreq->devfreq)); } void panthor_devfreq_record_busy(struct panthor_device *ptdev) diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.h b/drivers/gpu/drm/panthor/panthor_devfreq.h index 83a5c9522493..b7631de695f7 100644 --- a/drivers/gpu/drm/panthor/panthor_devfreq.h +++ b/drivers/gpu/drm/panthor/panthor_devfreq.h @@ -12,8 +12,8 @@ struct panthor_devfreq; int panthor_devfreq_init(struct panthor_device *ptdev); -int panthor_devfreq_resume(struct panthor_device *ptdev); -int panthor_devfreq_suspend(struct panthor_device *ptdev); +void panthor_devfreq_resume(struct panthor_device *ptdev); +void panthor_devfreq_suspend(struct panthor_device *ptdev); void panthor_devfreq_record_busy(struct panthor_device *ptdev); void panthor_devfreq_record_idle(struct panthor_device *ptdev); diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c index 4082c8f2951d..f0b2da5b2b96 100644 --- a/drivers/gpu/drm/panthor/panthor_device.c +++ b/drivers/gpu/drm/panthor/panthor_device.c @@ -22,6 +22,24 @@ #include "panthor_regs.h" #include "panthor_sched.h" +static int panthor_gpu_coherency_init(struct panthor_device *ptdev) +{ + ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT; + + if (!ptdev->coherent) + return 0; + + /* Check if the ACE-Lite coherency protocol is actually supported by the GPU. + * ACE protocol has never been supported for command stream frontend GPUs. + */ + if ((gpu_read(ptdev, GPU_COHERENCY_FEATURES) & + GPU_COHERENCY_PROT_BIT(ACE_LITE))) + return 0; + + drm_err(&ptdev->base, "Coherency not supported by the device"); + return -ENOTSUPP; +} + static int panthor_clk_init(struct panthor_device *ptdev) { ptdev->clks.core = devm_clk_get(ptdev->base.dev, NULL); @@ -110,14 +128,11 @@ static void panthor_device_reset_work(struct work_struct *work) struct panthor_device *ptdev = container_of(work, struct panthor_device, reset.work); int ret = 0, cookie; - if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE) { - /* - * No need for a reset as the device has been (or will be) - * powered down - */ - atomic_set(&ptdev->reset.pending, 0); + /* If the device is entering suspend, we don't reset. A slow reset will + * be forced at resume time instead. + */ + if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE) return; - } if (!drm_dev_enter(&ptdev->base, &cookie)) return; @@ -156,8 +171,6 @@ int panthor_device_init(struct panthor_device *ptdev) struct page *p; int ret; - ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT; - init_completion(&ptdev->unplug.done); ret = drmm_mutex_init(&ptdev->base, &ptdev->unplug.lock); if (ret) @@ -167,6 +180,11 @@ int panthor_device_init(struct panthor_device *ptdev) if (ret) return ret; +#ifdef CONFIG_DEBUG_FS + drmm_mutex_init(&ptdev->base, &ptdev->gems.lock); + INIT_LIST_HEAD(&ptdev->gems.node); +#endif + atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED); p = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!p) @@ -230,6 +248,10 @@ int panthor_device_init(struct panthor_device *ptdev) if (ret) goto err_rpm_put; + ret = panthor_gpu_coherency_init(ptdev); + if (ret) + goto err_unplug_gpu; + ret = panthor_mmu_init(ptdev); if (ret) goto err_unplug_gpu; @@ -390,11 +412,15 @@ int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct * { u64 offset = (u64)vma->vm_pgoff << PAGE_SHIFT; + if ((vma->vm_flags & VM_SHARED) == 0) + return -EINVAL; + switch (offset) { case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET: if (vma->vm_end - vma->vm_start != PAGE_SIZE || (vma->vm_flags & (VM_WRITE | VM_EXEC))) return -EINVAL; + vm_flags_clear(vma, VM_MAYWRITE); break; @@ -411,6 +437,22 @@ int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct * return 0; } +static int panthor_device_resume_hw_components(struct panthor_device *ptdev) +{ + int ret; + + panthor_gpu_resume(ptdev); + panthor_mmu_resume(ptdev); + + ret = panthor_fw_resume(ptdev); + if (!ret) + return 0; + + panthor_mmu_suspend(ptdev); + panthor_gpu_suspend(ptdev); + return ret; +} + int panthor_device_resume(struct device *dev) { struct panthor_device *ptdev = dev_get_drvdata(dev); @@ -433,31 +475,34 @@ int panthor_device_resume(struct device *dev) if (ret) goto err_disable_stacks_clk; - ret = panthor_devfreq_resume(ptdev); - if (ret) - goto err_disable_coregroup_clk; + panthor_devfreq_resume(ptdev); if (panthor_device_is_initialized(ptdev) && drm_dev_enter(&ptdev->base, &cookie)) { - panthor_gpu_resume(ptdev); - panthor_mmu_resume(ptdev); - ret = drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev)); - if (!ret) { - panthor_sched_resume(ptdev); - } else { - panthor_mmu_suspend(ptdev); - panthor_gpu_suspend(ptdev); + /* If there was a reset pending at the time we suspended the + * device, we force a slow reset. + */ + if (atomic_read(&ptdev->reset.pending)) { + ptdev->reset.fast = false; + atomic_set(&ptdev->reset.pending, 0); } + ret = panthor_device_resume_hw_components(ptdev); + if (ret && ptdev->reset.fast) { + drm_err(&ptdev->base, "Fast reset failed, trying a slow reset"); + ptdev->reset.fast = false; + ret = panthor_device_resume_hw_components(ptdev); + } + + if (!ret) + panthor_sched_resume(ptdev); + drm_dev_exit(cookie); if (ret) goto err_suspend_devfreq; } - if (atomic_read(&ptdev->reset.pending)) - queue_work(ptdev->reset.wq, &ptdev->reset.work); - /* Clear all IOMEM mappings pointing to this device after we've * resumed. This way the fake mappings pointing to the dummy pages * are removed and the real iomem mapping will be restored on next @@ -472,8 +517,6 @@ int panthor_device_resume(struct device *dev) err_suspend_devfreq: panthor_devfreq_suspend(ptdev); - -err_disable_coregroup_clk: clk_disable_unprepare(ptdev->clks.coregroup); err_disable_stacks_clk: @@ -484,13 +527,14 @@ err_disable_core_clk: err_set_suspended: atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED); + atomic_set(&ptdev->pm.recovery_needed, 1); return ret; } int panthor_device_suspend(struct device *dev) { struct panthor_device *ptdev = dev_get_drvdata(dev); - int ret, cookie; + int cookie; if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE) return -EINVAL; @@ -522,36 +566,11 @@ int panthor_device_suspend(struct device *dev) drm_dev_exit(cookie); } - ret = panthor_devfreq_suspend(ptdev); - if (ret) { - if (panthor_device_is_initialized(ptdev) && - drm_dev_enter(&ptdev->base, &cookie)) { - panthor_gpu_resume(ptdev); - panthor_mmu_resume(ptdev); - drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev)); - panthor_sched_resume(ptdev); - drm_dev_exit(cookie); - } - - goto err_set_active; - } + panthor_devfreq_suspend(ptdev); clk_disable_unprepare(ptdev->clks.coregroup); clk_disable_unprepare(ptdev->clks.stacks); clk_disable_unprepare(ptdev->clks.core); atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED); return 0; - -err_set_active: - /* If something failed and we have to revert back to an - * active state, we also need to clear the MMIO userspace - * mappings, so any dumb pages that were mapped while we - * were trying to suspend gets invalidated. - */ - mutex_lock(&ptdev->pm.mmio_lock); - atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE); - unmap_mapping_range(ptdev->base.anon_inode->i_mapping, - DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1); - mutex_unlock(&ptdev->pm.mmio_lock); - return ret; } diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h index e388c0472ba7..465d3ab1b79e 100644 --- a/drivers/gpu/drm/panthor/panthor_device.h +++ b/drivers/gpu/drm/panthor/panthor_device.h @@ -9,6 +9,7 @@ #include <linux/atomic.h> #include <linux/io-pgtable.h> #include <linux/regulator/consumer.h> +#include <linux/pm_runtime.h> #include <linux/sched.h> #include <linux/spinlock.h> @@ -67,6 +68,25 @@ struct panthor_irq { }; /** + * enum panthor_device_profiling_mode - Profiling state + */ +enum panthor_device_profiling_flags { + /** @PANTHOR_DEVICE_PROFILING_DISABLED: Profiling is disabled. */ + PANTHOR_DEVICE_PROFILING_DISABLED = 0, + + /** @PANTHOR_DEVICE_PROFILING_CYCLES: Sampling job cycles. */ + PANTHOR_DEVICE_PROFILING_CYCLES = BIT(0), + + /** @PANTHOR_DEVICE_PROFILING_TIMESTAMP: Sampling job timestamp. */ + PANTHOR_DEVICE_PROFILING_TIMESTAMP = BIT(1), + + /** @PANTHOR_DEVICE_PROFILING_ALL: Sampling everything. */ + PANTHOR_DEVICE_PROFILING_ALL = + PANTHOR_DEVICE_PROFILING_CYCLES | + PANTHOR_DEVICE_PROFILING_TIMESTAMP, +}; + +/** * struct panthor_device - Panthor device */ struct panthor_device { @@ -137,6 +157,17 @@ struct panthor_device { /** @pending: Set to true if a reset is pending. */ atomic_t pending; + + /** + * @fast: True if the post_reset logic can proceed with a fast reset. + * + * A fast reset is just a reset where the driver doesn't reload the FW sections. + * + * Any time the firmware is properly suspended, a fast reset can take place. + * On the other hand, if the halt operation failed, the driver will reload + * all FW sections to make sure we start from a fresh state. + */ + bool fast; } reset; /** @pm: Power management related data. */ @@ -161,7 +192,35 @@ struct panthor_device { * is suspended. */ struct page *dummy_latest_flush; + + /** @recovery_needed: True when a resume attempt failed. */ + atomic_t recovery_needed; } pm; + + /** @profile_mask: User-set profiling flags for job accounting. */ + u32 profile_mask; + + /** @current_frequency: Device clock frequency at present. Set by DVFS*/ + unsigned long current_frequency; + + /** @fast_rate: Maximum device clock frequency. Set by DVFS */ + unsigned long fast_rate; + +#ifdef CONFIG_DEBUG_FS + /** @gems: Device-wide list of GEM objects owned by at least one file. */ + struct { + /** @gems.lock: Protects the device-wide list of GEM objects. */ + struct mutex lock; + + /** @node: Used to keep track of all the device's DRM objects */ + struct list_head node; + } gems; +#endif +}; + +struct panthor_gpu_usage { + u64 time; + u64 cycles; }; /** @@ -176,6 +235,9 @@ struct panthor_file { /** @groups: Scheduling group pool attached to this file. */ struct panthor_group_pool *groups; + + /** @stats: cycle and timestamp measures for job execution. */ + struct panthor_gpu_usage stats; }; int panthor_device_init(struct panthor_device *ptdev); @@ -207,6 +269,28 @@ int panthor_device_mmap_io(struct panthor_device *ptdev, int panthor_device_resume(struct device *dev); int panthor_device_suspend(struct device *dev); +static inline int panthor_device_resume_and_get(struct panthor_device *ptdev) +{ + int ret = pm_runtime_resume_and_get(ptdev->base.dev); + + /* If the resume failed, we need to clear the runtime_error, which + * can done by forcing the RPM state to suspended. If multiple + * threads called panthor_device_resume_and_get(), we only want + * one of them to update the state, hence the cmpxchg. Note that a + * thread might enter panthor_device_resume_and_get() and call + * pm_runtime_resume_and_get() after another thread had attempted + * to resume and failed. This means we will end up with an error + * without even attempting a resume ourselves. The only risk here + * is to report an error when the second resume attempt might have + * succeeded. Given resume errors are not expected, this is probably + * something we can live with. + */ + if (ret && atomic_cmpxchg(&ptdev->pm.recovery_needed, 1, 0) == 1) + pm_runtime_set_suspended(ptdev->base.dev); + + return ret; +} + enum drm_panthor_exception_type { DRM_PANTHOR_EXCEPTION_OK = 0x00, DRM_PANTHOR_EXCEPTION_TERMINATED = 0x04, @@ -310,8 +394,6 @@ static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *da if (!status) \ break; \ \ - gpu_write(ptdev, __reg_prefix ## _INT_CLEAR, status); \ - \ __handler(ptdev, status); \ ret = IRQ_HANDLED; \ } \ diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index c520f156e2d7..6200cad22563 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -3,12 +3,17 @@ /* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */ /* Copyright 2019 Collabora ltd. */ +#ifdef CONFIG_ARM_ARCH_TIMER +#include <asm/arch_timer.h> +#endif + #include <linux/list.h> #include <linux/module.h> #include <linux/of_platform.h> #include <linux/pagemap.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <linux/time64.h> #include <drm/drm_auth.h> #include <drm/drm_debugfs.h> @@ -165,6 +170,8 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride, _Generic(_obj_name, \ PANTHOR_UOBJ_DECL(struct drm_panthor_gpu_info, tiler_present), \ PANTHOR_UOBJ_DECL(struct drm_panthor_csif_info, pad), \ + PANTHOR_UOBJ_DECL(struct drm_panthor_timestamp_info, current_timestamp), \ + PANTHOR_UOBJ_DECL(struct drm_panthor_group_priorities_info, pad), \ PANTHOR_UOBJ_DECL(struct drm_panthor_sync_op, timeline_value), \ PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \ PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, ringbuf_size), \ @@ -751,10 +758,64 @@ static void panthor_submit_ctx_cleanup(struct panthor_submit_ctx *ctx, kvfree(ctx->jobs); } +static int panthor_query_timestamp_info(struct panthor_device *ptdev, + struct drm_panthor_timestamp_info *arg) +{ + int ret; + + ret = panthor_device_resume_and_get(ptdev); + if (ret) + return ret; + +#ifdef CONFIG_ARM_ARCH_TIMER + arg->timestamp_frequency = arch_timer_get_cntfrq(); +#else + arg->timestamp_frequency = 0; +#endif + arg->current_timestamp = panthor_gpu_read_timestamp(ptdev); + arg->timestamp_offset = panthor_gpu_read_timestamp_offset(ptdev); + + pm_runtime_put(ptdev->base.dev); + return 0; +} + +static int group_priority_permit(struct drm_file *file, + u8 priority) +{ + /* Ensure that priority is valid */ + if (priority > PANTHOR_GROUP_PRIORITY_REALTIME) + return -EINVAL; + + /* Medium priority and below are always allowed */ + if (priority <= PANTHOR_GROUP_PRIORITY_MEDIUM) + return 0; + + /* Higher priorities require CAP_SYS_NICE or DRM_MASTER */ + if (capable(CAP_SYS_NICE) || drm_is_current_master(file)) + return 0; + + return -EACCES; +} + +static void panthor_query_group_priorities_info(struct drm_file *file, + struct drm_panthor_group_priorities_info *arg) +{ + int prio; + + memset(arg, 0, sizeof(*arg)); + for (prio = PANTHOR_GROUP_PRIORITY_REALTIME; prio >= 0; prio--) { + if (!group_priority_permit(file, prio)) + arg->allowed_mask |= BIT(prio); + } +} + static int panthor_ioctl_dev_query(struct drm_device *ddev, void *data, struct drm_file *file) { struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); struct drm_panthor_dev_query *args = data; + struct drm_panthor_timestamp_info timestamp_info; + struct drm_panthor_group_priorities_info priorities_info; + int ret; if (!args->pointer) { switch (args->type) { @@ -766,6 +827,14 @@ static int panthor_ioctl_dev_query(struct drm_device *ddev, void *data, struct d args->size = sizeof(ptdev->csif_info); return 0; + case DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO: + args->size = sizeof(timestamp_info); + return 0; + + case DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO: + args->size = sizeof(priorities_info); + return 0; + default: return -EINVAL; } @@ -778,6 +847,18 @@ static int panthor_ioctl_dev_query(struct drm_device *ddev, void *data, struct d case DRM_PANTHOR_DEV_QUERY_CSIF_INFO: return PANTHOR_UOBJ_SET(args->pointer, args->size, ptdev->csif_info); + case DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO: + ret = panthor_query_timestamp_info(ptdev, ×tamp_info); + + if (ret) + return ret; + + return PANTHOR_UOBJ_SET(args->pointer, args->size, timestamp_info); + + case DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO: + panthor_query_group_priorities_info(file, &priorities_info); + return PANTHOR_UOBJ_SET(args->pointer, args->size, priorities_info); + default: return -EINVAL; } @@ -859,6 +940,7 @@ static int panthor_ioctl_bo_mmap_offset(struct drm_device *ddev, void *data, struct drm_file *file) { struct drm_panthor_bo_mmap_offset *args = data; + struct panthor_gem_object *bo; struct drm_gem_object *obj; int ret; @@ -869,6 +951,12 @@ static int panthor_ioctl_bo_mmap_offset(struct drm_device *ddev, void *data, if (!obj) return -ENOENT; + bo = to_panthor_bo(obj); + if (bo->flags & DRM_PANTHOR_BO_NO_MMAP) { + ret = -EPERM; + goto out; + } + ret = drm_gem_create_mmap_offset(obj); if (ret) goto out; @@ -997,24 +1085,6 @@ static int panthor_ioctl_group_destroy(struct drm_device *ddev, void *data, return panthor_group_destroy(pfile, args->group_handle); } -static int group_priority_permit(struct drm_file *file, - u8 priority) -{ - /* Ensure that priority is valid */ - if (priority > PANTHOR_GROUP_PRIORITY_HIGH) - return -EINVAL; - - /* Medium priority and below are always allowed */ - if (priority <= PANTHOR_GROUP_PRIORITY_MEDIUM) - return 0; - - /* Higher priorities require CAP_SYS_NICE or DRM_MASTER */ - if (capable(CAP_SYS_NICE) || drm_is_current_master(file)) - return 0; - - return -EACCES; -} - static int panthor_ioctl_group_create(struct drm_device *ddev, void *data, struct drm_file *file) { @@ -1268,6 +1338,46 @@ static int panthor_ioctl_vm_get_state(struct drm_device *ddev, void *data, return 0; } +static int panthor_ioctl_bo_set_label(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_panthor_bo_set_label *args = data; + struct drm_gem_object *obj; + const char *label = NULL; + int ret = 0; + + if (args->pad) + return -EINVAL; + + obj = drm_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; + + if (args->label) { + label = strndup_user((const char __user *)(uintptr_t)args->label, + PANTHOR_BO_LABEL_MAXLEN); + if (IS_ERR(label)) { + ret = PTR_ERR(label); + if (ret == -EINVAL) + ret = -E2BIG; + goto err_put_obj; + } + } + + /* + * We treat passing a label of length 0 and passing a NULL label + * differently, because even though they might seem conceptually + * similar, future uses of the BO label might expect a different + * behaviour in each case. + */ + panthor_gem_bo_set_label(obj, label); + +err_put_obj: + drm_gem_object_put(obj); + + return ret; +} + static int panthor_open(struct drm_device *ddev, struct drm_file *file) { @@ -1337,6 +1447,7 @@ static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = { PANTHOR_IOCTL(TILER_HEAP_CREATE, tiler_heap_create, DRM_RENDER_ALLOW), PANTHOR_IOCTL(TILER_HEAP_DESTROY, tiler_heap_destroy, DRM_RENDER_ALLOW), PANTHOR_IOCTL(GROUP_SUBMIT, group_submit, DRM_RENDER_ALLOW), + PANTHOR_IOCTL(BO_SET_LABEL, bo_set_label, DRM_RENDER_ALLOW), }; static int panthor_mmap(struct file *filp, struct vm_area_struct *vma) @@ -1374,6 +1485,51 @@ static int panthor_mmap(struct file *filp, struct vm_area_struct *vma) return ret; } +static void panthor_gpu_show_fdinfo(struct panthor_device *ptdev, + struct panthor_file *pfile, + struct drm_printer *p) +{ + if (ptdev->profile_mask & PANTHOR_DEVICE_PROFILING_ALL) + panthor_fdinfo_gather_group_samples(pfile); + + if (ptdev->profile_mask & PANTHOR_DEVICE_PROFILING_TIMESTAMP) { +#ifdef CONFIG_ARM_ARCH_TIMER + drm_printf(p, "drm-engine-panthor:\t%llu ns\n", + DIV_ROUND_UP_ULL((pfile->stats.time * NSEC_PER_SEC), + arch_timer_get_cntfrq())); +#endif + } + if (ptdev->profile_mask & PANTHOR_DEVICE_PROFILING_CYCLES) + drm_printf(p, "drm-cycles-panthor:\t%llu\n", pfile->stats.cycles); + + drm_printf(p, "drm-maxfreq-panthor:\t%lu Hz\n", ptdev->fast_rate); + drm_printf(p, "drm-curfreq-panthor:\t%lu Hz\n", ptdev->current_frequency); +} + +static void panthor_show_internal_memory_stats(struct drm_printer *p, struct drm_file *file) +{ + char *drv_name = file->minor->dev->driver->name; + struct panthor_file *pfile = file->driver_priv; + struct drm_memory_stats stats = {0}; + + panthor_fdinfo_gather_group_mem_info(pfile, &stats); + panthor_vm_heaps_sizes(pfile, &stats); + + drm_fdinfo_print_size(p, drv_name, "resident", "memory", stats.resident); + drm_fdinfo_print_size(p, drv_name, "active", "memory", stats.active); +} + +static void panthor_show_fdinfo(struct drm_printer *p, struct drm_file *file) +{ + struct drm_device *dev = file->minor->dev; + struct panthor_device *ptdev = container_of(dev, struct panthor_device, base); + + panthor_gpu_show_fdinfo(ptdev, file->driver_priv, p); + panthor_show_internal_memory_stats(p, file); + + drm_show_memory_stats(p, file); +} + static const struct file_operations panthor_drm_driver_fops = { .open = drm_open, .release = drm_release, @@ -1383,33 +1539,64 @@ static const struct file_operations panthor_drm_driver_fops = { .read = drm_read, .llseek = noop_llseek, .mmap = panthor_mmap, + .show_fdinfo = drm_show_fdinfo, .fop_flags = FOP_UNSIGNED_OFFSET, }; #ifdef CONFIG_DEBUG_FS +static int panthor_gems_show(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m->private; + struct drm_device *dev = node->minor->dev; + struct panthor_device *ptdev = container_of(dev, struct panthor_device, base); + + panthor_gem_debugfs_print_bos(ptdev, m); + + return 0; +} + +static struct drm_info_list panthor_debugfs_list[] = { + {"gems", panthor_gems_show, 0, NULL}, +}; + +static int panthor_gems_debugfs_init(struct drm_minor *minor) +{ + drm_debugfs_create_files(panthor_debugfs_list, + ARRAY_SIZE(panthor_debugfs_list), + minor->debugfs_root, minor); + + return 0; +} + static void panthor_debugfs_init(struct drm_minor *minor) { panthor_mmu_debugfs_init(minor); + panthor_gems_debugfs_init(minor); } #endif /* * PanCSF driver version: * - 1.0 - initial interface + * - 1.1 - adds DEV_QUERY_TIMESTAMP_INFO query + * - 1.2 - adds DEV_QUERY_GROUP_PRIORITIES_INFO query + * - adds PANTHOR_GROUP_PRIORITY_REALTIME priority + * - 1.3 - adds DRM_PANTHOR_GROUP_STATE_INNOCENT flag + * - 1.4 - adds DRM_IOCTL_PANTHOR_BO_SET_LABEL ioctl */ static const struct drm_driver panthor_drm_driver = { .driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA, .open = panthor_open, .postclose = panthor_postclose, + .show_fdinfo = panthor_show_fdinfo, .ioctls = panthor_drm_driver_ioctls, .num_ioctls = ARRAY_SIZE(panthor_drm_driver_ioctls), .fops = &panthor_drm_driver_fops, .name = "panthor", .desc = "Panthor DRM driver", - .date = "20230801", .major = 1, - .minor = 0, + .minor = 4, .gem_create_object = panthor_gem_create_object, .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table, @@ -1439,6 +1626,44 @@ static void panthor_remove(struct platform_device *pdev) panthor_device_unplug(ptdev); } +static ssize_t profiling_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct panthor_device *ptdev = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%d\n", ptdev->profile_mask); +} + +static ssize_t profiling_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct panthor_device *ptdev = dev_get_drvdata(dev); + u32 value; + int err; + + err = kstrtou32(buf, 0, &value); + if (err) + return err; + + if ((value & ~PANTHOR_DEVICE_PROFILING_ALL) != 0) + return -EINVAL; + + ptdev->profile_mask = value; + + return len; +} + +static DEVICE_ATTR_RW(profiling); + +static struct attribute *panthor_attrs[] = { + &dev_attr_profiling.attr, + NULL, +}; + +ATTRIBUTE_GROUPS(panthor); + static const struct of_device_id dt_match[] = { { .compatible = "rockchip,rk3588-mali" }, { .compatible = "arm,mali-valhall-csf" }, @@ -1453,11 +1678,12 @@ static DEFINE_RUNTIME_DEV_PM_OPS(panthor_pm_ops, static struct platform_driver panthor_driver = { .probe = panthor_probe, - .remove_new = panthor_remove, + .remove = panthor_remove, .driver = { .name = "panthor", .pm = pm_ptr(&panthor_pm_ops), .of_match_table = dt_match, + .dev_groups = panthor_groups, }, }; diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c index ef232c0c2049..7bc38e635329 100644 --- a/drivers/gpu/drm/panthor/panthor_fw.c +++ b/drivers/gpu/drm/panthor/panthor_fw.c @@ -12,6 +12,7 @@ #include <linux/iosys-map.h> #include <linux/mutex.h> #include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include <drm/drm_drv.h> #include <drm/drm_managed.h> @@ -78,6 +79,12 @@ enum panthor_fw_binary_entry_type { /** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */ CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4, + + /** + * @CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: Metadata about how + * the FW binary was built. + */ + CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA = 6 }; #define CSF_FW_BINARY_ENTRY_TYPE(ehdr) ((ehdr) & 0xff) @@ -85,26 +92,26 @@ enum panthor_fw_binary_entry_type { #define CSF_FW_BINARY_ENTRY_UPDATE BIT(30) #define CSF_FW_BINARY_ENTRY_OPTIONAL BIT(31) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_RD BIT(0) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_WR BIT(1) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_EX BIT(2) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_NONE (0 << 3) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED (1 << 3) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_UNCACHED_COHERENT (2 << 3) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED_COHERENT (3 << 3) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK GENMASK(4, 3) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_PROT BIT(5) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED BIT(30) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO BIT(31) - -#define CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS \ - (CSF_FW_BINARY_IFACE_ENTRY_RD_RD | \ - CSF_FW_BINARY_IFACE_ENTRY_RD_WR | \ - CSF_FW_BINARY_IFACE_ENTRY_RD_EX | \ - CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK | \ - CSF_FW_BINARY_IFACE_ENTRY_RD_PROT | \ - CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED | \ - CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) +#define CSF_FW_BINARY_IFACE_ENTRY_RD BIT(0) +#define CSF_FW_BINARY_IFACE_ENTRY_WR BIT(1) +#define CSF_FW_BINARY_IFACE_ENTRY_EX BIT(2) +#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_NONE (0 << 3) +#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED (1 << 3) +#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_UNCACHED_COHERENT (2 << 3) +#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED_COHERENT (3 << 3) +#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK GENMASK(4, 3) +#define CSF_FW_BINARY_IFACE_ENTRY_PROT BIT(5) +#define CSF_FW_BINARY_IFACE_ENTRY_SHARED BIT(30) +#define CSF_FW_BINARY_IFACE_ENTRY_ZERO BIT(31) + +#define CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS \ + (CSF_FW_BINARY_IFACE_ENTRY_RD | \ + CSF_FW_BINARY_IFACE_ENTRY_WR | \ + CSF_FW_BINARY_IFACE_ENTRY_EX | \ + CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK | \ + CSF_FW_BINARY_IFACE_ENTRY_PROT | \ + CSF_FW_BINARY_IFACE_ENTRY_SHARED | \ + CSF_FW_BINARY_IFACE_ENTRY_ZERO) /** * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary @@ -132,6 +139,13 @@ struct panthor_fw_binary_section_entry_hdr { } data; }; +struct panthor_fw_build_info_hdr { + /** @meta_start: Offset of the build info data in the FW binary */ + u32 meta_start; + /** @meta_size: Size of the build info data in the FW binary */ + u32 meta_size; +}; + /** * struct panthor_fw_binary_iter - Firmware binary iterator * @@ -187,7 +201,6 @@ struct panthor_fw_section { #define MIN_CS_PER_CSG 8 #define MIN_CSGS 3 -#define MAX_CSG_PRIO 0xf #define CSF_IFACE_VERSION(major, minor, patch) \ (((major) << 24) | ((minor) << 16) | (patch)) @@ -249,17 +262,6 @@ struct panthor_fw { /** @booted: True is the FW is booted */ bool booted; - /** - * @fast_reset: True if the post_reset logic can proceed with a fast reset. - * - * A fast reset is just a reset where the driver doesn't reload the FW sections. - * - * Any time the firmware is properly suspended, a fast reset can take place. - * On the other hand, if the halt operation failed, the driver will reload - * all sections to make sure we start from a fresh state. - */ - bool fast_reset; - /** @irq: Job irq data. */ struct panthor_irq irq; }; @@ -400,7 +402,7 @@ static void panthor_fw_init_section_mem(struct panthor_device *ptdev, int ret; if (!section->data.size && - !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO)) + !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO)) return; ret = panthor_kernel_bo_vmap(section->mem); @@ -408,7 +410,7 @@ static void panthor_fw_init_section_mem(struct panthor_device *ptdev, return; memcpy(section->mem->kmap, section->data.buf, section->data.size); - if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) { + if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO) { memset(section->mem->kmap + section->data.size, 0, panthor_kernel_bo_size(section->mem) - section->data.size); } @@ -447,7 +449,8 @@ panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev, DRM_PANTHOR_BO_NO_MMAP, DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, - PANTHOR_VM_KERNEL_AUTO_VA); + PANTHOR_VM_KERNEL_AUTO_VA, + "Queue FW interface"); if (IS_ERR(mem)) return mem; @@ -479,7 +482,8 @@ panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size) return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size, DRM_PANTHOR_BO_NO_MMAP, DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, - PANTHOR_VM_KERNEL_AUTO_VA); + PANTHOR_VM_KERNEL_AUTO_VA, + "FW suspend buffer"); } static int panthor_fw_load_section_entry(struct panthor_device *ptdev, @@ -487,6 +491,7 @@ static int panthor_fw_load_section_entry(struct panthor_device *ptdev, struct panthor_fw_binary_iter *iter, u32 ehdr) { + ssize_t vm_pgsz = panthor_vm_page_size(ptdev->fw->vm); struct panthor_fw_binary_section_entry_hdr hdr; struct panthor_fw_section *section; u32 section_size; @@ -515,27 +520,26 @@ static int panthor_fw_load_section_entry(struct panthor_device *ptdev, return -EINVAL; } - if ((hdr.va.start & ~PAGE_MASK) != 0 || - (hdr.va.end & ~PAGE_MASK) != 0) { + if (!IS_ALIGNED(hdr.va.start, vm_pgsz) || !IS_ALIGNED(hdr.va.end, vm_pgsz)) { drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n", hdr.va.start, hdr.va.end); return -EINVAL; } - if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) { + if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS) { drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n", hdr.flags); return -EINVAL; } - if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) { + if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_PROT) { drm_warn(&ptdev->base, "Firmware protected mode entry not be supported, ignoring"); return 0; } if (hdr.va.start == CSF_MCU_SHARED_REGION_START && - !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) { + !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED)) { drm_err(&ptdev->base, "Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START); return -EINVAL; @@ -574,39 +578,39 @@ static int panthor_fw_load_section_entry(struct panthor_device *ptdev, section_size = hdr.va.end - hdr.va.start; if (section_size) { - u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK; + u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK; struct panthor_gem_object *bo; u32 vm_map_flags = 0; struct sg_table *sgt; u64 va = hdr.va.start; - if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR)) + if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_WR)) vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY; - if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX)) + if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_EX)) vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC; - /* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are mapped to + /* TODO: CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_*_COHERENT are mapped to * non-cacheable for now. We might want to introduce a new * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device * memory and is currently not used by our driver) for * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit * of IO-coherent systems. */ - if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED) + if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED) vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED; section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), section_size, DRM_PANTHOR_BO_NO_MMAP, - vm_map_flags, va); + vm_map_flags, va, "FW section"); if (IS_ERR(section->mem)) return PTR_ERR(section->mem); if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start)) return -EINVAL; - if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) { + if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED) { ret = panthor_kernel_bo_vmap(section->mem); if (ret) return ret; @@ -628,6 +632,45 @@ static int panthor_fw_load_section_entry(struct panthor_device *ptdev, return 0; } +static int panthor_fw_read_build_info(struct panthor_device *ptdev, + const struct firmware *fw, + struct panthor_fw_binary_iter *iter, + u32 ehdr) +{ + struct panthor_fw_build_info_hdr hdr; + static const char git_sha_header[] = "git_sha: "; + const int header_len = sizeof(git_sha_header) - 1; + int ret; + + ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr)); + if (ret) + return ret; + + if (hdr.meta_start > fw->size || + hdr.meta_start + hdr.meta_size > fw->size) { + drm_err(&ptdev->base, "Firmware build info corrupt\n"); + /* We don't need the build info, so continue */ + return 0; + } + + if (memcmp(git_sha_header, fw->data + hdr.meta_start, header_len)) { + /* Not the expected header, this isn't metadata we understand */ + return 0; + } + + /* Check that the git SHA is NULL terminated as expected */ + if (fw->data[hdr.meta_start + hdr.meta_size - 1] != '\0') { + drm_warn(&ptdev->base, "Firmware's git sha is not NULL terminated\n"); + /* Don't treat as fatal */ + return 0; + } + + drm_info(&ptdev->base, "Firmware git sha: %s\n", + fw->data + hdr.meta_start + header_len); + + return 0; +} + static void panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload) { @@ -636,7 +679,7 @@ panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload) list_for_each_entry(section, &ptdev->fw->sections, node) { struct sg_table *sgt; - if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR)) + if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_WR)) continue; panthor_fw_init_section_mem(ptdev, section); @@ -672,6 +715,8 @@ static int panthor_fw_load_entry(struct panthor_device *ptdev, switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) { case CSF_FW_BINARY_ENTRY_TYPE_IFACE: return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr); + case CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: + return panthor_fw_read_build_info(ptdev, fw, &eiter, ehdr); /* FIXME: handle those entry types? */ case CSF_FW_BINARY_ENTRY_TYPE_CONFIG: @@ -921,7 +966,7 @@ static int panthor_fw_init_ifaces(struct panthor_device *ptdev) return ret; } - drm_info(&ptdev->base, "CSF FW v%d.%d.%d, Features %#x Instrumentation features %#x", + drm_info(&ptdev->base, "CSF FW using interface v%d.%d.%d, Features %#x Instrumentation features %#x", CSF_IFACE_VERSION_MAJOR(glb_iface->control->version), CSF_IFACE_VERSION_MINOR(glb_iface->control->version), CSF_IFACE_VERSION_PATCH(glb_iface->control->version), @@ -965,6 +1010,8 @@ static void panthor_fw_init_global_iface(struct panthor_device *ptdev) static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status) { + gpu_write(ptdev, JOB_INT_CLEAR, status); + if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF)) ptdev->fw->booted = true; @@ -1034,7 +1081,7 @@ void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) /* Make sure we won't be woken up by a ping. */ cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work); - ptdev->fw->fast_reset = false; + ptdev->reset.fast = false; if (!on_hang) { struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); @@ -1043,17 +1090,11 @@ void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT); gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status, - status == MCU_STATUS_HALT, 10, 100000) && - glb_iface->output->halt_status == PANTHOR_FW_HALT_OK) { - ptdev->fw->fast_reset = true; + status == MCU_STATUS_HALT, 10, 100000)) { + ptdev->reset.fast = true; } else { drm_warn(&ptdev->base, "Failed to cleanly suspend MCU"); } - - /* The FW detects 0 -> 1 transitions. Make sure we reset - * the HALT bit before the FW is rebooted. - */ - panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT); } panthor_job_irq_suspend(&ptdev->fw->irq); @@ -1075,41 +1116,30 @@ int panthor_fw_post_reset(struct panthor_device *ptdev) if (ret) return ret; - /* If this is a fast reset, try to start the MCU without reloading - * the FW sections. If it fails, go for a full reset. - */ - if (ptdev->fw->fast_reset) { - ret = panthor_fw_start(ptdev); - if (!ret) - goto out; - - /* Forcibly reset the MCU and force a slow reset, so we get a - * fresh boot on the next panthor_fw_start() call. + if (!ptdev->reset.fast) { + /* On a slow reset, reload all sections, including RO ones. + * We're not supposed to end up here anyway, let's just assume + * the overhead of reloading everything is acceptable. */ - panthor_fw_stop(ptdev); - ptdev->fw->fast_reset = false; - drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset"); + panthor_reload_fw_sections(ptdev, true); + } else { + /* The FW detects 0 -> 1 transitions. Make sure we reset + * the HALT bit before the FW is rebooted. + * This is not needed on a slow reset because FW sections are + * re-initialized. + */ + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); - ret = panthor_vm_flush_all(ptdev->fw->vm); - if (ret) { - drm_err(&ptdev->base, "FW slow reset failed (couldn't flush FW's AS l2cache)"); - return ret; - } + panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT); } - /* Reload all sections, including RO ones. We're not supposed - * to end up here anyway, let's just assume the overhead of - * reloading everything is acceptable. - */ - panthor_reload_fw_sections(ptdev, true); - ret = panthor_fw_start(ptdev); if (ret) { - drm_err(&ptdev->base, "FW slow reset failed (couldn't start the FW )"); + drm_err(&ptdev->base, "FW %s reset failed", + ptdev->reset.fast ? "fast" : "slow"); return ret; } -out: /* We must re-initialize the global interface even on fast-reset. */ panthor_fw_init_global_iface(ptdev); return 0; @@ -1133,11 +1163,13 @@ void panthor_fw_unplug(struct panthor_device *ptdev) cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work); - /* Make sure the IRQ handler can be called after that point. */ - if (ptdev->fw->irq.irq) - panthor_job_irq_suspend(&ptdev->fw->irq); + if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) { + /* Make sure the IRQ handler cannot be called after that point. */ + if (ptdev->fw->irq.irq) + panthor_job_irq_suspend(&ptdev->fw->irq); - panthor_fw_stop(ptdev); + panthor_fw_stop(ptdev); + } list_for_each_entry(section, &ptdev->fw->sections, node) panthor_kernel_bo_destroy(section->mem); @@ -1150,7 +1182,8 @@ void panthor_fw_unplug(struct panthor_device *ptdev) panthor_vm_put(ptdev->fw->vm); ptdev->fw->vm = NULL; - panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000); + if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) + panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000); } /** diff --git a/drivers/gpu/drm/panthor/panthor_fw.h b/drivers/gpu/drm/panthor/panthor_fw.h index 22448abde992..6598d96c6d2a 100644 --- a/drivers/gpu/drm/panthor/panthor_fw.h +++ b/drivers/gpu/drm/panthor/panthor_fw.h @@ -102,9 +102,9 @@ struct panthor_fw_cs_output_iface { #define CS_STATUS_BLOCKED_REASON_SB_WAIT 1 #define CS_STATUS_BLOCKED_REASON_PROGRESS_WAIT 2 #define CS_STATUS_BLOCKED_REASON_SYNC_WAIT 3 -#define CS_STATUS_BLOCKED_REASON_DEFERRED 5 -#define CS_STATUS_BLOCKED_REASON_RES 6 -#define CS_STATUS_BLOCKED_REASON_FLUSH 7 +#define CS_STATUS_BLOCKED_REASON_DEFERRED 4 +#define CS_STATUS_BLOCKED_REASON_RESOURCE 5 +#define CS_STATUS_BLOCKED_REASON_FLUSH 6 #define CS_STATUS_BLOCKED_REASON_MASK GENMASK(3, 0) u32 status_blocked_reason; u32 status_wait_sync_value_hi; diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index 38f560864879..7c00fd77758b 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -2,6 +2,7 @@ /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ /* Copyright 2023 Collabora ltd. */ +#include <linux/cleanup.h> #include <linux/dma-buf.h> #include <linux/dma-mapping.h> #include <linux/err.h> @@ -10,14 +11,64 @@ #include <drm/panthor_drm.h> #include "panthor_device.h" +#include "panthor_fw.h" #include "panthor_gem.h" #include "panthor_mmu.h" +#ifdef CONFIG_DEBUG_FS +static void panthor_gem_debugfs_bo_add(struct panthor_device *ptdev, + struct panthor_gem_object *bo) +{ + INIT_LIST_HEAD(&bo->debugfs.node); + + bo->debugfs.creator.tgid = current->group_leader->pid; + get_task_comm(bo->debugfs.creator.process_name, current->group_leader); + + mutex_lock(&ptdev->gems.lock); + list_add_tail(&bo->debugfs.node, &ptdev->gems.node); + mutex_unlock(&ptdev->gems.lock); +} + +static void panthor_gem_debugfs_bo_rm(struct panthor_gem_object *bo) +{ + struct panthor_device *ptdev = container_of(bo->base.base.dev, + struct panthor_device, base); + + if (list_empty(&bo->debugfs.node)) + return; + + mutex_lock(&ptdev->gems.lock); + list_del_init(&bo->debugfs.node); + mutex_unlock(&ptdev->gems.lock); +} + +static void panthor_gem_debugfs_set_usage_flags(struct panthor_gem_object *bo, u32 usage_flags) +{ + bo->debugfs.flags = usage_flags | PANTHOR_DEBUGFS_GEM_USAGE_FLAG_INITIALIZED; +} +#else +static void panthor_gem_debugfs_bo_add(struct panthor_device *ptdev, + struct panthor_gem_object *bo) +{} +static void panthor_gem_debugfs_bo_rm(struct panthor_gem_object *bo) {} +static void panthor_gem_debugfs_set_usage_flags(struct panthor_gem_object *bo, u32 usage_flags) {} +#endif + static void panthor_gem_free_object(struct drm_gem_object *obj) { struct panthor_gem_object *bo = to_panthor_bo(obj); struct drm_gem_object *vm_root_gem = bo->exclusive_vm_root_gem; + panthor_gem_debugfs_bo_rm(bo); + + /* + * Label might have been allocated with kstrdup_const(), + * we need to take that into account when freeing the memory + */ + kfree_const(bo->label.str); + + mutex_destroy(&bo->label.lock); + drm_gem_free_mmap_offset(&bo->base.base); mutex_destroy(&bo->gpuva_list_lock); drm_gem_shmem_free(&bo->base); @@ -44,8 +95,7 @@ void panthor_kernel_bo_destroy(struct panthor_kernel_bo *bo) to_panthor_bo(bo->obj)->exclusive_vm_root_gem != panthor_vm_root_gem(vm))) goto out_free_bo; - ret = panthor_vm_unmap_range(vm, bo->va_node.start, - panthor_kernel_bo_size(bo)); + ret = panthor_vm_unmap_range(vm, bo->va_node.start, bo->va_node.size); if (ret) goto out_free_bo; @@ -68,17 +118,19 @@ out_free_bo: * @gpu_va: GPU address assigned when mapping to the VM. * If gpu_va == PANTHOR_VM_KERNEL_AUTO_VA, the virtual address will be * automatically allocated. + * @name: Descriptive label of the BO's contents * * Return: A valid pointer in case of success, an ERR_PTR() otherwise. */ struct panthor_kernel_bo * panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, size_t size, u32 bo_flags, u32 vm_map_flags, - u64 gpu_va) + u64 gpu_va, const char *name) { struct drm_gem_shmem_object *obj; struct panthor_kernel_bo *kbo; struct panthor_gem_object *bo; + u32 debug_flags = PANTHOR_DEBUGFS_GEM_USAGE_FLAG_KERNEL; int ret; if (drm_WARN_ON(&ptdev->base, !vm)) @@ -95,10 +147,22 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, } bo = to_panthor_bo(&obj->base); - size = obj->base.size; kbo->obj = &obj->base; bo->flags = bo_flags; + if (vm == panthor_fw_vm(ptdev)) + debug_flags |= PANTHOR_DEBUGFS_GEM_USAGE_FLAG_FW_MAPPED; + + panthor_gem_kernel_bo_set_label(kbo, name); + panthor_gem_debugfs_set_usage_flags(to_panthor_bo(kbo->obj), debug_flags); + + /* The system and GPU MMU page size might differ, which becomes a + * problem for FW sections that need to be mapped at explicit address + * since our PAGE_SIZE alignment might cover a VA range that's + * expected to be used for another section. + * Make sure we never map more than we need. + */ + size = ALIGN(size, panthor_vm_page_size(vm)); ret = panthor_vm_alloc_va(vm, gpu_va, size, &kbo->va_node); if (ret) goto err_put_obj; @@ -124,17 +188,6 @@ err_free_bo: return ERR_PTR(ret); } -static int panthor_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) -{ - struct panthor_gem_object *bo = to_panthor_bo(obj); - - /* Don't allow mmap on objects that have the NO_MMAP flag set. */ - if (bo->flags & DRM_PANTHOR_BO_NO_MMAP) - return -EINVAL; - - return drm_gem_shmem_object_mmap(obj, vma); -} - static struct dma_buf * panthor_gem_prime_export(struct drm_gem_object *obj, int flags) { @@ -145,6 +198,17 @@ panthor_gem_prime_export(struct drm_gem_object *obj, int flags) return drm_gem_prime_export(obj, flags); } +static enum drm_gem_object_status panthor_gem_status(struct drm_gem_object *obj) +{ + struct panthor_gem_object *bo = to_panthor_bo(obj); + enum drm_gem_object_status res = 0; + + if (drm_gem_is_imported(&bo->base.base) || bo->base.pages) + res |= DRM_GEM_OBJECT_RESIDENT; + + return res; +} + static const struct drm_gem_object_funcs panthor_gem_funcs = { .free = panthor_gem_free_object, .print_info = drm_gem_shmem_object_print_info, @@ -153,7 +217,8 @@ static const struct drm_gem_object_funcs panthor_gem_funcs = { .get_sg_table = drm_gem_shmem_object_get_sg_table, .vmap = drm_gem_shmem_object_vmap, .vunmap = drm_gem_shmem_object_vunmap, - .mmap = panthor_gem_mmap, + .mmap = drm_gem_shmem_object_mmap, + .status = panthor_gem_status, .export = panthor_gem_prime_export, .vm_ops = &drm_gem_shmem_vm_ops, }; @@ -179,6 +244,9 @@ struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t obj->base.map_wc = !ptdev->coherent; mutex_init(&obj->gpuva_list_lock); drm_gem_gpuva_set_lock(&obj->base.base, &obj->gpuva_list_lock); + mutex_init(&obj->label.lock); + + panthor_gem_debugfs_bo_add(ptdev, obj); return &obj->base.base; } @@ -228,5 +296,153 @@ panthor_gem_create_with_handle(struct drm_file *file, /* drop reference from allocate - handle holds it now. */ drm_gem_object_put(&shmem->base); + /* + * No explicit flags are needed in the call below, since the + * function internally sets the INITIALIZED bit for us. + */ + panthor_gem_debugfs_set_usage_flags(bo, 0); + return ret; } + +void +panthor_gem_bo_set_label(struct drm_gem_object *obj, const char *label) +{ + struct panthor_gem_object *bo = to_panthor_bo(obj); + const char *old_label; + + scoped_guard(mutex, &bo->label.lock) { + old_label = bo->label.str; + bo->label.str = label; + } + + kfree_const(old_label); +} + +void +panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const char *label) +{ + const char *str; + + /* We should never attempt labelling a UM-exposed GEM object */ + if (drm_WARN_ON(bo->obj->dev, bo->obj->handle_count > 0)) + return; + + if (!label) + return; + + str = kstrdup_const(label, GFP_KERNEL); + if (!str) { + /* Failing to allocate memory for a label isn't a fatal condition */ + drm_warn(bo->obj->dev, "Not enough memory to allocate BO label"); + return; + } + + panthor_gem_bo_set_label(bo->obj, str); +} + +#ifdef CONFIG_DEBUG_FS +struct gem_size_totals { + size_t size; + size_t resident; + size_t reclaimable; +}; + +static void panthor_gem_debugfs_print_flag_names(struct seq_file *m) +{ + int len; + int i; + + static const char * const gem_state_flags_names[] = { + [PANTHOR_DEBUGFS_GEM_STATE_IMPORTED_BIT] = "imported", + [PANTHOR_DEBUGFS_GEM_STATE_EXPORTED_BIT] = "exported", + }; + + static const char * const gem_usage_flags_names[] = { + [PANTHOR_DEBUGFS_GEM_USAGE_KERNEL_BIT] = "kernel", + [PANTHOR_DEBUGFS_GEM_USAGE_FW_MAPPED_BIT] = "fw-mapped", + }; + + seq_puts(m, "GEM state flags: "); + for (i = 0, len = ARRAY_SIZE(gem_state_flags_names); i < len; i++) { + if (!gem_state_flags_names[i]) + continue; + seq_printf(m, "%s (0x%x)%s", gem_state_flags_names[i], + (u32)BIT(i), (i < len - 1) ? ", " : "\n"); + } + + seq_puts(m, "GEM usage flags: "); + for (i = 0, len = ARRAY_SIZE(gem_usage_flags_names); i < len; i++) { + if (!gem_usage_flags_names[i]) + continue; + seq_printf(m, "%s (0x%x)%s", gem_usage_flags_names[i], + (u32)BIT(i), (i < len - 1) ? ", " : "\n\n"); + } +} + +static void panthor_gem_debugfs_bo_print(struct panthor_gem_object *bo, + struct seq_file *m, + struct gem_size_totals *totals) +{ + unsigned int refcount = kref_read(&bo->base.base.refcount); + char creator_info[32] = {}; + size_t resident_size; + u32 gem_usage_flags = bo->debugfs.flags & (u32)~PANTHOR_DEBUGFS_GEM_USAGE_FLAG_INITIALIZED; + u32 gem_state_flags = 0; + + /* Skip BOs being destroyed. */ + if (!refcount) + return; + + resident_size = bo->base.pages ? bo->base.base.size : 0; + + snprintf(creator_info, sizeof(creator_info), + "%s/%d", bo->debugfs.creator.process_name, bo->debugfs.creator.tgid); + seq_printf(m, "%-32s%-16d%-16d%-16zd%-16zd0x%-16lx", + creator_info, + bo->base.base.name, + refcount, + bo->base.base.size, + resident_size, + drm_vma_node_start(&bo->base.base.vma_node)); + + if (bo->base.base.import_attach) + gem_state_flags |= PANTHOR_DEBUGFS_GEM_STATE_FLAG_IMPORTED; + if (bo->base.base.dma_buf) + gem_state_flags |= PANTHOR_DEBUGFS_GEM_STATE_FLAG_EXPORTED; + + seq_printf(m, "0x%-8x 0x%-10x", gem_state_flags, gem_usage_flags); + + scoped_guard(mutex, &bo->label.lock) { + seq_printf(m, "%s\n", bo->label.str ? : ""); + } + + totals->size += bo->base.base.size; + totals->resident += resident_size; + if (bo->base.madv > 0) + totals->reclaimable += resident_size; +} + +void panthor_gem_debugfs_print_bos(struct panthor_device *ptdev, + struct seq_file *m) +{ + struct gem_size_totals totals = {0}; + struct panthor_gem_object *bo; + + panthor_gem_debugfs_print_flag_names(m); + + seq_puts(m, "created-by global-name refcount size resident-size file-offset state usage label\n"); + seq_puts(m, "----------------------------------------------------------------------------------------------------------------------------------------------\n"); + + scoped_guard(mutex, &ptdev->gems.lock) { + list_for_each_entry(bo, &ptdev->gems.node, debugfs.node) { + if (bo->debugfs.flags & PANTHOR_DEBUGFS_GEM_USAGE_FLAG_INITIALIZED) + panthor_gem_debugfs_bo_print(bo, m, &totals); + } + } + + seq_puts(m, "==============================================================================================================================================\n"); + seq_printf(m, "Total size: %zd, Total resident: %zd, Total reclaimable: %zd\n", + totals.size, totals.resident, totals.reclaimable); +} +#endif diff --git a/drivers/gpu/drm/panthor/panthor_gem.h b/drivers/gpu/drm/panthor/panthor_gem.h index e43021cf6d45..4dd732dcd59f 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.h +++ b/drivers/gpu/drm/panthor/panthor_gem.h @@ -13,6 +13,56 @@ struct panthor_vm; +#define PANTHOR_BO_LABEL_MAXLEN 4096 + +enum panthor_debugfs_gem_state_flags { + PANTHOR_DEBUGFS_GEM_STATE_IMPORTED_BIT = 0, + PANTHOR_DEBUGFS_GEM_STATE_EXPORTED_BIT = 1, + + /** @PANTHOR_DEBUGFS_GEM_STATE_FLAG_IMPORTED: GEM BO is PRIME imported. */ + PANTHOR_DEBUGFS_GEM_STATE_FLAG_IMPORTED = BIT(PANTHOR_DEBUGFS_GEM_STATE_IMPORTED_BIT), + + /** @PANTHOR_DEBUGFS_GEM_STATE_FLAG_EXPORTED: GEM BO is PRIME exported. */ + PANTHOR_DEBUGFS_GEM_STATE_FLAG_EXPORTED = BIT(PANTHOR_DEBUGFS_GEM_STATE_EXPORTED_BIT), +}; + +enum panthor_debugfs_gem_usage_flags { + PANTHOR_DEBUGFS_GEM_USAGE_KERNEL_BIT = 0, + PANTHOR_DEBUGFS_GEM_USAGE_FW_MAPPED_BIT = 1, + + /** @PANTHOR_DEBUGFS_GEM_USAGE_FLAG_KERNEL: BO is for kernel use only. */ + PANTHOR_DEBUGFS_GEM_USAGE_FLAG_KERNEL = BIT(PANTHOR_DEBUGFS_GEM_USAGE_KERNEL_BIT), + + /** @PANTHOR_DEBUGFS_GEM_USAGE_FLAG_FW_MAPPED: BO is mapped on the FW VM. */ + PANTHOR_DEBUGFS_GEM_USAGE_FLAG_FW_MAPPED = BIT(PANTHOR_DEBUGFS_GEM_USAGE_FW_MAPPED_BIT), + + /** @PANTHOR_DEBUGFS_GEM_USAGE_FLAG_INITIALIZED: BO is ready for DebugFS display. */ + PANTHOR_DEBUGFS_GEM_USAGE_FLAG_INITIALIZED = BIT(31), +}; + +/** + * struct panthor_gem_debugfs - GEM object's DebugFS list information + */ +struct panthor_gem_debugfs { + /** + * @node: Node used to insert the object in the device-wide list of + * GEM objects, to display information about it through a DebugFS file. + */ + struct list_head node; + + /** @creator: Information about the UM process which created the GEM. */ + struct { + /** @creator.process_name: Group leader name in owning thread's process */ + char process_name[TASK_COMM_LEN]; + + /** @creator.tgid: PID of the thread's group leader within its process */ + pid_t tgid; + } creator; + + /** @flags: Combination of panthor_debugfs_gem_usage_flags flags */ + u32 flags; +}; + /** * struct panthor_gem_object - Driver specific GEM object. */ @@ -46,6 +96,24 @@ struct panthor_gem_object { /** @flags: Combination of drm_panthor_bo_flags flags. */ u32 flags; + + /** + * @label: BO tagging fields. The label can be assigned within the + * driver itself or through a specific IOCTL. + */ + struct { + /** + * @label.str: Pointer to NULL-terminated string, + */ + const char *str; + + /** @lock.str: Protects access to the @label.str field. */ + struct mutex lock; + } label; + +#ifdef CONFIG_DEBUG_FS + struct panthor_gem_debugfs debugfs; +#endif }; /** @@ -85,17 +153,15 @@ struct panthor_gem_object *to_panthor_bo(struct drm_gem_object *obj) struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size); -struct drm_gem_object * -panthor_gem_prime_import_sg_table(struct drm_device *ddev, - struct dma_buf_attachment *attach, - struct sg_table *sgt); - int panthor_gem_create_with_handle(struct drm_file *file, struct drm_device *ddev, struct panthor_vm *exclusive_vm, u64 *size, u32 flags, uint32_t *handle); +void panthor_gem_bo_set_label(struct drm_gem_object *obj, const char *label); +void panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const char *label); + static inline u64 panthor_kernel_bo_gpuva(struct panthor_kernel_bo *bo) { @@ -117,7 +183,7 @@ panthor_kernel_bo_vmap(struct panthor_kernel_bo *bo) if (bo->kmap) return 0; - ret = drm_gem_vmap_unlocked(bo->obj, &map); + ret = drm_gem_vmap(bo->obj, &map); if (ret) return ret; @@ -131,7 +197,7 @@ panthor_kernel_bo_vunmap(struct panthor_kernel_bo *bo) if (bo->kmap) { struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->kmap); - drm_gem_vunmap_unlocked(bo->obj, &map); + drm_gem_vunmap(bo->obj, &map); bo->kmap = NULL; } } @@ -139,8 +205,13 @@ panthor_kernel_bo_vunmap(struct panthor_kernel_bo *bo) struct panthor_kernel_bo * panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, size_t size, u32 bo_flags, u32 vm_map_flags, - u64 gpu_va); + u64 gpu_va, const char *name); void panthor_kernel_bo_destroy(struct panthor_kernel_bo *bo); +#ifdef CONFIG_DEBUG_FS +void panthor_gem_debugfs_print_bos(struct panthor_device *pfdev, + struct seq_file *m); +#endif + #endif /* __PANTHOR_GEM_H__ */ diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c index 5251d8764e7d..32d678a0114e 100644 --- a/drivers/gpu/drm/panthor/panthor_gpu.c +++ b/drivers/gpu/drm/panthor/panthor_gpu.c @@ -77,6 +77,12 @@ static const struct panthor_model gpu_models[] = { GPU_IRQ_RESET_COMPLETED | \ GPU_IRQ_CLEAN_CACHES_COMPLETED) +static void panthor_gpu_coherency_set(struct panthor_device *ptdev) +{ + gpu_write(ptdev, GPU_COHERENCY_PROTOCOL, + ptdev->coherent ? GPU_COHERENCY_PROT_BIT(ACE_LITE) : GPU_COHERENCY_NONE); +} + static void panthor_gpu_init_info(struct panthor_device *ptdev) { const struct panthor_model *model; @@ -144,6 +150,8 @@ static void panthor_gpu_init_info(struct panthor_device *ptdev) static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status) { + gpu_write(ptdev, GPU_INT_CLEAR, status); + if (status & GPU_IRQ_FAULT) { u32 fault_status = gpu_read(ptdev, GPU_FAULT_STATUS); u64 address = ((u64)gpu_read(ptdev, GPU_FAULT_ADDR_HI) << 32) | @@ -174,7 +182,8 @@ void panthor_gpu_unplug(struct panthor_device *ptdev) unsigned long flags; /* Make sure the IRQ handler is not running after that point. */ - panthor_gpu_irq_suspend(&ptdev->gpu->irq); + if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) + panthor_gpu_irq_suspend(&ptdev->gpu->irq); /* Wake-up all waiters. */ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); @@ -365,6 +374,9 @@ int panthor_gpu_l2_power_on(struct panthor_device *ptdev) hweight64(ptdev->gpu_info.shader_present)); } + /* Set the desired coherency mode before the power up of L2 */ + panthor_gpu_coherency_set(ptdev); + return panthor_gpu_power_on(ptdev, L2, 1, 20000); } @@ -460,11 +472,12 @@ int panthor_gpu_soft_reset(struct panthor_device *ptdev) */ void panthor_gpu_suspend(struct panthor_device *ptdev) { - /* - * It may be preferable to simply power down the L2, but for now just - * soft-reset which will leave the L2 powered down. - */ - panthor_gpu_soft_reset(ptdev); + /* On a fast reset, simply power down the L2. */ + if (!ptdev->reset.fast) + panthor_gpu_soft_reset(ptdev); + else + panthor_gpu_power_off(ptdev, L2, 1, 20000); + panthor_gpu_irq_suspend(&ptdev->gpu->irq); } @@ -480,3 +493,50 @@ void panthor_gpu_resume(struct panthor_device *ptdev) panthor_gpu_irq_resume(&ptdev->gpu->irq, GPU_INTERRUPTS_MASK); panthor_gpu_l2_power_on(ptdev); } + +/** + * panthor_gpu_read_64bit_counter() - Read a 64-bit counter at a given offset. + * @ptdev: Device. + * @reg: The offset of the register to read. + * + * Return: The counter value. + */ +static u64 +panthor_gpu_read_64bit_counter(struct panthor_device *ptdev, u32 reg) +{ + u32 hi, lo; + + do { + hi = gpu_read(ptdev, reg + 0x4); + lo = gpu_read(ptdev, reg); + } while (hi != gpu_read(ptdev, reg + 0x4)); + + return ((u64)hi << 32) | lo; +} + +/** + * panthor_gpu_read_timestamp() - Read the timestamp register. + * @ptdev: Device. + * + * Return: The GPU timestamp value. + */ +u64 panthor_gpu_read_timestamp(struct panthor_device *ptdev) +{ + return panthor_gpu_read_64bit_counter(ptdev, GPU_TIMESTAMP_LO); +} + +/** + * panthor_gpu_read_timestamp_offset() - Read the timestamp offset register. + * @ptdev: Device. + * + * Return: The GPU timestamp offset value. + */ +u64 panthor_gpu_read_timestamp_offset(struct panthor_device *ptdev) +{ + u32 hi, lo; + + hi = gpu_read(ptdev, GPU_TIMESTAMP_OFFSET_HI); + lo = gpu_read(ptdev, GPU_TIMESTAMP_OFFSET_LO); + + return ((u64)hi << 32) | lo; +} diff --git a/drivers/gpu/drm/panthor/panthor_gpu.h b/drivers/gpu/drm/panthor/panthor_gpu.h index bba7555dd3c6..7f6133a66127 100644 --- a/drivers/gpu/drm/panthor/panthor_gpu.h +++ b/drivers/gpu/drm/panthor/panthor_gpu.h @@ -5,6 +5,8 @@ #ifndef __PANTHOR_GPU_H__ #define __PANTHOR_GPU_H__ +#include <linux/types.h> + struct panthor_device; int panthor_gpu_init(struct panthor_device *ptdev); @@ -48,5 +50,7 @@ int panthor_gpu_l2_power_on(struct panthor_device *ptdev); int panthor_gpu_flush_caches(struct panthor_device *ptdev, u32 l2, u32 lsc, u32 other); int panthor_gpu_soft_reset(struct panthor_device *ptdev); +u64 panthor_gpu_read_timestamp(struct panthor_device *ptdev); +u64 panthor_gpu_read_timestamp_offset(struct panthor_device *ptdev); #endif diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c index 3796a9eb22af..d236e9ceade4 100644 --- a/drivers/gpu/drm/panthor/panthor_heap.c +++ b/drivers/gpu/drm/panthor/panthor_heap.c @@ -97,6 +97,9 @@ struct panthor_heap_pool { /** @gpu_contexts: Buffer object containing the GPU heap contexts. */ struct panthor_kernel_bo *gpu_contexts; + + /** @size: Size of all chunks across all heaps in the pool. */ + atomic_t size; }; static int panthor_heap_ctx_stride(struct panthor_device *ptdev) @@ -118,7 +121,7 @@ static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id) panthor_get_heap_ctx_offset(pool, id); } -static void panthor_free_heap_chunk(struct panthor_vm *vm, +static void panthor_free_heap_chunk(struct panthor_heap_pool *pool, struct panthor_heap *heap, struct panthor_heap_chunk *chunk) { @@ -127,12 +130,13 @@ static void panthor_free_heap_chunk(struct panthor_vm *vm, heap->chunk_count--; mutex_unlock(&heap->lock); + atomic_sub(heap->chunk_size, &pool->size); + panthor_kernel_bo_destroy(chunk->bo); kfree(chunk); } -static int panthor_alloc_heap_chunk(struct panthor_device *ptdev, - struct panthor_vm *vm, +static int panthor_alloc_heap_chunk(struct panthor_heap_pool *pool, struct panthor_heap *heap, bool initial_chunk) { @@ -144,10 +148,11 @@ static int panthor_alloc_heap_chunk(struct panthor_device *ptdev, if (!chunk) return -ENOMEM; - chunk->bo = panthor_kernel_bo_create(ptdev, vm, heap->chunk_size, + chunk->bo = panthor_kernel_bo_create(pool->ptdev, pool->vm, heap->chunk_size, DRM_PANTHOR_BO_NO_MMAP, DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, - PANTHOR_VM_KERNEL_AUTO_VA); + PANTHOR_VM_KERNEL_AUTO_VA, + "Tiler heap chunk"); if (IS_ERR(chunk->bo)) { ret = PTR_ERR(chunk->bo); goto err_free_chunk; @@ -180,6 +185,8 @@ static int panthor_alloc_heap_chunk(struct panthor_device *ptdev, heap->chunk_count++; mutex_unlock(&heap->lock); + atomic_add(heap->chunk_size, &pool->size); + return 0; err_destroy_bo: @@ -191,17 +198,16 @@ err_free_chunk: return ret; } -static void panthor_free_heap_chunks(struct panthor_vm *vm, +static void panthor_free_heap_chunks(struct panthor_heap_pool *pool, struct panthor_heap *heap) { struct panthor_heap_chunk *chunk, *tmp; list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) - panthor_free_heap_chunk(vm, heap, chunk); + panthor_free_heap_chunk(pool, heap, chunk); } -static int panthor_alloc_heap_chunks(struct panthor_device *ptdev, - struct panthor_vm *vm, +static int panthor_alloc_heap_chunks(struct panthor_heap_pool *pool, struct panthor_heap *heap, u32 chunk_count) { @@ -209,7 +215,7 @@ static int panthor_alloc_heap_chunks(struct panthor_device *ptdev, u32 i; for (i = 0; i < chunk_count; i++) { - ret = panthor_alloc_heap_chunk(ptdev, vm, heap, true); + ret = panthor_alloc_heap_chunk(pool, heap, true); if (ret) return ret; } @@ -226,7 +232,7 @@ panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle) if (!heap) return -EINVAL; - panthor_free_heap_chunks(pool->vm, heap); + panthor_free_heap_chunks(pool, heap); mutex_destroy(&heap->lock); kfree(heap); return 0; @@ -308,8 +314,7 @@ int panthor_heap_create(struct panthor_heap_pool *pool, heap->max_chunks = max_chunks; heap->target_in_flight = target_in_flight; - ret = panthor_alloc_heap_chunks(pool->ptdev, vm, heap, - initial_chunk_count); + ret = panthor_alloc_heap_chunks(pool, heap, initial_chunk_count); if (ret) goto err_free_heap; @@ -342,7 +347,7 @@ int panthor_heap_create(struct panthor_heap_pool *pool, return id; err_free_heap: - panthor_free_heap_chunks(pool->vm, heap); + panthor_free_heap_chunks(pool, heap); mutex_destroy(&heap->lock); kfree(heap); @@ -389,6 +394,7 @@ int panthor_heap_return_chunk(struct panthor_heap_pool *pool, removed = chunk; list_del(&chunk->node); heap->chunk_count--; + atomic_sub(heap->chunk_size, &pool->size); break; } } @@ -466,7 +472,7 @@ int panthor_heap_grow(struct panthor_heap_pool *pool, * further jobs in this queue fail immediately instead of having to * wait for the job timeout. */ - ret = panthor_alloc_heap_chunk(pool->ptdev, pool->vm, heap, false); + ret = panthor_alloc_heap_chunk(pool, heap, false); if (ret) goto out_unlock; @@ -550,7 +556,8 @@ panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm) pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize, DRM_PANTHOR_BO_NO_MMAP, DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, - PANTHOR_VM_KERNEL_AUTO_VA); + PANTHOR_VM_KERNEL_AUTO_VA, + "Heap pool"); if (IS_ERR(pool->gpu_contexts)) { ret = PTR_ERR(pool->gpu_contexts); goto err_destroy_pool; @@ -560,6 +567,8 @@ panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm) if (ret) goto err_destroy_pool; + atomic_add(pool->gpu_contexts->obj->size, &pool->size); + return pool; err_destroy_pool: @@ -594,8 +603,10 @@ void panthor_heap_pool_destroy(struct panthor_heap_pool *pool) xa_for_each(&pool->xa, i, heap) drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i)); - if (!IS_ERR_OR_NULL(pool->gpu_contexts)) + if (!IS_ERR_OR_NULL(pool->gpu_contexts)) { + atomic_sub(pool->gpu_contexts->obj->size, &pool->size); panthor_kernel_bo_destroy(pool->gpu_contexts); + } /* Reflects the fact the pool has been destroyed. */ pool->vm = NULL; @@ -603,3 +614,18 @@ void panthor_heap_pool_destroy(struct panthor_heap_pool *pool) panthor_heap_pool_put(pool); } + +/** + * panthor_heap_pool_size() - Get a heap pool's total size + * @pool: Pool whose total chunks size to return + * + * Returns the aggregated size of all chunks for all heaps in the pool + * + */ +size_t panthor_heap_pool_size(struct panthor_heap_pool *pool) +{ + if (!pool) + return 0; + + return atomic_read(&pool->size); +} diff --git a/drivers/gpu/drm/panthor/panthor_heap.h b/drivers/gpu/drm/panthor/panthor_heap.h index 25a5f2bba445..e3358d4e8edb 100644 --- a/drivers/gpu/drm/panthor/panthor_heap.h +++ b/drivers/gpu/drm/panthor/panthor_heap.h @@ -27,6 +27,8 @@ struct panthor_heap_pool * panthor_heap_pool_get(struct panthor_heap_pool *pool); void panthor_heap_pool_put(struct panthor_heap_pool *pool); +size_t panthor_heap_pool_size(struct panthor_heap_pool *pool); + int panthor_heap_grow(struct panthor_heap_pool *pool, u64 heap_gpu_va, u32 renderpasses_in_flight, diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index 3cd2bce59edc..6ca9a2642a4e 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -53,26 +53,27 @@ struct panthor_mmu { /** @irq: The MMU irq. */ struct panthor_irq irq; - /** @as: Address space related fields. + /** + * @as: Address space related fields. * * The GPU has a limited number of address spaces (AS) slots, forcing * us to re-assign them to re-assign slots on-demand. */ struct { - /** @slots_lock: Lock protecting access to all other AS fields. */ + /** @as.slots_lock: Lock protecting access to all other AS fields. */ struct mutex slots_lock; - /** @alloc_mask: Bitmask encoding the allocated slots. */ + /** @as.alloc_mask: Bitmask encoding the allocated slots. */ unsigned long alloc_mask; - /** @faulty_mask: Bitmask encoding the faulty slots. */ + /** @as.faulty_mask: Bitmask encoding the faulty slots. */ unsigned long faulty_mask; - /** @slots: VMs currently bound to the AS slots. */ + /** @as.slots: VMs currently bound to the AS slots. */ struct panthor_as_slot slots[MAX_AS_SLOTS]; /** - * @lru_list: List of least recently used VMs. + * @as.lru_list: List of least recently used VMs. * * We use this list to pick a VM to evict when all slots are * used. @@ -87,16 +88,16 @@ struct panthor_mmu { /** @vm: VMs management fields */ struct { - /** @lock: Lock protecting access to list. */ + /** @vm.lock: Lock protecting access to list. */ struct mutex lock; - /** @list: List containing all VMs. */ + /** @vm.list: List containing all VMs. */ struct list_head list; - /** @reset_in_progress: True if a reset is in progress. */ + /** @vm.reset_in_progress: True if a reset is in progress. */ bool reset_in_progress; - /** @wq: Workqueue used for the VM_BIND queues. */ + /** @vm.wq: Workqueue used for the VM_BIND queues. */ struct workqueue_struct *wq; } vm; }; @@ -143,14 +144,14 @@ struct panthor_vma { struct panthor_vm_op_ctx { /** @rsvd_page_tables: Pages reserved for the MMU page table update. */ struct { - /** @count: Number of pages reserved. */ + /** @rsvd_page_tables.count: Number of pages reserved. */ u32 count; - /** @ptr: Point to the first unused page in the @pages table. */ + /** @rsvd_page_tables.ptr: Point to the first unused page in the @pages table. */ u32 ptr; /** - * @page: Array of pages that can be used for an MMU page table update. + * @rsvd_page_tables.pages: Array of pages to be used for an MMU page table update. * * After an VM operation, there might be free pages left in this array. * They should be returned to the pt_cache as part of the op_ctx cleanup. @@ -172,10 +173,10 @@ struct panthor_vm_op_ctx { /** @va: Virtual range targeted by the VM operation. */ struct { - /** @addr: Start address. */ + /** @va.addr: Start address. */ u64 addr; - /** @range: Range size. */ + /** @va.range: Range size. */ u64 range; } va; @@ -195,14 +196,14 @@ struct panthor_vm_op_ctx { /** @map: Fields specific to a map operation. */ struct { - /** @vm_bo: Buffer object to map. */ + /** @map.vm_bo: Buffer object to map. */ struct drm_gpuvm_bo *vm_bo; - /** @bo_offset: Offset in the buffer object. */ + /** @map.bo_offset: Offset in the buffer object. */ u64 bo_offset; /** - * @sgt: sg-table pointing to pages backing the GEM object. + * @map.sgt: sg-table pointing to pages backing the GEM object. * * This is gathered at job creation time, such that we don't have * to allocate in ::run_job(). @@ -210,7 +211,7 @@ struct panthor_vm_op_ctx { struct sg_table *sgt; /** - * @new_vma: The new VMA object that will be inserted to the VA tree. + * @map.new_vma: The new VMA object that will be inserted to the VA tree. */ struct panthor_vma *new_vma; } map; @@ -304,27 +305,27 @@ struct panthor_vm { /** @kernel_auto_va: Automatic VA-range for kernel BOs. */ struct { - /** @start: Start of the automatic VA-range for kernel BOs. */ + /** @kernel_auto_va.start: Start of the automatic VA-range for kernel BOs. */ u64 start; - /** @size: Size of the automatic VA-range for kernel BOs. */ + /** @kernel_auto_va.size: Size of the automatic VA-range for kernel BOs. */ u64 end; } kernel_auto_va; /** @as: Address space related fields. */ struct { /** - * @id: ID of the address space this VM is bound to. + * @as.id: ID of the address space this VM is bound to. * * A value of -1 means the VM is inactive/not bound. */ int id; - /** @active_cnt: Number of active users of this VM. */ + /** @as.active_cnt: Number of active users of this VM. */ refcount_t active_cnt; /** - * @lru_node: Used to instead the VM in the panthor_mmu::as::lru_list. + * @as.lru_node: Used to instead the VM in the panthor_mmu::as::lru_list. * * Active VMs should not be inserted in the LRU list. */ @@ -336,13 +337,13 @@ struct panthor_vm { */ struct { /** - * @pool: The heap pool attached to this VM. + * @heaps.pool: The heap pool attached to this VM. * * Will stay NULL until someone creates a heap context on this VM. */ struct panthor_heap_pool *pool; - /** @lock: Lock used to protect access to @pool. */ + /** @heaps.lock: Lock used to protect access to @pool. */ struct mutex lock; } heaps; @@ -408,7 +409,7 @@ struct panthor_vm_bind_job { struct panthor_vm_op_ctx ctx; }; -/** +/* * @pt_cache: Cache used to allocate MMU page tables. * * The pre-allocation pattern forces us to over-allocate to plan for @@ -478,7 +479,7 @@ static void *alloc_pt(void *cookie, size_t size, gfp_t gfp) } /** - * @free_pt() - Custom page table free function + * free_pt() - Custom page table free function * @cookie: Cookie passed at page table allocation time. * @data: Page table to free. * @size: Size of the page table. This size should be fixed, @@ -697,7 +698,7 @@ static void panthor_vm_release_as_locked(struct panthor_vm *vm) /** * panthor_vm_active() - Flag a VM as active - * @VM: VM to flag as active. + * @vm: VM to flag as active. * * Assigns an address space to a VM so it can be used by the GPU/MCU. * @@ -780,6 +781,7 @@ out_enable_as: if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) { gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as)); ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as); + ptdev->mmu->irq.mask |= panthor_mmu_as_fault_mask(ptdev, as); gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask); } @@ -801,7 +803,7 @@ out_dev_exit: /** * panthor_vm_idle() - Flag a VM idle - * @VM: VM to flag as idle. + * @vm: VM to flag as idle. * * When we know the GPU is done with the VM (no more jobs to process), * we can relinquish the AS slot attached to this VM, if any. @@ -826,6 +828,14 @@ void panthor_vm_idle(struct panthor_vm *vm) mutex_unlock(&ptdev->mmu->as.slots_lock); } +u32 panthor_vm_page_size(struct panthor_vm *vm) +{ + const struct io_pgtable *pgt = io_pgtable_ops_to_pgtable(vm->pgtbl_ops); + u32 pg_shift = ffs(pgt->cfg.pgsize_bitmap) - 1; + + return 1u << pg_shift; +} + static void panthor_vm_stop(struct panthor_vm *vm) { drm_sched_stop(&vm->sched, NULL); @@ -833,7 +843,7 @@ static void panthor_vm_stop(struct panthor_vm *vm) static void panthor_vm_start(struct panthor_vm *vm) { - drm_sched_start(&vm->sched); + drm_sched_start(&vm->sched, 0); } /** @@ -982,6 +992,8 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot, if (!size) break; + + offset = 0; } return panthor_vm_flush_range(vm, start_iova, iova - start_iova); @@ -1007,7 +1019,7 @@ static int flags_to_prot(u32 flags) /** * panthor_vm_alloc_va() - Allocate a region in the auto-va space - * @VM: VM to allocate a region on. + * @vm: VM to allocate a region on. * @va: start of the VA range. Can be PANTHOR_VM_KERNEL_AUTO_VA if the user * wants the VA to be automatically allocated from the auto-VA range. * @size: size of the VA range. @@ -1025,12 +1037,13 @@ int panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size, struct drm_mm_node *va_node) { + ssize_t vm_pgsz = panthor_vm_page_size(vm); int ret; - if (!size || (size & ~PAGE_MASK)) + if (!size || !IS_ALIGNED(size, vm_pgsz)) return -EINVAL; - if (va != PANTHOR_VM_KERNEL_AUTO_VA && (va & ~PAGE_MASK)) + if (va != PANTHOR_VM_KERNEL_AUTO_VA && !IS_ALIGNED(va, vm_pgsz)) return -EINVAL; mutex_lock(&vm->mm_lock); @@ -1052,7 +1065,7 @@ panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size, /** * panthor_vm_free_va() - Free a region allocated with panthor_vm_alloc_va() - * @VM: VM to free the region on. + * @vm: VM to free the region on. * @va_node: Memory node representing the region to free. */ void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node) @@ -1091,7 +1104,7 @@ static void panthor_vm_bo_put(struct drm_gpuvm_bo *vm_bo) /* If the vm_bo object was destroyed, release the pin reference that * was hold by this object. */ - if (unpin && !bo->base.base.import_attach) + if (unpin && !drm_gem_is_imported(&bo->base.base)) drm_gem_shmem_unpin(&bo->base); drm_gpuvm_put(vm); @@ -1222,7 +1235,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, if (ret) goto err_cleanup; - if (!bo->base.base.import_attach) { + if (!drm_gem_is_imported(&bo->base.base)) { /* Pre-reserve the BO pages, so the map operation doesn't have to * allocate. */ @@ -1233,7 +1246,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, sgt = drm_gem_shmem_get_pages_sgt(&bo->base); if (IS_ERR(sgt)) { - if (!bo->base.base.import_attach) + if (!drm_gem_is_imported(&bo->base.base)) drm_gem_shmem_unpin(&bo->base); ret = PTR_ERR(sgt); @@ -1244,7 +1257,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, preallocated_vm_bo = drm_gpuvm_bo_create(&vm->base, &bo->base.base); if (!preallocated_vm_bo) { - if (!bo->base.base.import_attach) + if (!drm_gem_is_imported(&bo->base.base)) drm_gem_shmem_unpin(&bo->base); ret = -ENOMEM; @@ -1270,7 +1283,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, * which will be released in panthor_vm_bo_put(). */ if (preallocated_vm_bo != op_ctx->map.vm_bo && - !bo->base.base.import_attach) + !drm_gem_is_imported(&bo->base.base)) drm_gem_shmem_unpin(&bo->base); op_ctx->map.bo_offset = offset; @@ -1481,9 +1494,9 @@ panthor_vm_create_check_args(const struct panthor_device *ptdev, /** * panthor_vm_pool_create_vm() - Create a VM + * @ptdev: The panthor device * @pool: The VM to create this VM on. - * @kernel_va_start: Start of the region reserved for kernel objects. - * @kernel_va_range: Size of the region reserved for kernel objects. + * @args: VM creation args. * * Return: a positive VM ID on success, a negative error code otherwise. */ @@ -1547,6 +1560,8 @@ static void panthor_vm_destroy(struct panthor_vm *vm) * * The VM resources are freed when the last reference on the VM object is * dropped. + * + * Return: %0 for success, negative errno value for failure */ int panthor_vm_pool_destroy_vm(struct panthor_vm_pool *pool, u32 handle) { @@ -1571,7 +1586,9 @@ panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle) { struct panthor_vm *vm; + xa_lock(&pool->xa); vm = panthor_vm_get(xa_load(&pool->xa, handle)); + xa_unlock(&pool->xa); return vm; } @@ -1693,11 +1710,17 @@ static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status) access_type, access_type_name(ptdev, fault_status), source_id); + /* We don't handle VM faults at the moment, so let's just clear the + * interrupt and let the writer/reader crash. + * Note that COMPLETED irqs are never cleared, but this is fine + * because they are always masked. + */ + gpu_write(ptdev, MMU_INT_CLEAR, mask); + /* Ignore MMU interrupts on this AS until it's been * re-enabled. */ ptdev->mmu->irq.mask = new_int_mask; - gpu_write(ptdev, MMU_INT_MASK, new_int_mask); if (ptdev->mmu->as.slots[as].vm) ptdev->mmu->as.slots[as].vm->unhandled_fault = true; @@ -1928,7 +1951,34 @@ struct panthor_heap_pool *panthor_vm_get_heap_pool(struct panthor_vm *vm, bool c return pool; } -static u64 mair_to_memattr(u64 mair) +/** + * panthor_vm_heaps_sizes() - Calculate size of all heap chunks across all + * heaps over all the heap pools in a VM + * @pfile: File. + * @stats: Memory stats to be updated. + * + * Calculate all heap chunk sizes in all heap pools bound to a VM. If the VM + * is active, record the size as active as well. + */ +void panthor_vm_heaps_sizes(struct panthor_file *pfile, struct drm_memory_stats *stats) +{ + struct panthor_vm *vm; + unsigned long i; + + if (!pfile->vms) + return; + + xa_lock(&pfile->vms->xa); + xa_for_each(&pfile->vms->xa, i, vm) { + size_t size = panthor_heap_pool_size(vm->heaps.pool); + stats->resident += size; + if (vm->as.id >= 0) + stats->active += size; + } + xa_unlock(&pfile->vms->xa); +} + +static u64 mair_to_memattr(u64 mair, bool coherent) { u64 memattr = 0; u32 i; @@ -1947,14 +1997,21 @@ static u64 mair_to_memattr(u64 mair) AS_MEMATTR_AARCH64_SH_MIDGARD_INNER | AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(false, false); } else { - /* Use SH_CPU_INNER mode so SH_IS, which is used when - * IOMMU_CACHE is set, actually maps to the standard - * definition of inner-shareable and not Mali's - * internal-shareable mode. - */ out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_WB | - AS_MEMATTR_AARCH64_SH_CPU_INNER | AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(inner & 1, inner & 2); + /* Use SH_MIDGARD_INNER mode when device isn't coherent, + * so SH_IS, which is used when IOMMU_CACHE is set, maps + * to Mali's internal-shareable mode. As per the Mali + * Spec, inner and outer-shareable modes aren't allowed + * for WB memory when coherency is disabled. + * Use SH_CPU_INNER mode when coherency is enabled, so + * that SH_IS actually maps to the standard definition of + * inner-shareable. + */ + if (!coherent) + out_attr |= AS_MEMATTR_AARCH64_SH_MIDGARD_INNER; + else + out_attr |= AS_MEMATTR_AARCH64_SH_CPU_INNER; } memattr |= (u64)out_attr << (8 * i); @@ -2255,6 +2312,16 @@ panthor_vm_create(struct panthor_device *ptdev, bool for_mcu, u64 full_va_range = 1ull << va_bits; struct drm_gem_object *dummy_gem; struct drm_gpu_scheduler *sched; + const struct drm_sched_init_args sched_args = { + .ops = &panthor_vm_bind_ops, + .submit_wq = ptdev->mmu->vm.wq, + .num_rqs = 1, + .credit_limit = 1, + /* Bind operations are synchronous for now, no timeout needed. */ + .timeout = MAX_SCHEDULE_TIMEOUT, + .name = "panthor-vm-bind", + .dev = ptdev->base.dev, + }; struct io_pgtable_cfg pgtbl_cfg; u64 mair, min_va, va_range; struct panthor_vm *vm; @@ -2312,11 +2379,7 @@ panthor_vm_create(struct panthor_device *ptdev, bool for_mcu, goto err_mm_takedown; } - /* Bind operations are synchronous for now, no timeout needed. */ - ret = drm_sched_init(&vm->sched, &panthor_vm_bind_ops, ptdev->mmu->vm.wq, - 1, 1, 0, - MAX_SCHEDULE_TIMEOUT, NULL, NULL, - "panthor-vm-bind", ptdev->base.dev); + ret = drm_sched_init(&vm->sched, &sched_args); if (ret) goto err_free_io_pgtable; @@ -2326,7 +2389,7 @@ panthor_vm_create(struct panthor_device *ptdev, bool for_mcu, goto err_sched_fini; mair = io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg.arm_lpae_s1_cfg.mair; - vm->memattr = mair_to_memattr(mair); + vm->memattr = mair_to_memattr(mair, ptdev->coherent); mutex_lock(&ptdev->mmu->vm.lock); list_add_tail(&vm->node, &ptdev->mmu->vm.list); @@ -2366,11 +2429,12 @@ panthor_vm_bind_prepare_op_ctx(struct drm_file *file, const struct drm_panthor_vm_bind_op *op, struct panthor_vm_op_ctx *op_ctx) { + ssize_t vm_pgsz = panthor_vm_page_size(vm); struct drm_gem_object *gem; int ret; /* Aligned on page size. */ - if ((op->va | op->size) & ~PAGE_MASK) + if (!IS_ALIGNED(op->va | op->size, vm_pgsz)) return -EINVAL; switch (op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) { @@ -2651,7 +2715,8 @@ int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec, struct panthor_vm */ void panthor_mmu_unplug(struct panthor_device *ptdev) { - panthor_mmu_irq_suspend(&ptdev->mmu->irq); + if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) + panthor_mmu_irq_suspend(&ptdev->mmu->irq); mutex_lock(&ptdev->mmu->as.slots_lock); for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) { @@ -2716,9 +2781,9 @@ int panthor_mmu_init(struct panthor_device *ptdev) * which passes iova as an unsigned long. Patch the mmu_features to reflect this * limitation. */ - if (sizeof(unsigned long) * 8 < va_bits) { + if (va_bits > BITS_PER_LONG) { ptdev->gpu_info.mmu_features &= ~GENMASK(7, 0); - ptdev->gpu_info.mmu_features |= sizeof(unsigned long) * 8; + ptdev->gpu_info.mmu_features |= BITS_PER_LONG; } return drmm_add_action_or_reset(&ptdev->base, panthor_mmu_release_wq, mmu->vm.wq); diff --git a/drivers/gpu/drm/panthor/panthor_mmu.h b/drivers/gpu/drm/panthor/panthor_mmu.h index 6788771071e3..fc274637114e 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.h +++ b/drivers/gpu/drm/panthor/panthor_mmu.h @@ -9,6 +9,7 @@ struct drm_exec; struct drm_sched_job; +struct drm_memory_stats; struct panthor_gem_object; struct panthor_heap_pool; struct panthor_vm; @@ -30,12 +31,15 @@ panthor_vm_get_bo_for_va(struct panthor_vm *vm, u64 va, u64 *bo_offset); int panthor_vm_active(struct panthor_vm *vm); void panthor_vm_idle(struct panthor_vm *vm); +u32 panthor_vm_page_size(struct panthor_vm *vm); int panthor_vm_as(struct panthor_vm *vm); int panthor_vm_flush_all(struct panthor_vm *vm); struct panthor_heap_pool * panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create); +void panthor_vm_heaps_sizes(struct panthor_file *pfile, struct drm_memory_stats *stats); + struct panthor_vm *panthor_vm_get(struct panthor_vm *vm); void panthor_vm_put(struct panthor_vm *vm); struct panthor_vm *panthor_vm_create(struct panthor_device *ptdev, bool for_mcu, diff --git a/drivers/gpu/drm/panthor/panthor_regs.h b/drivers/gpu/drm/panthor/panthor_regs.h index b7b3b3add166..a7a323dc5cf9 100644 --- a/drivers/gpu/drm/panthor/panthor_regs.h +++ b/drivers/gpu/drm/panthor/panthor_regs.h @@ -133,8 +133,8 @@ #define GPU_COHERENCY_PROT_BIT(name) BIT(GPU_COHERENCY_ ## name) #define GPU_COHERENCY_PROTOCOL 0x304 -#define GPU_COHERENCY_ACE 0 -#define GPU_COHERENCY_ACE_LITE 1 +#define GPU_COHERENCY_ACE_LITE 0 +#define GPU_COHERENCY_ACE 1 #define GPU_COHERENCY_NONE 31 #define MCU_CONTROL 0x700 diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index aee362abb710..43ee57728de5 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -9,6 +9,7 @@ #include <drm/panthor_drm.h> #include <linux/build_bug.h> +#include <linux/cleanup.h> #include <linux/clk.h> #include <linux/delay.h> #include <linux/dma-mapping.h> @@ -88,11 +89,11 @@ #define JOB_TIMEOUT_MS 5000 -#define MIN_CS_PER_CSG 8 - -#define MIN_CSGS 3 #define MAX_CSG_PRIO 0xf +#define NUM_INSTRS_PER_CACHE_LINE (64 / sizeof(u64)) +#define MAX_INSTRS_PER_JOB 24 + struct panthor_group; /** @@ -137,8 +138,6 @@ enum panthor_csg_priority { * non-real-time groups. When such a group becomes executable, * it will evict the group with the lowest non-rt priority if * there's no free group slot available. - * - * Currently not exposed to userspace. */ PANTHOR_CSG_PRIORITY_RT, @@ -476,6 +475,18 @@ struct panthor_queue { */ struct list_head in_flight_jobs; } fence_ctx; + + /** @profiling: Job profiling data slots and access information. */ + struct { + /** @slots: Kernel BO holding the slots. */ + struct panthor_kernel_bo *slots; + + /** @slot_count: Number of jobs ringbuffer can hold at once. */ + u32 slot_count; + + /** @seqno: Index of the next available profiling information slot. */ + u32 seqno; + } profiling; }; /** @@ -589,14 +600,25 @@ struct panthor_group { * @timedout: True when a timeout occurred on any of the queues owned by * this group. * - * Timeouts can be reported by drm_sched or by the FW. In any case, any - * timeout situation is unrecoverable, and the group becomes useless. - * We simply wait for all references to be dropped so we can release the - * group object. + * Timeouts can be reported by drm_sched or by the FW. If a reset is required, + * and the group can't be suspended, this also leads to a timeout. In any case, + * any timeout situation is unrecoverable, and the group becomes useless. We + * simply wait for all references to be dropped so we can release the group + * object. */ bool timedout; /** + * @innocent: True when the group becomes unusable because the group suspension + * failed during a reset. + * + * Sometimes the FW was put in a bad state by other groups, causing the group + * suspension happening in the reset path to fail. In that case, we consider the + * group innocent. + */ + bool innocent; + + /** * @syncobjs: Pool of per-queue synchronization objects. * * One sync object per queue. The position of the sync object is @@ -604,6 +626,21 @@ struct panthor_group { */ struct panthor_kernel_bo *syncobjs; + /** @fdinfo: Per-file info exposed through /proc/<process>/fdinfo */ + struct { + /** @data: Total sampled values for jobs in queues from this group. */ + struct panthor_gpu_usage data; + + /** + * @fdinfo.lock: Spinlock to govern concurrent access from drm file's fdinfo + * callback and job post-completion processing function + */ + spinlock_t lock; + + /** @fdinfo.kbo_sizes: Aggregate size of private kernel BO's held by the group. */ + size_t kbo_sizes; + } fdinfo; + /** @state: Group state. */ enum panthor_group_state state; @@ -661,6 +698,18 @@ struct panthor_group { struct list_head wait_node; }; +struct panthor_job_profiling_data { + struct { + u64 before; + u64 after; + } cycles; + + struct { + u64 before; + u64 after; + } time; +}; + /** * group_queue_work() - Queue a group work * @group: Group to queue the work for. @@ -774,6 +823,15 @@ struct panthor_job { /** @done_fence: Fence signaled when the job is finished or cancelled. */ struct dma_fence *done_fence; + + /** @profiling: Job profiling information. */ + struct { + /** @mask: Current device job profiling enablement bitmask. */ + u32 mask; + + /** @slot: Job index in the profiling slots BO. */ + u32 slot; + } profiling; }; static void @@ -782,7 +840,7 @@ panthor_queue_put_syncwait_obj(struct panthor_queue *queue) if (queue->syncwait.kmap) { struct iosys_map map = IOSYS_MAP_INIT_VADDR(queue->syncwait.kmap); - drm_gem_vunmap_unlocked(queue->syncwait.obj, &map); + drm_gem_vunmap(queue->syncwait.obj, &map); queue->syncwait.kmap = NULL; } @@ -808,7 +866,7 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue goto err_put_syncwait_obj; queue->syncwait.obj = &bo->base.base; - ret = drm_gem_vmap_unlocked(queue->syncwait.obj, &map); + ret = drm_gem_vmap(queue->syncwait.obj, &map); if (drm_WARN_ON(&ptdev->base, ret)) goto err_put_syncwait_obj; @@ -838,6 +896,7 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * panthor_kernel_bo_destroy(queue->ringbuf); panthor_kernel_bo_destroy(queue->iface.mem); + panthor_kernel_bo_destroy(queue->profiling.slots); /* Release the last_fence we were holding, if any. */ dma_fence_put(queue->fence_ctx.last_fence); @@ -1988,8 +2047,6 @@ tick_ctx_init(struct panthor_scheduler *sched, } } -#define NUM_INSTRS_PER_SLOT 16 - static void group_term_post_processing(struct panthor_group *group) { @@ -2306,7 +2363,7 @@ static void tick_work(struct work_struct *work) if (!drm_dev_enter(&ptdev->base, &cookie)) return; - ret = pm_runtime_resume_and_get(ptdev->base.dev); + ret = panthor_device_resume_and_get(ptdev); if (drm_WARN_ON(&ptdev->base, ret)) goto out_dev_exit; @@ -2545,7 +2602,7 @@ static void queue_start(struct panthor_queue *queue) list_for_each_entry(job, &queue->scheduler.pending_list, base.list) job->base.s_fence->parent = dma_fence_get(job->done_fence); - drm_sched_start(&queue->scheduler); + drm_sched_start(&queue->scheduler, 0); } static void panthor_group_stop(struct panthor_group *group) @@ -2640,6 +2697,18 @@ void panthor_sched_suspend(struct panthor_device *ptdev) csgs_upd_ctx_init(&upd_ctx); while (slot_mask) { u32 csg_id = ffs(slot_mask) - 1; + struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; + + /* If the group was still usable before that point, we consider + * it innocent. + */ + if (group_can_run(csg_slot->group)) + csg_slot->group->innocent = true; + + /* We consider group suspension failures as fatal and flag the + * group as unusable by setting timedout=true. + */ + csg_slot->group->timedout = true; csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id, CSG_STATE_TERMINATE, @@ -2783,6 +2852,42 @@ void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed) } } +static void update_fdinfo_stats(struct panthor_job *job) +{ + struct panthor_group *group = job->group; + struct panthor_queue *queue = group->queues[job->queue_idx]; + struct panthor_gpu_usage *fdinfo = &group->fdinfo.data; + struct panthor_job_profiling_data *slots = queue->profiling.slots->kmap; + struct panthor_job_profiling_data *data = &slots[job->profiling.slot]; + + scoped_guard(spinlock, &group->fdinfo.lock) { + if (job->profiling.mask & PANTHOR_DEVICE_PROFILING_CYCLES) + fdinfo->cycles += data->cycles.after - data->cycles.before; + if (job->profiling.mask & PANTHOR_DEVICE_PROFILING_TIMESTAMP) + fdinfo->time += data->time.after - data->time.before; + } +} + +void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile) +{ + struct panthor_group_pool *gpool = pfile->groups; + struct panthor_group *group; + unsigned long i; + + if (IS_ERR_OR_NULL(gpool)) + return; + + xa_lock(&gpool->xa); + xa_for_each(&gpool->xa, i, group) { + guard(spinlock)(&group->fdinfo.lock); + pfile->stats.cycles += group->fdinfo.data.cycles; + pfile->stats.time += group->fdinfo.data.time; + group->fdinfo.data.cycles = 0; + group->fdinfo.data.time = 0; + } + xa_unlock(&gpool->xa); +} + static void group_sync_upd_work(struct work_struct *work) { struct panthor_group *group = @@ -2815,6 +2920,8 @@ static void group_sync_upd_work(struct work_struct *work) dma_fence_end_signalling(cookie); list_for_each_entry_safe(job, job_tmp, &done_jobs, node) { + if (job->profiling.mask) + update_fdinfo_stats(job); list_del_init(&job->node); panthor_job_put(&job->base); } @@ -2822,65 +2929,198 @@ static void group_sync_upd_work(struct work_struct *work) group_put(group); } -static struct dma_fence * -queue_run_job(struct drm_sched_job *sched_job) +struct panthor_job_ringbuf_instrs { + u64 buffer[MAX_INSTRS_PER_JOB]; + u32 count; +}; + +struct panthor_job_instr { + u32 profile_mask; + u64 instr; +}; + +#define JOB_INSTR(__prof, __instr) \ + { \ + .profile_mask = __prof, \ + .instr = __instr, \ + } + +static void +copy_instrs_to_ringbuf(struct panthor_queue *queue, + struct panthor_job *job, + struct panthor_job_ringbuf_instrs *instrs) +{ + u64 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf); + u64 start = job->ringbuf.start & (ringbuf_size - 1); + u64 size, written; + + /* + * We need to write a whole slot, including any trailing zeroes + * that may come at the end of it. Also, because instrs.buffer has + * been zero-initialised, there's no need to pad it with 0's + */ + instrs->count = ALIGN(instrs->count, NUM_INSTRS_PER_CACHE_LINE); + size = instrs->count * sizeof(u64); + WARN_ON(size > ringbuf_size); + written = min(ringbuf_size - start, size); + + memcpy(queue->ringbuf->kmap + start, instrs->buffer, written); + + if (written < size) + memcpy(queue->ringbuf->kmap, + &instrs->buffer[written / sizeof(u64)], + size - written); +} + +struct panthor_job_cs_params { + u32 profile_mask; + u64 addr_reg; u64 val_reg; + u64 cycle_reg; u64 time_reg; + u64 sync_addr; u64 times_addr; + u64 cs_start; u64 cs_size; + u32 last_flush; u32 waitall_mask; +}; + +static void +get_job_cs_params(struct panthor_job *job, struct panthor_job_cs_params *params) { - struct panthor_job *job = container_of(sched_job, struct panthor_job, base); struct panthor_group *group = job->group; struct panthor_queue *queue = group->queues[job->queue_idx]; struct panthor_device *ptdev = group->ptdev; struct panthor_scheduler *sched = ptdev->scheduler; - u32 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf); - u32 ringbuf_insert = queue->iface.input->insert & (ringbuf_size - 1); - u64 addr_reg = ptdev->csif_info.cs_reg_count - - ptdev->csif_info.unpreserved_cs_reg_count; - u64 val_reg = addr_reg + 2; - u64 sync_addr = panthor_kernel_bo_gpuva(group->syncobjs) + - job->queue_idx * sizeof(struct panthor_syncobj_64b); - u32 waitall_mask = GENMASK(sched->sb_slot_count - 1, 0); - struct dma_fence *done_fence; - int ret; - u64 call_instrs[NUM_INSTRS_PER_SLOT] = { - /* MOV32 rX+2, cs.latest_flush */ - (2ull << 56) | (val_reg << 48) | job->call_info.latest_flush, + params->addr_reg = ptdev->csif_info.cs_reg_count - + ptdev->csif_info.unpreserved_cs_reg_count; + params->val_reg = params->addr_reg + 2; + params->cycle_reg = params->addr_reg; + params->time_reg = params->val_reg; - /* FLUSH_CACHE2.clean_inv_all.no_wait.signal(0) rX+2 */ - (36ull << 56) | (0ull << 48) | (val_reg << 40) | (0 << 16) | 0x233, + params->sync_addr = panthor_kernel_bo_gpuva(group->syncobjs) + + job->queue_idx * sizeof(struct panthor_syncobj_64b); + params->times_addr = panthor_kernel_bo_gpuva(queue->profiling.slots) + + (job->profiling.slot * sizeof(struct panthor_job_profiling_data)); + params->waitall_mask = GENMASK(sched->sb_slot_count - 1, 0); - /* MOV48 rX:rX+1, cs.start */ - (1ull << 56) | (addr_reg << 48) | job->call_info.start, + params->cs_start = job->call_info.start; + params->cs_size = job->call_info.size; + params->last_flush = job->call_info.latest_flush; - /* MOV32 rX+2, cs.size */ - (2ull << 56) | (val_reg << 48) | job->call_info.size, + params->profile_mask = job->profiling.mask; +} - /* WAIT(0) => waits for FLUSH_CACHE2 instruction */ - (3ull << 56) | (1 << 16), +#define JOB_INSTR_ALWAYS(instr) \ + JOB_INSTR(PANTHOR_DEVICE_PROFILING_DISABLED, (instr)) +#define JOB_INSTR_TIMESTAMP(instr) \ + JOB_INSTR(PANTHOR_DEVICE_PROFILING_TIMESTAMP, (instr)) +#define JOB_INSTR_CYCLES(instr) \ + JOB_INSTR(PANTHOR_DEVICE_PROFILING_CYCLES, (instr)) +static void +prepare_job_instrs(const struct panthor_job_cs_params *params, + struct panthor_job_ringbuf_instrs *instrs) +{ + const struct panthor_job_instr instr_seq[] = { + /* MOV32 rX+2, cs.latest_flush */ + JOB_INSTR_ALWAYS((2ull << 56) | (params->val_reg << 48) | params->last_flush), + /* FLUSH_CACHE2.clean_inv_all.no_wait.signal(0) rX+2 */ + JOB_INSTR_ALWAYS((36ull << 56) | (0ull << 48) | (params->val_reg << 40) | + (0 << 16) | 0x233), + /* MOV48 rX:rX+1, cycles_offset */ + JOB_INSTR_CYCLES((1ull << 56) | (params->cycle_reg << 48) | + (params->times_addr + + offsetof(struct panthor_job_profiling_data, cycles.before))), + /* STORE_STATE cycles */ + JOB_INSTR_CYCLES((40ull << 56) | (params->cycle_reg << 40) | (1ll << 32)), + /* MOV48 rX:rX+1, time_offset */ + JOB_INSTR_TIMESTAMP((1ull << 56) | (params->time_reg << 48) | + (params->times_addr + + offsetof(struct panthor_job_profiling_data, time.before))), + /* STORE_STATE timer */ + JOB_INSTR_TIMESTAMP((40ull << 56) | (params->time_reg << 40) | (0ll << 32)), + /* MOV48 rX:rX+1, cs.start */ + JOB_INSTR_ALWAYS((1ull << 56) | (params->addr_reg << 48) | params->cs_start), + /* MOV32 rX+2, cs.size */ + JOB_INSTR_ALWAYS((2ull << 56) | (params->val_reg << 48) | params->cs_size), + /* WAIT(0) => waits for FLUSH_CACHE2 instruction */ + JOB_INSTR_ALWAYS((3ull << 56) | (1 << 16)), /* CALL rX:rX+1, rX+2 */ - (32ull << 56) | (addr_reg << 40) | (val_reg << 32), - + JOB_INSTR_ALWAYS((32ull << 56) | (params->addr_reg << 40) | + (params->val_reg << 32)), + /* MOV48 rX:rX+1, cycles_offset */ + JOB_INSTR_CYCLES((1ull << 56) | (params->cycle_reg << 48) | + (params->times_addr + + offsetof(struct panthor_job_profiling_data, cycles.after))), + /* STORE_STATE cycles */ + JOB_INSTR_CYCLES((40ull << 56) | (params->cycle_reg << 40) | (1ll << 32)), + /* MOV48 rX:rX+1, time_offset */ + JOB_INSTR_TIMESTAMP((1ull << 56) | (params->time_reg << 48) | + (params->times_addr + + offsetof(struct panthor_job_profiling_data, time.after))), + /* STORE_STATE timer */ + JOB_INSTR_TIMESTAMP((40ull << 56) | (params->time_reg << 40) | (0ll << 32)), /* MOV48 rX:rX+1, sync_addr */ - (1ull << 56) | (addr_reg << 48) | sync_addr, - + JOB_INSTR_ALWAYS((1ull << 56) | (params->addr_reg << 48) | params->sync_addr), /* MOV48 rX+2, #1 */ - (1ull << 56) | (val_reg << 48) | 1, - + JOB_INSTR_ALWAYS((1ull << 56) | (params->val_reg << 48) | 1), /* WAIT(all) */ - (3ull << 56) | (waitall_mask << 16), - + JOB_INSTR_ALWAYS((3ull << 56) | (params->waitall_mask << 16)), /* SYNC_ADD64.system_scope.propage_err.nowait rX:rX+1, rX+2*/ - (51ull << 56) | (0ull << 48) | (addr_reg << 40) | (val_reg << 32) | (0 << 16) | 1, + JOB_INSTR_ALWAYS((51ull << 56) | (0ull << 48) | (params->addr_reg << 40) | + (params->val_reg << 32) | (0 << 16) | 1), + /* ERROR_BARRIER, so we can recover from faults at job boundaries. */ + JOB_INSTR_ALWAYS((47ull << 56)), + }; + u32 pad; - /* ERROR_BARRIER, so we can recover from faults at job - * boundaries. - */ - (47ull << 56), + instrs->count = 0; + + /* NEED to be cacheline aligned to please the prefetcher. */ + static_assert(sizeof(instrs->buffer) % 64 == 0, + "panthor_job_ringbuf_instrs::buffer is not aligned on a cacheline"); + + /* Make sure we have enough storage to store the whole sequence. */ + static_assert(ALIGN(ARRAY_SIZE(instr_seq), NUM_INSTRS_PER_CACHE_LINE) == + ARRAY_SIZE(instrs->buffer), + "instr_seq vs panthor_job_ringbuf_instrs::buffer size mismatch"); + + for (u32 i = 0; i < ARRAY_SIZE(instr_seq); i++) { + /* If the profile mask of this instruction is not enabled, skip it. */ + if (instr_seq[i].profile_mask && + !(instr_seq[i].profile_mask & params->profile_mask)) + continue; + + instrs->buffer[instrs->count++] = instr_seq[i].instr; + } + + pad = ALIGN(instrs->count, NUM_INSTRS_PER_CACHE_LINE); + memset(&instrs->buffer[instrs->count], 0, + (pad - instrs->count) * sizeof(instrs->buffer[0])); + instrs->count = pad; +} + +static u32 calc_job_credits(u32 profile_mask) +{ + struct panthor_job_ringbuf_instrs instrs; + struct panthor_job_cs_params params = { + .profile_mask = profile_mask, }; - /* Need to be cacheline aligned to please the prefetcher. */ - static_assert(sizeof(call_instrs) % 64 == 0, - "call_instrs is not aligned on a cacheline"); + prepare_job_instrs(¶ms, &instrs); + return instrs.count; +} + +static struct dma_fence * +queue_run_job(struct drm_sched_job *sched_job) +{ + struct panthor_job *job = container_of(sched_job, struct panthor_job, base); + struct panthor_group *group = job->group; + struct panthor_queue *queue = group->queues[job->queue_idx]; + struct panthor_device *ptdev = group->ptdev; + struct panthor_scheduler *sched = ptdev->scheduler; + struct panthor_job_ringbuf_instrs instrs; + struct panthor_job_cs_params cs_params; + struct dma_fence *done_fence; + int ret; /* Stream size is zero, nothing to do except making sure all previously * submitted jobs are done before we signal the @@ -2891,7 +3131,7 @@ queue_run_job(struct drm_sched_job *sched_job) return dma_fence_get(job->done_fence); } - ret = pm_runtime_resume_and_get(ptdev->base.dev); + ret = panthor_device_resume_and_get(ptdev); if (drm_WARN_ON(&ptdev->base, ret)) return ERR_PTR(ret); @@ -2907,17 +3147,23 @@ queue_run_job(struct drm_sched_job *sched_job) queue->fence_ctx.id, atomic64_inc_return(&queue->fence_ctx.seqno)); - memcpy(queue->ringbuf->kmap + ringbuf_insert, - call_instrs, sizeof(call_instrs)); + job->profiling.slot = queue->profiling.seqno++; + if (queue->profiling.seqno == queue->profiling.slot_count) + queue->profiling.seqno = 0; + + job->ringbuf.start = queue->iface.input->insert; + + get_job_cs_params(job, &cs_params); + prepare_job_instrs(&cs_params, &instrs); + copy_instrs_to_ringbuf(queue, job, &instrs); + + job->ringbuf.end = job->ringbuf.start + (instrs.count * sizeof(u64)); panthor_job_get(&job->base); spin_lock(&queue->fence_ctx.lock); list_add_tail(&job->node, &queue->fence_ctx.in_flight_jobs); spin_unlock(&queue->fence_ctx.lock); - job->ringbuf.start = queue->iface.input->insert; - job->ringbuf.end = job->ringbuf.start + sizeof(call_instrs); - /* Make sure the ring buffer is updated before the INSERT * register. */ @@ -3010,10 +3256,53 @@ static const struct drm_sched_backend_ops panthor_queue_sched_ops = { .free_job = queue_free_job, }; +static u32 calc_profiling_ringbuf_num_slots(struct panthor_device *ptdev, + u32 cs_ringbuf_size) +{ + u32 min_profiled_job_instrs = U32_MAX; + u32 last_flag = fls(PANTHOR_DEVICE_PROFILING_ALL); + + /* + * We want to calculate the minimum size of a profiled job's CS, + * because since they need additional instructions for the sampling + * of performance metrics, they might take up further slots in + * the queue's ringbuffer. This means we might not need as many job + * slots for keeping track of their profiling information. What we + * need is the maximum number of slots we should allocate to this end, + * which matches the maximum number of profiled jobs we can place + * simultaneously in the queue's ring buffer. + * That has to be calculated separately for every single job profiling + * flag, but not in the case job profiling is disabled, since unprofiled + * jobs don't need to keep track of this at all. + */ + for (u32 i = 0; i < last_flag; i++) { + min_profiled_job_instrs = + min(min_profiled_job_instrs, calc_job_credits(BIT(i))); + } + + return DIV_ROUND_UP(cs_ringbuf_size, min_profiled_job_instrs * sizeof(u64)); +} + static struct panthor_queue * group_create_queue(struct panthor_group *group, const struct drm_panthor_queue_create *args) { + const struct drm_sched_init_args sched_args = { + .ops = &panthor_queue_sched_ops, + .submit_wq = group->ptdev->scheduler->wq, + .num_rqs = 1, + /* + * The credit limit argument tells us the total number of + * instructions across all CS slots in the ringbuffer, with + * some jobs requiring twice as many as others, depending on + * their profiling status. + */ + .credit_limit = args->ringbuf_size / sizeof(u64), + .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS), + .timeout_wq = group->ptdev->reset.wq, + .name = "panthor-queue", + .dev = group->ptdev->base.dev, + }; struct drm_gpu_scheduler *drm_sched; struct panthor_queue *queue; int ret; @@ -3043,7 +3332,8 @@ group_create_queue(struct panthor_group *group, DRM_PANTHOR_BO_NO_MMAP, DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, - PANTHOR_VM_KERNEL_AUTO_VA); + PANTHOR_VM_KERNEL_AUTO_VA, + "CS ring buffer"); if (IS_ERR(queue->ringbuf)) { ret = PTR_ERR(queue->ringbuf); goto err_free_queue; @@ -3063,12 +3353,29 @@ group_create_queue(struct panthor_group *group, goto err_free_queue; } - ret = drm_sched_init(&queue->scheduler, &panthor_queue_sched_ops, - group->ptdev->scheduler->wq, 1, - args->ringbuf_size / (NUM_INSTRS_PER_SLOT * sizeof(u64)), - 0, msecs_to_jiffies(JOB_TIMEOUT_MS), - group->ptdev->reset.wq, - NULL, "panthor-queue", group->ptdev->base.dev); + queue->profiling.slot_count = + calc_profiling_ringbuf_num_slots(group->ptdev, args->ringbuf_size); + + queue->profiling.slots = + panthor_kernel_bo_create(group->ptdev, group->vm, + queue->profiling.slot_count * + sizeof(struct panthor_job_profiling_data), + DRM_PANTHOR_BO_NO_MMAP, + DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | + DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, + PANTHOR_VM_KERNEL_AUTO_VA, + "Group job stats"); + + if (IS_ERR(queue->profiling.slots)) { + ret = PTR_ERR(queue->profiling.slots); + goto err_free_queue; + } + + ret = panthor_kernel_bo_vmap(queue->profiling.slots); + if (ret) + goto err_free_queue; + + ret = drm_sched_init(&queue->scheduler, &sched_args); if (ret) goto err_free_queue; @@ -3082,6 +3389,29 @@ err_free_queue: return ERR_PTR(ret); } +static void add_group_kbo_sizes(struct panthor_device *ptdev, + struct panthor_group *group) +{ + struct panthor_queue *queue; + int i; + + if (drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(group))) + return; + if (drm_WARN_ON(&ptdev->base, ptdev != group->ptdev)) + return; + + group->fdinfo.kbo_sizes += group->suspend_buf->obj->size; + group->fdinfo.kbo_sizes += group->protm_suspend_buf->obj->size; + group->fdinfo.kbo_sizes += group->syncobjs->obj->size; + + for (i = 0; i < group->queue_count; i++) { + queue = group->queues[i]; + group->fdinfo.kbo_sizes += queue->ringbuf->obj->size; + group->fdinfo.kbo_sizes += queue->iface.mem->obj->size; + group->fdinfo.kbo_sizes += queue->profiling.slots->obj->size; + } +} + #define MAX_GROUPS_PER_POOL 128 int panthor_group_create(struct panthor_file *pfile, @@ -3165,7 +3495,8 @@ int panthor_group_create(struct panthor_file *pfile, DRM_PANTHOR_BO_NO_MMAP, DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, - PANTHOR_VM_KERNEL_AUTO_VA); + PANTHOR_VM_KERNEL_AUTO_VA, + "Group sync objects"); if (IS_ERR(group->syncobjs)) { ret = PTR_ERR(group->syncobjs); goto err_put_group; @@ -3206,6 +3537,9 @@ int panthor_group_create(struct panthor_file *pfile, } mutex_unlock(&sched->reset.lock); + add_group_kbo_sizes(group->ptdev, group); + spin_lock_init(&group->fdinfo.lock); + return gid; err_put_group: @@ -3285,6 +3619,8 @@ int panthor_group_get_state(struct panthor_file *pfile, get_state->state |= DRM_PANTHOR_GROUP_STATE_FATAL_FAULT; get_state->fatal_queues = group->fatal_queues; } + if (group->innocent) + get_state->state |= DRM_PANTHOR_GROUP_STATE_INNOCENT; mutex_unlock(&sched->lock); group_put(group); @@ -3321,6 +3657,33 @@ void panthor_group_pool_destroy(struct panthor_file *pfile) pfile->groups = NULL; } +/** + * panthor_fdinfo_gather_group_mem_info() - Retrieve aggregate size of all private kernel BO's + * belonging to all the groups owned by an open Panthor file + * @pfile: File. + * @stats: Memory statistics to be updated. + * + */ +void +panthor_fdinfo_gather_group_mem_info(struct panthor_file *pfile, + struct drm_memory_stats *stats) +{ + struct panthor_group_pool *gpool = pfile->groups; + struct panthor_group *group; + unsigned long i; + + if (IS_ERR_OR_NULL(gpool)) + return; + + xa_lock(&gpool->xa); + xa_for_each(&gpool->xa, i, group) { + stats->resident += group->fdinfo.kbo_sizes; + if (group->csg_id >= 0) + stats->active += group->fdinfo.kbo_sizes; + } + xa_unlock(&gpool->xa); +} + static void job_release(struct kref *ref) { struct panthor_job *job = container_of(ref, struct panthor_job, refcount); @@ -3373,6 +3736,7 @@ panthor_job_create(struct panthor_file *pfile, { struct panthor_group_pool *gpool = pfile->groups; struct panthor_job *job; + u32 credits; int ret; if (qsubmit->pad) @@ -3409,6 +3773,11 @@ panthor_job_create(struct panthor_file *pfile, goto err_put_job; } + if (!group_can_run(job->group)) { + ret = -EINVAL; + goto err_put_job; + } + if (job->queue_idx >= job->group->queue_count || !job->group->queues[job->queue_idx]) { ret = -EINVAL; @@ -3426,9 +3795,16 @@ panthor_job_create(struct panthor_file *pfile, } } + job->profiling.mask = pfile->ptdev->profile_mask; + credits = calc_job_credits(job->profiling.mask); + if (credits == 0) { + ret = -EINVAL; + goto err_put_job; + } + ret = drm_sched_job_init(&job->base, &job->group->queues[job->queue_idx]->entity, - 1, job->group); + credits, job->group); if (ret) goto err_put_job; diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h index 3a30d2328b30..e650a445cf50 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.h +++ b/drivers/gpu/drm/panthor/panthor_sched.h @@ -9,6 +9,7 @@ struct dma_fence; struct drm_file; struct drm_gem_object; struct drm_sched_job; +struct drm_memory_stats; struct drm_panthor_group_create; struct drm_panthor_queue_create; struct drm_panthor_group_get_state; @@ -36,6 +37,8 @@ void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *job); int panthor_group_pool_create(struct panthor_file *pfile); void panthor_group_pool_destroy(struct panthor_file *pfile); +void panthor_fdinfo_gather_group_mem_info(struct panthor_file *pfile, + struct drm_memory_stats *stats); int panthor_sched_init(struct panthor_device *ptdev); void panthor_sched_unplug(struct panthor_device *ptdev); @@ -47,4 +50,6 @@ void panthor_sched_resume(struct panthor_device *ptdev); void panthor_sched_report_mmu_fault(struct panthor_device *ptdev); void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events); +void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile); + #endif |