diff options
Diffstat (limited to 'drivers/gpu/drm/panthor')
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_device.c | 13 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_device.h | 102 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_drv.c | 137 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_fw.c | 19 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_gem.c | 226 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_gem.h | 79 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_gpu.c | 162 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_gpu.h | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_heap.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_mmu.c | 68 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_mmu.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_regs.h | 104 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_sched.c | 20 | ||||
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_sched.h | 3 |
14 files changed, 649 insertions, 303 deletions
diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c index a9da1d1eeb70..f0b2da5b2b96 100644 --- a/drivers/gpu/drm/panthor/panthor_device.c +++ b/drivers/gpu/drm/panthor/panthor_device.c @@ -171,10 +171,6 @@ int panthor_device_init(struct panthor_device *ptdev) struct page *p; int ret; - ret = panthor_gpu_coherency_init(ptdev); - if (ret) - return ret; - init_completion(&ptdev->unplug.done); ret = drmm_mutex_init(&ptdev->base, &ptdev->unplug.lock); if (ret) @@ -184,6 +180,11 @@ int panthor_device_init(struct panthor_device *ptdev) if (ret) return ret; +#ifdef CONFIG_DEBUG_FS + drmm_mutex_init(&ptdev->base, &ptdev->gems.lock); + INIT_LIST_HEAD(&ptdev->gems.node); +#endif + atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED); p = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!p) @@ -247,6 +248,10 @@ int panthor_device_init(struct panthor_device *ptdev) if (ret) goto err_rpm_put; + ret = panthor_gpu_coherency_init(ptdev); + if (ret) + goto err_unplug_gpu; + ret = panthor_mmu_init(ptdev); if (ret) goto err_unplug_gpu; diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h index da6574021664..4fc7cf2aeed5 100644 --- a/drivers/gpu/drm/panthor/panthor_device.h +++ b/drivers/gpu/drm/panthor/panthor_device.h @@ -205,6 +205,17 @@ struct panthor_device { /** @fast_rate: Maximum device clock frequency. Set by DVFS */ unsigned long fast_rate; + +#ifdef CONFIG_DEBUG_FS + /** @gems: Device-wide list of GEM objects owned by at least one file. */ + struct { + /** @gems.lock: Protects the device-wide list of GEM objects. */ + struct mutex lock; + + /** @node: Used to keep track of all the device's DRM objects */ + struct list_head node; + } gems; +#endif }; struct panthor_gpu_usage { @@ -219,6 +230,24 @@ struct panthor_file { /** @ptdev: Device attached to this file. */ struct panthor_device *ptdev; + /** @user_mmio: User MMIO related fields. */ + struct { + /** + * @offset: Offset used for user MMIO mappings. + * + * This offset should not be used to check the type of mapping + * except in panthor_mmap(). After that point, MMIO mapping + * offsets have been adjusted to match + * DRM_PANTHOR_USER_MMIO_OFFSET and that macro should be used + * instead. + * Make sure this rule is followed at all times, because + * userspace is in control of the offset, and can change the + * value behind our back. Otherwise it can lead to erroneous + * branching happening in kernel space. + */ + u64 offset; + } user_mmio; + /** @vms: VM pool attached to this file. */ struct panthor_vm_pool *vms; @@ -383,8 +412,6 @@ static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *da if (!status) \ break; \ \ - gpu_write(ptdev, __reg_prefix ## _INT_CLEAR, status); \ - \ __handler(ptdev, status); \ ret = IRQ_HANDLED; \ } \ @@ -428,4 +455,75 @@ static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev, \ extern struct workqueue_struct *panthor_cleanup_wq; +static inline void gpu_write(struct panthor_device *ptdev, u32 reg, u32 data) +{ + writel(data, ptdev->iomem + reg); +} + +static inline u32 gpu_read(struct panthor_device *ptdev, u32 reg) +{ + return readl(ptdev->iomem + reg); +} + +static inline u32 gpu_read_relaxed(struct panthor_device *ptdev, u32 reg) +{ + return readl_relaxed(ptdev->iomem + reg); +} + +static inline void gpu_write64(struct panthor_device *ptdev, u32 reg, u64 data) +{ + gpu_write(ptdev, reg, lower_32_bits(data)); + gpu_write(ptdev, reg + 4, upper_32_bits(data)); +} + +static inline u64 gpu_read64(struct panthor_device *ptdev, u32 reg) +{ + return (gpu_read(ptdev, reg) | ((u64)gpu_read(ptdev, reg + 4) << 32)); +} + +static inline u64 gpu_read64_relaxed(struct panthor_device *ptdev, u32 reg) +{ + return (gpu_read_relaxed(ptdev, reg) | + ((u64)gpu_read_relaxed(ptdev, reg + 4) << 32)); +} + +static inline u64 gpu_read64_counter(struct panthor_device *ptdev, u32 reg) +{ + u32 lo, hi1, hi2; + do { + hi1 = gpu_read(ptdev, reg + 4); + lo = gpu_read(ptdev, reg); + hi2 = gpu_read(ptdev, reg + 4); + } while (hi1 != hi2); + return lo | ((u64)hi2 << 32); +} + +#define gpu_read_poll_timeout(dev, reg, val, cond, delay_us, timeout_us) \ + read_poll_timeout(gpu_read, val, cond, delay_us, timeout_us, false, \ + dev, reg) + +#define gpu_read_poll_timeout_atomic(dev, reg, val, cond, delay_us, \ + timeout_us) \ + read_poll_timeout_atomic(gpu_read, val, cond, delay_us, timeout_us, \ + false, dev, reg) + +#define gpu_read64_poll_timeout(dev, reg, val, cond, delay_us, timeout_us) \ + read_poll_timeout(gpu_read64, val, cond, delay_us, timeout_us, false, \ + dev, reg) + +#define gpu_read64_poll_timeout_atomic(dev, reg, val, cond, delay_us, \ + timeout_us) \ + read_poll_timeout_atomic(gpu_read64, val, cond, delay_us, timeout_us, \ + false, dev, reg) + +#define gpu_read_relaxed_poll_timeout_atomic(dev, reg, val, cond, delay_us, \ + timeout_us) \ + read_poll_timeout_atomic(gpu_read_relaxed, val, cond, delay_us, \ + timeout_us, false, dev, reg) + +#define gpu_read64_relaxed_poll_timeout(dev, reg, val, cond, delay_us, \ + timeout_us) \ + read_poll_timeout(gpu_read64_relaxed, val, cond, delay_us, timeout_us, \ + false, dev, reg) + #endif diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 06fe46e32073..1116f2d2826e 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -772,8 +772,8 @@ static int panthor_query_timestamp_info(struct panthor_device *ptdev, #else arg->timestamp_frequency = 0; #endif - arg->current_timestamp = panthor_gpu_read_timestamp(ptdev); - arg->timestamp_offset = panthor_gpu_read_timestamp_offset(ptdev); + arg->current_timestamp = gpu_read64_counter(ptdev, GPU_TIMESTAMP); + arg->timestamp_offset = gpu_read64(ptdev, GPU_TIMESTAMP_OFFSET); pm_runtime_put(ptdev->base.dev); return 0; @@ -940,6 +940,7 @@ static int panthor_ioctl_bo_mmap_offset(struct drm_device *ddev, void *data, struct drm_file *file) { struct drm_panthor_bo_mmap_offset *args = data; + struct panthor_gem_object *bo; struct drm_gem_object *obj; int ret; @@ -950,6 +951,12 @@ static int panthor_ioctl_bo_mmap_offset(struct drm_device *ddev, void *data, if (!obj) return -ENOENT; + bo = to_panthor_bo(obj); + if (bo->flags & DRM_PANTHOR_BO_NO_MMAP) { + ret = -EPERM; + goto out; + } + ret = drm_gem_create_mmap_offset(obj); if (ret) goto out; @@ -989,7 +996,8 @@ static int panthor_ioctl_group_submit(struct drm_device *ddev, void *data, const struct drm_panthor_queue_submit *qsubmit = &jobs_args[i]; struct drm_sched_job *job; - job = panthor_job_create(pfile, args->group_handle, qsubmit); + job = panthor_job_create(pfile, args->group_handle, qsubmit, + file->client_id); if (IS_ERR(job)) { ret = PTR_ERR(job); goto out_cleanup_submit_ctx; @@ -1331,6 +1339,60 @@ static int panthor_ioctl_vm_get_state(struct drm_device *ddev, void *data, return 0; } +static int panthor_ioctl_bo_set_label(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_panthor_bo_set_label *args = data; + struct drm_gem_object *obj; + const char *label = NULL; + int ret = 0; + + if (args->pad) + return -EINVAL; + + obj = drm_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; + + if (args->label) { + label = strndup_user((const char __user *)(uintptr_t)args->label, + PANTHOR_BO_LABEL_MAXLEN); + if (IS_ERR(label)) { + ret = PTR_ERR(label); + if (ret == -EINVAL) + ret = -E2BIG; + goto err_put_obj; + } + } + + /* + * We treat passing a label of length 0 and passing a NULL label + * differently, because even though they might seem conceptually + * similar, future uses of the BO label might expect a different + * behaviour in each case. + */ + panthor_gem_bo_set_label(obj, label); + +err_put_obj: + drm_gem_object_put(obj); + + return ret; +} + +static int panthor_ioctl_set_user_mmio_offset(struct drm_device *ddev, + void *data, struct drm_file *file) +{ + struct drm_panthor_set_user_mmio_offset *args = data; + struct panthor_file *pfile = file->driver_priv; + + if (args->offset != DRM_PANTHOR_USER_MMIO_OFFSET_32BIT && + args->offset != DRM_PANTHOR_USER_MMIO_OFFSET_64BIT) + return -EINVAL; + + WRITE_ONCE(pfile->user_mmio.offset, args->offset); + return 0; +} + static int panthor_open(struct drm_device *ddev, struct drm_file *file) { @@ -1348,6 +1410,18 @@ panthor_open(struct drm_device *ddev, struct drm_file *file) } pfile->ptdev = ptdev; + pfile->user_mmio.offset = DRM_PANTHOR_USER_MMIO_OFFSET; + +#ifdef CONFIG_ARM64 + /* + * With 32-bit systems being limited by the 32-bit representation of + * mmap2's pgoffset field, we need to make the MMIO offset arch + * specific. + */ + if (test_tsk_thread_flag(current, TIF_32BIT)) + pfile->user_mmio.offset = DRM_PANTHOR_USER_MMIO_OFFSET_32BIT; +#endif + ret = panthor_vm_pool_create(pfile); if (ret) @@ -1400,6 +1474,8 @@ static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = { PANTHOR_IOCTL(TILER_HEAP_CREATE, tiler_heap_create, DRM_RENDER_ALLOW), PANTHOR_IOCTL(TILER_HEAP_DESTROY, tiler_heap_destroy, DRM_RENDER_ALLOW), PANTHOR_IOCTL(GROUP_SUBMIT, group_submit, DRM_RENDER_ALLOW), + PANTHOR_IOCTL(BO_SET_LABEL, bo_set_label, DRM_RENDER_ALLOW), + PANTHOR_IOCTL(SET_USER_MMIO_OFFSET, set_user_mmio_offset, DRM_RENDER_ALLOW), }; static int panthor_mmap(struct file *filp, struct vm_area_struct *vma) @@ -1408,30 +1484,26 @@ static int panthor_mmap(struct file *filp, struct vm_area_struct *vma) struct panthor_file *pfile = file->driver_priv; struct panthor_device *ptdev = pfile->ptdev; u64 offset = (u64)vma->vm_pgoff << PAGE_SHIFT; + u64 user_mmio_offset; int ret, cookie; if (!drm_dev_enter(file->minor->dev, &cookie)) return -ENODEV; -#ifdef CONFIG_ARM64 - /* - * With 32-bit systems being limited by the 32-bit representation of - * mmap2's pgoffset field, we need to make the MMIO offset arch - * specific. This converts a user MMIO offset into something the kernel - * driver understands. + /* Adjust the user MMIO offset to match the offset used kernel side. + * We use a local variable with a READ_ONCE() here to make sure + * the user_mmio_offset we use for the is_user_mmio_mapping() check + * hasn't changed when we do the offset adjustment. */ - if (test_tsk_thread_flag(current, TIF_32BIT) && - offset >= DRM_PANTHOR_USER_MMIO_OFFSET_32BIT) { - offset += DRM_PANTHOR_USER_MMIO_OFFSET_64BIT - - DRM_PANTHOR_USER_MMIO_OFFSET_32BIT; + user_mmio_offset = READ_ONCE(pfile->user_mmio.offset); + if (offset >= user_mmio_offset) { + offset -= user_mmio_offset; + offset += DRM_PANTHOR_USER_MMIO_OFFSET; vma->vm_pgoff = offset >> PAGE_SHIFT; - } -#endif - - if (offset >= DRM_PANTHOR_USER_MMIO_OFFSET) ret = panthor_device_mmap_io(ptdev, vma); - else + } else { ret = drm_gem_mmap(filp, vma); + } drm_dev_exit(cookie); return ret; @@ -1496,9 +1568,34 @@ static const struct file_operations panthor_drm_driver_fops = { }; #ifdef CONFIG_DEBUG_FS +static int panthor_gems_show(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m->private; + struct drm_device *dev = node->minor->dev; + struct panthor_device *ptdev = container_of(dev, struct panthor_device, base); + + panthor_gem_debugfs_print_bos(ptdev, m); + + return 0; +} + +static struct drm_info_list panthor_debugfs_list[] = { + {"gems", panthor_gems_show, 0, NULL}, +}; + +static int panthor_gems_debugfs_init(struct drm_minor *minor) +{ + drm_debugfs_create_files(panthor_debugfs_list, + ARRAY_SIZE(panthor_debugfs_list), + minor->debugfs_root, minor); + + return 0; +} + static void panthor_debugfs_init(struct drm_minor *minor) { panthor_mmu_debugfs_init(minor); + panthor_gems_debugfs_init(minor); } #endif @@ -1509,6 +1606,8 @@ static void panthor_debugfs_init(struct drm_minor *minor) * - 1.2 - adds DEV_QUERY_GROUP_PRIORITIES_INFO query * - adds PANTHOR_GROUP_PRIORITY_REALTIME priority * - 1.3 - adds DRM_PANTHOR_GROUP_STATE_INNOCENT flag + * - 1.4 - adds DRM_IOCTL_PANTHOR_BO_SET_LABEL ioctl + * - 1.5 - adds DRM_PANTHOR_SET_USER_MMIO_OFFSET ioctl */ static const struct drm_driver panthor_drm_driver = { .driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ | @@ -1522,7 +1621,7 @@ static const struct drm_driver panthor_drm_driver = { .name = "panthor", .desc = "Panthor DRM driver", .major = 1, - .minor = 3, + .minor = 5, .gem_create_object = panthor_gem_create_object, .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table, diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c index 0f52766a3120..36f1034839c2 100644 --- a/drivers/gpu/drm/panthor/panthor_fw.c +++ b/drivers/gpu/drm/panthor/panthor_fw.c @@ -449,7 +449,8 @@ panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev, DRM_PANTHOR_BO_NO_MMAP, DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, - PANTHOR_VM_KERNEL_AUTO_VA); + PANTHOR_VM_KERNEL_AUTO_VA, + "Queue FW interface"); if (IS_ERR(mem)) return mem; @@ -481,7 +482,8 @@ panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size) return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size, DRM_PANTHOR_BO_NO_MMAP, DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, - PANTHOR_VM_KERNEL_AUTO_VA); + PANTHOR_VM_KERNEL_AUTO_VA, + "FW suspend buffer"); } static int panthor_fw_load_section_entry(struct panthor_device *ptdev, @@ -601,7 +603,7 @@ static int panthor_fw_load_section_entry(struct panthor_device *ptdev, section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), section_size, DRM_PANTHOR_BO_NO_MMAP, - vm_map_flags, va); + vm_map_flags, va, "FW section"); if (IS_ERR(section->mem)) return PTR_ERR(section->mem); @@ -1008,6 +1010,8 @@ static void panthor_fw_init_global_iface(struct panthor_device *ptdev) static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status) { + gpu_write(ptdev, JOB_INT_CLEAR, status); + if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF)) ptdev->fw->booted = true; @@ -1059,8 +1063,8 @@ static void panthor_fw_stop(struct panthor_device *ptdev) u32 status; gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE); - if (readl_poll_timeout(ptdev->iomem + MCU_STATUS, status, - status == MCU_STATUS_DISABLED, 10, 100000)) + if (gpu_read_poll_timeout(ptdev, MCU_STATUS, status, + status == MCU_STATUS_DISABLED, 10, 100000)) drm_err(&ptdev->base, "Failed to stop MCU"); } @@ -1085,8 +1089,9 @@ void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT); gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); - if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status, - status == MCU_STATUS_HALT, 10, 100000)) { + if (!gpu_read_poll_timeout(ptdev, MCU_STATUS, status, + status == MCU_STATUS_HALT, 10, + 100000)) { ptdev->reset.fast = true; } else { drm_warn(&ptdev->base, "Failed to cleanly suspend MCU"); diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index 8244a4e6c2a2..a123bc740ba1 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -2,6 +2,7 @@ /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ /* Copyright 2023 Collabora ltd. */ +#include <linux/cleanup.h> #include <linux/dma-buf.h> #include <linux/dma-mapping.h> #include <linux/err.h> @@ -10,14 +11,68 @@ #include <drm/panthor_drm.h> #include "panthor_device.h" +#include "panthor_fw.h" #include "panthor_gem.h" #include "panthor_mmu.h" +#ifdef CONFIG_DEBUG_FS +static void panthor_gem_debugfs_bo_init(struct panthor_gem_object *bo) +{ + INIT_LIST_HEAD(&bo->debugfs.node); +} + +static void panthor_gem_debugfs_bo_add(struct panthor_gem_object *bo) +{ + struct panthor_device *ptdev = container_of(bo->base.base.dev, + struct panthor_device, base); + + bo->debugfs.creator.tgid = current->group_leader->pid; + get_task_comm(bo->debugfs.creator.process_name, current->group_leader); + + mutex_lock(&ptdev->gems.lock); + list_add_tail(&bo->debugfs.node, &ptdev->gems.node); + mutex_unlock(&ptdev->gems.lock); +} + +static void panthor_gem_debugfs_bo_rm(struct panthor_gem_object *bo) +{ + struct panthor_device *ptdev = container_of(bo->base.base.dev, + struct panthor_device, base); + + if (list_empty(&bo->debugfs.node)) + return; + + mutex_lock(&ptdev->gems.lock); + list_del_init(&bo->debugfs.node); + mutex_unlock(&ptdev->gems.lock); +} + +static void panthor_gem_debugfs_set_usage_flags(struct panthor_gem_object *bo, u32 usage_flags) +{ + bo->debugfs.flags = usage_flags; + panthor_gem_debugfs_bo_add(bo); +} +#else +static void panthor_gem_debugfs_bo_rm(struct panthor_gem_object *bo) {} +static void panthor_gem_debugfs_set_usage_flags(struct panthor_gem_object *bo, u32 usage_flags) {} +static void panthor_gem_debugfs_bo_init(struct panthor_gem_object *bo) {} +#endif + static void panthor_gem_free_object(struct drm_gem_object *obj) { struct panthor_gem_object *bo = to_panthor_bo(obj); struct drm_gem_object *vm_root_gem = bo->exclusive_vm_root_gem; + panthor_gem_debugfs_bo_rm(bo); + + /* + * Label might have been allocated with kstrdup_const(), + * we need to take that into account when freeing the memory + */ + kfree_const(bo->label.str); + + mutex_destroy(&bo->label.lock); + drm_gem_free_mmap_offset(&bo->base.base); mutex_destroy(&bo->gpuva_list_lock); drm_gem_shmem_free(&bo->base); @@ -67,17 +122,19 @@ out_free_bo: * @gpu_va: GPU address assigned when mapping to the VM. * If gpu_va == PANTHOR_VM_KERNEL_AUTO_VA, the virtual address will be * automatically allocated. + * @name: Descriptive label of the BO's contents * * Return: A valid pointer in case of success, an ERR_PTR() otherwise. */ struct panthor_kernel_bo * panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, size_t size, u32 bo_flags, u32 vm_map_flags, - u64 gpu_va) + u64 gpu_va, const char *name) { struct drm_gem_shmem_object *obj; struct panthor_kernel_bo *kbo; struct panthor_gem_object *bo; + u32 debug_flags = PANTHOR_DEBUGFS_GEM_USAGE_FLAG_KERNEL; int ret; if (drm_WARN_ON(&ptdev->base, !vm)) @@ -97,6 +154,12 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, kbo->obj = &obj->base; bo->flags = bo_flags; + if (vm == panthor_fw_vm(ptdev)) + debug_flags |= PANTHOR_DEBUGFS_GEM_USAGE_FLAG_FW_MAPPED; + + panthor_gem_kernel_bo_set_label(kbo, name); + panthor_gem_debugfs_set_usage_flags(to_panthor_bo(kbo->obj), debug_flags); + /* The system and GPU MMU page size might differ, which becomes a * problem for FW sections that need to be mapped at explicit address * since our PAGE_SIZE alignment might cover a VA range that's @@ -129,17 +192,6 @@ err_free_bo: return ERR_PTR(ret); } -static int panthor_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) -{ - struct panthor_gem_object *bo = to_panthor_bo(obj); - - /* Don't allow mmap on objects that have the NO_MMAP flag set. */ - if (bo->flags & DRM_PANTHOR_BO_NO_MMAP) - return -EINVAL; - - return drm_gem_shmem_object_mmap(obj, vma); -} - static struct dma_buf * panthor_gem_prime_export(struct drm_gem_object *obj, int flags) { @@ -155,7 +207,7 @@ static enum drm_gem_object_status panthor_gem_status(struct drm_gem_object *obj) struct panthor_gem_object *bo = to_panthor_bo(obj); enum drm_gem_object_status res = 0; - if (bo->base.base.import_attach || bo->base.pages) + if (drm_gem_is_imported(&bo->base.base) || bo->base.pages) res |= DRM_GEM_OBJECT_RESIDENT; return res; @@ -169,7 +221,7 @@ static const struct drm_gem_object_funcs panthor_gem_funcs = { .get_sg_table = drm_gem_shmem_object_get_sg_table, .vmap = drm_gem_shmem_object_vmap, .vunmap = drm_gem_shmem_object_vunmap, - .mmap = panthor_gem_mmap, + .mmap = drm_gem_shmem_object_mmap, .status = panthor_gem_status, .export = panthor_gem_prime_export, .vm_ops = &drm_gem_shmem_vm_ops, @@ -196,6 +248,9 @@ struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t obj->base.map_wc = !ptdev->coherent; mutex_init(&obj->gpuva_list_lock); drm_gem_gpuva_set_lock(&obj->base.base, &obj->gpuva_list_lock); + mutex_init(&obj->label.lock); + + panthor_gem_debugfs_bo_init(obj); return &obj->base.base; } @@ -234,6 +289,8 @@ panthor_gem_create_with_handle(struct drm_file *file, bo->base.base.resv = bo->exclusive_vm_root_gem->resv; } + panthor_gem_debugfs_set_usage_flags(bo, 0); + /* * Allocate an id of idr table where the obj is registered * and handle has the id what user can see. @@ -247,3 +304,144 @@ panthor_gem_create_with_handle(struct drm_file *file, return ret; } + +void +panthor_gem_bo_set_label(struct drm_gem_object *obj, const char *label) +{ + struct panthor_gem_object *bo = to_panthor_bo(obj); + const char *old_label; + + scoped_guard(mutex, &bo->label.lock) { + old_label = bo->label.str; + bo->label.str = label; + } + + kfree_const(old_label); +} + +void +panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const char *label) +{ + const char *str; + + /* We should never attempt labelling a UM-exposed GEM object */ + if (drm_WARN_ON(bo->obj->dev, bo->obj->handle_count > 0)) + return; + + if (!label) + return; + + str = kstrdup_const(label, GFP_KERNEL); + if (!str) { + /* Failing to allocate memory for a label isn't a fatal condition */ + drm_warn(bo->obj->dev, "Not enough memory to allocate BO label"); + return; + } + + panthor_gem_bo_set_label(bo->obj, str); +} + +#ifdef CONFIG_DEBUG_FS +struct gem_size_totals { + size_t size; + size_t resident; + size_t reclaimable; +}; + +static void panthor_gem_debugfs_print_flag_names(struct seq_file *m) +{ + int len; + int i; + + static const char * const gem_state_flags_names[] = { + [PANTHOR_DEBUGFS_GEM_STATE_IMPORTED_BIT] = "imported", + [PANTHOR_DEBUGFS_GEM_STATE_EXPORTED_BIT] = "exported", + }; + + static const char * const gem_usage_flags_names[] = { + [PANTHOR_DEBUGFS_GEM_USAGE_KERNEL_BIT] = "kernel", + [PANTHOR_DEBUGFS_GEM_USAGE_FW_MAPPED_BIT] = "fw-mapped", + }; + + seq_puts(m, "GEM state flags: "); + for (i = 0, len = ARRAY_SIZE(gem_state_flags_names); i < len; i++) { + if (!gem_state_flags_names[i]) + continue; + seq_printf(m, "%s (0x%x)%s", gem_state_flags_names[i], + (u32)BIT(i), (i < len - 1) ? ", " : "\n"); + } + + seq_puts(m, "GEM usage flags: "); + for (i = 0, len = ARRAY_SIZE(gem_usage_flags_names); i < len; i++) { + if (!gem_usage_flags_names[i]) + continue; + seq_printf(m, "%s (0x%x)%s", gem_usage_flags_names[i], + (u32)BIT(i), (i < len - 1) ? ", " : "\n\n"); + } +} + +static void panthor_gem_debugfs_bo_print(struct panthor_gem_object *bo, + struct seq_file *m, + struct gem_size_totals *totals) +{ + unsigned int refcount = kref_read(&bo->base.base.refcount); + char creator_info[32] = {}; + size_t resident_size; + u32 gem_usage_flags = bo->debugfs.flags; + u32 gem_state_flags = 0; + + /* Skip BOs being destroyed. */ + if (!refcount) + return; + + resident_size = bo->base.pages ? bo->base.base.size : 0; + + snprintf(creator_info, sizeof(creator_info), + "%s/%d", bo->debugfs.creator.process_name, bo->debugfs.creator.tgid); + seq_printf(m, "%-32s%-16d%-16d%-16zd%-16zd0x%-16lx", + creator_info, + bo->base.base.name, + refcount, + bo->base.base.size, + resident_size, + drm_vma_node_start(&bo->base.base.vma_node)); + + if (bo->base.base.import_attach) + gem_state_flags |= PANTHOR_DEBUGFS_GEM_STATE_FLAG_IMPORTED; + if (bo->base.base.dma_buf) + gem_state_flags |= PANTHOR_DEBUGFS_GEM_STATE_FLAG_EXPORTED; + + seq_printf(m, "0x%-8x 0x%-10x", gem_state_flags, gem_usage_flags); + + scoped_guard(mutex, &bo->label.lock) { + seq_printf(m, "%s\n", bo->label.str ? : ""); + } + + totals->size += bo->base.base.size; + totals->resident += resident_size; + if (bo->base.madv > 0) + totals->reclaimable += resident_size; +} + +void panthor_gem_debugfs_print_bos(struct panthor_device *ptdev, + struct seq_file *m) +{ + struct gem_size_totals totals = {0}; + struct panthor_gem_object *bo; + + panthor_gem_debugfs_print_flag_names(m); + + seq_puts(m, "created-by global-name refcount size resident-size file-offset state usage label\n"); + seq_puts(m, "----------------------------------------------------------------------------------------------------------------------------------------------\n"); + + scoped_guard(mutex, &ptdev->gems.lock) { + list_for_each_entry(bo, &ptdev->gems.node, debugfs.node) { + panthor_gem_debugfs_bo_print(bo, m, &totals); + } + } + + seq_puts(m, "==============================================================================================================================================\n"); + seq_printf(m, "Total size: %zd, Total resident: %zd, Total reclaimable: %zd\n", + totals.size, totals.resident, totals.reclaimable); +} +#endif diff --git a/drivers/gpu/drm/panthor/panthor_gem.h b/drivers/gpu/drm/panthor/panthor_gem.h index 5749ef2ebe03..8fc7215e9b90 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.h +++ b/drivers/gpu/drm/panthor/panthor_gem.h @@ -13,6 +13,53 @@ struct panthor_vm; +#define PANTHOR_BO_LABEL_MAXLEN 4096 + +enum panthor_debugfs_gem_state_flags { + PANTHOR_DEBUGFS_GEM_STATE_IMPORTED_BIT = 0, + PANTHOR_DEBUGFS_GEM_STATE_EXPORTED_BIT = 1, + + /** @PANTHOR_DEBUGFS_GEM_STATE_FLAG_IMPORTED: GEM BO is PRIME imported. */ + PANTHOR_DEBUGFS_GEM_STATE_FLAG_IMPORTED = BIT(PANTHOR_DEBUGFS_GEM_STATE_IMPORTED_BIT), + + /** @PANTHOR_DEBUGFS_GEM_STATE_FLAG_EXPORTED: GEM BO is PRIME exported. */ + PANTHOR_DEBUGFS_GEM_STATE_FLAG_EXPORTED = BIT(PANTHOR_DEBUGFS_GEM_STATE_EXPORTED_BIT), +}; + +enum panthor_debugfs_gem_usage_flags { + PANTHOR_DEBUGFS_GEM_USAGE_KERNEL_BIT = 0, + PANTHOR_DEBUGFS_GEM_USAGE_FW_MAPPED_BIT = 1, + + /** @PANTHOR_DEBUGFS_GEM_USAGE_FLAG_KERNEL: BO is for kernel use only. */ + PANTHOR_DEBUGFS_GEM_USAGE_FLAG_KERNEL = BIT(PANTHOR_DEBUGFS_GEM_USAGE_KERNEL_BIT), + + /** @PANTHOR_DEBUGFS_GEM_USAGE_FLAG_FW_MAPPED: BO is mapped on the FW VM. */ + PANTHOR_DEBUGFS_GEM_USAGE_FLAG_FW_MAPPED = BIT(PANTHOR_DEBUGFS_GEM_USAGE_FW_MAPPED_BIT), +}; + +/** + * struct panthor_gem_debugfs - GEM object's DebugFS list information + */ +struct panthor_gem_debugfs { + /** + * @node: Node used to insert the object in the device-wide list of + * GEM objects, to display information about it through a DebugFS file. + */ + struct list_head node; + + /** @creator: Information about the UM process which created the GEM. */ + struct { + /** @creator.process_name: Group leader name in owning thread's process */ + char process_name[TASK_COMM_LEN]; + + /** @creator.tgid: PID of the thread's group leader within its process */ + pid_t tgid; + } creator; + + /** @flags: Combination of panthor_debugfs_gem_usage_flags flags */ + u32 flags; +}; + /** * struct panthor_gem_object - Driver specific GEM object. */ @@ -46,6 +93,24 @@ struct panthor_gem_object { /** @flags: Combination of drm_panthor_bo_flags flags. */ u32 flags; + + /** + * @label: BO tagging fields. The label can be assigned within the + * driver itself or through a specific IOCTL. + */ + struct { + /** + * @label.str: Pointer to NULL-terminated string, + */ + const char *str; + + /** @lock.str: Protects access to the @label.str field. */ + struct mutex lock; + } label; + +#ifdef CONFIG_DEBUG_FS + struct panthor_gem_debugfs debugfs; +#endif }; /** @@ -91,6 +156,9 @@ panthor_gem_create_with_handle(struct drm_file *file, struct panthor_vm *exclusive_vm, u64 *size, u32 flags, uint32_t *handle); +void panthor_gem_bo_set_label(struct drm_gem_object *obj, const char *label); +void panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const char *label); + static inline u64 panthor_kernel_bo_gpuva(struct panthor_kernel_bo *bo) { @@ -112,7 +180,7 @@ panthor_kernel_bo_vmap(struct panthor_kernel_bo *bo) if (bo->kmap) return 0; - ret = drm_gem_vmap_unlocked(bo->obj, &map); + ret = drm_gem_vmap(bo->obj, &map); if (ret) return ret; @@ -126,7 +194,7 @@ panthor_kernel_bo_vunmap(struct panthor_kernel_bo *bo) if (bo->kmap) { struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->kmap); - drm_gem_vunmap_unlocked(bo->obj, &map); + drm_gem_vunmap(bo->obj, &map); bo->kmap = NULL; } } @@ -134,8 +202,13 @@ panthor_kernel_bo_vunmap(struct panthor_kernel_bo *bo) struct panthor_kernel_bo * panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, size_t size, u32 bo_flags, u32 vm_map_flags, - u64 gpu_va); + u64 gpu_va, const char *name); void panthor_kernel_bo_destroy(struct panthor_kernel_bo *bo); +#ifdef CONFIG_DEBUG_FS +void panthor_gem_debugfs_print_bos(struct panthor_device *pfdev, + struct seq_file *m); +#endif + #endif /* __PANTHOR_GEM_H__ */ diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c index 671049020afa..cb7a335e07d7 100644 --- a/drivers/gpu/drm/panthor/panthor_gpu.c +++ b/drivers/gpu/drm/panthor/panthor_gpu.c @@ -108,14 +108,9 @@ static void panthor_gpu_init_info(struct panthor_device *ptdev) ptdev->gpu_info.as_present = gpu_read(ptdev, GPU_AS_PRESENT); - ptdev->gpu_info.shader_present = gpu_read(ptdev, GPU_SHADER_PRESENT_LO); - ptdev->gpu_info.shader_present |= (u64)gpu_read(ptdev, GPU_SHADER_PRESENT_HI) << 32; - - ptdev->gpu_info.tiler_present = gpu_read(ptdev, GPU_TILER_PRESENT_LO); - ptdev->gpu_info.tiler_present |= (u64)gpu_read(ptdev, GPU_TILER_PRESENT_HI) << 32; - - ptdev->gpu_info.l2_present = gpu_read(ptdev, GPU_L2_PRESENT_LO); - ptdev->gpu_info.l2_present |= (u64)gpu_read(ptdev, GPU_L2_PRESENT_HI) << 32; + ptdev->gpu_info.shader_present = gpu_read64(ptdev, GPU_SHADER_PRESENT); + ptdev->gpu_info.tiler_present = gpu_read64(ptdev, GPU_TILER_PRESENT); + ptdev->gpu_info.l2_present = gpu_read64(ptdev, GPU_L2_PRESENT); arch_major = GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id); product_major = GPU_PROD_MAJOR(ptdev->gpu_info.gpu_id); @@ -150,10 +145,11 @@ static void panthor_gpu_init_info(struct panthor_device *ptdev) static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status) { + gpu_write(ptdev, GPU_INT_CLEAR, status); + if (status & GPU_IRQ_FAULT) { u32 fault_status = gpu_read(ptdev, GPU_FAULT_STATUS); - u64 address = ((u64)gpu_read(ptdev, GPU_FAULT_ADDR_HI) << 32) | - gpu_read(ptdev, GPU_FAULT_ADDR_LO); + u64 address = gpu_read64(ptdev, GPU_FAULT_ADDR); drm_warn(&ptdev->base, "GPU Fault 0x%08x (%s) at 0x%016llx\n", fault_status, panthor_exception_name(ptdev, fault_status & 0xFF), @@ -244,45 +240,27 @@ int panthor_gpu_block_power_off(struct panthor_device *ptdev, u32 pwroff_reg, u32 pwrtrans_reg, u64 mask, u32 timeout_us) { - u32 val, i; + u32 val; int ret; - for (i = 0; i < 2; i++) { - u32 mask32 = mask >> (i * 32); - - if (!mask32) - continue; - - ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4), - val, !(mask32 & val), - 100, timeout_us); - if (ret) { - drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition", - blk_name, mask); - return ret; - } + ret = gpu_read64_relaxed_poll_timeout(ptdev, pwrtrans_reg, val, + !(mask & val), 100, timeout_us); + if (ret) { + drm_err(&ptdev->base, + "timeout waiting on %s:%llx power transition", blk_name, + mask); + return ret; } - if (mask & GENMASK(31, 0)) - gpu_write(ptdev, pwroff_reg, mask); + gpu_write64(ptdev, pwroff_reg, mask); - if (mask >> 32) - gpu_write(ptdev, pwroff_reg + 4, mask >> 32); - - for (i = 0; i < 2; i++) { - u32 mask32 = mask >> (i * 32); - - if (!mask32) - continue; - - ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4), - val, !(mask32 & val), - 100, timeout_us); - if (ret) { - drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition", - blk_name, mask); - return ret; - } + ret = gpu_read64_relaxed_poll_timeout(ptdev, pwrtrans_reg, val, + !(mask & val), 100, timeout_us); + if (ret) { + drm_err(&ptdev->base, + "timeout waiting on %s:%llx power transition", blk_name, + mask); + return ret; } return 0; @@ -305,45 +283,27 @@ int panthor_gpu_block_power_on(struct panthor_device *ptdev, u32 pwron_reg, u32 pwrtrans_reg, u32 rdy_reg, u64 mask, u32 timeout_us) { - u32 val, i; + u32 val; int ret; - for (i = 0; i < 2; i++) { - u32 mask32 = mask >> (i * 32); - - if (!mask32) - continue; - - ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4), - val, !(mask32 & val), - 100, timeout_us); - if (ret) { - drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition", - blk_name, mask); - return ret; - } + ret = gpu_read64_relaxed_poll_timeout(ptdev, pwrtrans_reg, val, + !(mask & val), 100, timeout_us); + if (ret) { + drm_err(&ptdev->base, + "timeout waiting on %s:%llx power transition", blk_name, + mask); + return ret; } - if (mask & GENMASK(31, 0)) - gpu_write(ptdev, pwron_reg, mask); - - if (mask >> 32) - gpu_write(ptdev, pwron_reg + 4, mask >> 32); - - for (i = 0; i < 2; i++) { - u32 mask32 = mask >> (i * 32); - - if (!mask32) - continue; + gpu_write64(ptdev, pwron_reg, mask); - ret = readl_relaxed_poll_timeout(ptdev->iomem + rdy_reg + (i * 4), - val, (mask32 & val) == mask32, - 100, timeout_us); - if (ret) { - drm_err(&ptdev->base, "timeout waiting on %s:%llx readiness", - blk_name, mask); - return ret; - } + ret = gpu_read64_relaxed_poll_timeout(ptdev, rdy_reg, val, + (mask & val) == val, + 100, timeout_us); + if (ret) { + drm_err(&ptdev->base, "timeout waiting on %s:%llx readiness", + blk_name, mask); + return ret; } return 0; @@ -492,49 +452,3 @@ void panthor_gpu_resume(struct panthor_device *ptdev) panthor_gpu_l2_power_on(ptdev); } -/** - * panthor_gpu_read_64bit_counter() - Read a 64-bit counter at a given offset. - * @ptdev: Device. - * @reg: The offset of the register to read. - * - * Return: The counter value. - */ -static u64 -panthor_gpu_read_64bit_counter(struct panthor_device *ptdev, u32 reg) -{ - u32 hi, lo; - - do { - hi = gpu_read(ptdev, reg + 0x4); - lo = gpu_read(ptdev, reg); - } while (hi != gpu_read(ptdev, reg + 0x4)); - - return ((u64)hi << 32) | lo; -} - -/** - * panthor_gpu_read_timestamp() - Read the timestamp register. - * @ptdev: Device. - * - * Return: The GPU timestamp value. - */ -u64 panthor_gpu_read_timestamp(struct panthor_device *ptdev) -{ - return panthor_gpu_read_64bit_counter(ptdev, GPU_TIMESTAMP_LO); -} - -/** - * panthor_gpu_read_timestamp_offset() - Read the timestamp offset register. - * @ptdev: Device. - * - * Return: The GPU timestamp offset value. - */ -u64 panthor_gpu_read_timestamp_offset(struct panthor_device *ptdev) -{ - u32 hi, lo; - - hi = gpu_read(ptdev, GPU_TIMESTAMP_OFFSET_HI); - lo = gpu_read(ptdev, GPU_TIMESTAMP_OFFSET_LO); - - return ((u64)hi << 32) | lo; -} diff --git a/drivers/gpu/drm/panthor/panthor_gpu.h b/drivers/gpu/drm/panthor/panthor_gpu.h index 7f6133a66127..7c17a8c06858 100644 --- a/drivers/gpu/drm/panthor/panthor_gpu.h +++ b/drivers/gpu/drm/panthor/panthor_gpu.h @@ -30,9 +30,9 @@ int panthor_gpu_block_power_off(struct panthor_device *ptdev, */ #define panthor_gpu_power_on(ptdev, type, mask, timeout_us) \ panthor_gpu_block_power_on(ptdev, #type, \ - type ## _PWRON_LO, \ - type ## _PWRTRANS_LO, \ - type ## _READY_LO, \ + type ## _PWRON, \ + type ## _PWRTRANS, \ + type ## _READY, \ mask, timeout_us) /** @@ -42,15 +42,13 @@ int panthor_gpu_block_power_off(struct panthor_device *ptdev, */ #define panthor_gpu_power_off(ptdev, type, mask, timeout_us) \ panthor_gpu_block_power_off(ptdev, #type, \ - type ## _PWROFF_LO, \ - type ## _PWRTRANS_LO, \ + type ## _PWROFF, \ + type ## _PWRTRANS, \ mask, timeout_us) int panthor_gpu_l2_power_on(struct panthor_device *ptdev); int panthor_gpu_flush_caches(struct panthor_device *ptdev, u32 l2, u32 lsc, u32 other); int panthor_gpu_soft_reset(struct panthor_device *ptdev); -u64 panthor_gpu_read_timestamp(struct panthor_device *ptdev); -u64 panthor_gpu_read_timestamp_offset(struct panthor_device *ptdev); #endif diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c index 3bdf61c14264..d236e9ceade4 100644 --- a/drivers/gpu/drm/panthor/panthor_heap.c +++ b/drivers/gpu/drm/panthor/panthor_heap.c @@ -151,7 +151,8 @@ static int panthor_alloc_heap_chunk(struct panthor_heap_pool *pool, chunk->bo = panthor_kernel_bo_create(pool->ptdev, pool->vm, heap->chunk_size, DRM_PANTHOR_BO_NO_MMAP, DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, - PANTHOR_VM_KERNEL_AUTO_VA); + PANTHOR_VM_KERNEL_AUTO_VA, + "Tiler heap chunk"); if (IS_ERR(chunk->bo)) { ret = PTR_ERR(chunk->bo); goto err_free_chunk; @@ -555,7 +556,8 @@ panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm) pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize, DRM_PANTHOR_BO_NO_MMAP, DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, - PANTHOR_VM_KERNEL_AUTO_VA); + PANTHOR_VM_KERNEL_AUTO_VA, + "Heap pool"); if (IS_ERR(pool->gpu_contexts)) { ret = PTR_ERR(pool->gpu_contexts); goto err_destroy_pool; diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index 12a02e28f50f..4140f697ba5a 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -510,9 +510,9 @@ static int wait_ready(struct panthor_device *ptdev, u32 as_nr) /* Wait for the MMU status to indicate there is no active command, in * case one is pending. */ - ret = readl_relaxed_poll_timeout_atomic(ptdev->iomem + AS_STATUS(as_nr), - val, !(val & AS_STATUS_AS_ACTIVE), - 10, 100000); + ret = gpu_read_relaxed_poll_timeout_atomic(ptdev, AS_STATUS(as_nr), val, + !(val & AS_STATUS_AS_ACTIVE), + 10, 100000); if (ret) { panthor_device_schedule_reset(ptdev); @@ -564,8 +564,7 @@ static void lock_region(struct panthor_device *ptdev, u32 as_nr, region = region_width | region_start; /* Lock the region that needs to be updated */ - gpu_write(ptdev, AS_LOCKADDR_LO(as_nr), lower_32_bits(region)); - gpu_write(ptdev, AS_LOCKADDR_HI(as_nr), upper_32_bits(region)); + gpu_write64(ptdev, AS_LOCKADDR(as_nr), region); write_cmd(ptdev, as_nr, AS_COMMAND_LOCK); } @@ -615,14 +614,9 @@ static int panthor_mmu_as_enable(struct panthor_device *ptdev, u32 as_nr, if (ret) return ret; - gpu_write(ptdev, AS_TRANSTAB_LO(as_nr), lower_32_bits(transtab)); - gpu_write(ptdev, AS_TRANSTAB_HI(as_nr), upper_32_bits(transtab)); - - gpu_write(ptdev, AS_MEMATTR_LO(as_nr), lower_32_bits(memattr)); - gpu_write(ptdev, AS_MEMATTR_HI(as_nr), upper_32_bits(memattr)); - - gpu_write(ptdev, AS_TRANSCFG_LO(as_nr), lower_32_bits(transcfg)); - gpu_write(ptdev, AS_TRANSCFG_HI(as_nr), upper_32_bits(transcfg)); + gpu_write64(ptdev, AS_TRANSTAB(as_nr), transtab); + gpu_write64(ptdev, AS_MEMATTR(as_nr), memattr); + gpu_write64(ptdev, AS_TRANSCFG(as_nr), transcfg); return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE); } @@ -635,14 +629,9 @@ static int panthor_mmu_as_disable(struct panthor_device *ptdev, u32 as_nr) if (ret) return ret; - gpu_write(ptdev, AS_TRANSTAB_LO(as_nr), 0); - gpu_write(ptdev, AS_TRANSTAB_HI(as_nr), 0); - - gpu_write(ptdev, AS_MEMATTR_LO(as_nr), 0); - gpu_write(ptdev, AS_MEMATTR_HI(as_nr), 0); - - gpu_write(ptdev, AS_TRANSCFG_LO(as_nr), AS_TRANSCFG_ADRMODE_UNMAPPED); - gpu_write(ptdev, AS_TRANSCFG_HI(as_nr), 0); + gpu_write64(ptdev, AS_TRANSTAB(as_nr), 0); + gpu_write64(ptdev, AS_MEMATTR(as_nr), 0); + gpu_write64(ptdev, AS_TRANSCFG(as_nr), AS_TRANSCFG_ADRMODE_UNMAPPED); return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE); } @@ -781,6 +770,7 @@ out_enable_as: if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) { gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as)); ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as); + ptdev->mmu->irq.mask |= panthor_mmu_as_fault_mask(ptdev, as); gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask); } @@ -895,17 +885,6 @@ static int panthor_vm_flush_range(struct panthor_vm *vm, u64 iova, u64 size) return ret; } -/** - * panthor_vm_flush_all() - Flush L2 caches for the entirety of a VM's AS - * @vm: VM whose cache to flush - * - * Return: 0 on success, a negative error code if flush failed. - */ -int panthor_vm_flush_all(struct panthor_vm *vm) -{ - return panthor_vm_flush_range(vm, vm->base.mm_start, vm->base.mm_range); -} - static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) { struct panthor_device *ptdev = vm->ptdev; @@ -1103,7 +1082,7 @@ static void panthor_vm_bo_put(struct drm_gpuvm_bo *vm_bo) /* If the vm_bo object was destroyed, release the pin reference that * was hold by this object. */ - if (unpin && !bo->base.base.import_attach) + if (unpin && !drm_gem_is_imported(&bo->base.base)) drm_gem_shmem_unpin(&bo->base); drm_gpuvm_put(vm); @@ -1234,7 +1213,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, if (ret) goto err_cleanup; - if (!bo->base.base.import_attach) { + if (!drm_gem_is_imported(&bo->base.base)) { /* Pre-reserve the BO pages, so the map operation doesn't have to * allocate. */ @@ -1245,7 +1224,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, sgt = drm_gem_shmem_get_pages_sgt(&bo->base); if (IS_ERR(sgt)) { - if (!bo->base.base.import_attach) + if (!drm_gem_is_imported(&bo->base.base)) drm_gem_shmem_unpin(&bo->base); ret = PTR_ERR(sgt); @@ -1256,7 +1235,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, preallocated_vm_bo = drm_gpuvm_bo_create(&vm->base, &bo->base.base); if (!preallocated_vm_bo) { - if (!bo->base.base.import_attach) + if (!drm_gem_is_imported(&bo->base.base)) drm_gem_shmem_unpin(&bo->base); ret = -ENOMEM; @@ -1282,7 +1261,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, * which will be released in panthor_vm_bo_put(). */ if (preallocated_vm_bo != op_ctx->map.vm_bo && - !bo->base.base.import_attach) + !drm_gem_is_imported(&bo->base.base)) drm_gem_shmem_unpin(&bo->base); op_ctx->map.bo_offset = offset; @@ -1680,8 +1659,7 @@ static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status) u32 source_id; fault_status = gpu_read(ptdev, AS_FAULTSTATUS(as)); - addr = gpu_read(ptdev, AS_FAULTADDRESS_LO(as)); - addr |= (u64)gpu_read(ptdev, AS_FAULTADDRESS_HI(as)) << 32; + addr = gpu_read64(ptdev, AS_FAULTADDRESS(as)); /* decode the fault status */ exception_type = fault_status & 0xFF; @@ -1709,11 +1687,17 @@ static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status) access_type, access_type_name(ptdev, fault_status), source_id); + /* We don't handle VM faults at the moment, so let's just clear the + * interrupt and let the writer/reader crash. + * Note that COMPLETED irqs are never cleared, but this is fine + * because they are always masked. + */ + gpu_write(ptdev, MMU_INT_CLEAR, mask); + /* Ignore MMU interrupts on this AS until it's been * re-enabled. */ ptdev->mmu->irq.mask = new_int_mask; - gpu_write(ptdev, MMU_INT_MASK, new_int_mask); if (ptdev->mmu->as.slots[as].vm) ptdev->mmu->as.slots[as].vm->unhandled_fault = true; @@ -2275,7 +2259,7 @@ static enum drm_gpu_sched_stat panthor_vm_bind_timedout_job(struct drm_sched_job *sched_job) { WARN(1, "VM_BIND ops are synchronous for now, there should be no timeout!"); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static const struct drm_sched_backend_ops panthor_vm_bind_ops = { @@ -2516,7 +2500,7 @@ panthor_vm_bind_job_create(struct drm_file *file, kref_init(&job->refcount); job->vm = panthor_vm_get(vm); - ret = drm_sched_job_init(&job->base, &vm->entity, 1, vm); + ret = drm_sched_job_init(&job->base, &vm->entity, 1, vm, file->client_id); if (ret) goto err_put_job; diff --git a/drivers/gpu/drm/panthor/panthor_mmu.h b/drivers/gpu/drm/panthor/panthor_mmu.h index fc274637114e..0e268fdfdb2f 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.h +++ b/drivers/gpu/drm/panthor/panthor_mmu.h @@ -33,7 +33,6 @@ int panthor_vm_active(struct panthor_vm *vm); void panthor_vm_idle(struct panthor_vm *vm); u32 panthor_vm_page_size(struct panthor_vm *vm); int panthor_vm_as(struct panthor_vm *vm); -int panthor_vm_flush_all(struct panthor_vm *vm); struct panthor_heap_pool * panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create); diff --git a/drivers/gpu/drm/panthor/panthor_regs.h b/drivers/gpu/drm/panthor/panthor_regs.h index b7b3b3add166..48bbfd40138c 100644 --- a/drivers/gpu/drm/panthor/panthor_regs.h +++ b/drivers/gpu/drm/panthor/panthor_regs.h @@ -63,20 +63,16 @@ #define GPU_STATUS_DBG_ENABLED BIT(8) #define GPU_FAULT_STATUS 0x3C -#define GPU_FAULT_ADDR_LO 0x40 -#define GPU_FAULT_ADDR_HI 0x44 +#define GPU_FAULT_ADDR 0x40 #define GPU_PWR_KEY 0x50 #define GPU_PWR_KEY_UNLOCK 0x2968A819 #define GPU_PWR_OVERRIDE0 0x54 #define GPU_PWR_OVERRIDE1 0x58 -#define GPU_TIMESTAMP_OFFSET_LO 0x88 -#define GPU_TIMESTAMP_OFFSET_HI 0x8C -#define GPU_CYCLE_COUNT_LO 0x90 -#define GPU_CYCLE_COUNT_HI 0x94 -#define GPU_TIMESTAMP_LO 0x98 -#define GPU_TIMESTAMP_HI 0x9C +#define GPU_TIMESTAMP_OFFSET 0x88 +#define GPU_CYCLE_COUNT 0x90 +#define GPU_TIMESTAMP 0x98 #define GPU_THREAD_MAX_THREADS 0xA0 #define GPU_THREAD_MAX_WORKGROUP_SIZE 0xA4 @@ -85,47 +81,29 @@ #define GPU_TEXTURE_FEATURES(n) (0xB0 + ((n) * 4)) -#define GPU_SHADER_PRESENT_LO 0x100 -#define GPU_SHADER_PRESENT_HI 0x104 -#define GPU_TILER_PRESENT_LO 0x110 -#define GPU_TILER_PRESENT_HI 0x114 -#define GPU_L2_PRESENT_LO 0x120 -#define GPU_L2_PRESENT_HI 0x124 - -#define SHADER_READY_LO 0x140 -#define SHADER_READY_HI 0x144 -#define TILER_READY_LO 0x150 -#define TILER_READY_HI 0x154 -#define L2_READY_LO 0x160 -#define L2_READY_HI 0x164 - -#define SHADER_PWRON_LO 0x180 -#define SHADER_PWRON_HI 0x184 -#define TILER_PWRON_LO 0x190 -#define TILER_PWRON_HI 0x194 -#define L2_PWRON_LO 0x1A0 -#define L2_PWRON_HI 0x1A4 - -#define SHADER_PWROFF_LO 0x1C0 -#define SHADER_PWROFF_HI 0x1C4 -#define TILER_PWROFF_LO 0x1D0 -#define TILER_PWROFF_HI 0x1D4 -#define L2_PWROFF_LO 0x1E0 -#define L2_PWROFF_HI 0x1E4 - -#define SHADER_PWRTRANS_LO 0x200 -#define SHADER_PWRTRANS_HI 0x204 -#define TILER_PWRTRANS_LO 0x210 -#define TILER_PWRTRANS_HI 0x214 -#define L2_PWRTRANS_LO 0x220 -#define L2_PWRTRANS_HI 0x224 - -#define SHADER_PWRACTIVE_LO 0x240 -#define SHADER_PWRACTIVE_HI 0x244 -#define TILER_PWRACTIVE_LO 0x250 -#define TILER_PWRACTIVE_HI 0x254 -#define L2_PWRACTIVE_LO 0x260 -#define L2_PWRACTIVE_HI 0x264 +#define GPU_SHADER_PRESENT 0x100 +#define GPU_TILER_PRESENT 0x110 +#define GPU_L2_PRESENT 0x120 + +#define SHADER_READY 0x140 +#define TILER_READY 0x150 +#define L2_READY 0x160 + +#define SHADER_PWRON 0x180 +#define TILER_PWRON 0x190 +#define L2_PWRON 0x1A0 + +#define SHADER_PWROFF 0x1C0 +#define TILER_PWROFF 0x1D0 +#define L2_PWROFF 0x1E0 + +#define SHADER_PWRTRANS 0x200 +#define TILER_PWRTRANS 0x210 +#define L2_PWRTRANS 0x220 + +#define SHADER_PWRACTIVE 0x240 +#define TILER_PWRACTIVE 0x250 +#define L2_PWRACTIVE 0x260 #define GPU_REVID 0x280 @@ -133,8 +111,8 @@ #define GPU_COHERENCY_PROT_BIT(name) BIT(GPU_COHERENCY_ ## name) #define GPU_COHERENCY_PROTOCOL 0x304 -#define GPU_COHERENCY_ACE 0 -#define GPU_COHERENCY_ACE_LITE 1 +#define GPU_COHERENCY_ACE_LITE 0 +#define GPU_COHERENCY_ACE 1 #define GPU_COHERENCY_NONE 31 #define MCU_CONTROL 0x700 @@ -168,10 +146,8 @@ #define MMU_AS_SHIFT 6 #define MMU_AS(as) (MMU_BASE + ((as) << MMU_AS_SHIFT)) -#define AS_TRANSTAB_LO(as) (MMU_AS(as) + 0x0) -#define AS_TRANSTAB_HI(as) (MMU_AS(as) + 0x4) -#define AS_MEMATTR_LO(as) (MMU_AS(as) + 0x8) -#define AS_MEMATTR_HI(as) (MMU_AS(as) + 0xC) +#define AS_TRANSTAB(as) (MMU_AS(as) + 0x0) +#define AS_MEMATTR(as) (MMU_AS(as) + 0x8) #define AS_MEMATTR_AARCH64_INNER_ALLOC_IMPL (2 << 2) #define AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(w, r) ((3 << 2) | \ ((w) ? BIT(0) : 0) | \ @@ -183,8 +159,7 @@ #define AS_MEMATTR_AARCH64_INNER_OUTER_NC (1 << 6) #define AS_MEMATTR_AARCH64_INNER_OUTER_WB (2 << 6) #define AS_MEMATTR_AARCH64_FAULT (3 << 6) -#define AS_LOCKADDR_LO(as) (MMU_AS(as) + 0x10) -#define AS_LOCKADDR_HI(as) (MMU_AS(as) + 0x14) +#define AS_LOCKADDR(as) (MMU_AS(as) + 0x10) #define AS_COMMAND(as) (MMU_AS(as) + 0x18) #define AS_COMMAND_NOP 0 #define AS_COMMAND_UPDATE 1 @@ -199,12 +174,10 @@ #define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1 << 8) #define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2 << 8) #define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3 << 8) -#define AS_FAULTADDRESS_LO(as) (MMU_AS(as) + 0x20) -#define AS_FAULTADDRESS_HI(as) (MMU_AS(as) + 0x24) +#define AS_FAULTADDRESS(as) (MMU_AS(as) + 0x20) #define AS_STATUS(as) (MMU_AS(as) + 0x28) #define AS_STATUS_AS_ACTIVE BIT(0) -#define AS_TRANSCFG_LO(as) (MMU_AS(as) + 0x30) -#define AS_TRANSCFG_HI(as) (MMU_AS(as) + 0x34) +#define AS_TRANSCFG(as) (MMU_AS(as) + 0x30) #define AS_TRANSCFG_ADRMODE_UNMAPPED (1 << 0) #define AS_TRANSCFG_ADRMODE_IDENTITY (2 << 0) #define AS_TRANSCFG_ADRMODE_AARCH64_4K (6 << 0) @@ -222,18 +195,11 @@ #define AS_TRANSCFG_DISABLE_AF_FAULT BIT(34) #define AS_TRANSCFG_WXN BIT(35) #define AS_TRANSCFG_XREADABLE BIT(36) -#define AS_FAULTEXTRA_LO(as) (MMU_AS(as) + 0x38) -#define AS_FAULTEXTRA_HI(as) (MMU_AS(as) + 0x3C) +#define AS_FAULTEXTRA(as) (MMU_AS(as) + 0x38) #define CSF_GPU_LATEST_FLUSH_ID 0x10000 #define CSF_DOORBELL(i) (0x80000 + ((i) * 0x10000)) #define CSF_GLB_DOORBELL_ID 0 -#define gpu_write(dev, reg, data) \ - writel(data, (dev)->iomem + (reg)) - -#define gpu_read(dev, reg) \ - readl((dev)->iomem + (reg)) - #endif diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 4d31d1967716..8f17394cc82a 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -840,7 +840,7 @@ panthor_queue_put_syncwait_obj(struct panthor_queue *queue) if (queue->syncwait.kmap) { struct iosys_map map = IOSYS_MAP_INIT_VADDR(queue->syncwait.kmap); - drm_gem_vunmap_unlocked(queue->syncwait.obj, &map); + drm_gem_vunmap(queue->syncwait.obj, &map); queue->syncwait.kmap = NULL; } @@ -866,7 +866,7 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue goto err_put_syncwait_obj; queue->syncwait.obj = &bo->base.base; - ret = drm_gem_vmap_unlocked(queue->syncwait.obj, &map); + ret = drm_gem_vmap(queue->syncwait.obj, &map); if (drm_WARN_ON(&ptdev->base, ret)) goto err_put_syncwait_obj; @@ -3241,7 +3241,7 @@ queue_timedout_job(struct drm_sched_job *sched_job) queue_start(queue); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void queue_free_job(struct drm_sched_job *sched_job) @@ -3332,7 +3332,8 @@ group_create_queue(struct panthor_group *group, DRM_PANTHOR_BO_NO_MMAP, DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, - PANTHOR_VM_KERNEL_AUTO_VA); + PANTHOR_VM_KERNEL_AUTO_VA, + "CS ring buffer"); if (IS_ERR(queue->ringbuf)) { ret = PTR_ERR(queue->ringbuf); goto err_free_queue; @@ -3362,7 +3363,8 @@ group_create_queue(struct panthor_group *group, DRM_PANTHOR_BO_NO_MMAP, DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, - PANTHOR_VM_KERNEL_AUTO_VA); + PANTHOR_VM_KERNEL_AUTO_VA, + "Group job stats"); if (IS_ERR(queue->profiling.slots)) { ret = PTR_ERR(queue->profiling.slots); @@ -3493,7 +3495,8 @@ int panthor_group_create(struct panthor_file *pfile, DRM_PANTHOR_BO_NO_MMAP, DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, - PANTHOR_VM_KERNEL_AUTO_VA); + PANTHOR_VM_KERNEL_AUTO_VA, + "Group sync objects"); if (IS_ERR(group->syncobjs)) { ret = PTR_ERR(group->syncobjs); goto err_put_group; @@ -3729,7 +3732,8 @@ struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job) struct drm_sched_job * panthor_job_create(struct panthor_file *pfile, u16 group_handle, - const struct drm_panthor_queue_submit *qsubmit) + const struct drm_panthor_queue_submit *qsubmit, + u64 drm_client_id) { struct panthor_group_pool *gpool = pfile->groups; struct panthor_job *job; @@ -3801,7 +3805,7 @@ panthor_job_create(struct panthor_file *pfile, ret = drm_sched_job_init(&job->base, &job->group->queues[job->queue_idx]->entity, - credits, job->group); + credits, job->group, drm_client_id); if (ret) goto err_put_job; diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h index e650a445cf50..742b0b4ff3a3 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.h +++ b/drivers/gpu/drm/panthor/panthor_sched.h @@ -29,7 +29,8 @@ int panthor_group_get_state(struct panthor_file *pfile, struct drm_sched_job * panthor_job_create(struct panthor_file *pfile, u16 group_handle, - const struct drm_panthor_queue_submit *qsubmit); + const struct drm_panthor_queue_submit *qsubmit, + u64 drm_client_id); struct drm_sched_job *panthor_job_get(struct drm_sched_job *job); struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job); void panthor_job_put(struct drm_sched_job *job); |