From ea78ec98265339997959eba3c9d764317614675a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:30 +0100 Subject: drm/panthor: Expose the selected coherency protocol to the UMD If we want to be able to skip CPU cache maintenance operations on CPU-cached mappings, the UMD needs to know the kind of coherency in place. Add a field to drm_panthor_gpu_info to do that. We can re-use a padding field for that since this object is write-only from the KMD perspective, and the UMD should just ignore it. v2: - New commit v3: - Make coherency protocol a real enum, not a bitmask - Add BUILD_BUG_ON()s to make sure the values in panthor_regs.h and those exposed through the uAPI match v4: - Add Steve's R-b v5: - No changes v6: - No changes v7: - Fix kernel doc v8: - No changes Reviewed-by: Steven Price Reviewed-by: Karunika Choo Link: https://patch.msgid.link/20251208100841.730527-4-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- include/uapi/drm/panthor_drm.h | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 467d365ed7ba..28cf9e878db6 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -245,6 +245,26 @@ enum drm_panthor_dev_query_type { DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO, }; +/** + * enum drm_panthor_gpu_coherency: Type of GPU coherency + */ +enum drm_panthor_gpu_coherency { + /** + * @DRM_PANTHOR_GPU_COHERENCY_ACE_LITE: ACE Lite coherency. + */ + DRM_PANTHOR_GPU_COHERENCY_ACE_LITE = 0, + + /** + * @DRM_PANTHOR_GPU_COHERENCY_ACE: ACE coherency. + */ + DRM_PANTHOR_GPU_COHERENCY_ACE = 1, + + /** + * @DRM_PANTHOR_GPU_COHERENCY_NONE: No coherency. + */ + DRM_PANTHOR_GPU_COHERENCY_NONE = 31, +}; + /** * struct drm_panthor_gpu_info - GPU information * @@ -301,7 +321,16 @@ struct drm_panthor_gpu_info { */ __u32 thread_max_barrier_size; - /** @coherency_features: Coherency features. */ + /** + * @coherency_features: Coherency features. + * + * Combination of drm_panthor_gpu_coherency flags. + * + * Note that this is just what the coherency protocols supported by the + * GPU, but the actual coherency in place depends on the SoC + * integration and is reflected by + * drm_panthor_gpu_info::selected_coherency. + */ __u32 coherency_features; /** @texture_features: Texture features. */ @@ -310,8 +339,12 @@ struct drm_panthor_gpu_info { /** @as_present: Bitmask encoding the number of address-space exposed by the MMU. */ __u32 as_present; - /** @pad0: MBZ. */ - __u32 pad0; + /** + * @select_coherency: Coherency selected for this device. + * + * One of drm_panthor_gpu_coherency. + */ + __u32 selected_coherency; /** @shader_present: Bitmask encoding the shader cores exposed by the GPU. */ __u64 shader_present; -- cgit v1.2.3 From e06177ec7a36391c66216b55b7c112d5ba8c4cc1 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:31 +0100 Subject: drm/panthor: Add a PANTHOR_BO_SYNC ioctl This will be used by the UMD to synchronize CPU-cached mappings when the UMD can't do it directly (no usermode cache maintenance instruction on Arm32). v2: - Change the flags so they better match the drm_gem_shmem_sync() semantics v3: - Add Steve's R-b v4: - No changes v5: - Drop Steve's R-b (the semantics changes call for a new review) v6: - Drop ret initialization in panthor_ioctl_bo_sync() - Bail out early in panthor_ioctl_bo_sync() if ops.count is zero - Drop unused PANTHOR_BO_SYNC_OP_FLAGS definition v7: - Hand-roll the sync logic (was previously provided by gem_shmem) v8: - Collect R-b Signed-off-by: Faith Ekstrand Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-5-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_drv.c | 41 ++++++++++++++++- drivers/gpu/drm/panthor/panthor_gem.c | 85 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/panthor/panthor_gem.h | 2 + include/uapi/drm/panthor_drm.h | 52 +++++++++++++++++++++ 4 files changed, 179 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 73d26e17e2a2..2a9f1feac57a 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -177,7 +177,8 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride, PANTHOR_UOBJ_DECL(struct drm_panthor_sync_op, timeline_value), \ PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \ PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, ringbuf_size), \ - PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs)) + PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs), \ + PANTHOR_UOBJ_DECL(struct drm_panthor_bo_sync_op, size)) /** * PANTHOR_UOBJ_SET() - Copy a kernel object to a user object. @@ -1396,6 +1397,43 @@ static int panthor_ioctl_set_user_mmio_offset(struct drm_device *ddev, return 0; } +static int panthor_ioctl_bo_sync(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_panthor_bo_sync *args = data; + struct drm_panthor_bo_sync_op *ops; + struct drm_gem_object *obj; + int ret; + + if (!args->ops.count) + return 0; + + ret = PANTHOR_UOBJ_GET_ARRAY(ops, &args->ops); + if (ret) + return ret; + + for (u32 i = 0; i < args->ops.count; i++) { + obj = drm_gem_object_lookup(file, ops[i].handle); + if (!obj) { + ret = -ENOENT; + goto err_ops; + } + + ret = panthor_gem_sync(obj, ops[i].type, ops[i].offset, + ops[i].size); + + drm_gem_object_put(obj); + + if (ret) + goto err_ops; + } + +err_ops: + kvfree(ops); + + return ret; +} + static int panthor_open(struct drm_device *ddev, struct drm_file *file) { @@ -1470,6 +1508,7 @@ static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = { PANTHOR_IOCTL(GROUP_SUBMIT, group_submit, DRM_RENDER_ALLOW), PANTHOR_IOCTL(BO_SET_LABEL, bo_set_label, DRM_RENDER_ALLOW), PANTHOR_IOCTL(SET_USER_MMIO_OFFSET, set_user_mmio_offset, DRM_RENDER_ALLOW), + PANTHOR_IOCTL(BO_SYNC, bo_sync, DRM_RENDER_ALLOW), }; static int panthor_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index 0de37733a2ef..69ee30603e0a 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -465,6 +465,91 @@ panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const char *label) panthor_gem_bo_set_label(bo->obj, str); } +int +panthor_gem_sync(struct drm_gem_object *obj, u32 type, + u64 offset, u64 size) +{ + struct panthor_gem_object *bo = to_panthor_bo(obj); + struct drm_gem_shmem_object *shmem = &bo->base; + const struct drm_device *dev = shmem->base.dev; + struct sg_table *sgt; + struct scatterlist *sgl; + unsigned int count; + + /* Make sure the range is in bounds. */ + if (offset + size < offset || offset + size > shmem->base.size) + return -EINVAL; + + /* Disallow CPU-cache maintenance on imported buffers. */ + if (drm_gem_is_imported(&shmem->base)) + return -EINVAL; + + switch (type) { + case DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH: + case DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE: + break; + + default: + return -EINVAL; + } + + /* Don't bother if it's WC-mapped */ + if (shmem->map_wc) + return 0; + + /* Nothing to do if the size is zero. */ + if (size == 0) + return 0; + + sgt = drm_gem_shmem_get_pages_sgt(shmem); + if (IS_ERR(sgt)) + return PTR_ERR(sgt); + + for_each_sgtable_dma_sg(sgt, sgl, count) { + if (size == 0) + break; + + dma_addr_t paddr = sg_dma_address(sgl); + size_t len = sg_dma_len(sgl); + + if (len <= offset) { + offset -= len; + continue; + } + + paddr += offset; + len -= offset; + len = min_t(size_t, len, size); + size -= len; + offset = 0; + + /* It's unclear whether dma_sync_xxx() is the right API to do CPU + * cache maintenance given an IOMMU can register their own + * implementation doing more than just CPU cache flushes/invalidation, + * and what we really care about here is CPU caches only, but that's + * the best we have that is both arch-agnostic and does at least the + * CPU cache maintenance on a tuple. + * + * Also, I wish we could do a single + * + * dma_sync_single_for_device(BIDIR) + * + * and get a flush+invalidate, but that's not how it's implemented + * in practice (at least on arm64), so we have to make it + * + * dma_sync_single_for_device(TO_DEVICE) + * dma_sync_single_for_cpu(FROM_DEVICE) + * + * for the flush+invalidate case. + */ + dma_sync_single_for_device(dev->dev, paddr, len, DMA_TO_DEVICE); + if (type == DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE) + dma_sync_single_for_cpu(dev->dev, paddr, len, DMA_FROM_DEVICE); + } + + return 0; +} + #ifdef CONFIG_DEBUG_FS struct gem_size_totals { size_t size; diff --git a/drivers/gpu/drm/panthor/panthor_gem.h b/drivers/gpu/drm/panthor/panthor_gem.h index 262c77a4d3c1..22519c570b5a 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.h +++ b/drivers/gpu/drm/panthor/panthor_gem.h @@ -148,6 +148,8 @@ panthor_gem_create_with_handle(struct drm_file *file, void panthor_gem_bo_set_label(struct drm_gem_object *obj, const char *label); void panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const char *label); +int panthor_gem_sync(struct drm_gem_object *obj, + u32 type, u64 offset, u64 size); struct drm_gem_object * panthor_gem_prime_import(struct drm_device *dev, diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 28cf9e878db6..9f810305db6e 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -144,6 +144,9 @@ enum drm_panthor_ioctl_id { * pgoff_t size. */ DRM_PANTHOR_SET_USER_MMIO_OFFSET, + + /** @DRM_PANTHOR_BO_SYNC: Sync BO data to/from the device */ + DRM_PANTHOR_BO_SYNC, }; /** @@ -1073,6 +1076,53 @@ struct drm_panthor_set_user_mmio_offset { __u64 offset; }; +/** + * enum drm_panthor_bo_sync_op_type - BO sync type + */ +enum drm_panthor_bo_sync_op_type { + /** @DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH: Flush CPU caches. */ + DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH = 0, + + /** @DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE: Flush and invalidate CPU caches. */ + DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE = 1, +}; + +/** + * struct drm_panthor_bo_sync_op - BO map sync op + */ +struct drm_panthor_bo_sync_op { + /** @handle: Handle of the buffer object to sync. */ + __u32 handle; + + /** @type: Type of operation. */ + __u32 type; + + /** + * @offset: Offset into the BO at which the sync range starts. + * + * This will be rounded down to the nearest cache line as needed. + */ + __u64 offset; + + /** + * @size: Size of the range to sync + * + * @size + @offset will be rounded up to the nearest cache line as + * needed. + */ + __u64 size; +}; + +/** + * struct drm_panthor_bo_sync - BO map sync request + */ +struct drm_panthor_bo_sync { + /** + * @ops: Array of struct drm_panthor_bo_sync_op sync operations. + */ + struct drm_panthor_obj_array ops; +}; + /** * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number * @__access: Access type. Must be R, W or RW. @@ -1119,6 +1169,8 @@ enum { DRM_IOCTL_PANTHOR(WR, BO_SET_LABEL, bo_set_label), DRM_IOCTL_PANTHOR_SET_USER_MMIO_OFFSET = DRM_IOCTL_PANTHOR(WR, SET_USER_MMIO_OFFSET, set_user_mmio_offset), + DRM_IOCTL_PANTHOR_BO_SYNC = + DRM_IOCTL_PANTHOR(WR, BO_SYNC, bo_sync), }; #if defined(__cplusplus) -- cgit v1.2.3 From c146c82f862e9c7e602a908891c3adf992ef2beb Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:32 +0100 Subject: drm/panthor: Add an ioctl to query BO flags This is useful when importing BOs, so we can know about cacheability and flush the caches when needed. We can also know when the buffer comes from a different subsystem and take proper actions (avoid CPU mappings, or do kernel-based syncs instead of userland cache flushes). v2: - New commit v3: - Add Steve's R-b v4: - No changes v5: - No changes v6: - No changes v7: - No changes v8: - No changes Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-6-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_drv.c | 24 +++++++++++++++ include/uapi/drm/panthor_drm.h | 57 +++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 2a9f1feac57a..67d694d00ccb 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -1434,6 +1434,29 @@ err_ops: return ret; } +static int panthor_ioctl_bo_query_info(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_panthor_bo_query_info *args = data; + struct panthor_gem_object *bo; + struct drm_gem_object *obj; + + obj = drm_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; + + bo = to_panthor_bo(obj); + args->pad = 0; + args->create_flags = bo->flags; + + args->extra_flags = 0; + if (drm_gem_is_imported(&bo->base.base)) + args->extra_flags |= DRM_PANTHOR_BO_IS_IMPORTED; + + drm_gem_object_put(obj); + return 0; +} + static int panthor_open(struct drm_device *ddev, struct drm_file *file) { @@ -1509,6 +1532,7 @@ static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = { PANTHOR_IOCTL(BO_SET_LABEL, bo_set_label, DRM_RENDER_ALLOW), PANTHOR_IOCTL(SET_USER_MMIO_OFFSET, set_user_mmio_offset, DRM_RENDER_ALLOW), PANTHOR_IOCTL(BO_SYNC, bo_sync, DRM_RENDER_ALLOW), + PANTHOR_IOCTL(BO_QUERY_INFO, bo_query_info, DRM_RENDER_ALLOW), }; static int panthor_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 9f810305db6e..39d5ce815742 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -147,6 +147,13 @@ enum drm_panthor_ioctl_id { /** @DRM_PANTHOR_BO_SYNC: Sync BO data to/from the device */ DRM_PANTHOR_BO_SYNC, + + /** + * @DRM_PANTHOR_BO_QUERY_INFO: Query information about a BO. + * + * This is useful for imported BOs. + */ + DRM_PANTHOR_BO_QUERY_INFO, }; /** @@ -1123,6 +1130,54 @@ struct drm_panthor_bo_sync { struct drm_panthor_obj_array ops; }; +/** + * enum drm_panthor_bo_extra_flags - Set of flags returned on a BO_QUERY_INFO request + * + * Those are flags reflecting BO properties that are not directly coming from the flags + * passed are creation time, or information on BOs that were imported from other drivers. + */ +enum drm_panthor_bo_extra_flags { + /** + * @DRM_PANTHOR_BO_IS_IMPORTED: BO has been imported from an external driver. + * + * Note that imported dma-buf handles are not flagged as imported if they + * where exported by panthor. Only buffers that are coming from other drivers + * (dma heaps, other GPUs, display controllers, V4L, ...). + * + * It's also important to note that all imported BOs are mapped cached and can't + * be considered IO-coherent even if the GPU is. This means they require explicit + * syncs that must go through the DRM_PANTHOR_BO_SYNC ioctl (userland cache + * maintenance is not allowed in that case, because extra operations might be + * needed to make changes visible to the CPU/device, like buffer migration when the + * exporter is a GPU with its own VRAM). + */ + DRM_PANTHOR_BO_IS_IMPORTED = (1 << 0), +}; + +/** + * struct drm_panthor_bo_query_info - Query BO info + */ +struct drm_panthor_bo_query_info { + /** @handle: Handle of the buffer object to query flags on. */ + __u32 handle; + + /** + * @extra_flags: Combination of enum drm_panthor_bo_extra_flags flags. + */ + __u32 extra_flags; + + /** + * @create_flags: Flags passed at creation time. + * + * Combination of enum drm_panthor_bo_flags flags. + * Will be zero if the buffer comes from a different driver. + */ + __u32 create_flags; + + /** @pad: Will be zero on return. */ + __u32 pad; +}; + /** * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number * @__access: Access type. Must be R, W or RW. @@ -1171,6 +1226,8 @@ enum { DRM_IOCTL_PANTHOR(WR, SET_USER_MMIO_OFFSET, set_user_mmio_offset), DRM_IOCTL_PANTHOR_BO_SYNC = DRM_IOCTL_PANTHOR(WR, BO_SYNC, bo_sync), + DRM_IOCTL_PANTHOR_BO_QUERY_INFO = + DRM_IOCTL_PANTHOR(WR, BO_QUERY_INFO, bo_query_info), }; #if defined(__cplusplus) -- cgit v1.2.3 From cd2c9c3015e642e28e1b528c52c06a79f350d600 Mon Sep 17 00:00:00 2001 From: Loïc Molinari Date: Mon, 8 Dec 2025 11:08:33 +0100 Subject: drm/panthor: Add flag to map GEM object Write-Back Cacheable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Will be used by the UMD to optimize CPU accesses to buffers that are frequently read by the CPU, or on which the access pattern makes non-cacheable mappings inefficient. Mapping buffers CPU-cached implies taking care of the CPU cache maintenance in the UMD, unless the GPU is IO coherent. v2: - Add more to the commit message - Tweak the doc - Make sure we sync the section of the BO pointing to the CS syncobj before we read its seqno v3: - Fix formatting/spelling issues v4: - Add Steve's R-b v5: - Drop Steve's R-b (changes in the ioctl semantics requiring new review) v6: - Fix the uAPI doc - Fix inverted logic in some comment v7: - No changes v8: - Collect R-b Signed-off-by: Loïc Molinari Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-7-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_drv.c | 7 ++++++- drivers/gpu/drm/panthor/panthor_gem.c | 37 +++++++++++++++++++++++++++++++-- drivers/gpu/drm/panthor/panthor_sched.c | 18 ++++++++++++++-- include/uapi/drm/panthor_drm.h | 9 ++++++++ 4 files changed, 66 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 67d694d00ccb..598c7ad6f2b6 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -902,7 +902,8 @@ static int panthor_ioctl_vm_destroy(struct drm_device *ddev, void *data, return panthor_vm_pool_destroy_vm(pfile->vms, args->id); } -#define PANTHOR_BO_FLAGS DRM_PANTHOR_BO_NO_MMAP +#define PANTHOR_BO_FLAGS (DRM_PANTHOR_BO_NO_MMAP | \ + DRM_PANTHOR_BO_WB_MMAP) static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data, struct drm_file *file) @@ -921,6 +922,10 @@ static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data, goto out_dev_exit; } + if ((args->flags & DRM_PANTHOR_BO_NO_MMAP) && + (args->flags & DRM_PANTHOR_BO_WB_MMAP)) + return -EINVAL; + if (args->exclusive_vm_id) { vm = panthor_vm_pool_get_vm(pfile->vms, args->exclusive_vm_id); if (!vm) { diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index 69ee30603e0a..360d05abe891 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -77,6 +77,39 @@ static void panthor_gem_debugfs_set_usage_flags(struct panthor_gem_object *bo, u static void panthor_gem_debugfs_bo_init(struct panthor_gem_object *bo) {} #endif +static bool +should_map_wc(struct panthor_gem_object *bo, struct panthor_vm *exclusive_vm) +{ + struct panthor_device *ptdev = container_of(bo->base.base.dev, struct panthor_device, base); + + /* We can't do uncached mappings if the device is coherent, + * because the zeroing done by the shmem layer at page allocation + * time happens on a cached mapping which isn't CPU-flushed (at least + * not on Arm64 where the flush is deferred to PTE setup time, and + * only done conditionally based on the mapping permissions). We can't + * rely on dma_map_sgtable()/dma_sync_sgtable_for_xxx() either to flush + * those, because they are NOPed if dma_dev_coherent() returns true. + * + * FIXME: Note that this problem is going to pop up again when we + * decide to support mapping buffers with the NO_MMAP flag as + * non-shareable (AKA buffers accessed only by the GPU), because we + * need the same CPU flush to happen after page allocation, otherwise + * there's a risk of data leak or late corruption caused by a dirty + * cacheline being evicted. At this point we'll need a way to force + * CPU cache maintenance regardless of whether the device is coherent + * or not. + */ + if (ptdev->coherent) + return false; + + /* Cached mappings are explicitly requested, so no write-combine. */ + if (bo->flags & DRM_PANTHOR_BO_WB_MMAP) + return false; + + /* The default is write-combine. */ + return true; +} + static void panthor_gem_free_object(struct drm_gem_object *obj) { struct panthor_gem_object *bo = to_panthor_bo(obj); @@ -163,6 +196,7 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, bo = to_panthor_bo(&obj->base); kbo->obj = &obj->base; bo->flags = bo_flags; + bo->base.map_wc = should_map_wc(bo, vm); bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm); drm_gem_object_get(bo->exclusive_vm_root_gem); bo->base.base.resv = bo->exclusive_vm_root_gem->resv; @@ -363,7 +397,6 @@ static const struct drm_gem_object_funcs panthor_gem_funcs = { */ struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size) { - struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); struct panthor_gem_object *obj; obj = kzalloc(sizeof(*obj), GFP_KERNEL); @@ -371,7 +404,6 @@ struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t return ERR_PTR(-ENOMEM); obj->base.base.funcs = &panthor_gem_funcs; - obj->base.map_wc = !ptdev->coherent; mutex_init(&obj->label.lock); panthor_gem_debugfs_bo_init(obj); @@ -406,6 +438,7 @@ panthor_gem_create_with_handle(struct drm_file *file, bo = to_panthor_bo(&shmem->base); bo->flags = flags; + bo->base.map_wc = should_map_wc(bo, exclusive_vm); if (exclusive_vm) { bo->exclusive_vm_root_gem = panthor_vm_root_gem(exclusive_vm); diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 33b9ef537e35..5abc5744e5ac 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -863,8 +863,11 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue struct iosys_map map; int ret; - if (queue->syncwait.kmap) - return queue->syncwait.kmap + queue->syncwait.offset; + if (queue->syncwait.kmap) { + bo = container_of(queue->syncwait.obj, + struct panthor_gem_object, base.base); + goto out_sync; + } bo = panthor_vm_get_bo_for_va(group->vm, queue->syncwait.gpu_va, @@ -881,6 +884,17 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap)) goto err_put_syncwait_obj; +out_sync: + /* Make sure the CPU caches are invalidated before the seqno is read. + * drm_gem_shmem_sync() is a NOP if map_wc=true, so no need to check + * it here. + */ + panthor_gem_sync(&bo->base.base, queue->syncwait.offset, + queue->syncwait.sync64 ? + sizeof(struct panthor_syncobj_64b) : + sizeof(struct panthor_syncobj_32b), + DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE); + return queue->syncwait.kmap + queue->syncwait.offset; err_put_syncwait_obj: diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 39d5ce815742..e238c6264fa1 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -681,6 +681,15 @@ struct drm_panthor_vm_get_state { enum drm_panthor_bo_flags { /** @DRM_PANTHOR_BO_NO_MMAP: The buffer object will never be CPU-mapped in userspace. */ DRM_PANTHOR_BO_NO_MMAP = (1 << 0), + + /** + * @DRM_PANTHOR_BO_WB_MMAP: Force "Write-Back Cacheable" CPU mapping. + * + * CPU map the buffer object in userspace by forcing the "Write-Back + * Cacheable" cacheability attribute. The mapping otherwise uses the + * "Non-Cacheable" attribute if the GPU is not IO coherent. + */ + DRM_PANTHOR_BO_WB_MMAP = (1 << 1), }; /** -- cgit v1.2.3 From 2396d65d94fc75d39f096b9777f9edc9c8e677c1 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:36 +0100 Subject: drm/panfrost: Expose the selected coherency protocol to the UMD Will be needed if we want to skip CPU cache maintenance operations when the GPU can snoop CPU caches. v2: - New commit v3: - Fix the coherency values (enum instead of bitmask) v4: - Fix init/test on coherency_features v5: - No changes v6: - Collect R-b v7: - No changes v8: - No changes Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-10-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panfrost/panfrost_device.h | 1 + drivers/gpu/drm/panfrost/panfrost_drv.c | 1 + drivers/gpu/drm/panfrost/panfrost_gpu.c | 26 +++++++++++++++++++++++--- drivers/gpu/drm/panfrost/panfrost_regs.h | 10 ++++++++-- include/uapi/drm/panfrost_drm.h | 7 +++++++ 5 files changed, 40 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h index e61c4329fd07..0f3992412205 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.h +++ b/drivers/gpu/drm/panfrost/panfrost_device.h @@ -79,6 +79,7 @@ struct panfrost_features { u32 thread_max_workgroup_sz; u32 thread_max_barrier_sz; u32 coherency_features; + u32 selected_coherency; u32 afbc_features; u32 texture_features[4]; u32 js_features[16]; diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 199073cc7d3f..b2c3f6c81be0 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -95,6 +95,7 @@ static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct PANFROST_FEATURE_ARRAY(JS_FEATURES, js_features, 15); PANFROST_FEATURE(NR_CORE_GROUPS, nr_core_groups); PANFROST_FEATURE(THREAD_TLS_ALLOC, thread_tls_alloc); + PANFROST_FEATURE(SELECTED_COHERENCY, selected_coherency); case DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP: ret = panfrost_ioctl_query_timestamp(pfdev, ¶m->value); diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c index 483d278eb154..7d555e63e21a 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -159,8 +159,8 @@ static void panfrost_gpu_init_quirks(struct panfrost_device *pfdev) pfdev->features.revision >= 0x2000) quirks |= JM_MAX_JOB_THROTTLE_LIMIT << JM_JOB_THROTTLE_LIMIT_SHIFT; else if (panfrost_model_eq(pfdev, 0x6000) && - pfdev->features.coherency_features == COHERENCY_ACE) - quirks |= (COHERENCY_ACE_LITE | COHERENCY_ACE) << + pfdev->features.coherency_features == BIT(COHERENCY_ACE)) + quirks |= (BIT(COHERENCY_ACE_LITE) | BIT(COHERENCY_ACE)) << JM_FORCE_COHERENCY_FEATURES_SHIFT; if (panfrost_has_hw_feature(pfdev, HW_FEATURE_IDVS_GROUP_SIZE)) @@ -263,7 +263,27 @@ static int panfrost_gpu_init_features(struct panfrost_device *pfdev) pfdev->features.max_threads = gpu_read(pfdev, GPU_THREAD_MAX_THREADS); pfdev->features.thread_max_workgroup_sz = gpu_read(pfdev, GPU_THREAD_MAX_WORKGROUP_SIZE); pfdev->features.thread_max_barrier_sz = gpu_read(pfdev, GPU_THREAD_MAX_BARRIER_SIZE); - pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES); + + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_COHERENCY_REG)) + pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES); + else + pfdev->features.coherency_features = BIT(COHERENCY_ACE_LITE); + + BUILD_BUG_ON(COHERENCY_ACE_LITE != DRM_PANFROST_GPU_COHERENCY_ACE_LITE); + BUILD_BUG_ON(COHERENCY_ACE != DRM_PANFROST_GPU_COHERENCY_ACE); + BUILD_BUG_ON(COHERENCY_NONE != DRM_PANFROST_GPU_COHERENCY_NONE); + + if (!pfdev->coherent) { + pfdev->features.selected_coherency = COHERENCY_NONE; + } else if (pfdev->features.coherency_features & BIT(COHERENCY_ACE)) { + pfdev->features.selected_coherency = COHERENCY_ACE; + } else if (pfdev->features.coherency_features & BIT(COHERENCY_ACE_LITE)) { + pfdev->features.selected_coherency = COHERENCY_ACE_LITE; + } else { + drm_WARN(&pfdev->base, true, "No known coherency protocol supported"); + pfdev->features.selected_coherency = COHERENCY_NONE; + } + pfdev->features.afbc_features = gpu_read(pfdev, GPU_AFBC_FEATURES); for (i = 0; i < 4; i++) pfdev->features.texture_features[i] = gpu_read(pfdev, GPU_TEXTURE_FEATURES(i)); diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h index 2b8f1617b836..ee15f6bf6e6f 100644 --- a/drivers/gpu/drm/panfrost/panfrost_regs.h +++ b/drivers/gpu/drm/panfrost/panfrost_regs.h @@ -102,9 +102,15 @@ #define GPU_L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ #define GPU_L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ +/* GPU_COHERENCY_FEATURES is a bitmask of BIT(COHERENCY_xxx) values encoding the + * set of supported coherency protocols. GPU_COHERENCY_ENABLE is passed a + * COHERENCY_xxx value. + */ #define GPU_COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ -#define COHERENCY_ACE_LITE BIT(0) -#define COHERENCY_ACE BIT(1) +#define GPU_COHERENCY_ENABLE 0x304 /* (RW) Coherency protocol selection */ +#define COHERENCY_ACE_LITE 0 +#define COHERENCY_ACE 1 +#define COHERENCY_NONE 31 #define GPU_STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ #define GPU_STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 1956431bb391..0c59714ae42b 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -228,6 +228,13 @@ enum drm_panfrost_param { DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP, DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP_FREQUENCY, DRM_PANFROST_PARAM_ALLOWED_JM_CTX_PRIORITIES, + DRM_PANFROST_PARAM_SELECTED_COHERENCY, +}; + +enum drm_panfrost_gpu_coherency { + DRM_PANFROST_GPU_COHERENCY_ACE_LITE = 0, + DRM_PANFROST_GPU_COHERENCY_ACE = 1, + DRM_PANFROST_GPU_COHERENCY_NONE = 31, }; struct drm_panfrost_get_param { -- cgit v1.2.3 From 7be45f5489769520aa9276137d0f1f543fb81286 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 8 Dec 2025 11:08:37 +0100 Subject: drm/panfrost: Add a PANFROST_SYNC_BO ioctl This will be used by the UMD to synchronize CPU-cached mappings when the UMD can't do it directly (no usermode cache maintenance instruction on Arm32). v2: - Add more to the commit message - Change the flags to better match the drm_gem_shmem_sync semantics v3: - Add Steve's R-b v4: - No changes v5: - Drop Steve's R-b (semantics changes requiring a new review) v6: - Bail out early in panfrost_ioctl_sync_bo() if op_count is zero v7: - Hand-roll our own bo_sync() helper v8: - Collect R-b Signed-off-by: Faith Ekstrand Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-11-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panfrost/panfrost_drv.c | 51 ++++++++++++++++++++ drivers/gpu/drm/panfrost/panfrost_gem.c | 84 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/panfrost/panfrost_gem.h | 2 + include/uapi/drm/panfrost_drm.h | 45 ++++++++++++++++++ 4 files changed, 182 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index b2c3f6c81be0..450204fdbe45 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -580,6 +580,56 @@ static int panfrost_ioctl_jm_ctx_destroy(struct drm_device *dev, void *data, return panfrost_jm_ctx_destroy(file, args->handle); } +static int panfrost_ioctl_sync_bo(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_panfrost_sync_bo *args = data; + struct drm_panfrost_bo_sync_op *ops; + struct drm_gem_object *obj; + int ret; + u32 i; + + if (args->pad) + return -EINVAL; + + if (!args->op_count) + return 0; + + ops = kvmalloc_array(args->op_count, sizeof(*ops), GFP_KERNEL); + if (!ops) { + DRM_DEBUG("Failed to allocate incoming BO sync ops array\n"); + return -ENOMEM; + } + + if (copy_from_user(ops, (void __user *)(uintptr_t)args->ops, + args->op_count * sizeof(*ops))) { + DRM_DEBUG("Failed to copy in BO sync ops\n"); + ret = -EFAULT; + goto err_ops; + } + + for (i = 0; i < args->op_count; i++) { + obj = drm_gem_object_lookup(file, ops[i].handle); + if (!obj) { + ret = -ENOENT; + goto err_ops; + } + + ret = panfrost_gem_sync(obj, ops[i].type, + ops[i].offset, ops[i].size); + + drm_gem_object_put(obj); + + if (ret) + goto err_ops; + } + +err_ops: + kvfree(ops); + + return ret; +} + int panfrost_unstable_ioctl_check(void) { if (!unstable_ioctls) @@ -649,6 +699,7 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = { PANFROST_IOCTL(SET_LABEL_BO, set_label_bo, DRM_RENDER_ALLOW), PANFROST_IOCTL(JM_CTX_CREATE, jm_ctx_create, DRM_RENDER_ALLOW), PANFROST_IOCTL(JM_CTX_DESTROY, jm_ctx_destroy, DRM_RENDER_ALLOW), + PANFROST_IOCTL(SYNC_BO, sync_bo, DRM_RENDER_ALLOW), }; static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev, diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index 02721863b6ae..62c9e3a6b0e9 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -507,6 +507,90 @@ panfrost_gem_set_label(struct drm_gem_object *obj, const char *label) kfree_const(old_label); } +int +panfrost_gem_sync(struct drm_gem_object *obj, u32 type, u32 offset, u32 size) +{ + struct panfrost_gem_object *bo = to_panfrost_bo(obj); + struct drm_gem_shmem_object *shmem = &bo->base; + const struct drm_device *dev = shmem->base.dev; + struct sg_table *sgt; + struct scatterlist *sgl; + unsigned int count; + + /* Make sure the range is in bounds. */ + if (offset + size < offset || offset + size > shmem->base.size) + return -EINVAL; + + /* Disallow CPU-cache maintenance on imported buffers. */ + if (drm_gem_is_imported(&shmem->base)) + return -EINVAL; + + switch (type) { + case PANFROST_BO_SYNC_CPU_CACHE_FLUSH: + case PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE: + break; + + default: + return -EINVAL; + } + + /* Don't bother if it's WC-mapped */ + if (shmem->map_wc) + return 0; + + /* Nothing to do if the size is zero. */ + if (size == 0) + return 0; + + sgt = drm_gem_shmem_get_pages_sgt(shmem); + if (IS_ERR(sgt)) + return PTR_ERR(sgt); + + for_each_sgtable_dma_sg(sgt, sgl, count) { + if (size == 0) + break; + + dma_addr_t paddr = sg_dma_address(sgl); + size_t len = sg_dma_len(sgl); + + if (len <= offset) { + offset -= len; + continue; + } + + paddr += offset; + len -= offset; + len = min_t(size_t, len, size); + size -= len; + offset = 0; + + /* It's unclear whether dma_sync_xxx() is the right API to do CPU + * cache maintenance given an IOMMU can register their own + * implementation doing more than just CPU cache flushes/invalidation, + * and what we really care about here is CPU caches only, but that's + * the best we have that is both arch-agnostic and does at least the + * CPU cache maintenance on a tuple. + * + * Also, I wish we could do a single + * + * dma_sync_single_for_device(BIDIR) + * + * and get a flush+invalidate, but that's not how it's implemented + * in practice (at least on arm64), so we have to make it + * + * dma_sync_single_for_device(TO_DEVICE) + * dma_sync_single_for_cpu(FROM_DEVICE) + * + * for the flush+invalidate case. + */ + dma_sync_single_for_device(dev->dev, paddr, len, DMA_TO_DEVICE); + if (type == PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE) + dma_sync_single_for_cpu(dev->dev, paddr, len, DMA_FROM_DEVICE); + } + + return 0; +} + void panfrost_gem_internal_set_label(struct drm_gem_object *obj, const char *label) { diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index c2470e8255ab..45e2aa846cc7 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -153,6 +153,8 @@ int panfrost_gem_shrinker_init(struct drm_device *dev); void panfrost_gem_shrinker_cleanup(struct drm_device *dev); void panfrost_gem_set_label(struct drm_gem_object *obj, const char *label); +int panfrost_gem_sync(struct drm_gem_object *obj, u32 type, + u32 offset, u32 size); void panfrost_gem_internal_set_label(struct drm_gem_object *obj, const char *label); #ifdef CONFIG_DEBUG_FS diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 0c59714ae42b..e194e087a0c8 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -24,6 +24,7 @@ extern "C" { #define DRM_PANFROST_SET_LABEL_BO 0x09 #define DRM_PANFROST_JM_CTX_CREATE 0x0a #define DRM_PANFROST_JM_CTX_DESTROY 0x0b +#define DRM_PANFROST_SYNC_BO 0x0c #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) @@ -35,6 +36,7 @@ extern "C" { #define DRM_IOCTL_PANFROST_SET_LABEL_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SET_LABEL_BO, struct drm_panfrost_set_label_bo) #define DRM_IOCTL_PANFROST_JM_CTX_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_CREATE, struct drm_panfrost_jm_ctx_create) #define DRM_IOCTL_PANFROST_JM_CTX_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_DESTROY, struct drm_panfrost_jm_ctx_destroy) +#define DRM_IOCTL_PANFROST_SYNC_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SYNC_BO, struct drm_panfrost_sync_bo) /* * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module @@ -308,6 +310,49 @@ struct drm_panfrost_set_label_bo { __u64 label; }; +/* Valid flags to pass to drm_panfrost_bo_sync_op */ +#define PANFROST_BO_SYNC_CPU_CACHE_FLUSH 0 +#define PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE 1 + +/** + * struct drm_panthor_bo_flush_map_op - BO map sync op + */ +struct drm_panfrost_bo_sync_op { + /** @handle: Handle of the buffer object to sync. */ + __u32 handle; + + /** @type: Type of sync operation. */ + __u32 type; + + /** + * @offset: Offset into the BO at which the sync range starts. + * + * This will be rounded down to the nearest cache line as needed. + */ + __u32 offset; + + /** + * @size: Size of the range to sync + * + * @size + @offset will be rounded up to the nearest cache line as + * needed. + */ + __u32 size; +}; + +/** + * struct drm_panfrost_sync_bo - ioctl argument for syncing BO maps + */ +struct drm_panfrost_sync_bo { + /** Array of struct drm_panfrost_bo_sync_op */ + __u64 ops; + + /** Number of BO sync ops */ + __u32 op_count; + + __u32 pad; +}; + /* Definitions for coredump decoding in user space */ #define PANFROSTDUMP_MAJOR 1 #define PANFROSTDUMP_MINOR 0 -- cgit v1.2.3 From d17592e61fa8e3b2d58df7c4a24abc8ac58b8d3f Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:38 +0100 Subject: drm/panfrost: Add an ioctl to query BO flags This is useful when importing BOs, so we can know about cacheability and flush the caches when needed. v2: - New commit v3: - Add Steve's R-b v4: - No changes v5: - No changes v6: - No changes v7: - No changes v8: - No changes Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-12-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panfrost/panfrost_drv.c | 33 +++++++++++++++++++++++++++++++++ include/uapi/drm/panfrost_drm.h | 19 +++++++++++++++++++ 2 files changed, 52 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 450204fdbe45..d461ecf8829d 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -630,6 +630,38 @@ err_ops: return ret; } +static int panfrost_ioctl_query_bo_info(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_panfrost_query_bo_info *args = data; + struct drm_gem_object *gem_obj; + struct panfrost_gem_object *bo; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + + bo = to_panfrost_bo(gem_obj); + args->pad = 0; + args->create_flags = 0; + args->extra_flags = 0; + + if (drm_gem_is_imported(gem_obj)) { + args->extra_flags |= DRM_PANFROST_BO_IS_IMPORTED; + } else { + if (bo->noexec) + args->create_flags |= PANFROST_BO_NOEXEC; + + if (bo->is_heap) + args->create_flags |= PANFROST_BO_HEAP; + } + + drm_gem_object_put(gem_obj); + return 0; +} + int panfrost_unstable_ioctl_check(void) { if (!unstable_ioctls) @@ -700,6 +732,7 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = { PANFROST_IOCTL(JM_CTX_CREATE, jm_ctx_create, DRM_RENDER_ALLOW), PANFROST_IOCTL(JM_CTX_DESTROY, jm_ctx_destroy, DRM_RENDER_ALLOW), PANFROST_IOCTL(SYNC_BO, sync_bo, DRM_RENDER_ALLOW), + PANFROST_IOCTL(QUERY_BO_INFO, query_bo_info, DRM_RENDER_ALLOW), }; static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev, diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index e194e087a0c8..36ae48ea50d3 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -25,6 +25,7 @@ extern "C" { #define DRM_PANFROST_JM_CTX_CREATE 0x0a #define DRM_PANFROST_JM_CTX_DESTROY 0x0b #define DRM_PANFROST_SYNC_BO 0x0c +#define DRM_PANFROST_QUERY_BO_INFO 0x0d #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) @@ -37,6 +38,7 @@ extern "C" { #define DRM_IOCTL_PANFROST_JM_CTX_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_CREATE, struct drm_panfrost_jm_ctx_create) #define DRM_IOCTL_PANFROST_JM_CTX_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_DESTROY, struct drm_panfrost_jm_ctx_destroy) #define DRM_IOCTL_PANFROST_SYNC_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SYNC_BO, struct drm_panfrost_sync_bo) +#define DRM_IOCTL_PANFROST_QUERY_BO_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_QUERY_BO_INFO, struct drm_panfrost_query_bo_info) /* * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module @@ -353,6 +355,23 @@ struct drm_panfrost_sync_bo { __u32 pad; }; +/** BO comes from a different subsystem. */ +#define DRM_PANFROST_BO_IS_IMPORTED (1 << 0) + +struct drm_panfrost_query_bo_info { + /** Handle of the object being queried. */ + __u32 handle; + + /** Extra flags that are not coming from the BO_CREATE ioctl(). */ + __u32 extra_flags; + + /** Flags passed at creation time. */ + __u32 create_flags; + + /** Will be zero on return. */ + __u32 pad; +}; + /* Definitions for coredump decoding in user space */ #define PANFROSTDUMP_MAJOR 1 #define PANFROSTDUMP_MINOR 0 -- cgit v1.2.3 From 62eedf1ccba534b318ca85d3890bf0951b9e0f87 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 8 Dec 2025 11:08:39 +0100 Subject: drm/panfrost: Add flag to map GEM object Write-Back Cacheable Will be used by the UMD to optimize CPU accesses to buffers that are frequently read by the CPU, or on which the access pattern makes non-cacheable mappings inefficient. Mapping buffers CPU-cached implies taking care of the CPU cache maintenance in the UMD, unless the GPU is IO coherent. v2: - Add more to the commit message v3: - No changes v4: - Fix the map_wc test in panfrost_ioctl_query_bo_info() v5: - Drop Steve's R-b (enough has changed to justify a new review) v6: - Collect R-b v7: - No changes v8: - Fix double drm_gem_object_funcs::export assignment Signed-off-by: Faith Ekstrand Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-13-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panfrost/panfrost_drv.c | 10 ++++++++-- drivers/gpu/drm/panfrost/panfrost_gem.c | 32 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/panfrost/panfrost_gem.h | 5 +++++ include/uapi/drm/panfrost_drm.h | 5 ++++- 4 files changed, 49 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index d461ecf8829d..34969179544c 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -126,6 +126,10 @@ static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct return 0; } +#define PANFROST_BO_FLAGS (PANFROST_BO_NOEXEC | \ + PANFROST_BO_HEAP | \ + PANFROST_BO_WB_MMAP) + static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *file) { @@ -135,8 +139,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, struct panfrost_gem_mapping *mapping; int ret; - if (!args->size || args->pad || - (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP))) + if (!args->size || args->pad || (args->flags & ~PANFROST_BO_FLAGS)) return -EINVAL; /* Heaps should never be executable */ @@ -656,6 +659,9 @@ static int panfrost_ioctl_query_bo_info(struct drm_device *dev, void *data, if (bo->is_heap) args->create_flags |= PANFROST_BO_HEAP; + + if (!bo->base.map_wc) + args->create_flags |= PANFROST_BO_WB_MMAP; } drm_gem_object_put(gem_obj); diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index 62c9e3a6b0e9..44985b515212 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -444,12 +444,42 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t return &obj->base.base; } +static bool +should_map_wc(struct panfrost_gem_object *bo) +{ + struct panfrost_device *pfdev = to_panfrost_device(bo->base.base.dev); + + /* We can't do uncached mappings if the device is coherent, + * because the zeroing done by the shmem layer at page allocation + * time happens on a cached mapping which isn't CPU-flushed (at least + * not on Arm64 where the flush is deferred to PTE setup time, and + * only done conditionally based on the mapping permissions). We can't + * rely on dma_map_sgtable()/dma_sync_sgtable_for_xxx() either to flush + * those, because they are NOPed if dma_dev_coherent() returns true. + */ + if (pfdev->coherent) + return false; + + /* Cached mappings are explicitly requested, so no write-combine. */ + if (bo->wb_mmap) + return false; + + /* The default is write-combine. */ + return true; +} + struct panfrost_gem_object * panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags) { struct drm_gem_shmem_object *shmem; struct panfrost_gem_object *bo; + /* The heap buffer is not supposed to be CPU-visible, so don't allow + * WB_MMAP on those. + */ + if ((flags & PANFROST_BO_HEAP) && (flags & PANFROST_BO_WB_MMAP)) + return ERR_PTR(-EINVAL); + /* Round up heap allocations to 2MB to keep fault handling simple */ if (flags & PANFROST_BO_HEAP) size = roundup(size, SZ_2M); @@ -461,6 +491,8 @@ panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags) bo = to_panfrost_bo(&shmem->base); bo->noexec = !!(flags & PANFROST_BO_NOEXEC); bo->is_heap = !!(flags & PANFROST_BO_HEAP); + bo->wb_mmap = !!(flags & PANFROST_BO_WB_MMAP); + bo->base.map_wc = should_map_wc(bo); return bo; } diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index 45e2aa846cc7..79d4377019e9 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -98,6 +98,11 @@ struct panfrost_gem_object { bool noexec :1; bool is_heap :1; + /* On coherent devices, this reflects the creation flags, not the true + * cacheability attribute of the mapping. + */ + bool wb_mmap :1; + #ifdef CONFIG_DEBUG_FS struct panfrost_gem_debugfs debugfs; #endif diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 36ae48ea50d3..50d5337f35ef 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -124,9 +124,12 @@ struct drm_panfrost_wait_bo { __s64 timeout_ns; }; -/* Valid flags to pass to drm_panfrost_create_bo */ +/* Valid flags to pass to drm_panfrost_create_bo. + * PANFROST_BO_WB_MMAP can't be set if PANFROST_BO_HEAP is. + */ #define PANFROST_BO_NOEXEC 1 #define PANFROST_BO_HEAP 2 +#define PANFROST_BO_WB_MMAP 4 /** * struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs. -- cgit v1.2.3