From b80961a86b40372b7cfb3065439377f7e7550e59 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 26 Nov 2025 10:59:50 -0800 Subject: drm/xe/uapi: Add DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE which accepts a user pointer to populate the exec queue state so that a GPU hang can be replayed via a Mesa tool. v2: Update the value for HANG_REPLAY_STATE flag Cc: José Roberto de Souza Signed-off-by: Matthew Brost Signed-off-by: Carlos Santa Reviewed-by: Jonathan Cavitt Acked-by: José Roberto de Souza Acked-by: Rodrigo Vivi Link: https://patch.msgid.link/20251126185952.546277-8-matthew.brost@intel.com --- include/uapi/drm/xe_drm.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 47853659a705..37881b1eb6ba 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -210,8 +210,12 @@ struct drm_xe_ext_set_property { /** @pad: MBZ */ __u32 pad; - /** @value: property value */ - __u64 value; + union { + /** @value: property value */ + __u64 value; + /** @ptr: pointer to user value */ + __u64 ptr; + }; /** @reserved: Reserved */ __u64 reserved[2]; @@ -1292,6 +1296,7 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE 2 +#define DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE 3 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -- cgit v1.2.3 From 78d91ba6bd7968d4750dad57c62bf5225ddcb388 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Thu, 4 Dec 2025 09:34:03 +0530 Subject: drm/xe/uapi: Add NO_COMPRESSION BO flag and query capability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION to let userspace opt out of CCS compression on a per-BO basis. When set, the driver maps this to XE_BO_FLAG_NO_COMPRESSION, skips CCS metadata allocation/clearing, and rejects compressed PAT indices at vm_bind. This avoids extra memory ops and manual CCS state handling for buffers. To allow userspace to detect at runtime whether the kernel supports this feature, add DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT and expose it via query_config() on Xe2+ platforms. Mesa PR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38425 IGT PR: https://patchwork.freedesktop.org/patch/685180/ v2 - Changed error code from -EINVAL to -EOPNOTSUPP for unsupported flag usage on pre-Xe2 platforms - Fixed checkpatch warning in xe_vm.c - Fixed kernel-doc formatting in xe_drm.h v3 - Rebase - Updated commit title and description - Added UAPI for DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT and exposed it via query_config() v4 - Rebase v5 - Included Mesa PR and IGT PR in the commit description - Used xe_pat_index_get_comp_en() to extract the compression v6 - Added XE_IOCTL_DBG() checks for argument validation Suggested-by: Matthew Auld Suggested-by: José Roberto de Souza Acked-by: José Roberto de Souza Reviewed-by: Matthew Auld Signed-off-by: Sanjay Yadav Signed-off-by: Matthew Auld Link: https://patch.msgid.link/20251204040402.2692921-2-sanjay.kumar.yadav@intel.com --- drivers/gpu/drm/xe/xe_bo.c | 15 +++++++++++++-- drivers/gpu/drm/xe/xe_bo.h | 1 + drivers/gpu/drm/xe/xe_query.c | 3 +++ drivers/gpu/drm/xe/xe_vm.c | 4 ++++ include/uapi/drm/xe_drm.h | 16 ++++++++++++++++ 5 files changed, 37 insertions(+), 2 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index b67fd337ff19..6280e6a013ff 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -3178,7 +3178,8 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, args->flags & ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING | DRM_XE_GEM_CREATE_FLAG_SCANOUT | - DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM))) + DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM | + DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION))) return -EINVAL; if (XE_IOCTL_DBG(xe, args->handle)) @@ -3200,6 +3201,12 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT) bo_flags |= XE_BO_FLAG_SCANOUT; + if (args->flags & DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION) { + if (XE_IOCTL_DBG(xe, GRAPHICS_VER(xe) < 20)) + return -EOPNOTSUPP; + bo_flags |= XE_BO_FLAG_NO_COMPRESSION; + } + bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1); /* CCS formats need physical placement at a 64K alignment in VRAM. */ @@ -3521,8 +3528,12 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo) * Compression implies coh_none, therefore we know for sure that WB * memory can't currently use compression, which is likely one of the * common cases. + * Additionally, userspace may explicitly request no compression via the + * DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION flag, which should also disable + * CCS usage. */ - if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB) + if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB || + bo->flags & XE_BO_FLAG_NO_COMPRESSION) return false; return true; diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 911d5b90461a..8ab4474129c3 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -50,6 +50,7 @@ #define XE_BO_FLAG_GGTT3 BIT(23) #define XE_BO_FLAG_CPU_ADDR_MIRROR BIT(24) #define XE_BO_FLAG_FORCE_USER_VRAM BIT(25) +#define XE_BO_FLAG_NO_COMPRESSION BIT(26) /* this one is trigger internally only */ #define XE_BO_FLAG_INTERNAL_TEST BIT(30) diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index a7bf1fd6dd6a..6667403a8814 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -338,6 +338,9 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_XE_GPUSVM)) config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR; + if (GRAPHICS_VER(xe) >= 20) + config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= + DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT; config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY; config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] = diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 00ffd3f03983..c2012d20faa6 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3501,6 +3501,10 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, { u16 coh_mode; + if (XE_IOCTL_DBG(xe, (bo->flags & XE_BO_FLAG_NO_COMPRESSION) && + xe_pat_index_get_comp_en(xe, pat_index))) + return -EINVAL; + if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || XE_IOCTL_DBG(xe, obj_offset > xe_bo_size(bo) - range)) { diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 37881b1eb6ba..0d99bb0cd20a 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -407,6 +407,9 @@ struct drm_xe_query_mem_regions { * has low latency hint support * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR - Flag is set if the * device has CPU address mirroring support + * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT - Flag is set if the + * device supports the userspace hint %DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION. + * This is exposed only on Xe2+. * - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment * required by this device, typically SZ_4K or SZ_64K * - %DRM_XE_QUERY_CONFIG_VA_BITS - Maximum bits of a virtual address @@ -425,6 +428,7 @@ struct drm_xe_query_config { #define DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM (1 << 0) #define DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY (1 << 1) #define DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR (1 << 2) + #define DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT (1 << 3) #define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT 2 #define DRM_XE_QUERY_CONFIG_VA_BITS 3 #define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 4 @@ -795,6 +799,17 @@ struct drm_xe_device_query { * need to use VRAM for display surfaces, therefore the kernel requires * setting this flag for such objects, otherwise an error is thrown on * small-bar systems. + * - %DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION - Allows userspace to + * hint that compression (CCS) should be disabled for the buffer being + * created. This can avoid unnecessary memory operations and CCS state + * management. + * On pre-Xe2 platforms, this flag is currently rejected as compression + * control is not supported via PAT index. On Xe2+ platforms, compression + * is controlled via PAT entries. If this flag is set, the driver will reject + * any VM bind that requests a PAT index enabling compression for this BO. + * Note: On dGPU platforms, there is currently no change in behavior with + * this flag, but future improvements may leverage it. The current benefit is + * primarily applicable to iGPU platforms. * * @cpu_caching supports the following values: * - %DRM_XE_GEM_CPU_CACHING_WB - Allocate the pages with write-back @@ -841,6 +856,7 @@ struct drm_xe_gem_create { #define DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING (1 << 0) #define DRM_XE_GEM_CREATE_FLAG_SCANOUT (1 << 1) #define DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM (1 << 2) +#define DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION (1 << 3) /** * @flags: Flags, currently a mask of memory instances of where BO can * be placed -- cgit v1.2.3 From 16e076b036583702bb47554d3931b5e674dd9a8e Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 1 Dec 2025 18:51:12 -0800 Subject: drm/xe/oa/uapi: Add gt_id to struct drm_xe_oa_unit gt_id was previously omitted from 'struct drm_xe_oa_unit' because it could be determine from hwe's attached to the OA unit. However, we now have OA units which don't have any hwe's attached to them. Hence add gt_id to 'struct drm_xe_oa_unit' in order to provide this needed information to userspace. Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Link: https://patch.msgid.link/20251202025115.373546-3-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_query.c | 4 +++- include/uapi/drm/xe_drm.h | 9 ++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 6667403a8814..75490683bad2 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -685,7 +685,9 @@ static int query_oa_units(struct xe_device *xe, du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS | DRM_XE_OA_CAPS_OA_BUFFER_SIZE | DRM_XE_OA_CAPS_WAIT_NUM_REPORTS | - DRM_XE_OA_CAPS_OAM; + DRM_XE_OA_CAPS_OAM | + DRM_XE_OA_CAPS_OA_UNIT_GT_ID; + du->gt_id = u->gt->info.id; j = 0; for_each_hw_engine(hwe, gt, hwe_id) { if (!xe_hw_engine_is_reserved(hwe) && diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 0d99bb0cd20a..876a076fa6c0 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1697,12 +1697,19 @@ struct drm_xe_oa_unit { #define DRM_XE_OA_CAPS_OA_BUFFER_SIZE (1 << 2) #define DRM_XE_OA_CAPS_WAIT_NUM_REPORTS (1 << 3) #define DRM_XE_OA_CAPS_OAM (1 << 4) +#define DRM_XE_OA_CAPS_OA_UNIT_GT_ID (1 << 5) /** @oa_timestamp_freq: OA timestamp freq */ __u64 oa_timestamp_freq; + /** @gt_id: gt id for this OA unit */ + __u16 gt_id; + + /** @reserved1: MBZ */ + __u16 reserved1[3]; + /** @reserved: MBZ */ - __u64 reserved[4]; + __u64 reserved[3]; /** @num_engines: number of engines in @eci array */ __u64 num_engines; -- cgit v1.2.3 From d9ec63474648a258094704ce223c9249fa7bb279 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Wed, 10 Dec 2025 17:02:50 -0800 Subject: drm/xe/multi_queue: Add user interface for multi queue support Multi Queue is a new mode of execution supported by the compute and blitter copy command streamers (CCS and BCS, respectively). It is an enhancement of the existing hardware architecture and leverages the same submission model. It enables support for efficient, parallel execution of multiple queues within a single context. All the queues of a group must use the same address space (VM). The new DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE execution queue property supports creating a multi queue group and adding queues to a queue group. All queues of a multi queue group share the same context. A exec queue create ioctl call with above property specified with value DRM_XE_SUPER_GROUP_CREATE will create a new multi queue group with the queue being created as the primary queue (aka q0) of the group. To add secondary queues to the group, they need to be created with the above property with id of the primary queue as the value. The properties of the primary queue (like priority, timeslice) applies to the whole group. So, these properties can't be set for secondary queues of a group. Once destroyed, the secondary queues of a multi queue group can't be replaced. However, they can be dynamically added to the group up to a total of 64 queues per group. Once the primary queue is destroyed, secondary queues can't be added to the queue group. v2: Remove group->lock, fix xe_exec_queue_group_add()/delete() function semantics, add additional comments, remove unused group->list_lock, add XE_BO_FLAG_GGTT_INVALIDATE for cgp bo, Assert LRC is valid, update uapi kernel doc. (Matt Brost) v3: Use XE_BO_FLAG_PINNED_LATE_RESTORE/USER_VRAM/GGTT_INVALIDATE flags for cgp bo (Matt) v4: Ensure queue is not a vm_bind queue uapi change due to rebase Signed-off-by: Stuart Summers Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20251211010249.1647839-21-niranjana.vishwanathapura@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 197 ++++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_exec_queue.h | 47 ++++++++ drivers/gpu/drm/xe/xe_exec_queue_types.h | 26 ++++ include/uapi/drm/xe_drm.h | 10 ++ 4 files changed, 278 insertions(+), 2 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 02b75652d497..f76ec277c5af 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -13,6 +13,7 @@ #include #include +#include "xe_bo.h" #include "xe_dep_scheduler.h" #include "xe_device.h" #include "xe_gt.h" @@ -63,6 +64,33 @@ enum xe_exec_queue_sched_prop { static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, u64 extensions, int ext_number); +static void xe_exec_queue_group_cleanup(struct xe_exec_queue *q) +{ + struct xe_exec_queue_group *group = q->multi_queue.group; + struct xe_lrc *lrc; + unsigned long idx; + + if (xe_exec_queue_is_multi_queue_secondary(q)) { + /* + * Put pairs with get from xe_exec_queue_lookup() call + * in xe_exec_queue_group_validate(). + */ + xe_exec_queue_put(xe_exec_queue_multi_queue_primary(q)); + return; + } + + if (!group) + return; + + /* Primary queue cleanup */ + xa_for_each(&group->xa, idx, lrc) + xe_lrc_put(lrc); + + xa_destroy(&group->xa); + xe_bo_unpin_map_no_vm(group->cgp_bo); + kfree(group); +} + static void __xe_exec_queue_free(struct xe_exec_queue *q) { int i; @@ -73,6 +101,10 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q) if (xe_exec_queue_uses_pxp(q)) xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); + + if (xe_exec_queue_is_multi_queue(q)) + xe_exec_queue_group_cleanup(q); + if (q->vm) xe_vm_put(q->vm); @@ -588,6 +620,150 @@ static int exec_queue_set_hang_replay_state(struct xe_device *xe, return 0; } +static int xe_exec_queue_group_init(struct xe_device *xe, struct xe_exec_queue *q) +{ + struct xe_tile *tile = gt_to_tile(q->gt); + struct xe_exec_queue_group *group; + struct xe_bo *bo; + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return -ENOMEM; + + bo = xe_bo_create_pin_map_novm(xe, tile, SZ_4K, ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_PINNED_LATE_RESTORE | + XE_BO_FLAG_FORCE_USER_VRAM | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_GGTT, false); + if (IS_ERR(bo)) { + drm_err(&xe->drm, "CGP bo allocation for queue group failed: %ld\n", + PTR_ERR(bo)); + kfree(group); + return PTR_ERR(bo); + } + + xe_map_memset(xe, &bo->vmap, 0, 0, SZ_4K); + + group->primary = q; + group->cgp_bo = bo; + xa_init_flags(&group->xa, XA_FLAGS_ALLOC1); + q->multi_queue.group = group; + + return 0; +} + +static inline bool xe_exec_queue_supports_multi_queue(struct xe_exec_queue *q) +{ + return q->gt->info.multi_queue_engine_class_mask & BIT(q->class); +} + +static int xe_exec_queue_group_validate(struct xe_device *xe, struct xe_exec_queue *q, + u32 primary_id) +{ + struct xe_exec_queue_group *group; + struct xe_exec_queue *primary; + int ret; + + /* + * Get from below xe_exec_queue_lookup() pairs with put + * in xe_exec_queue_group_cleanup(). + */ + primary = xe_exec_queue_lookup(q->vm->xef, primary_id); + if (XE_IOCTL_DBG(xe, !primary)) + return -ENOENT; + + if (XE_IOCTL_DBG(xe, !xe_exec_queue_is_multi_queue_primary(primary)) || + XE_IOCTL_DBG(xe, q->vm != primary->vm) || + XE_IOCTL_DBG(xe, q->logical_mask != primary->logical_mask)) { + ret = -EINVAL; + goto put_primary; + } + + group = primary->multi_queue.group; + q->multi_queue.valid = true; + q->multi_queue.group = group; + + return 0; +put_primary: + xe_exec_queue_put(primary); + return ret; +} + +#define XE_MAX_GROUP_SIZE 64 +static int xe_exec_queue_group_add(struct xe_device *xe, struct xe_exec_queue *q) +{ + struct xe_exec_queue_group *group = q->multi_queue.group; + u32 pos; + int err; + + xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q)); + + /* Primary queue holds a reference to LRCs of all secondary queues */ + err = xa_alloc(&group->xa, &pos, xe_lrc_get(q->lrc[0]), + XA_LIMIT(1, XE_MAX_GROUP_SIZE - 1), GFP_KERNEL); + if (XE_IOCTL_DBG(xe, err)) { + xe_lrc_put(q->lrc[0]); + + /* It is invalid if queue group limit is exceeded */ + if (err == -EBUSY) + err = -EINVAL; + + return err; + } + + q->multi_queue.pos = pos; + + return 0; +} + +static void xe_exec_queue_group_delete(struct xe_device *xe, struct xe_exec_queue *q) +{ + struct xe_exec_queue_group *group = q->multi_queue.group; + struct xe_lrc *lrc; + + xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q)); + + lrc = xa_erase(&group->xa, q->multi_queue.pos); + xe_assert(xe, lrc); + xe_lrc_put(lrc); +} + +static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue *q, + u64 value) +{ + if (XE_IOCTL_DBG(xe, !xe_exec_queue_supports_multi_queue(q))) + return -ENODEV; + + if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe))) + return -EOPNOTSUPP; + + if (XE_IOCTL_DBG(xe, !q->vm->xef)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, xe_exec_queue_is_parallel(q))) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, xe_exec_queue_is_multi_queue(q))) + return -EINVAL; + + if (value & DRM_XE_MULTI_GROUP_CREATE) { + if (XE_IOCTL_DBG(xe, value & ~DRM_XE_MULTI_GROUP_CREATE)) + return -EINVAL; + + q->multi_queue.valid = true; + q->multi_queue.is_primary = true; + q->multi_queue.pos = 0; + return 0; + } + + /* While adding secondary queues, the upper 32 bits must be 0 */ + if (XE_IOCTL_DBG(xe, value & (~0ull << 32))) + return -EINVAL; + + return xe_exec_queue_group_validate(xe, q, value); +} + typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, struct xe_exec_queue *q, u64 value); @@ -597,6 +773,7 @@ static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type, [DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE] = exec_queue_set_hang_replay_state, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP] = exec_queue_set_multi_group, }; static int exec_queue_user_ext_set_property(struct xe_device *xe, @@ -618,7 +795,8 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE && ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE && - ext.property != DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE)) + ext.property != DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE && + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); @@ -667,6 +845,12 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue return exec_queue_user_extensions(xe, q, ext.next_extension, ++ext_number); + if (xe_exec_queue_is_multi_queue_primary(q)) { + err = xe_exec_queue_group_init(xe, q); + if (XE_IOCTL_DBG(xe, err)) + return err; + } + return 0; } @@ -821,12 +1005,18 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(q)) return PTR_ERR(q); + if (xe_exec_queue_is_multi_queue_secondary(q)) { + err = xe_exec_queue_group_add(xe, q); + if (XE_IOCTL_DBG(xe, err)) + goto put_exec_queue; + } + if (xe_vm_in_preempt_fence_mode(vm)) { q->lr.context = dma_fence_context_alloc(1); err = xe_vm_add_compute_exec_queue(vm, q); if (XE_IOCTL_DBG(xe, err)) - goto put_exec_queue; + goto delete_queue_group; } if (q->vm && q->hwe->hw_engine_group) { @@ -849,6 +1039,9 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, kill_exec_queue: xe_exec_queue_kill(q); +delete_queue_group: + if (xe_exec_queue_is_multi_queue_secondary(q)) + xe_exec_queue_group_delete(xe, q); put_exec_queue: xe_exec_queue_put(q); return err; diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index fda4d4f9bda8..e6daa40003f2 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -66,6 +66,53 @@ static inline bool xe_exec_queue_uses_pxp(struct xe_exec_queue *q) return q->pxp.type; } +/** + * xe_exec_queue_is_multi_queue() - Whether an exec_queue is part of a queue group. + * @q: The exec_queue + * + * Return: True if the exec_queue is part of a queue group, false otherwise. + */ +static inline bool xe_exec_queue_is_multi_queue(struct xe_exec_queue *q) +{ + return q->multi_queue.valid; +} + +/** + * xe_exec_queue_is_multi_queue_primary() - Whether an exec_queue is primary queue + * of a multi queue group. + * @q: The exec_queue + * + * Return: True if @q is primary queue of a queue group, false otherwise. + */ +static inline bool xe_exec_queue_is_multi_queue_primary(struct xe_exec_queue *q) +{ + return q->multi_queue.is_primary; +} + +/** + * xe_exec_queue_is_multi_queue_secondary() - Whether an exec_queue is secondary queue + * of a multi queue group. + * @q: The exec_queue + * + * Return: True if @q is secondary queue of a queue group, false otherwise. + */ +static inline bool xe_exec_queue_is_multi_queue_secondary(struct xe_exec_queue *q) +{ + return xe_exec_queue_is_multi_queue(q) && !xe_exec_queue_is_multi_queue_primary(q); +} + +/** + * xe_exec_queue_multi_queue_primary() - Get multi queue group's primary queue + * @q: The exec_queue + * + * If @q belongs to a multi queue group, then the primary queue of the group will + * be returned. Otherwise, @q will be returned. + */ +static inline struct xe_exec_queue *xe_exec_queue_multi_queue_primary(struct xe_exec_queue *q) +{ + return xe_exec_queue_is_multi_queue(q) ? q->multi_queue.group->primary : q; +} + bool xe_exec_queue_is_lr(struct xe_exec_queue *q); bool xe_exec_queue_is_idle(struct xe_exec_queue *q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 3ba10632dcd6..29feafb42e0a 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -32,6 +32,20 @@ enum xe_exec_queue_priority { XE_EXEC_QUEUE_PRIORITY_COUNT }; +/** + * struct xe_exec_queue_group - Execution multi queue group + * + * Contains multi queue group information. + */ +struct xe_exec_queue_group { + /** @primary: Primary queue of this group */ + struct xe_exec_queue *primary; + /** @cgp_bo: BO for the Context Group Page */ + struct xe_bo *cgp_bo; + /** @xa: xarray to store LRCs */ + struct xarray xa; +}; + /** * struct xe_exec_queue - Execution queue * @@ -111,6 +125,18 @@ struct xe_exec_queue { struct xe_guc_exec_queue *guc; }; + /** @multi_queue: Multi queue information */ + struct { + /** @multi_queue.group: Queue group information */ + struct xe_exec_queue_group *group; + /** @multi_queue.pos: Position of queue within the multi-queue group */ + u8 pos; + /** @multi_queue.valid: Queue belongs to a multi queue group */ + u8 valid:1; + /** @multi_queue.is_primary: Is primary queue (Q0) of the group */ + u8 is_primary:1; + } multi_queue; + /** @sched_props: scheduling properties */ struct { /** @sched_props.timeslice_us: timeslice period in micro-seconds */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 876a076fa6c0..19a8ae856a17 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1272,6 +1272,14 @@ struct drm_xe_vm_bind { * Given that going into a power-saving state kills PXP HWDRM sessions, * runtime PM will be blocked while queues of this type are alive. * All PXP queues will be killed if a PXP invalidation event occurs. + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP - Create a multi-queue group + * or add secondary queues to a multi-queue group. + * If the extension's 'value' field has %DRM_XE_MULTI_GROUP_CREATE flag set, + * then a new multi-queue group is created with this queue as the primary queue + * (Q0). Otherwise, the queue gets added to the multi-queue group whose primary + * queue's exec_queue_id is specified in the lower 32 bits of the 'value' field. + * All the other non-relevant bits of extension's 'value' field while adding the + * primary or the secondary queues of the group must be set to 0. * * The example below shows how to use @drm_xe_exec_queue_create to create * a simple exec_queue (no parallel submission) of class @@ -1313,6 +1321,8 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE 2 #define DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE 3 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP 4 +#define DRM_XE_MULTI_GROUP_CREATE (1ull << 63) /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -- cgit v1.2.3 From 898a00f4b43311adfd4da1711ed2b72adc8c98a5 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Wed, 10 Dec 2025 17:02:52 -0800 Subject: drm/xe/multi_queue: Add multi queue priority property Add support for queues of a multi queue group to set their priority within the queue group by adding property DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY. This is the only other property supported by secondary queues of a multi queue group, other than DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE. v2: Add kernel doc for enum xe_multi_queue_priority, Add assert for priority values, fix includes and declarations (Matt Brost) v3: update uapi kernel-doc (Matt Brost) v4: uapi change due to rebase Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20251211010249.1647839-23-niranjana.vishwanathapura@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 17 ++++++++++++++++- drivers/gpu/drm/xe/xe_exec_queue_types.h | 16 ++++++++++++++++ drivers/gpu/drm/xe/xe_guc_submit.c | 1 + drivers/gpu/drm/xe/xe_lrc.c | 29 +++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_lrc.h | 3 +++ include/uapi/drm/xe_drm.h | 4 ++++ 6 files changed, 69 insertions(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index f76ec277c5af..aa46d154d04a 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -180,6 +180,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, INIT_LIST_HEAD(&q->multi_gt_link); INIT_LIST_HEAD(&q->hw_engine_group_link); INIT_LIST_HEAD(&q->pxp.link); + q->multi_queue.priority = XE_MULTI_QUEUE_PRIORITY_NORMAL; q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; q->sched_props.preempt_timeout_us = @@ -764,6 +765,17 @@ static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue return xe_exec_queue_group_validate(xe, q, value); } +static int exec_queue_set_multi_queue_priority(struct xe_device *xe, struct xe_exec_queue *q, + u64 value) +{ + if (XE_IOCTL_DBG(xe, value > XE_MULTI_QUEUE_PRIORITY_HIGH)) + return -EINVAL; + + q->multi_queue.priority = value; + + return 0; +} + typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, struct xe_exec_queue *q, u64 value); @@ -774,6 +786,8 @@ static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type, [DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE] = exec_queue_set_hang_replay_state, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP] = exec_queue_set_multi_group, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY] = + exec_queue_set_multi_queue_priority, }; static int exec_queue_user_ext_set_property(struct xe_device *xe, @@ -796,7 +810,8 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE && ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE && ext.property != DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE && - ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP)) + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP && + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 06fb518b8533..46e5f4715a0d 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -32,6 +32,20 @@ enum xe_exec_queue_priority { XE_EXEC_QUEUE_PRIORITY_COUNT }; +/** + * enum xe_multi_queue_priority - Multi Queue priority values + * + * The priority values of the queues within the multi queue group. + */ +enum xe_multi_queue_priority { + /** @XE_MULTI_QUEUE_PRIORITY_LOW: Priority low */ + XE_MULTI_QUEUE_PRIORITY_LOW = 0, + /** @XE_MULTI_QUEUE_PRIORITY_NORMAL: Priority normal */ + XE_MULTI_QUEUE_PRIORITY_NORMAL, + /** @XE_MULTI_QUEUE_PRIORITY_HIGH: Priority high */ + XE_MULTI_QUEUE_PRIORITY_HIGH, +}; + /** * struct xe_exec_queue_group - Execution multi queue group * @@ -131,6 +145,8 @@ struct xe_exec_queue { struct { /** @multi_queue.group: Queue group information */ struct xe_exec_queue_group *group; + /** @multi_queue.priority: Queue priority within the multi-queue group */ + enum xe_multi_queue_priority priority; /** @multi_queue.pos: Position of queue within the multi-queue group */ u8 pos; /** @multi_queue.valid: Queue belongs to a multi queue group */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index bafe42393d22..7cca03d4296c 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -640,6 +640,7 @@ static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, return; } + xe_lrc_set_multi_queue_priority(q->lrc[0], q->multi_queue.priority); xe_guc_exec_queue_group_cgp_update(xe, q); WRITE_ONCE(group->sync_pending, true); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index a05060f75e7e..70eae7d03a27 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -44,6 +44,11 @@ #define LRC_INDIRECT_CTX_BO_SIZE SZ_4K #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K +#define LRC_PRIORITY GENMASK_ULL(10, 9) +#define LRC_PRIORITY_LOW 0 +#define LRC_PRIORITY_NORMAL 1 +#define LRC_PRIORITY_HIGH 2 + /* * Layout of the LRC and associated data allocated as * lrc->bo: @@ -1399,6 +1404,30 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) return 0; } +static u8 xe_multi_queue_prio_to_lrc(struct xe_lrc *lrc, enum xe_multi_queue_priority priority) +{ + struct xe_device *xe = gt_to_xe(lrc->gt); + + xe_assert(xe, (priority >= XE_MULTI_QUEUE_PRIORITY_LOW && + priority <= XE_MULTI_QUEUE_PRIORITY_HIGH)); + + /* xe_multi_queue_priority is directly mapped to LRC priority values */ + return priority; +} + +/** + * xe_lrc_set_multi_queue_priority() - Set multi queue priority in LRC + * @lrc: Logical Ring Context + * @priority: Multi queue priority of the exec queue + * + * Convert @priority to LRC multi queue priority and update the @lrc descriptor + */ +void xe_lrc_set_multi_queue_priority(struct xe_lrc *lrc, enum xe_multi_queue_priority priority) +{ + lrc->desc &= ~LRC_PRIORITY; + lrc->desc |= FIELD_PREP(LRC_PRIORITY, xe_multi_queue_prio_to_lrc(lrc, priority)); +} + static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_vm *vm, void *replay_state, u32 ring_size, u16 msix_vec, diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index a32472b92242..8acf85273c1a 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -13,6 +13,7 @@ struct drm_printer; struct xe_bb; struct xe_device; struct xe_exec_queue; +enum xe_multi_queue_priority; enum xe_engine_class; struct xe_gt; struct xe_hw_engine; @@ -135,6 +136,8 @@ void xe_lrc_dump_default(struct drm_printer *p, u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs); +void xe_lrc_set_multi_queue_priority(struct xe_lrc *lrc, enum xe_multi_queue_priority priority); + struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc); void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot); void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 19a8ae856a17..fd79d78de2e9 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1280,6 +1280,9 @@ struct drm_xe_vm_bind { * queue's exec_queue_id is specified in the lower 32 bits of the 'value' field. * All the other non-relevant bits of extension's 'value' field while adding the * primary or the secondary queues of the group must be set to 0. + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY - Set the queue + * priority within the multi-queue group. Current valid priority values are 0–2 + * (default is 1), with higher values indicating higher priority. * * The example below shows how to use @drm_xe_exec_queue_create to create * a simple exec_queue (no parallel submission) of class @@ -1323,6 +1326,7 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE 3 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP 4 #define DRM_XE_MULTI_GROUP_CREATE (1ull << 63) +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY 5 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -- cgit v1.2.3 From 2a31ea17d5c69e51ea454485edd40e4aeff467c1 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Wed, 10 Dec 2025 17:02:54 -0800 Subject: drm/xe/multi_queue: Add exec_queue set_property ioctl support This patch adds support for exec_queue set_property ioctl. It is derived from the original work which is part of https://patchwork.freedesktop.org/series/112188/ Currently only DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY property can be dynamically set. v2: Check for and update kernel-doc which property this ioctl supports (Matt Brost) Signed-off-by: Matthew Brost Signed-off-by: Pallavi Mishra Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20251211010249.1647839-25-niranjana.vishwanathapura@intel.com --- drivers/gpu/drm/xe/xe_device.c | 2 ++ drivers/gpu/drm/xe/xe_exec_queue.c | 35 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_exec_queue.h | 2 ++ include/uapi/drm/xe_drm.h | 26 ++++++++++++++++++++++++++ 4 files changed, 65 insertions(+) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 1197f914ef77..7a498c8db7b1 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -207,6 +207,8 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_IOCTL_DEF_DRV(XE_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_VM_QUERY_MEM_RANGE_ATTRS, xe_vm_query_vmas_attrs_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, xe_exec_queue_set_property_ioctl, + DRM_RENDER_ALLOW), }; static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index d0082eb45a4a..d738a9fea1e1 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -790,6 +790,41 @@ static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { exec_queue_set_multi_queue_priority, }; +int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_exec_queue_set_property *args = data; + struct xe_exec_queue *q; + int ret; + u32 idx; + + if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->property != + DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) + return -EINVAL; + + q = xe_exec_queue_lookup(xef, args->exec_queue_id); + if (XE_IOCTL_DBG(xe, !q)) + return -ENOENT; + + idx = array_index_nospec(args->property, + ARRAY_SIZE(exec_queue_set_property_funcs)); + ret = exec_queue_set_property_funcs[idx](xe, q, args->value); + if (XE_IOCTL_DBG(xe, ret)) + goto err_post_lookup; + + xe_exec_queue_put(q); + return 0; + + err_post_lookup: + xe_exec_queue_put(q); + return ret; +} + static int exec_queue_user_ext_check(struct xe_exec_queue *q, u64 properties) { u64 secondary_queue_valid_props = BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP) | diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index e6daa40003f2..ffcc1feb879e 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -125,6 +125,8 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); enum xe_exec_queue_priority xe_exec_queue_device_get_max_priority(struct xe_device *xe); void xe_exec_queue_last_fence_put(struct xe_exec_queue *e, struct xe_vm *vm); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index fd79d78de2e9..705081bf0d81 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -106,6 +106,7 @@ extern "C" { #define DRM_XE_OBSERVATION 0x0b #define DRM_XE_MADVISE 0x0c #define DRM_XE_VM_QUERY_MEM_RANGE_ATTRS 0x0d +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x0e /* Must be kept compact -- no holes */ @@ -123,6 +124,7 @@ extern "C" { #define DRM_IOCTL_XE_OBSERVATION DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION, struct drm_xe_observation_param) #define DRM_IOCTL_XE_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_MADVISE, struct drm_xe_madvise) #define DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_QUERY_MEM_RANGE_ATTRS, struct drm_xe_vm_query_mem_range_attr) +#define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property) /** * DOC: Xe IOCTL Extensions @@ -2315,6 +2317,30 @@ struct drm_xe_vm_query_mem_range_attr { }; +/** + * struct drm_xe_exec_queue_set_property - exec queue set property + * + * Sets execution queue properties dynamically. + * Currently only %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY + * property can be dynamically set. + */ +struct drm_xe_exec_queue_set_property { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @exec_queue_id: Exec queue ID */ + __u32 exec_queue_id; + + /** @property: property to set */ + __u32 property; + + /** @value: property value */ + __u64 value; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 3131a43ecb346ae3b5287ee195779fc38c6fcd11 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Wed, 10 Dec 2025 17:03:03 -0800 Subject: drm/xe/multi_queue: Support active group after primary is destroyed Add support to keep the group active after the primary queue is destroyed. Instead of killing the primary queue during exec_queue destroy ioctl, kill it when all the secondary queues of the group are killed. Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20251211010249.1647839-34-niranjana.vishwanathapura@intel.com --- drivers/gpu/drm/xe/xe_device.c | 7 +++- drivers/gpu/drm/xe/xe_exec_queue.c | 55 ++++++++++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_exec_queue.h | 2 ++ drivers/gpu/drm/xe/xe_exec_queue_types.h | 4 +++ include/uapi/drm/xe_drm.h | 4 +++ 5 files changed, 69 insertions(+), 3 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 7a498c8db7b1..24efb6a3e0ea 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -177,7 +177,12 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) xa_for_each(&xef->exec_queue.xa, idx, q) { if (q->vm && q->hwe->hw_engine_group) xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); - xe_exec_queue_kill(q); + + if (xe_exec_queue_is_multi_queue_primary(q)) + xe_exec_queue_group_kill_put(q->multi_queue.group); + else + xe_exec_queue_kill(q); + xe_exec_queue_put(q); } xa_for_each(&xef->vm.xa, idx, vm) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index d337b7bc2b80..3f4840d135a0 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -418,6 +418,26 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, } ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); +static void xe_exec_queue_group_kill(struct kref *ref) +{ + struct xe_exec_queue_group *group = container_of(ref, struct xe_exec_queue_group, + kill_refcount); + xe_exec_queue_kill(group->primary); +} + +static inline void xe_exec_queue_group_kill_get(struct xe_exec_queue_group *group) +{ + kref_get(&group->kill_refcount); +} + +void xe_exec_queue_group_kill_put(struct xe_exec_queue_group *group) +{ + if (!group) + return; + + kref_put(&group->kill_refcount, xe_exec_queue_group_kill); +} + void xe_exec_queue_destroy(struct kref *ref) { struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); @@ -650,6 +670,7 @@ static int xe_exec_queue_group_init(struct xe_device *xe, struct xe_exec_queue * group->primary = q; group->cgp_bo = bo; INIT_LIST_HEAD(&group->list); + kref_init(&group->kill_refcount); xa_init_flags(&group->xa, XA_FLAGS_ALLOC1); mutex_init(&group->list_lock); q->multi_queue.group = group; @@ -725,6 +746,11 @@ static int xe_exec_queue_group_add(struct xe_device *xe, struct xe_exec_queue *q q->multi_queue.pos = pos; + if (group->primary->multi_queue.keep_active) { + xe_exec_queue_group_kill_get(group); + q->multi_queue.keep_active = true; + } + return 0; } @@ -738,6 +764,11 @@ static void xe_exec_queue_group_delete(struct xe_device *xe, struct xe_exec_queu lrc = xa_erase(&group->xa, q->multi_queue.pos); xe_assert(xe, lrc); xe_lrc_put(lrc); + + if (q->multi_queue.keep_active) { + xe_exec_queue_group_kill_put(group); + q->multi_queue.keep_active = false; + } } static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue *q, @@ -759,12 +790,24 @@ static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue return -EINVAL; if (value & DRM_XE_MULTI_GROUP_CREATE) { - if (XE_IOCTL_DBG(xe, value & ~DRM_XE_MULTI_GROUP_CREATE)) + if (XE_IOCTL_DBG(xe, value & ~(DRM_XE_MULTI_GROUP_CREATE | + DRM_XE_MULTI_GROUP_KEEP_ACTIVE))) + return -EINVAL; + + /* + * KEEP_ACTIVE is not supported in preempt fence mode as in that mode, + * VM_DESTROY ioctl expects all exec queues of that VM are already killed. + */ + if (XE_IOCTL_DBG(xe, (value & DRM_XE_MULTI_GROUP_KEEP_ACTIVE) && + xe_vm_in_preempt_fence_mode(q->vm))) return -EINVAL; q->multi_queue.valid = true; q->multi_queue.is_primary = true; q->multi_queue.pos = 0; + if (value & DRM_XE_MULTI_GROUP_KEEP_ACTIVE) + q->multi_queue.keep_active = true; + return 0; } @@ -1312,6 +1355,11 @@ void xe_exec_queue_kill(struct xe_exec_queue *q) q->ops->kill(q); xe_vm_remove_compute_exec_queue(q->vm, q); + + if (!xe_exec_queue_is_multi_queue_primary(q) && q->multi_queue.keep_active) { + xe_exec_queue_group_kill_put(q->multi_queue.group); + q->multi_queue.keep_active = false; + } } int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, @@ -1338,7 +1386,10 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, if (q->vm && q->hwe->hw_engine_group) xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); - xe_exec_queue_kill(q); + if (xe_exec_queue_is_multi_queue_primary(q)) + xe_exec_queue_group_kill_put(q->multi_queue.group); + else + xe_exec_queue_kill(q); trace_xe_exec_queue_close(q); xe_exec_queue_put(q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index ffcc1feb879e..10abed98fb6b 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -113,6 +113,8 @@ static inline struct xe_exec_queue *xe_exec_queue_multi_queue_primary(struct xe_ return xe_exec_queue_is_multi_queue(q) ? q->multi_queue.group->primary : q; } +void xe_exec_queue_group_kill_put(struct xe_exec_queue_group *group); + bool xe_exec_queue_is_lr(struct xe_exec_queue *q); bool xe_exec_queue_is_idle(struct xe_exec_queue *q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 5fc516b0bb77..67ea5eebf70b 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -62,6 +62,8 @@ struct xe_exec_queue_group { struct list_head list; /** @list_lock: Secondary queue list lock */ struct mutex list_lock; + /** @kill_refcount: ref count to kill primary queue */ + struct kref kill_refcount; /** @sync_pending: CGP_SYNC_DONE g2h response pending */ bool sync_pending; /** @banned: Group banned */ @@ -161,6 +163,8 @@ struct xe_exec_queue { u8 valid:1; /** @multi_queue.is_primary: Is primary queue (Q0) of the group */ u8 is_primary:1; + /** @multi_queue.keep_active: Keep the group active after primary is destroyed */ + u8 keep_active:1; } multi_queue; /** @sched_props: scheduling properties */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 705081bf0d81..bd6154e3b728 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1280,6 +1280,9 @@ struct drm_xe_vm_bind { * then a new multi-queue group is created with this queue as the primary queue * (Q0). Otherwise, the queue gets added to the multi-queue group whose primary * queue's exec_queue_id is specified in the lower 32 bits of the 'value' field. + * If the extension's 'value' field has %DRM_XE_MULTI_GROUP_KEEP_ACTIVE flag + * set, then the multi-queue group is kept active after the primary queue is + * destroyed. * All the other non-relevant bits of extension's 'value' field while adding the * primary or the secondary queues of the group must be set to 0. * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY - Set the queue @@ -1328,6 +1331,7 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE 3 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP 4 #define DRM_XE_MULTI_GROUP_CREATE (1ull << 63) +#define DRM_XE_MULTI_GROUP_KEEP_ACTIVE (1ull << 62) #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY 5 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -- cgit v1.2.3 From b07bac9bd708ec468cd1b8a5fe70ae2ac9b0a11c Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Fri, 5 Dec 2025 23:47:17 +0000 Subject: drm/xe: Limit num_syncs to prevent oversized allocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The exec and vm_bind ioctl allow userspace to specify an arbitrary num_syncs value. Without bounds checking, a very large num_syncs can force an excessively large allocation, leading to kernel warnings from the page allocator as below. Introduce DRM_XE_MAX_SYNCS (set to 1024) and reject any request exceeding this limit. " ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1217 at mm/page_alloc.c:5124 __alloc_frozen_pages_noprof+0x2f8/0x2180 mm/page_alloc.c:5124 ... Call Trace: alloc_pages_mpol+0xe4/0x330 mm/mempolicy.c:2416 ___kmalloc_large_node+0xd8/0x110 mm/slub.c:4317 __kmalloc_large_node_noprof+0x18/0xe0 mm/slub.c:4348 __do_kmalloc_node mm/slub.c:4364 [inline] __kmalloc_noprof+0x3d4/0x4b0 mm/slub.c:4388 kmalloc_noprof include/linux/slab.h:909 [inline] kmalloc_array_noprof include/linux/slab.h:948 [inline] xe_exec_ioctl+0xa47/0x1e70 drivers/gpu/drm/xe/xe_exec.c:158 drm_ioctl_kernel+0x1f1/0x3e0 drivers/gpu/drm/drm_ioctl.c:797 drm_ioctl+0x5e7/0xc50 drivers/gpu/drm/drm_ioctl.c:894 xe_drm_ioctl+0x10b/0x170 drivers/gpu/drm/xe/xe_device.c:224 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:598 [inline] __se_sys_ioctl fs/ioctl.c:584 [inline] __x64_sys_ioctl+0x18b/0x210 fs/ioctl.c:584 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xbb/0x380 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f ... " v2: Add "Reported-by" and Cc stable kernels. v3: Change XE_MAX_SYNCS from 64 to 1024. (Matt & Ashutosh) v4: s/XE_MAX_SYNCS/DRM_XE_MAX_SYNCS/ (Matt) v5: Do the check at the top of the exec func. (Matt) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Reported-by: Koen Koning Reported-by: Peter Senna Tschudin Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6450 Cc: # v6.12+ Cc: Matthew Brost Cc: Michal Mrozek Cc: Carl Zhang Cc: José Roberto de Souza Cc: Lionel Landwerlin Cc: Ivan Briano Cc: Thomas Hellström Cc: Ashutosh Dixit Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20251205234715.2476561-5-shuicheng.lin@intel.com --- drivers/gpu/drm/xe/xe_exec.c | 3 ++- drivers/gpu/drm/xe/xe_vm.c | 3 +++ include/uapi/drm/xe_drm.h | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 4d81210e41f5..fd9480031750 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -132,7 +132,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) || - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]) || + XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS)) return -EINVAL; q = xe_exec_queue_lookup(xef, args->exec_queue_id); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index bd787aae4248..ca546666a5c9 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3341,6 +3341,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; + if (XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS)) + return -EINVAL; + if (args->num_binds > 1) { u64 __user *bind_user = u64_to_user_ptr(args->vector_of_binds); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index bd6154e3b728..c59587529986 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1504,6 +1504,7 @@ struct drm_xe_exec { /** @exec_queue_id: Exec queue ID for the batch buffer */ __u32 exec_queue_id; +#define DRM_XE_MAX_SYNCS 1024 /** @num_syncs: Amount of struct drm_xe_sync in array. */ __u32 num_syncs; -- cgit v1.2.3 From ab39e2a8f7aed72929bfc1d58eb5e8766f1d85db Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Fri, 5 Dec 2025 13:26:11 -0800 Subject: drm/xe/oa/uapi: Expose MERT OA unit A MERT OA unit is available in the SoC on some platforms. Add support for this OA unit and expose it to userspace. The MERT OA unit does not have any HW engines attached, but is otherwise similar to an OAM unit. Signed-off-by: Lucas De Marchi Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patch.msgid.link/20251205212613.826224-2-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/regs/xe_oa_regs.h | 9 +++++++++ drivers/gpu/drm/xe/xe_oa.c | 37 +++++++++++++++++++++++++++++++++--- include/uapi/drm/xe_drm.h | 3 +++ 3 files changed, 46 insertions(+), 3 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h index 638ab3b99eb0..04a729e610aa 100644 --- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -108,4 +108,13 @@ #define XE_OAM_SCMI_0_BASE_ADJ (MEDIA_GT_GSI_OFFSET + XE_OAM_SCMI_0_BASE) #define XE_OAM_SCMI_1_BASE_ADJ (MEDIA_GT_GSI_OFFSET + XE_OAM_SCMI_1_BASE) +#define OAMERT_CONTROL XE_REG(0x1453a0) +#define OAMERT_DEBUG XE_REG(0x1453a4) +#define OAMERT_STATUS XE_REG(0x1453a8) +#define OAMERT_HEAD_POINTER XE_REG(0x1453ac) +#define OAMERT_TAIL_POINTER XE_REG(0x1453b0) +#define OAMERT_BUFFER XE_REG(0x1453b4) +#define OAMERT_CONTEXT_CONTROL XE_REG(0x1453c8) +#define OAMERT_MMIO_TRG XE_REG(0x1453cc) + #endif diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 92aa25fc0422..d4e1585004e2 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1940,6 +1940,7 @@ static bool oa_unit_supports_oa_format(struct xe_oa_open_param *param, int type) type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; case DRM_XE_OA_UNIT_TYPE_OAM: case DRM_XE_OA_UNIT_TYPE_OAM_SAG: + case DRM_XE_OA_UNIT_TYPE_MERT: return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; default: return false; @@ -2227,6 +2228,8 @@ static const struct xe_mmio_range xe2_oa_mux_regs[] = { { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ { .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */ { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */ + { .start = 0x145194, .end = 0x145194 }, /* SYS_MEM_LAT_MEASURE */ + { .start = 0x145340, .end = 0x14537C }, /* MERTSS_PES_0 - MERTSS_PES_7 */ {}, }; @@ -2518,7 +2521,12 @@ int xe_oa_register(struct xe_device *xe) static u32 num_oa_units_per_gt(struct xe_gt *gt) { if (xe_gt_is_main_type(gt) || GRAPHICS_VER(gt_to_xe(gt)) < 20) - return 1; + /* + * Mert OA unit belongs to the SoC, not a gt, so should be accessed using + * xe_root_tile_mmio(). However, for all known platforms this is the same as + * accessing via xe_root_mmio_gt()->mmio. + */ + return xe_device_has_mert(gt_to_xe(gt)) ? 2 : 1; else if (!IS_DGFX(gt_to_xe(gt))) return XE_OAM_UNIT_SCMI_0 + 1; /* SAG + SCMI_0 */ else @@ -2602,6 +2610,22 @@ static struct xe_oa_regs __oag_regs(void) }; } +static struct xe_oa_regs __oamert_regs(void) +{ + return (struct xe_oa_regs) { + .base = 0, + .oa_head_ptr = OAMERT_HEAD_POINTER, + .oa_tail_ptr = OAMERT_TAIL_POINTER, + .oa_buffer = OAMERT_BUFFER, + .oa_ctx_ctrl = OAMERT_CONTEXT_CONTROL, + .oa_ctrl = OAMERT_CONTROL, + .oa_debug = OAMERT_DEBUG, + .oa_status = OAMERT_STATUS, + .oa_mmio_trg = OAMERT_MMIO_TRG, + .oa_ctrl_counter_select_mask = OAM_CONTROL_COUNTER_SEL_MASK, + }; +} + static void __xe_oa_init_oa_units(struct xe_gt *gt) { const u32 oam_base_addr[] = { @@ -2615,8 +2639,15 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt) struct xe_oa_unit *u = >->oa.oa_unit[i]; if (xe_gt_is_main_type(gt)) { - u->regs = __oag_regs(); - u->type = DRM_XE_OA_UNIT_TYPE_OAG; + if (!i) { + u->regs = __oag_regs(); + u->type = DRM_XE_OA_UNIT_TYPE_OAG; + } else { + xe_gt_assert(gt, xe_device_has_mert(gt_to_xe(gt))); + xe_gt_assert(gt, gt == xe_root_mmio_gt(gt_to_xe(gt))); + u->regs = __oamert_regs(); + u->type = DRM_XE_OA_UNIT_TYPE_MERT; + } } else { xe_gt_assert(gt, GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270); u->regs = __oam_regs(oam_base_addr[i]); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index c59587529986..726e481574fe 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1696,6 +1696,9 @@ enum drm_xe_oa_unit_type { /** @DRM_XE_OA_UNIT_TYPE_OAM_SAG: OAM_SAG OA unit */ DRM_XE_OA_UNIT_TYPE_OAM_SAG, + + /** @DRM_XE_OA_UNIT_TYPE_MERT: MERT OA unit */ + DRM_XE_OA_UNIT_TYPE_MERT, }; /** -- cgit v1.2.3