From 8169b2097d88d99d7e4a72e20e4b549efe9eb8d7 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 3 Jul 2024 09:48:01 -0700 Subject: drm/xe/uapi: Rename xe perf layer as xe observation layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Xe, the perf layer allows capture of HW counter streams. These HW counters are generally performance related but don't have to be necessarily so. Also, the name "perf" is a carryover from i915 and is not preferred. Here we propose the name "observation" for this common layer which allows capture of different types of these counter streams. v2: Rename observability layer to observation layer (Lucas/Rodrigo) v3: Rename sysctl file to "observation_paranoid" (Jose) Fixes: 52c2e956dceb ("drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream types") Fixes: fe8929bdf835 ("drm/xe/perf/uapi: Add perf_stream_paranoid sysctl") Acked-by: Lucas De Marchi Acked-by: Rodrigo Vivi Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Acked-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240703164801.2561423-1-ashutosh.dixit@intel.com --- include/uapi/drm/xe_drm.h | 102 +++++++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 12eaa8532b5c..33544ef78d3e 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -80,7 +80,7 @@ extern "C" { * - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY * - &DRM_IOCTL_XE_EXEC * - &DRM_IOCTL_XE_WAIT_USER_FENCE - * - &DRM_IOCTL_XE_PERF + * - &DRM_IOCTL_XE_OBSERVATION */ /* @@ -101,7 +101,7 @@ extern "C" { #define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08 #define DRM_XE_EXEC 0x09 #define DRM_XE_WAIT_USER_FENCE 0x0a -#define DRM_XE_PERF 0x0b +#define DRM_XE_OBSERVATION 0x0b /* Must be kept compact -- no holes */ @@ -116,7 +116,7 @@ extern "C" { #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property) #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) -#define DRM_IOCTL_XE_PERF DRM_IOW(DRM_COMMAND_BASE + DRM_XE_PERF, struct drm_xe_perf_param) +#define DRM_IOCTL_XE_OBSERVATION DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION, struct drm_xe_observation_param) /** * DOC: Xe IOCTL Extensions @@ -1376,66 +1376,67 @@ struct drm_xe_wait_user_fence { }; /** - * enum drm_xe_perf_type - Perf stream types + * enum drm_xe_observation_type - Observation stream types */ -enum drm_xe_perf_type { - /** @DRM_XE_PERF_TYPE_OA: OA perf stream type */ - DRM_XE_PERF_TYPE_OA, +enum drm_xe_observation_type { + /** @DRM_XE_OBSERVATION_TYPE_OA: OA observation stream type */ + DRM_XE_OBSERVATION_TYPE_OA, }; /** - * enum drm_xe_perf_op - Perf stream ops + * enum drm_xe_observation_op - Observation stream ops */ -enum drm_xe_perf_op { - /** @DRM_XE_PERF_OP_STREAM_OPEN: Open a perf counter stream */ - DRM_XE_PERF_OP_STREAM_OPEN, +enum drm_xe_observation_op { + /** @DRM_XE_OBSERVATION_OP_STREAM_OPEN: Open an observation stream */ + DRM_XE_OBSERVATION_OP_STREAM_OPEN, - /** @DRM_XE_PERF_OP_ADD_CONFIG: Add perf stream config */ - DRM_XE_PERF_OP_ADD_CONFIG, + /** @DRM_XE_OBSERVATION_OP_ADD_CONFIG: Add observation stream config */ + DRM_XE_OBSERVATION_OP_ADD_CONFIG, - /** @DRM_XE_PERF_OP_REMOVE_CONFIG: Remove perf stream config */ - DRM_XE_PERF_OP_REMOVE_CONFIG, + /** @DRM_XE_OBSERVATION_OP_REMOVE_CONFIG: Remove observation stream config */ + DRM_XE_OBSERVATION_OP_REMOVE_CONFIG, }; /** - * struct drm_xe_perf_param - Input of &DRM_XE_PERF + * struct drm_xe_observation_param - Input of &DRM_XE_OBSERVATION * - * The perf layer enables multiplexing perf counter streams of multiple - * types. The actual params for a particular stream operation are supplied - * via the @param pointer (use __copy_from_user to get these params). + * The observation layer enables multiplexing observation streams of + * multiple types. The actual params for a particular stream operation are + * supplied via the @param pointer (use __copy_from_user to get these + * params). */ -struct drm_xe_perf_param { +struct drm_xe_observation_param { /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; - /** @perf_type: Perf stream type, of enum @drm_xe_perf_type */ - __u64 perf_type; - /** @perf_op: Perf op, of enum @drm_xe_perf_op */ - __u64 perf_op; + /** @observation_type: observation stream type, of enum @drm_xe_observation_type */ + __u64 observation_type; + /** @observation_op: observation stream op, of enum @drm_xe_observation_op */ + __u64 observation_op; /** @param: Pointer to actual stream params */ __u64 param; }; /** - * enum drm_xe_perf_ioctls - Perf fd ioctl's + * enum drm_xe_observation_ioctls - Observation stream fd ioctl's * - * Information exchanged between userspace and kernel for perf fd ioctl's - * is stream type specific + * Information exchanged between userspace and kernel for observation fd + * ioctl's is stream type specific */ -enum drm_xe_perf_ioctls { - /** @DRM_XE_PERF_IOCTL_ENABLE: Enable data capture for a stream */ - DRM_XE_PERF_IOCTL_ENABLE = _IO('i', 0x0), +enum drm_xe_observation_ioctls { + /** @DRM_XE_OBSERVATION_IOCTL_ENABLE: Enable data capture for an observation stream */ + DRM_XE_OBSERVATION_IOCTL_ENABLE = _IO('i', 0x0), - /** @DRM_XE_PERF_IOCTL_DISABLE: Disable data capture for a stream */ - DRM_XE_PERF_IOCTL_DISABLE = _IO('i', 0x1), + /** @DRM_XE_OBSERVATION_IOCTL_DISABLE: Disable data capture for a observation stream */ + DRM_XE_OBSERVATION_IOCTL_DISABLE = _IO('i', 0x1), - /** @DRM_XE_PERF_IOCTL_CONFIG: Change stream configuration */ - DRM_XE_PERF_IOCTL_CONFIG = _IO('i', 0x2), + /** @DRM_XE_OBSERVATION_IOCTL_CONFIG: Change observation stream configuration */ + DRM_XE_OBSERVATION_IOCTL_CONFIG = _IO('i', 0x2), - /** @DRM_XE_PERF_IOCTL_STATUS: Return stream status */ - DRM_XE_PERF_IOCTL_STATUS = _IO('i', 0x3), + /** @DRM_XE_OBSERVATION_IOCTL_STATUS: Return observation stream status */ + DRM_XE_OBSERVATION_IOCTL_STATUS = _IO('i', 0x3), - /** @DRM_XE_PERF_IOCTL_INFO: Return stream info */ - DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), + /** @DRM_XE_OBSERVATION_IOCTL_INFO: Return observation stream info */ + DRM_XE_OBSERVATION_IOCTL_INFO = _IO('i', 0x4), }; /** @@ -1546,12 +1547,12 @@ enum drm_xe_oa_format_type { * Stream params are specified as a chain of @drm_xe_ext_set_property * struct's, with @property values from enum @drm_xe_oa_property_id and * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY. - * @param field in struct @drm_xe_perf_param points to the first + * @param field in struct @drm_xe_observation_param points to the first * @drm_xe_ext_set_property struct. * - * Exactly the same mechanism is also used for stream reconfiguration using - * the @DRM_XE_PERF_IOCTL_CONFIG perf fd ioctl, though only a subset of - * properties below can be specified for stream reconfiguration. + * Exactly the same mechanism is also used for stream reconfiguration using the + * @DRM_XE_OBSERVATION_IOCTL_CONFIG observation stream fd ioctl, though only a + * subset of properties below can be specified for stream reconfiguration. */ enum drm_xe_oa_property_id { #define DRM_XE_OA_EXTENSION_SET_PROPERTY 0 @@ -1571,11 +1572,11 @@ enum drm_xe_oa_property_id { /** * @DRM_XE_OA_PROPERTY_OA_METRIC_SET: OA metrics defining contents of OA - * reports, previously added via @DRM_XE_PERF_OP_ADD_CONFIG. + * reports, previously added via @DRM_XE_OBSERVATION_OP_ADD_CONFIG. */ DRM_XE_OA_PROPERTY_OA_METRIC_SET, - /** @DRM_XE_OA_PROPERTY_OA_FORMAT: Perf counter report format */ + /** @DRM_XE_OA_PROPERTY_OA_FORMAT: OA counter report format */ DRM_XE_OA_PROPERTY_OA_FORMAT, /* * OA_FORMAT's are specified the same way as in PRM/Bspec 52198/60942, @@ -1596,13 +1597,13 @@ enum drm_xe_oa_property_id { /** * @DRM_XE_OA_PROPERTY_OA_DISABLED: A value of 1 will open the OA - * stream in a DISABLED state (see @DRM_XE_PERF_IOCTL_ENABLE). + * stream in a DISABLED state (see @DRM_XE_OBSERVATION_IOCTL_ENABLE). */ DRM_XE_OA_PROPERTY_OA_DISABLED, /** * @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID: Open the stream for a specific - * @exec_queue_id. Perf queries can be executed on this exec queue. + * @exec_queue_id. OA queries can be executed on this exec queue. */ DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID, @@ -1622,7 +1623,7 @@ enum drm_xe_oa_property_id { /** * struct drm_xe_oa_config - OA metric configuration * - * Multiple OA configs can be added using @DRM_XE_PERF_OP_ADD_CONFIG. A + * Multiple OA configs can be added using @DRM_XE_OBSERVATION_OP_ADD_CONFIG. A * particular config can be specified when opening an OA stream using * @DRM_XE_OA_PROPERTY_OA_METRIC_SET property. */ @@ -1645,8 +1646,9 @@ struct drm_xe_oa_config { /** * struct drm_xe_oa_stream_status - OA stream status returned from - * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl. Userspace can call the ioctl to - * query stream status in response to EIO errno from perf fd read(). + * @DRM_XE_OBSERVATION_IOCTL_STATUS observation stream fd ioctl. Userspace can + * call the ioctl to query stream status in response to EIO errno from + * observation fd read(). */ struct drm_xe_oa_stream_status { /** @extensions: Pointer to the first extension struct, if any */ @@ -1665,7 +1667,7 @@ struct drm_xe_oa_stream_status { /** * struct drm_xe_oa_stream_info - OA stream info returned from - * @DRM_XE_PERF_IOCTL_INFO perf fd ioctl + * @DRM_XE_OBSERVATION_IOCTL_INFO observation stream fd ioctl */ struct drm_xe_oa_stream_info { /** @extensions: Pointer to the first extension struct, if any */ -- cgit v1.2.3 From 01e0cfc994be484ddcb9e121e353e51d8bb837c0 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 5 Jul 2024 15:28:28 +0200 Subject: drm/xe: Use write-back caching mode for system memory on DGFX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The caching mode for buffer objects with VRAM as a possible placement was forced to write-combined, regardless of placement. However, write-combined system memory is expensive to allocate and even though it is pooled, the pool is expensive to shrink, since it involves global CPU TLB flushes. Moreover write-combined system memory from TTM is only reliably available on x86 and DGFX doesn't have an x86 restriction. So regardless of the cpu caching mode selected for a bo, internally use write-back caching mode for system memory on DGFX. Coherency is maintained, but user-space clients may perceive a difference in cpu access speeds. v2: - Update RB- and Ack tags. - Rephrase wording in xe_drm.h (Matt Roper) v3: - Really rephrase wording. Signed-off-by: Thomas Hellström Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode") Cc: Pallavi Mishra Cc: Matthew Auld Cc: dri-devel@lists.freedesktop.org Cc: Joonas Lahtinen Cc: Effie Yu Cc: Matthew Brost Cc: Maarten Lankhorst Cc: Jose Souza Cc: Michal Mrozek Cc: # v6.8+ Acked-by: Matthew Auld Acked-by: José Roberto de Souza Reviewed-by: Rodrigo Vivi Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode") Acked-by: Michal Mrozek Acked-by: Effie Yu #On chat Link: https://patchwork.freedesktop.org/patch/msgid/20240705132828.27714-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_bo.c | 47 ++++++++++++++++++++++++---------------- drivers/gpu/drm/xe/xe_bo_types.h | 3 ++- include/uapi/drm/xe_drm.h | 8 ++++++- 3 files changed, 37 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 65c696966e96..31192d983d9e 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -343,7 +343,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, struct xe_device *xe = xe_bo_device(bo); struct xe_ttm_tt *tt; unsigned long extra_pages; - enum ttm_caching caching; + enum ttm_caching caching = ttm_cached; int err; tt = kzalloc(sizeof(*tt), GFP_KERNEL); @@ -357,26 +357,35 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size), PAGE_SIZE); - switch (bo->cpu_caching) { - case DRM_XE_GEM_CPU_CACHING_WC: - caching = ttm_write_combined; - break; - default: - caching = ttm_cached; - break; - } - - WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching); - /* - * Display scanout is always non-coherent with the CPU cache. - * - * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and - * require a CPU:WC mapping. + * DGFX system memory is always WB / ttm_cached, since + * other caching modes are only supported on x86. DGFX + * GPU system memory accesses are always coherent with the + * CPU. */ - if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) || - (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_FLAG_PAGETABLE)) - caching = ttm_write_combined; + if (!IS_DGFX(xe)) { + switch (bo->cpu_caching) { + case DRM_XE_GEM_CPU_CACHING_WC: + caching = ttm_write_combined; + break; + default: + caching = ttm_cached; + break; + } + + WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching); + + /* + * Display scanout is always non-coherent with the CPU cache. + * + * For Xe_LPG and beyond, PPGTT PTE lookups are also + * non-coherent and require a CPU:WC mapping. + */ + if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) || + (xe->info.graphics_verx100 >= 1270 && + bo->flags & XE_BO_FLAG_PAGETABLE)) + caching = ttm_write_combined; + } if (bo->flags & XE_BO_FLAG_NEEDS_UC) { /* diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 02d68873558a..ebc8abf7930a 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -68,7 +68,8 @@ struct xe_bo { /** * @cpu_caching: CPU caching mode. Currently only used for userspace - * objects. + * objects. Exceptions are system memory on DGFX, which is always + * WB. */ u16 cpu_caching; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 33544ef78d3e..19619d4952a8 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -783,7 +783,13 @@ struct drm_xe_gem_create { #define DRM_XE_GEM_CPU_CACHING_WC 2 /** * @cpu_caching: The CPU caching mode to select for this object. If - * mmaping the object the mode selected here will also be used. + * mmaping the object the mode selected here will also be used. The + * exception is when mapping system memory (including data evicted + * to system) on discrete GPUs. The caching mode selected will + * then be overridden to DRM_XE_GEM_CPU_CACHING_WB, and coherency + * between GPU- and CPU is guaranteed. The caching mode of + * existing CPU-mappings will be updated transparently to + * user-space clients. */ __u16 cpu_caching; /** @pad: MBZ */ -- cgit v1.2.3 From 7108b4a589cd6d3a2c1276fd610b3500f46de66a Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 10 Jul 2024 15:02:27 -0700 Subject: drm/xe/uapi: Expose SIMD16 EU mask in topology query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PVC, Xe2 and later platforms have 16-wide EUs. We were implicitly reporting for PVC the number of 16-wide EUs without giving userspace any hint that they were different than for other platforms. Xe2 and later also have 16-wide, but in those cases the reported number would correspond to the 8-wide count. To avoid confusion and make sure the right number is used by userspace depending on the platform, add a new item to the topology query and drop the one that is not available. The new mask reported for both PVC and Xe2 should now match the numbers reported via hwconfig. v2: Use a different topo item with EU type in its name to report the new mask instead of adding the type itself as the item (Matt Roper) Reviewed-by: Matt Roper Acked-by: José Roberto de Souza Acked-by: Mateusz Jablonski Acked-by: Wenbin Lu Acked-by: Effie Yu Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240710220446.2169797-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_topology.c | 27 ++++++++++++++++++++++----- drivers/gpu/drm/xe/xe_gt_types.h | 11 +++++++++++ drivers/gpu/drm/xe/xe_query.c | 4 +++- include/uapi/drm/xe_drm.h | 10 +++++++++- 4 files changed, 45 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 25ff03ab8448..5a1559edf3e9 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -6,6 +6,7 @@ #include "xe_gt_topology.h" #include +#include #include "regs/xe_gt_regs.h" #include "xe_assert.h" @@ -31,7 +32,7 @@ load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...) } static void -load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask) +load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type) { struct xe_device *xe = gt_to_xe(gt); u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE); @@ -47,11 +48,13 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask) if (GRAPHICS_VERx100(xe) < 1250) reg_val = ~reg_val & XELP_EU_MASK; - /* On PVC, one bit = one EU */ - if (GRAPHICS_VERx100(xe) == 1260) { + if (GRAPHICS_VERx100(xe) == 1260 || GRAPHICS_VER(xe) >= 20) { + /* SIMD16 EUs, one bit == one EU */ + *eu_type = XE_GT_EU_TYPE_SIMD16; val = reg_val; } else { - /* All other platforms, one bit = 2 EU */ + /* SIMD8 EUs, one bit == 2 EU */ + *eu_type = XE_GT_EU_TYPE_SIMD8; for (i = 0; i < fls(reg_val); i++) if (reg_val & BIT(i)) val |= 0x3 << 2 * i; @@ -213,7 +216,7 @@ xe_gt_topology_init(struct xe_gt *gt) XEHP_GT_COMPUTE_DSS_ENABLE, XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, XE2_GT_COMPUTE_DSS_2); - load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss); + load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, >->fuse_topo.eu_type); load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask); p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology"); @@ -221,6 +224,18 @@ xe_gt_topology_init(struct xe_gt *gt) xe_gt_topology_dump(gt, &p); } +static const char *eu_type_to_str(enum xe_gt_eu_type eu_type) +{ + switch (eu_type) { + case XE_GT_EU_TYPE_SIMD16: + return "simd16"; + case XE_GT_EU_TYPE_SIMD8: + return "simd8"; + } + + unreachable(); +} + void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) { @@ -231,6 +246,8 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS, gt->fuse_topo.eu_mask_per_dss); + drm_printf(p, "EU type: %s\n", + eu_type_to_str(gt->fuse_topo.eu_type)); drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS, gt->fuse_topo.l3_bank_mask); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 38a0d0e178c8..ef68c4a92972 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -27,6 +27,11 @@ enum xe_gt_type { XE_GT_TYPE_MEDIA, }; +enum xe_gt_eu_type { + XE_GT_EU_TYPE_SIMD8, + XE_GT_EU_TYPE_SIMD16, +}; + #define XE_MAX_DSS_FUSE_REGS 3 #define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS) #define XE_MAX_EU_FUSE_REGS 1 @@ -343,6 +348,12 @@ struct xe_gt { /** @fuse_topo.l3_bank_mask: L3 bank mask */ xe_l3_bank_mask_t l3_bank_mask; + + /** + * @fuse_topo.eu_type: type/width of EU stored in + * fuse_topo.eu_mask_per_dss + */ + enum xe_gt_eu_type eu_type; } fuse_topo; /** @steering: register steering for individual HW units */ diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 4e01df6b1b7a..73ef6e4c2dc9 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -518,7 +518,9 @@ static int query_gt_topology(struct xe_device *xe, if (err) return err; - topo.type = DRM_XE_TOPO_EU_PER_DSS; + topo.type = gt->fuse_topo.eu_type == XE_GT_EU_TYPE_SIMD16 ? + DRM_XE_TOPO_SIMD16_EU_PER_DSS : + DRM_XE_TOPO_EU_PER_DSS; err = copy_mask(&query_ptr, &topo, gt->fuse_topo.eu_mask_per_dss, sizeof(gt->fuse_topo.eu_mask_per_dss)); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 19619d4952a8..29425d7fdc77 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -517,7 +517,14 @@ struct drm_xe_query_gt_list { * available per Dual Sub Slices (DSS). For example a query response * containing the following in mask: * ``EU_PER_DSS ff ff 00 00 00 00 00 00`` - * means each DSS has 16 EU. + * means each DSS has 16 SIMD8 EUs. This type may be omitted if device + * doesn't have SIMD8 EUs. + * - %DRM_XE_TOPO_SIMD16_EU_PER_DSS - To query the mask of SIMD16 Execution + * Units (EU) available per Dual Sub Slices (DSS). For example a query + * response containing the following in mask: + * ``SIMD16_EU_PER_DSS ff ff 00 00 00 00 00 00`` + * means each DSS has 16 SIMD16 EUs. This type may be omitted if device + * doesn't have SIMD16 EUs. */ struct drm_xe_query_topology_mask { /** @gt_id: GT ID the mask is associated with */ @@ -527,6 +534,7 @@ struct drm_xe_query_topology_mask { #define DRM_XE_TOPO_DSS_COMPUTE 2 #define DRM_XE_TOPO_L3_BANK 3 #define DRM_XE_TOPO_EU_PER_DSS 4 +#define DRM_XE_TOPO_SIMD16_EU_PER_DSS 5 /** @type: type of mask */ __u16 type; -- cgit v1.2.3 From f2881dfdaaa9ec873dbd383ef5512fc31e576cbb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 29 Jul 2024 11:26:34 +0200 Subject: drm/xe/oa/uapi: Make bit masks unsigned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with gcc-5: In function ‘decode_oa_format.isra.26’, inlined from ‘xe_oa_set_prop_oa_format’ at drivers/gpu/drm/xe/xe_oa.c:1664:6: ././include/linux/compiler_types.h:510:38: error: call to ‘__compiletime_assert_1336’ declared with attribute error: FIELD_GET: mask is not constant [...] ./include/linux/bitfield.h:155:3: note: in expansion of macro ‘__BF_FIELD_CHECK’ __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \ ^ drivers/gpu/drm/xe/xe_oa.c:1573:18: note: in expansion of macro ‘FIELD_GET’ u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); ^ Fixes: b6fd51c62119 ("drm/xe/oa/uapi: Define and parse OA stream properties") Signed-off-by: Geert Uytterhoeven Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240729092634.2227611-1-geert+renesas@glider.be Signed-off-by: Lucas De Marchi --- include/uapi/drm/xe_drm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 29425d7fdc77..b6fbe4988f2e 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1598,10 +1598,10 @@ enum drm_xe_oa_property_id { * b. Counter select c. Counter size and d. BC report. Also refer to the * oa_formats array in drivers/gpu/drm/xe/xe_oa.c. */ -#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xff << 0) -#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xff << 8) -#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xff << 16) -#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xff << 24) +#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xffu << 0) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xffu << 8) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xffu << 16) +#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xffu << 24) /** * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit -- cgit v1.2.3