diff options
Diffstat (limited to 'include')
| -rw-r--r-- | include/drm/drm_ras.h | 75 | ||||
| -rw-r--r-- | include/drm/drm_ras_genl_family.h | 17 | ||||
| -rw-r--r-- | include/linux/mmu_notifier.h | 42 | ||||
| -rw-r--r-- | include/uapi/drm/drm_ras.h | 49 | ||||
| -rw-r--r-- | include/uapi/drm/xe_drm.h | 95 |
5 files changed, 278 insertions, 0 deletions
diff --git a/include/drm/drm_ras.h b/include/drm/drm_ras.h new file mode 100644 index 000000000000..5d50209e51db --- /dev/null +++ b/include/drm/drm_ras.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ + +#ifndef __DRM_RAS_H__ +#define __DRM_RAS_H__ + +#include <uapi/drm/drm_ras.h> + +/** + * struct drm_ras_node - A DRM RAS Node + */ +struct drm_ras_node { + /** @id: Unique identifier for the node. Dynamically assigned. */ + u32 id; + /** + * @device_name: Human-readable name of the device. Given by the driver. + */ + const char *device_name; + /** @node_name: Human-readable name of the node. Given by the driver. */ + const char *node_name; + /** @type: Type of the node (enum drm_ras_node_type). */ + enum drm_ras_node_type type; + + /* Error-Counter Related Callback and Variables */ + + /** @error_counter_range: Range of valid Error IDs for this node. */ + struct { + /** @first: First valid Error ID. */ + u32 first; + /** @last: Last valid Error ID. Mandatory entry. */ + u32 last; + } error_counter_range; + + /** + * @query_error_counter: + * + * This callback is used by drm-ras to query a specific error counter. + * Used for input check and to iterate all error counters in a node. + * + * Driver should expect query_error_counter() to be called with + * error_id from `error_counter_range.first` to + * `error_counter_range.last`. + * + * The @query_error_counter is a mandatory callback for + * error_counter_node. + * + * Returns: 0 on success, + * -ENOENT when error_id is not supported as an indication that + * drm_ras should silently skip this entry. Used for + * supporting non-contiguous error ranges. + * Driver is responsible for maintaining the list of + * supported error IDs in the range of first to last. + * Other negative values on errors that should terminate the + * netlink query. + */ + int (*query_error_counter)(struct drm_ras_node *node, u32 error_id, + const char **name, u32 *val); + + /** @priv: Driver private data */ + void *priv; +}; + +struct drm_device; + +#if IS_ENABLED(CONFIG_DRM_RAS) +int drm_ras_node_register(struct drm_ras_node *node); +void drm_ras_node_unregister(struct drm_ras_node *node); +#else +static inline int drm_ras_node_register(struct drm_ras_node *node) { return 0; } +static inline void drm_ras_node_unregister(struct drm_ras_node *node) { } +#endif + +#endif diff --git a/include/drm/drm_ras_genl_family.h b/include/drm/drm_ras_genl_family.h new file mode 100644 index 000000000000..910fb3943a75 --- /dev/null +++ b/include/drm/drm_ras_genl_family.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ + +#ifndef __DRM_RAS_GENL_FAMILY_H__ +#define __DRM_RAS_GENL_FAMILY_H__ + +#if IS_ENABLED(CONFIG_DRM_RAS) +int drm_ras_genl_family_register(void); +void drm_ras_genl_family_unregister(void); +#else +static inline int drm_ras_genl_family_register(void) { return 0; } +static inline void drm_ras_genl_family_unregister(void) { } +#endif + +#endif diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 07a2bbaf86e9..dcdfdf1e0b39 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -234,15 +234,57 @@ struct mmu_notifier { }; /** + * struct mmu_interval_notifier_finish - mmu_interval_notifier two-pass abstraction + * @link: Lockless list link for the notifiers pending pass list + * @notifier: The mmu_interval_notifier for which the finish pass is called. + * + * Allocate, typically using GFP_NOWAIT in the interval notifier's start pass. + * Note that with a large number of notifiers implementing two passes, + * allocation with GFP_NOWAIT will become increasingly likely to fail, so consider + * implementing a small pool instead of using kmalloc() allocations. + * + * If the implementation needs to pass data between the start and the finish passes, + * the recommended way is to embed struct mmu_interval_notifier_finish into a larger + * structure that also contains the data needed to be shared. Keep in mind that + * a notifier callback can be invoked in parallel, and each invocation needs its + * own struct mmu_interval_notifier_finish. + * + * If allocation fails, then the &mmu_interval_notifier_ops->invalidate_start op + * needs to implements the full notifier functionality. Please refer to its + * documentation. + */ +struct mmu_interval_notifier_finish { + struct llist_node link; + struct mmu_interval_notifier *notifier; +}; + +/** * struct mmu_interval_notifier_ops * @invalidate: Upon return the caller must stop using any SPTEs within this * range. This function can sleep. Return false only if sleeping * was required but mmu_notifier_range_blockable(range) is false. + * @invalidate_start: Similar to @invalidate, but intended for two-pass notifier + * callbacks where the call to @invalidate_start is the first + * pass and any struct mmu_interval_notifier_finish pointer + * returned in the @finish parameter describes the finish pass. + * If *@finish is %NULL on return, then no final pass will be + * called, and @invalidate_start needs to implement the full + * notifier, behaving like @invalidate. The value of *@finish + * is guaranteed to be %NULL at function entry. + * @invalidate_finish: Called as the second pass for any notifier that returned + * a non-NULL *@finish from @invalidate_start. The @finish + * pointer passed here is the same one returned by + * @invalidate_start. */ struct mmu_interval_notifier_ops { bool (*invalidate)(struct mmu_interval_notifier *interval_sub, const struct mmu_notifier_range *range, unsigned long cur_seq); + bool (*invalidate_start)(struct mmu_interval_notifier *interval_sub, + const struct mmu_notifier_range *range, + unsigned long cur_seq, + struct mmu_interval_notifier_finish **finish); + void (*invalidate_finish)(struct mmu_interval_notifier_finish *finish); }; struct mmu_interval_notifier { diff --git a/include/uapi/drm/drm_ras.h b/include/uapi/drm/drm_ras.h new file mode 100644 index 000000000000..5f40fa5b869d --- /dev/null +++ b/include/uapi/drm/drm_ras.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/drm_ras.yaml */ +/* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ + +#ifndef _UAPI_LINUX_DRM_RAS_H +#define _UAPI_LINUX_DRM_RAS_H + +#define DRM_RAS_FAMILY_NAME "drm-ras" +#define DRM_RAS_FAMILY_VERSION 1 + +/* + * Type of the node. Currently, only error-counter nodes are supported, which + * expose reliability counters for a hardware/software component. + */ +enum drm_ras_node_type { + DRM_RAS_NODE_TYPE_ERROR_COUNTER = 1, +}; + +enum { + DRM_RAS_A_NODE_ATTRS_NODE_ID = 1, + DRM_RAS_A_NODE_ATTRS_DEVICE_NAME, + DRM_RAS_A_NODE_ATTRS_NODE_NAME, + DRM_RAS_A_NODE_ATTRS_NODE_TYPE, + + __DRM_RAS_A_NODE_ATTRS_MAX, + DRM_RAS_A_NODE_ATTRS_MAX = (__DRM_RAS_A_NODE_ATTRS_MAX - 1) +}; + +enum { + DRM_RAS_A_ERROR_COUNTER_ATTRS_NODE_ID = 1, + DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_ID, + DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_NAME, + DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_VALUE, + + __DRM_RAS_A_ERROR_COUNTER_ATTRS_MAX, + DRM_RAS_A_ERROR_COUNTER_ATTRS_MAX = (__DRM_RAS_A_ERROR_COUNTER_ATTRS_MAX - 1) +}; + +enum { + DRM_RAS_CMD_LIST_NODES = 1, + DRM_RAS_CMD_GET_ERROR_COUNTER, + + __DRM_RAS_CMD_MAX, + DRM_RAS_CMD_MAX = (__DRM_RAS_CMD_MAX - 1) +}; + +#endif /* _UAPI_LINUX_DRM_RAS_H */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index ef2565048bdf..0497b85fa12a 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -406,6 +406,9 @@ struct drm_xe_query_mem_regions { * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT - Flag is set if the * device supports the userspace hint %DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION. * This is exposed only on Xe2+. + * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_STATE_CACHE_PERF_FIX - Flag is set + * if a queue can be creaed with + * %DRM_XE_EXEC_QUEUE_SET_DISABLE_STATE_CACHE_PERF_FIX * - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment * required by this device, typically SZ_4K or SZ_64K * - %DRM_XE_QUERY_CONFIG_VA_BITS - Maximum bits of a virtual address @@ -425,6 +428,7 @@ struct drm_xe_query_config { #define DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY (1 << 1) #define DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR (1 << 2) #define DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT (1 << 3) + #define DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_STATE_CACHE_PERF_FIX (1 << 4) #define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT 2 #define DRM_XE_QUERY_CONFIG_VA_BITS 3 #define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 4 @@ -1053,6 +1057,13 @@ struct drm_xe_vm_destroy { * not invoke autoreset. Neither will stack variables going out of scope. * Therefore it's recommended to always explicitly reset the madvises when * freeing the memory backing a region used in a &DRM_IOCTL_XE_MADVISE call. + * - DRM_XE_VM_BIND_FLAG_DECOMPRESS - Request on-device decompression for a MAP. + * When set on a MAP bind operation, request the driver schedule an on-device + * in-place decompression (via the migrate/resolve path) for the GPU mapping + * created by this bind. Only valid for DRM_XE_VM_BIND_OP_MAP; usage on + * other ops is rejected. The bind's pat_index must select the device's + * "no-compression" PAT. Only meaningful for VRAM-backed BOs on devices that + * support Flat CCS and the required HW generation XE2+. * * The @prefetch_mem_region_instance for %DRM_XE_VM_BIND_OP_PREFETCH can also be: * - %DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC, which ensures prefetching occurs in @@ -1160,6 +1171,7 @@ struct drm_xe_vm_bind_op { #define DRM_XE_VM_BIND_FLAG_CHECK_PXP (1 << 4) #define DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR (1 << 5) #define DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET (1 << 6) +#define DRM_XE_VM_BIND_FLAG_DECOMPRESS (1 << 7) /** @flags: Bind flags */ __u32 flags; @@ -1285,6 +1297,9 @@ struct drm_xe_vm_bind { * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY - Set the queue * priority within the multi-queue group. Current valid priority values are 0–2 * (default is 1), with higher values indicating higher priority. + * - %DRM_XE_EXEC_QUEUE_SET_DISABLE_STATE_CACHE_PERF_FIX - Set the queue to + * enable render color cache keying on BTP+BTI instead of just BTI + * (only valid for render queues). * * The example below shows how to use @drm_xe_exec_queue_create to create * a simple exec_queue (no parallel submission) of class @@ -1329,6 +1344,7 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP 4 #define DRM_XE_MULTI_GROUP_CREATE (1ull << 63) #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY 5 +#define DRM_XE_EXEC_QUEUE_SET_DISABLE_STATE_CACHE_PERF_FIX 6 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; @@ -2357,6 +2373,85 @@ struct drm_xe_exec_queue_set_property { __u64 reserved[2]; }; +/** + * DOC: Xe DRM RAS + * + * The enums and strings defined below map to the attributes of the DRM RAS Netlink Interface. + * Refer to Documentation/netlink/specs/drm_ras.yaml for complete interface specification. + * + * Node Registration + * ================= + * + * The driver registers DRM RAS nodes for each error severity level. + * enum drm_xe_ras_error_severity defines the node-id, while DRM_XE_RAS_ERROR_SEVERITY_NAMES maps + * node-id to node-name. + * + * Error Classification + * ==================== + * + * Each node contains a list of error counters. Each error is identified by a error-id and + * an error-name. enum drm_xe_ras_error_component defines the error-id, while + * DRM_XE_RAS_ERROR_COMPONENT_NAMES maps error-id to error-name. + * + * User Interface + * ============== + * + * To retrieve error values of a error counter, userspace applications should + * follow the below steps: + * + * 1. Use command LIST_NODES to enumerate all available nodes + * 2. Select node by node-id or node-name + * 3. Use command GET_ERROR_COUNTERS to list errors of specific node + * 4. Query specific error values using either error-id or error-name + * + * .. code-block:: C + * + * // Lookup tables for ID-to-name resolution + * static const char *nodes[] = DRM_XE_RAS_ERROR_SEVERITY_NAMES; + * static const char *errors[] = DRM_XE_RAS_ERROR_COMPONENT_NAMES; + * + */ + +/** + * enum drm_xe_ras_error_severity - DRM RAS error severity. + */ +enum drm_xe_ras_error_severity { + /** @DRM_XE_RAS_ERR_SEV_CORRECTABLE: Correctable Error */ + DRM_XE_RAS_ERR_SEV_CORRECTABLE = 0, + /** @DRM_XE_RAS_ERR_SEV_UNCORRECTABLE: Uncorrectable Error */ + DRM_XE_RAS_ERR_SEV_UNCORRECTABLE, + /** @DRM_XE_RAS_ERR_SEV_MAX: Max severity */ + DRM_XE_RAS_ERR_SEV_MAX /* non-ABI */ +}; + +/** + * enum drm_xe_ras_error_component - DRM RAS error component. + */ +enum drm_xe_ras_error_component { + /** @DRM_XE_RAS_ERR_COMP_CORE_COMPUTE: Core Compute Error */ + DRM_XE_RAS_ERR_COMP_CORE_COMPUTE = 1, + /** @DRM_XE_RAS_ERR_COMP_SOC_INTERNAL: SoC Internal Error */ + DRM_XE_RAS_ERR_COMP_SOC_INTERNAL, + /** @DRM_XE_RAS_ERR_COMP_MAX: Max Error */ + DRM_XE_RAS_ERR_COMP_MAX /* non-ABI */ +}; + +/* + * Error severity to name mapping. + */ +#define DRM_XE_RAS_ERROR_SEVERITY_NAMES { \ + [DRM_XE_RAS_ERR_SEV_CORRECTABLE] = "correctable-errors", \ + [DRM_XE_RAS_ERR_SEV_UNCORRECTABLE] = "uncorrectable-errors", \ +} + +/* + * Error component to name mapping. + */ +#define DRM_XE_RAS_ERROR_COMPONENT_NAMES { \ + [DRM_XE_RAS_ERR_COMP_CORE_COMPUTE] = "core-compute", \ + [DRM_XE_RAS_ERR_COMP_SOC_INTERNAL] = "soc-internal" \ +} + #if defined(__cplusplus) } #endif |
