summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2026-03-16 05:21:06 +0300
committerDave Airlie <airlied@redhat.com>2026-03-16 05:21:08 +0300
commit3f071d00fc9478344f3231f585a5e39ceb6c63e6 (patch)
tree27aea15fc69efdee1b76c01aacaf7814edfbfb02
parent38cb89a6c924c35d7d17ed13ccd3952c82b4e0d1 (diff)
parent42d3b66d4cdbacfc9d120d2301b8de89cc29a914 (diff)
downloadlinux-3f071d00fc9478344f3231f585a5e39ceb6c63e6.tar.xz
Merge tag 'drm-xe-next-2026-03-12' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
UAPI Changes: - add VM_BIND DECOMPRESS support and on-demand decompression (Nitin) - Allow per queue programming of COMMON_SLICE_CHICKEN3 bit13 (Lionel) Cross-subsystem Changes: - Introduce the DRM RAS infrastructure over generic netlink (Riana, Rodrigo) Core Changes: - Two-pass MMU interval notifiers (Thomas) Driver Changes: - Merge drm/drm-next into drm-xe-next (Brost) - Fix overflow in guc_ct_snapshot_capture (Mika, Fixes) - Extract gt_pta_entry (Gustavo) - Extra enabling patches for NVL-P (Gustavo) - Add Wa_14026578760 (Varun) - Add type-specific GT loop iterator (Roper) - Refactor xe_migrate_prepare_vm (Raag) - Don't disable GuCRC in suspend path (Vinay, Fixes) - Add missing kernel docs in xe_exec_queue.c (Niranjana) - Change TEST_VRAM to work with 32-bit resource_size_t (Wajdeczko) - Fix memory leak in xe_vm_madvise_ioctl (Varun, Fixes) - Skip access counter queue init for unsupported platforms (Himal) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Matthew Brost <matthew.brost@intel.com> Link: https://patch.msgid.link/abLUVfSHu8EHRF9q@lstrano-desk.jf.intel.com
-rw-r--r--Documentation/gpu/drm-ras.rst103
-rw-r--r--Documentation/gpu/index.rst1
-rw-r--r--Documentation/netlink/specs/drm_ras.yaml115
-rw-r--r--drivers/gpu/drm/Kconfig10
-rw-r--r--drivers/gpu/drm/Makefile1
-rw-r--r--drivers/gpu/drm/drm_drv.c6
-rw-r--r--drivers/gpu/drm/drm_ras.c354
-rw-r--r--drivers/gpu/drm/drm_ras_genl_family.c42
-rw-r--r--drivers/gpu/drm/drm_ras_nl.c56
-rw-r--r--drivers/gpu/drm/drm_ras_nl.h24
-rw-r--r--drivers/gpu/drm/xe/Makefile1
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h12
-rw-r--r--drivers/gpu/drm/xe/regs/xe_hw_error_regs.h86
-rw-r--r--drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c2
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c55
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h2
-rw-r--r--drivers/gpu/drm/xe/xe_device.c5
-rw-r--r--drivers/gpu/drm/xe/xe_device.h4
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h16
-rw-r--r--drivers/gpu/drm/xe/xe_device_wa_oob.rules1
-rw-r--r--drivers/gpu/drm/xe/xe_drm_ras.c186
-rw-r--r--drivers/gpu/drm/xe/xe_drm_ras.h15
-rw-r--r--drivers/gpu/drm/xe/xe_drm_ras_types.h48
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c131
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c41
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c5
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c1
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c11
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c8
-rw-r--r--drivers/gpu/drm/xe/xe_hw_error.c451
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c9
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.h1
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c143
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.h4
-rw-r--r--drivers/gpu/drm/xe/xe_pat.c33
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c12
-rw-r--r--drivers/gpu/drm/xe/xe_pci_types.h1
-rw-r--r--drivers/gpu/drm/xe/xe_query.c2
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.c7
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.h20
-rw-r--r--drivers/gpu/drm/xe/xe_rtp_types.h1
-rw-r--r--drivers/gpu/drm/xe/xe_step.c74
-rw-r--r--drivers/gpu/drm/xe/xe_step.h10
-rw-r--r--drivers/gpu/drm/xe/xe_step_types.h1
-rw-r--r--drivers/gpu/drm/xe/xe_svm.c8
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval.c84
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval.h6
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval_types.h14
-rw-r--r--drivers/gpu/drm/xe/xe_userptr.c155
-rw-r--r--drivers/gpu/drm/xe/xe_userptr.h31
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c136
-rw-r--r--drivers/gpu/drm/xe/xe_vm.h5
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.c10
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c9
-rw-r--r--include/drm/drm_ras.h75
-rw-r--r--include/drm/drm_ras_genl_family.h17
-rw-r--r--include/linux/mmu_notifier.h42
-rw-r--r--include/uapi/drm/drm_ras.h49
-rw-r--r--include/uapi/drm/xe_drm.h95
-rw-r--r--mm/mmu_notifier.c65
63 files changed, 2653 insertions, 266 deletions
diff --git a/Documentation/gpu/drm-ras.rst b/Documentation/gpu/drm-ras.rst
new file mode 100644
index 000000000000..70b246a78fc8
--- /dev/null
+++ b/Documentation/gpu/drm-ras.rst
@@ -0,0 +1,103 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+============================
+DRM RAS over Generic Netlink
+============================
+
+The DRM RAS (Reliability, Availability, Serviceability) interface provides a
+standardized way for GPU/accelerator drivers to expose error counters and
+other reliability nodes to user space via Generic Netlink. This allows
+diagnostic tools, monitoring daemons, or test infrastructure to query hardware
+health in a uniform way across different DRM drivers.
+
+Key Goals:
+
+* Provide a standardized RAS solution for GPU and accelerator drivers, enabling
+ data center monitoring and reliability operations.
+* Implement a single drm-ras Generic Netlink family to meet modern Netlink YAML
+ specifications and centralize all RAS-related communication in one namespace.
+* Support a basic error counter interface, addressing the immediate, essential
+ monitoring needs.
+* Offer a flexible, future-proof interface that can be extended to support
+ additional types of RAS data in the future.
+* Allow multiple nodes per driver, enabling drivers to register separate
+ nodes for different IP blocks, sub-blocks, or other logical subdivisions
+ as applicable.
+
+Nodes
+=====
+
+Nodes are logical abstractions representing an error type or error source within
+the device. Currently, only error counter nodes is supported.
+
+Drivers are responsible for registering and unregistering nodes via the
+`drm_ras_node_register()` and `drm_ras_node_unregister()` APIs.
+
+Node Management
+-------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_ras.c
+ :doc: DRM RAS Node Management
+.. kernel-doc:: drivers/gpu/drm/drm_ras.c
+ :internal:
+
+Generic Netlink Usage
+=====================
+
+The interface is implemented as a Generic Netlink family named ``drm-ras``.
+User space tools can:
+
+* List registered nodes with the ``list-nodes`` command.
+* List all error counters in an node with the ``get-error-counter`` command with ``node-id``
+ as a parameter.
+* Query specific error counter values with the ``get-error-counter`` command, using both
+ ``node-id`` and ``error-id`` as parameters.
+
+YAML-based Interface
+--------------------
+
+The interface is described in a YAML specification ``Documentation/netlink/specs/drm_ras.yaml``
+
+This YAML is used to auto-generate user space bindings via
+``tools/net/ynl/pyynl/ynl_gen_c.py``, and drives the structure of netlink
+attributes and operations.
+
+Usage Notes
+-----------
+
+* User space must first enumerate nodes to obtain their IDs.
+* Node IDs or Node names can be used for all further queries, such as error counters.
+* Error counters can be queried by either the Error ID or Error name.
+* Query Parameters should be defined as part of the uAPI to ensure user interface stability.
+* The interface supports future extension by adding new node types and
+ additional attributes.
+
+Example: List nodes using ynl
+
+.. code-block:: bash
+
+ sudo ynl --family drm_ras --dump list-nodes
+ [{'device-name': '0000:03:00.0',
+ 'node-id': 0,
+ 'node-name': 'correctable-errors',
+ 'node-type': 'error-counter'},
+ {'device-name': '0000:03:00.0',
+ 'node-id': 1,
+ 'node-name': 'uncorrectable-errors',
+ 'node-type': 'error-counter'}]
+
+Example: List all error counters using ynl
+
+.. code-block:: bash
+
+ sudo ynl --family drm_ras --dump get-error-counter --json '{"node-id":0}'
+ [{'error-id': 1, 'error-name': 'error_name1', 'error-value': 0},
+ {'error-id': 2, 'error-name': 'error_name2', 'error-value': 0}]
+
+Example: Query an error counter for a given node
+
+.. code-block:: bash
+
+ sudo ynl --family drm_ras --do get-error-counter --json '{"node-id":0, "error-id":1}'
+ {'error-id': 1, 'error-name': 'error_name1', 'error-value': 0}
+
diff --git a/Documentation/gpu/index.rst b/Documentation/gpu/index.rst
index 2fafa1f35ef3..5d708a106b3f 100644
--- a/Documentation/gpu/index.rst
+++ b/Documentation/gpu/index.rst
@@ -9,6 +9,7 @@ GPU Driver Developer's Guide
drm-mm
drm-kms
drm-kms-helpers
+ drm-ras
drm-uapi
drm-usage-stats
driver-uapi
diff --git a/Documentation/netlink/specs/drm_ras.yaml b/Documentation/netlink/specs/drm_ras.yaml
new file mode 100644
index 000000000000..79af25dac3c5
--- /dev/null
+++ b/Documentation/netlink/specs/drm_ras.yaml
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+---
+name: drm-ras
+protocol: genetlink
+uapi-header: drm/drm_ras.h
+
+doc: >-
+ DRM RAS (Reliability, Availability, Serviceability) over Generic Netlink.
+ Provides a standardized mechanism for DRM drivers to register "nodes"
+ representing hardware/software components capable of reporting error counters.
+ Userspace tools can query the list of nodes or individual error counters
+ via the Generic Netlink interface.
+
+definitions:
+ -
+ type: enum
+ name: node-type
+ value-start: 1
+ entries: [error-counter]
+ doc: >-
+ Type of the node. Currently, only error-counter nodes are
+ supported, which expose reliability counters for a hardware/software
+ component.
+
+attribute-sets:
+ -
+ name: node-attrs
+ attributes:
+ -
+ name: node-id
+ type: u32
+ doc: >-
+ Unique identifier for the node.
+ Assigned dynamically by the DRM RAS core upon registration.
+ -
+ name: device-name
+ type: string
+ doc: >-
+ Device name chosen by the driver at registration.
+ Can be a PCI BDF, UUID, or module name if unique.
+ -
+ name: node-name
+ type: string
+ doc: >-
+ Node name chosen by the driver at registration.
+ Can be an IP block name, or any name that identifies the
+ RAS node inside the device.
+ -
+ name: node-type
+ type: u32
+ doc: Type of this node, identifying its function.
+ enum: node-type
+ -
+ name: error-counter-attrs
+ attributes:
+ -
+ name: node-id
+ type: u32
+ doc: Node ID targeted by this error counter operation.
+ -
+ name: error-id
+ type: u32
+ doc: Unique identifier for a specific error counter within an node.
+ -
+ name: error-name
+ type: string
+ doc: Name of the error.
+ -
+ name: error-value
+ type: u32
+ doc: Current value of the requested error counter.
+
+operations:
+ list:
+ -
+ name: list-nodes
+ doc: >-
+ Retrieve the full list of currently registered DRM RAS nodes.
+ Each node includes its dynamically assigned ID, name, and type.
+ **Important:** User space must call this operation first to obtain
+ the node IDs. These IDs are required for all subsequent
+ operations on nodes, such as querying error counters.
+ attribute-set: node-attrs
+ flags: [admin-perm]
+ dump:
+ reply:
+ attributes:
+ - node-id
+ - device-name
+ - node-name
+ - node-type
+ -
+ name: get-error-counter
+ doc: >-
+ Retrieve error counter for a given node.
+ The response includes the id, the name, and even the current
+ value of each counter.
+ attribute-set: error-counter-attrs
+ flags: [admin-perm]
+ do:
+ request:
+ attributes:
+ - node-id
+ - error-id
+ reply:
+ attributes: &errorinfo
+ - error-id
+ - error-name
+ - error-value
+ dump:
+ request:
+ attributes:
+ - node-id
+ reply:
+ attributes: *errorinfo
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 0d0657dd1b41..5386248e75b6 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -130,6 +130,16 @@ config DRM_PANIC_SCREEN_QR_VERSION
Smaller QR code are easier to read, but will contain less debugging
data. Default is 40.
+config DRM_RAS
+ bool "DRM RAS support"
+ depends on DRM
+ depends on NET
+ help
+ Enables the DRM RAS (Reliability, Availability and Serviceability)
+ support for DRM drivers. This provides a Generic Netlink interface
+ for error reporting and queries.
+ If in doubt, say "N".
+
config DRM_DEBUG_DP_MST_TOPOLOGY_REFS
bool "Enable refcount backtrace history in the DP MST helpers"
depends on STACKTRACE_SUPPORT
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index aba4bf542a35..e97faabcd783 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -93,6 +93,7 @@ drm-$(CONFIG_DRM_ACCEL) += ../../accel/drm_accel.o
drm-$(CONFIG_DRM_PANIC) += drm_panic.o
drm-$(CONFIG_DRM_DRAW) += drm_draw.o
drm-$(CONFIG_DRM_PANIC_SCREEN_QR_CODE) += drm_panic_qr.o
+drm-$(CONFIG_DRM_RAS) += drm_ras.o drm_ras_nl.o drm_ras_genl_family.o
obj-$(CONFIG_DRM) += drm.o
obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 2915118436ce..6b965c3d3307 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -53,6 +53,7 @@
#include <drm/drm_panic.h>
#include <drm/drm_print.h>
#include <drm/drm_privacy_screen_machine.h>
+#include <drm/drm_ras_genl_family.h>
#include "drm_crtc_internal.h"
#include "drm_internal.h"
@@ -1223,6 +1224,7 @@ static const struct file_operations drm_stub_fops = {
static void drm_core_exit(void)
{
+ drm_ras_genl_family_unregister();
drm_privacy_screen_lookup_exit();
drm_panic_exit();
accel_core_exit();
@@ -1261,6 +1263,10 @@ static int __init drm_core_init(void)
drm_privacy_screen_lookup_init();
+ ret = drm_ras_genl_family_register();
+ if (ret < 0)
+ goto error;
+
drm_core_init_complete = true;
DRM_DEBUG("Initialized\n");
diff --git a/drivers/gpu/drm/drm_ras.c b/drivers/gpu/drm/drm_ras.c
new file mode 100644
index 000000000000..b2fa5ab86d87
--- /dev/null
+++ b/drivers/gpu/drm/drm_ras.c
@@ -0,0 +1,354 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/xarray.h>
+#include <net/genetlink.h>
+
+#include <drm/drm_ras.h>
+
+#include "drm_ras_nl.h"
+
+/**
+ * DOC: DRM RAS Node Management
+ *
+ * This module provides the infrastructure to manage RAS (Reliability,
+ * Availability, and Serviceability) nodes for DRM drivers. Each
+ * DRM driver may register one or more RAS nodes, which represent
+ * logical components capable of reporting error counters and other
+ * reliability metrics.
+ *
+ * The nodes are stored in a global xarray `drm_ras_xa` to allow
+ * efficient lookup by ID. Nodes can be registered or unregistered
+ * dynamically at runtime.
+ *
+ * A Generic Netlink family `drm_ras` exposes two main operations to
+ * userspace:
+ *
+ * 1. LIST_NODES: Dump all currently registered RAS nodes.
+ * The user receives an array of node IDs, names, and types.
+ *
+ * 2. GET_ERROR_COUNTER: Get error counters of a given node.
+ * Userspace must provide Node ID, Error ID (Optional for specific counter).
+ * Returns all counters of a node if only Node ID is provided or specific
+ * error counters.
+ *
+ * Node registration:
+ *
+ * - drm_ras_node_register(): Registers a new node and assigns
+ * it a unique ID in the xarray.
+ * - drm_ras_node_unregister(): Removes a previously registered
+ * node from the xarray.
+ *
+ * Node type:
+ *
+ * - ERROR_COUNTER:
+ * + Currently, only error counters are supported.
+ * + The driver must implement the query_error_counter() callback to provide
+ * the name and the value of the error counter.
+ * + The driver must provide a error_counter_range.last value informing the
+ * last valid error ID.
+ * + The driver can provide a error_counter_range.first value informing the
+ * first valid error ID.
+ * + The error counters in the driver doesn't need to be contiguous, but the
+ * driver must return -ENOENT to the query_error_counter as an indication
+ * that the ID should be skipped and not listed in the netlink API.
+ *
+ * Netlink handlers:
+ *
+ * - drm_ras_nl_list_nodes_dumpit(): Implements the LIST_NODES
+ * operation, iterating over the xarray.
+ * - drm_ras_nl_get_error_counter_dumpit(): Implements the GET_ERROR_COUNTER dumpit
+ * operation, fetching all counters from a specific node.
+ * - drm_ras_nl_get_error_counter_doit(): Implements the GET_ERROR_COUNTER doit
+ * operation, fetching a counter value from a specific node.
+ */
+
+static DEFINE_XARRAY_ALLOC(drm_ras_xa);
+
+/*
+ * The netlink callback context carries dump state across multiple dumpit calls
+ */
+struct drm_ras_ctx {
+ /* Which xarray id to restart the dump from */
+ unsigned long restart;
+};
+
+/**
+ * drm_ras_nl_list_nodes_dumpit() - Dump all registered RAS nodes
+ * @skb: Netlink message buffer
+ * @cb: Callback context for multi-part dumps
+ *
+ * Iterates over all registered RAS nodes in the global xarray and appends
+ * their attributes (ID, name, type) to the given netlink message buffer.
+ * Uses @cb->ctx to track progress in case the message buffer fills up, allowing
+ * multi-part dump support. On buffer overflow, updates the context to resume
+ * from the last node on the next invocation.
+ *
+ * Return: 0 if all nodes fit in @skb, number of bytes added to @skb if
+ * the buffer filled up (requires multi-part continuation), or
+ * a negative error code on failure.
+ */
+int drm_ras_nl_list_nodes_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ const struct genl_info *info = genl_info_dump(cb);
+ struct drm_ras_ctx *ctx = (void *)cb->ctx;
+ struct drm_ras_node *node;
+ struct nlattr *hdr;
+ unsigned long id;
+ int ret;
+
+ xa_for_each_start(&drm_ras_xa, id, node, ctx->restart) {
+ hdr = genlmsg_iput(skb, info);
+ if (!hdr) {
+ ret = -EMSGSIZE;
+ break;
+ }
+
+ ret = nla_put_u32(skb, DRM_RAS_A_NODE_ATTRS_NODE_ID, node->id);
+ if (ret) {
+ genlmsg_cancel(skb, hdr);
+ break;
+ }
+
+ ret = nla_put_string(skb, DRM_RAS_A_NODE_ATTRS_DEVICE_NAME,
+ node->device_name);
+ if (ret) {
+ genlmsg_cancel(skb, hdr);
+ break;
+ }
+
+ ret = nla_put_string(skb, DRM_RAS_A_NODE_ATTRS_NODE_NAME,
+ node->node_name);
+ if (ret) {
+ genlmsg_cancel(skb, hdr);
+ break;
+ }
+
+ ret = nla_put_u32(skb, DRM_RAS_A_NODE_ATTRS_NODE_TYPE,
+ node->type);
+ if (ret) {
+ genlmsg_cancel(skb, hdr);
+ break;
+ }
+
+ genlmsg_end(skb, hdr);
+ }
+
+ if (ret == -EMSGSIZE)
+ ctx->restart = id;
+
+ return ret;
+}
+
+static int get_node_error_counter(u32 node_id, u32 error_id,
+ const char **name, u32 *value)
+{
+ struct drm_ras_node *node;
+
+ node = xa_load(&drm_ras_xa, node_id);
+ if (!node || !node->query_error_counter)
+ return -ENOENT;
+
+ if (error_id < node->error_counter_range.first ||
+ error_id > node->error_counter_range.last)
+ return -EINVAL;
+
+ return node->query_error_counter(node, error_id, name, value);
+}
+
+static int msg_reply_value(struct sk_buff *msg, u32 error_id,
+ const char *error_name, u32 value)
+{
+ int ret;
+
+ ret = nla_put_u32(msg, DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_ID, error_id);
+ if (ret)
+ return ret;
+
+ ret = nla_put_string(msg, DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_NAME,
+ error_name);
+ if (ret)
+ return ret;
+
+ return nla_put_u32(msg, DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_VALUE,
+ value);
+}
+
+static int doit_reply_value(struct genl_info *info, u32 node_id,
+ u32 error_id)
+{
+ struct sk_buff *msg;
+ struct nlattr *hdr;
+ const char *error_name;
+ u32 value;
+ int ret;
+
+ msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = genlmsg_iput(msg, info);
+ if (!hdr) {
+ nlmsg_free(msg);
+ return -EMSGSIZE;
+ }
+
+ ret = get_node_error_counter(node_id, error_id,
+ &error_name, &value);
+ if (ret)
+ return ret;
+
+ ret = msg_reply_value(msg, error_id, error_name, value);
+ if (ret) {
+ genlmsg_cancel(msg, hdr);
+ nlmsg_free(msg);
+ return ret;
+ }
+
+ genlmsg_end(msg, hdr);
+
+ return genlmsg_reply(msg, info);
+}
+
+/**
+ * drm_ras_nl_get_error_counter_dumpit() - Dump all Error Counters
+ * @skb: Netlink message buffer
+ * @cb: Callback context for multi-part dumps
+ *
+ * Iterates over all error counters in a given Node and appends
+ * their attributes (ID, name, value) to the given netlink message buffer.
+ * Uses @cb->ctx to track progress in case the message buffer fills up, allowing
+ * multi-part dump support. On buffer overflow, updates the context to resume
+ * from the last node on the next invocation.
+ *
+ * Return: 0 if all errors fit in @skb, number of bytes added to @skb if
+ * the buffer filled up (requires multi-part continuation), or
+ * a negative error code on failure.
+ */
+int drm_ras_nl_get_error_counter_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ const struct genl_info *info = genl_info_dump(cb);
+ struct drm_ras_ctx *ctx = (void *)cb->ctx;
+ struct drm_ras_node *node;
+ struct nlattr *hdr;
+ const char *error_name;
+ u32 node_id, error_id, value;
+ int ret;
+
+ if (!info->attrs || GENL_REQ_ATTR_CHECK(info, DRM_RAS_A_ERROR_COUNTER_ATTRS_NODE_ID))
+ return -EINVAL;
+
+ node_id = nla_get_u32(info->attrs[DRM_RAS_A_ERROR_COUNTER_ATTRS_NODE_ID]);
+
+ node = xa_load(&drm_ras_xa, node_id);
+ if (!node)
+ return -ENOENT;
+
+ for (error_id = max(node->error_counter_range.first, ctx->restart);
+ error_id <= node->error_counter_range.last;
+ error_id++) {
+ ret = get_node_error_counter(node_id, error_id,
+ &error_name, &value);
+ /*
+ * For non-contiguous range, driver return -ENOENT as indication
+ * to skip this ID when listing all errors.
+ */
+ if (ret == -ENOENT)
+ continue;
+ if (ret)
+ return ret;
+
+ hdr = genlmsg_iput(skb, info);
+
+ if (!hdr) {
+ ret = -EMSGSIZE;
+ break;
+ }
+
+ ret = msg_reply_value(skb, error_id, error_name, value);
+ if (ret) {
+ genlmsg_cancel(skb, hdr);
+ break;
+ }
+
+ genlmsg_end(skb, hdr);
+ }
+
+ if (ret == -EMSGSIZE)
+ ctx->restart = error_id;
+
+ return ret;
+}
+
+/**
+ * drm_ras_nl_get_error_counter_doit() - Query an error counter of an node
+ * @skb: Netlink message buffer
+ * @info: Generic Netlink info containing attributes of the request
+ *
+ * Extracts the node ID and error ID from the netlink attributes and
+ * retrieves the current value of the corresponding error counter. Sends the
+ * result back to the requesting user via the standard Genl reply.
+ *
+ * Return: 0 on success, or negative errno on failure.
+ */
+int drm_ras_nl_get_error_counter_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ u32 node_id, error_id;
+
+ if (!info->attrs ||
+ GENL_REQ_ATTR_CHECK(info, DRM_RAS_A_ERROR_COUNTER_ATTRS_NODE_ID) ||
+ GENL_REQ_ATTR_CHECK(info, DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_ID))
+ return -EINVAL;
+
+ node_id = nla_get_u32(info->attrs[DRM_RAS_A_ERROR_COUNTER_ATTRS_NODE_ID]);
+ error_id = nla_get_u32(info->attrs[DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_ID]);
+
+ return doit_reply_value(info, node_id, error_id);
+}
+
+/**
+ * drm_ras_node_register() - Register a new RAS node
+ * @node: Node structure to register
+ *
+ * Adds the given RAS node to the global node xarray and assigns it
+ * a unique ID. Both @node->name and @node->type must be valid.
+ *
+ * Return: 0 on success, or negative errno on failure:
+ */
+int drm_ras_node_register(struct drm_ras_node *node)
+{
+ if (!node->device_name || !node->node_name)
+ return -EINVAL;
+
+ /* Currently, only Error Counter Endpoints are supported */
+ if (node->type != DRM_RAS_NODE_TYPE_ERROR_COUNTER)
+ return -EINVAL;
+
+ /* Mandatory entries for Error Counter Node */
+ if (node->type == DRM_RAS_NODE_TYPE_ERROR_COUNTER &&
+ (!node->error_counter_range.last || !node->query_error_counter))
+ return -EINVAL;
+
+ return xa_alloc(&drm_ras_xa, &node->id, node, xa_limit_32b, GFP_KERNEL);
+}
+EXPORT_SYMBOL(drm_ras_node_register);
+
+/**
+ * drm_ras_node_unregister() - Unregister a previously registered node
+ * @node: Node structure to unregister
+ *
+ * Removes the given node from the global node xarray using its ID.
+ */
+void drm_ras_node_unregister(struct drm_ras_node *node)
+{
+ xa_erase(&drm_ras_xa, node->id);
+}
+EXPORT_SYMBOL(drm_ras_node_unregister);
diff --git a/drivers/gpu/drm/drm_ras_genl_family.c b/drivers/gpu/drm/drm_ras_genl_family.c
new file mode 100644
index 000000000000..6f406d3d48c5
--- /dev/null
+++ b/drivers/gpu/drm/drm_ras_genl_family.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#include <drm/drm_ras_genl_family.h>
+#include "drm_ras_nl.h"
+
+/* Track family registration so the drm_exit can be called at any time */
+static bool registered;
+
+/**
+ * drm_ras_genl_family_register() - Register drm-ras genl family
+ *
+ * Only to be called one at drm_drv_init()
+ */
+int drm_ras_genl_family_register(void)
+{
+ int ret;
+
+ registered = false;
+
+ ret = genl_register_family(&drm_ras_nl_family);
+ if (ret)
+ return ret;
+
+ registered = true;
+ return 0;
+}
+
+/**
+ * drm_ras_genl_family_unregister() - Unregister drm-ras genl family
+ *
+ * To be called one at drm_drv_exit() at any moment, but only once.
+ */
+void drm_ras_genl_family_unregister(void)
+{
+ if (registered) {
+ genl_unregister_family(&drm_ras_nl_family);
+ registered = false;
+ }
+}
diff --git a/drivers/gpu/drm/drm_ras_nl.c b/drivers/gpu/drm/drm_ras_nl.c
new file mode 100644
index 000000000000..16803d0c4a44
--- /dev/null
+++ b/drivers/gpu/drm/drm_ras_nl.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/drm_ras.yaml */
+/* YNL-GEN kernel source */
+/* To regenerate run: tools/net/ynl/ynl-regen.sh */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "drm_ras_nl.h"
+
+#include <uapi/drm/drm_ras.h>
+
+/* DRM_RAS_CMD_GET_ERROR_COUNTER - do */
+static const struct nla_policy drm_ras_get_error_counter_do_nl_policy[DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_ID + 1] = {
+ [DRM_RAS_A_ERROR_COUNTER_ATTRS_NODE_ID] = { .type = NLA_U32, },
+ [DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_ID] = { .type = NLA_U32, },
+};
+
+/* DRM_RAS_CMD_GET_ERROR_COUNTER - dump */
+static const struct nla_policy drm_ras_get_error_counter_dump_nl_policy[DRM_RAS_A_ERROR_COUNTER_ATTRS_NODE_ID + 1] = {
+ [DRM_RAS_A_ERROR_COUNTER_ATTRS_NODE_ID] = { .type = NLA_U32, },
+};
+
+/* Ops table for drm_ras */
+static const struct genl_split_ops drm_ras_nl_ops[] = {
+ {
+ .cmd = DRM_RAS_CMD_LIST_NODES,
+ .dumpit = drm_ras_nl_list_nodes_dumpit,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DRM_RAS_CMD_GET_ERROR_COUNTER,
+ .doit = drm_ras_nl_get_error_counter_doit,
+ .policy = drm_ras_get_error_counter_do_nl_policy,
+ .maxattr = DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_ID,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DRM_RAS_CMD_GET_ERROR_COUNTER,
+ .dumpit = drm_ras_nl_get_error_counter_dumpit,
+ .policy = drm_ras_get_error_counter_dump_nl_policy,
+ .maxattr = DRM_RAS_A_ERROR_COUNTER_ATTRS_NODE_ID,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP,
+ },
+};
+
+struct genl_family drm_ras_nl_family __ro_after_init = {
+ .name = DRM_RAS_FAMILY_NAME,
+ .version = DRM_RAS_FAMILY_VERSION,
+ .netnsok = true,
+ .parallel_ops = true,
+ .module = THIS_MODULE,
+ .split_ops = drm_ras_nl_ops,
+ .n_split_ops = ARRAY_SIZE(drm_ras_nl_ops),
+};
diff --git a/drivers/gpu/drm/drm_ras_nl.h b/drivers/gpu/drm/drm_ras_nl.h
new file mode 100644
index 000000000000..06ccd9342773
--- /dev/null
+++ b/drivers/gpu/drm/drm_ras_nl.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/drm_ras.yaml */
+/* YNL-GEN kernel header */
+/* To regenerate run: tools/net/ynl/ynl-regen.sh */
+
+#ifndef _LINUX_DRM_RAS_GEN_H
+#define _LINUX_DRM_RAS_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <uapi/drm/drm_ras.h>
+
+int drm_ras_nl_list_nodes_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int drm_ras_nl_get_error_counter_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int drm_ras_nl_get_error_counter_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+
+extern struct genl_family drm_ras_nl_family;
+
+#endif /* _LINUX_DRM_RAS_GEN_H */
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 2062602c1bf3..3a3f9f22d42a 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -41,6 +41,7 @@ xe-y += xe_bb.o \
xe_device_sysfs.o \
xe_dma_buf.o \
xe_drm_client.o \
+ xe_drm_ras.o \
xe_eu_stall.o \
xe_exec.o \
xe_exec_queue.o \
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 66ddad767ad4..84b80e83ac46 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -20,6 +20,7 @@
#define MTL_MIRROR_TARGET_WP1 XE_REG(0xc60)
#define MTL_CAGF_MASK REG_GENMASK(8, 0)
#define MTL_CC_MASK REG_GENMASK(12, 9)
+#define MTL_CRST 0xf
/* RPM unit config (Gen8+) */
#define RPM_CONFIG0 XE_REG(0xd00)
@@ -100,6 +101,9 @@
#define VE1_AUX_INV XE_REG(0x42b8)
#define AUX_INV REG_BIT(0)
+#define GAMSTLB_CTRL XE_REG_MCR(0x477c)
+#define DIS_PEND_GPA_LINK REG_BIT(13)
+
#define GAMSTLB_CTRL2 XE_REG_MCR(0x4788)
#define STLB_SINGLE_BANK_MODE REG_BIT(11)
@@ -180,6 +184,7 @@
#define COMMON_SLICE_CHICKEN3 XE_REG(0x7304, XE_REG_OPTION_MASKED)
#define XEHP_COMMON_SLICE_CHICKEN3 XE_REG_MCR(0x7304, XE_REG_OPTION_MASKED)
+#define DISABLE_STATE_CACHE_PERF_FIX REG_BIT(13)
#define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12)
#define XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE REG_BIT(12)
#define BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11)
@@ -218,9 +223,6 @@
#define GSCPSMI_BASE XE_REG(0x880c)
-#define CCCHKNREG2 XE_REG_MCR(0x881c)
-#define LOCALITYDIS REG_BIT(7)
-
#define CCCHKNREG1 XE_REG_MCR(0x8828)
#define L3CMPCTRL REG_BIT(23)
#define ENCOMPPERFFIX REG_BIT(18)
@@ -452,6 +454,10 @@
#define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8)
+#define L2COMPUTESIDECTRL XE_REG_MCR(0xb1c0)
+#define CECTRL REG_GENMASK(2, 1)
+#define CECTRL_CENODATA_ALWAYS REG_FIELD_PREP(CECTRL, 0x0)
+
#define XE2_GLOBAL_INVAL XE_REG(0xb404)
#define XE2LPM_L3SQCREG2 XE_REG_MCR(0xb604)
diff --git a/drivers/gpu/drm/xe/regs/xe_hw_error_regs.h b/drivers/gpu/drm/xe/regs/xe_hw_error_regs.h
index c146b9ef44eb..046e1756c698 100644
--- a/drivers/gpu/drm/xe/regs/xe_hw_error_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_hw_error_regs.h
@@ -6,15 +6,83 @@
#ifndef _XE_HW_ERROR_REGS_H_
#define _XE_HW_ERROR_REGS_H_
-#define HEC_UNCORR_ERR_STATUS(base) XE_REG((base) + 0x118)
-#define UNCORR_FW_REPORTED_ERR BIT(6)
+#define HEC_UNCORR_ERR_STATUS(base) XE_REG((base) + 0x118)
+#define UNCORR_FW_REPORTED_ERR REG_BIT(6)
-#define HEC_UNCORR_FW_ERR_DW0(base) XE_REG((base) + 0x124)
+#define HEC_UNCORR_FW_ERR_DW0(base) XE_REG((base) + 0x124)
+
+#define ERR_STAT_GT_COR 0x100160
+#define EU_GRF_COR_ERR REG_BIT(15)
+#define EU_IC_COR_ERR REG_BIT(14)
+#define SLM_COR_ERR REG_BIT(13)
+#define GUC_COR_ERR REG_BIT(1)
+
+#define ERR_STAT_GT_NONFATAL 0x100164
+#define ERR_STAT_GT_FATAL 0x100168
+#define EU_GRF_FAT_ERR REG_BIT(15)
+#define SLM_FAT_ERR REG_BIT(13)
+#define GUC_FAT_ERR REG_BIT(6)
+#define FPU_FAT_ERR REG_BIT(3)
+
+#define ERR_STAT_GT_REG(x) XE_REG(_PICK_EVEN((x), \
+ ERR_STAT_GT_COR, \
+ ERR_STAT_GT_NONFATAL))
+
+#define PVC_COR_ERR_MASK (GUC_COR_ERR | SLM_COR_ERR | \
+ EU_IC_COR_ERR | EU_GRF_COR_ERR)
+
+#define PVC_FAT_ERR_MASK (FPU_FAT_ERR | GUC_FAT_ERR | \
+ EU_GRF_FAT_ERR | SLM_FAT_ERR)
+
+#define DEV_ERR_STAT_NONFATAL 0x100178
+#define DEV_ERR_STAT_CORRECTABLE 0x10017c
+#define DEV_ERR_STAT_REG(x) XE_REG(_PICK_EVEN((x), \
+ DEV_ERR_STAT_CORRECTABLE, \
+ DEV_ERR_STAT_NONFATAL))
+
+#define XE_CSC_ERROR 17
+#define XE_SOC_ERROR 16
+#define XE_GT_ERROR 0
+
+#define ERR_STAT_GT_FATAL_VECTOR_0 0x100260
+#define ERR_STAT_GT_FATAL_VECTOR_1 0x100264
+
+#define ERR_STAT_GT_FATAL_VECTOR_REG(x) XE_REG(_PICK_EVEN((x), \
+ ERR_STAT_GT_FATAL_VECTOR_0, \
+ ERR_STAT_GT_FATAL_VECTOR_1))
+
+#define ERR_STAT_GT_COR_VECTOR_0 0x1002a0
+#define ERR_STAT_GT_COR_VECTOR_1 0x1002a4
+
+#define ERR_STAT_GT_COR_VECTOR_REG(x) XE_REG(_PICK_EVEN((x), \
+ ERR_STAT_GT_COR_VECTOR_0, \
+ ERR_STAT_GT_COR_VECTOR_1))
+
+#define ERR_STAT_GT_VECTOR_REG(hw_err, x) (hw_err == HARDWARE_ERROR_CORRECTABLE ? \
+ ERR_STAT_GT_COR_VECTOR_REG(x) : \
+ ERR_STAT_GT_FATAL_VECTOR_REG(x))
+
+#define SOC_PVC_MASTER_BASE 0x282000
+#define SOC_PVC_SLAVE_BASE 0x283000
+
+#define SOC_GCOERRSTS 0x200
+#define SOC_GNFERRSTS 0x210
+#define SOC_GLOBAL_ERR_STAT_REG(base, x) XE_REG(_PICK_EVEN((x), \
+ (base) + SOC_GCOERRSTS, \
+ (base) + SOC_GNFERRSTS))
+#define SOC_SLAVE_IEH REG_BIT(1)
+#define SOC_IEH0_LOCAL_ERR_STATUS REG_BIT(0)
+#define SOC_IEH1_LOCAL_ERR_STATUS REG_BIT(0)
+
+#define SOC_GSYSEVTCTL 0x264
+#define SOC_GSYSEVTCTL_REG(master, slave, x) XE_REG(_PICK_EVEN((x), \
+ (master) + SOC_GSYSEVTCTL, \
+ (slave) + SOC_GSYSEVTCTL))
+
+#define SOC_LERRUNCSTS 0x280
+#define SOC_LERRCORSTS 0x294
+#define SOC_LOCAL_ERR_STAT_REG(base, hw_err) XE_REG(hw_err == HARDWARE_ERROR_CORRECTABLE ? \
+ (base) + SOC_LERRCORSTS : \
+ (base) + SOC_LERRUNCSTS)
-#define DEV_ERR_STAT_NONFATAL 0x100178
-#define DEV_ERR_STAT_CORRECTABLE 0x10017c
-#define DEV_ERR_STAT_REG(x) XE_REG(_PICK_EVEN((x), \
- DEV_ERR_STAT_CORRECTABLE, \
- DEV_ERR_STAT_NONFATAL))
-#define XE_CSC_ERROR BIT(17)
#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c
index 305dbd4e5d1a..efa8963ec248 100644
--- a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c
+++ b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c
@@ -11,7 +11,7 @@
#include "xe_pci_test.h"
#define TEST_MAX_VFS 63
-#define TEST_VRAM 0x37a800000ull
+#define TEST_VRAM 0x7a800000ull /* random size that works on 32-bit */
static void pf_set_admin_mode(struct xe_device *xe, bool enable)
{
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 8ff193600443..22179b2df85c 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -3331,6 +3331,61 @@ int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
}
/**
+ * xe_bo_decompress - schedule in-place decompress and install fence
+ * @bo: buffer object (caller should hold drm_exec reservations for VM+BO)
+ *
+ * Schedules an in-place resolve via the migrate layer and installs the
+ * returned dma_fence into the BO kernel reservation slot (DMA_RESV_USAGE_KERNEL).
+ * In preempt fence mode, this operation interrupts hardware execution
+ * which is expensive. Page fault mode is recommended for better performance.
+ *
+ * The resolve path only runs for VRAM-backed buffers (currently dGPU-only);
+ * iGPU/system-memory objects fail the resource check and bypass the resolve.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int xe_bo_decompress(struct xe_bo *bo)
+{
+ struct xe_device *xe = xe_bo_device(bo);
+ struct xe_tile *tile = xe_device_get_root_tile(xe);
+ struct dma_fence *decomp_fence = NULL;
+ struct ttm_operation_ctx op_ctx = {
+ .interruptible = true,
+ .no_wait_gpu = false,
+ .gfp_retry_mayfail = false,
+ };
+ int err = 0;
+
+ /* Silently skip decompression for non-VRAM buffers */
+ if (!bo->ttm.resource || !mem_type_is_vram(bo->ttm.resource->mem_type))
+ return 0;
+
+ /* Notify before scheduling resolve */
+ err = xe_bo_move_notify(bo, &op_ctx);
+ if (err)
+ return err;
+
+ /* Reserve fence slot before scheduling */
+ err = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
+ if (err)
+ return err;
+
+ /* Schedule the in-place decompression */
+ decomp_fence = xe_migrate_resolve(tile->migrate,
+ bo,
+ bo->ttm.resource);
+
+ if (IS_ERR(decomp_fence))
+ return PTR_ERR(decomp_fence);
+
+ /* Install kernel-usage fence */
+ dma_resv_add_fence(bo->ttm.base.resv, decomp_fence, DMA_RESV_USAGE_KERNEL);
+ dma_fence_put(decomp_fence);
+
+ return 0;
+}
+
+/**
* xe_bo_lock() - Lock the buffer object's dma_resv object
* @bo: The struct xe_bo whose lock is to be taken
* @intr: Whether to perform any wait interruptible
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index c914ab719f20..2cbac16f7db7 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -312,6 +312,8 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
bool xe_bo_needs_ccs_pages(struct xe_bo *bo);
+int xe_bo_decompress(struct xe_bo *bo);
+
static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo)
{
return PAGE_ALIGN(xe_bo_size(bo));
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 3462645ca13c..e77a3a3db73d 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -1074,10 +1074,7 @@ static void tdf_request_sync(struct xe_device *xe)
struct xe_gt *gt;
u8 id;
- for_each_gt(gt, xe, id) {
- if (xe_gt_is_media_type(gt))
- continue;
-
+ for_each_gt_with_type(gt, xe, id, BIT(XE_GT_TYPE_MAIN)) {
CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
if (!fw_ref.domains)
return;
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 39464650533b..c4d267002661 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -131,6 +131,10 @@ static inline bool xe_device_uc_enabled(struct xe_device *xe)
for ((id__) = 0; (id__) < (xe__)->info.tile_count * (xe__)->info.max_gt_per_tile; (id__)++) \
for_each_if((gt__) = xe_device_get_gt((xe__), (id__)))
+#define for_each_gt_with_type(gt__, xe__, id__, typemask__) \
+ for_each_gt((gt__), (xe__), (id__)) \
+ for_each_if((typemask__) & BIT((gt__)->info.type))
+
#define for_each_gt_on_tile(gt__, tile__, id__) \
for_each_gt((gt__), (tile__)->xe, (id__)) \
for_each_if((gt__)->tile == (tile__))
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index caa8f34a6744..615218d775b1 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -13,6 +13,7 @@
#include <drm/ttm/ttm_device.h>
#include "xe_devcoredump_types.h"
+#include "xe_drm_ras_types.h"
#include "xe_heci_gsc.h"
#include "xe_late_bind_fw_types.h"
#include "xe_oa_types.h"
@@ -81,16 +82,6 @@ enum xe_wedged_mode {
#define XE_MAX_ASID (BIT(20))
-#define IS_PLATFORM_STEP(_xe, _platform, min_step, max_step) \
- ((_xe)->info.platform == (_platform) && \
- (_xe)->info.step.graphics >= (min_step) && \
- (_xe)->info.step.graphics < (max_step))
-#define IS_SUBPLATFORM_STEP(_xe, _platform, sub, min_step, max_step) \
- ((_xe)->info.platform == (_platform) && \
- (_xe)->info.subplatform == (sub) && \
- (_xe)->info.step.graphics >= (min_step) && \
- (_xe)->info.step.graphics < (max_step))
-
/**
* struct xe_device - Top level struct of Xe device
*/
@@ -153,6 +144,8 @@ struct xe_device {
/** @info.force_execlist: Forced execlist submission */
u8 force_execlist:1;
+ /** @info.has_access_counter: Device supports access counter */
+ u8 has_access_counter:1;
/** @info.has_asid: Has address space ID */
u8 has_asid:1;
/** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */
@@ -509,6 +502,9 @@ struct xe_device {
/** @pmu: performance monitoring unit */
struct xe_pmu pmu;
+ /** @ras: RAS structure for device */
+ struct xe_drm_ras ras;
+
/** @i2c: I2C host controller */
struct xe_i2c *i2c;
diff --git a/drivers/gpu/drm/xe/xe_device_wa_oob.rules b/drivers/gpu/drm/xe/xe_device_wa_oob.rules
index 55ba01bc8f38..d129cddb6ead 100644
--- a/drivers/gpu/drm/xe/xe_device_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_device_wa_oob.rules
@@ -3,3 +3,4 @@
PLATFORM(PANTHERLAKE)
22019338487_display PLATFORM(LUNARLAKE)
14022085890 SUBPLATFORM(BATTLEMAGE, G21)
+14026539277 PLATFORM(NOVALAKE_P), PLATFORM_STEP(A0, B0)
diff --git a/drivers/gpu/drm/xe/xe_drm_ras.c b/drivers/gpu/drm/xe/xe_drm_ras.c
new file mode 100644
index 000000000000..e07dc23a155e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_drm_ras.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#include <linux/bitmap.h>
+
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <drm/drm_ras.h>
+
+#include "xe_device_types.h"
+#include "xe_drm_ras.h"
+
+static const char * const error_components[] = DRM_XE_RAS_ERROR_COMPONENT_NAMES;
+static const char * const error_severity[] = DRM_XE_RAS_ERROR_SEVERITY_NAMES;
+
+static int hw_query_error_counter(struct xe_drm_ras_counter *info,
+ u32 error_id, const char **name, u32 *val)
+{
+ if (!info || !info[error_id].name)
+ return -ENOENT;
+
+ *name = info[error_id].name;
+ *val = atomic_read(&info[error_id].counter);
+
+ return 0;
+}
+
+static int query_uncorrectable_error_counter(struct drm_ras_node *ep, u32 error_id,
+ const char **name, u32 *val)
+{
+ struct xe_device *xe = ep->priv;
+ struct xe_drm_ras *ras = &xe->ras;
+ struct xe_drm_ras_counter *info = ras->info[DRM_XE_RAS_ERR_SEV_UNCORRECTABLE];
+
+ return hw_query_error_counter(info, error_id, name, val);
+}
+
+static int query_correctable_error_counter(struct drm_ras_node *ep, u32 error_id,
+ const char **name, u32 *val)
+{
+ struct xe_device *xe = ep->priv;
+ struct xe_drm_ras *ras = &xe->ras;
+ struct xe_drm_ras_counter *info = ras->info[DRM_XE_RAS_ERR_SEV_CORRECTABLE];
+
+ return hw_query_error_counter(info, error_id, name, val);
+}
+
+static struct xe_drm_ras_counter *allocate_and_copy_counters(struct xe_device *xe)
+{
+ struct xe_drm_ras_counter *counter;
+ int i;
+
+ counter = kcalloc(DRM_XE_RAS_ERR_COMP_MAX, sizeof(*counter), GFP_KERNEL);
+ if (!counter)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = DRM_XE_RAS_ERR_COMP_CORE_COMPUTE; i < DRM_XE_RAS_ERR_COMP_MAX; i++) {
+ if (!error_components[i])
+ continue;
+
+ counter[i].name = error_components[i];
+ atomic_set(&counter[i].counter, 0);
+ }
+
+ return counter;
+}
+
+static int assign_node_params(struct xe_device *xe, struct drm_ras_node *node,
+ const enum drm_xe_ras_error_severity severity)
+{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ struct xe_drm_ras *ras = &xe->ras;
+ const char *device_name;
+
+ device_name = kasprintf(GFP_KERNEL, "%04x:%02x:%02x.%d",
+ pci_domain_nr(pdev->bus), pdev->bus->number,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+
+ if (!device_name)
+ return -ENOMEM;
+
+ node->device_name = device_name;
+ node->node_name = error_severity[severity];
+ node->type = DRM_RAS_NODE_TYPE_ERROR_COUNTER;
+ node->error_counter_range.first = DRM_XE_RAS_ERR_COMP_CORE_COMPUTE;
+ node->error_counter_range.last = DRM_XE_RAS_ERR_COMP_MAX - 1;
+ node->priv = xe;
+
+ ras->info[severity] = allocate_and_copy_counters(xe);
+ if (IS_ERR(ras->info[severity]))
+ return PTR_ERR(ras->info[severity]);
+
+ if (severity == DRM_XE_RAS_ERR_SEV_CORRECTABLE)
+ node->query_error_counter = query_correctable_error_counter;
+ else
+ node->query_error_counter = query_uncorrectable_error_counter;
+
+ return 0;
+}
+
+static void cleanup_node_param(struct xe_drm_ras *ras, const enum drm_xe_ras_error_severity severity)
+{
+ struct drm_ras_node *node = &ras->node[severity];
+
+ kfree(ras->info[severity]);
+ ras->info[severity] = NULL;
+
+ kfree(node->device_name);
+ node->device_name = NULL;
+}
+
+static int register_nodes(struct xe_device *xe)
+{
+ struct xe_drm_ras *ras = &xe->ras;
+ int i;
+
+ for_each_error_severity(i) {
+ struct drm_ras_node *node = &ras->node[i];
+ int ret;
+
+ ret = assign_node_params(xe, node, i);
+ if (ret) {
+ cleanup_node_param(ras, i);
+ return ret;
+ }
+
+ ret = drm_ras_node_register(node);
+ if (ret) {
+ cleanup_node_param(ras, i);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static void xe_drm_ras_unregister_nodes(struct drm_device *device, void *arg)
+{
+ struct xe_device *xe = arg;
+ struct xe_drm_ras *ras = &xe->ras;
+ int i;
+
+ for_each_error_severity(i) {
+ struct drm_ras_node *node = &ras->node[i];
+
+ drm_ras_node_unregister(node);
+ cleanup_node_param(ras, i);
+ }
+}
+
+/**
+ * xe_drm_ras_init() - Initialize DRM RAS
+ * @xe: xe device instance
+ *
+ * Allocate and register DRM RAS nodes per device
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int xe_drm_ras_init(struct xe_device *xe)
+{
+ struct xe_drm_ras *ras = &xe->ras;
+ struct drm_ras_node *node;
+ int err;
+
+ node = drmm_kcalloc(&xe->drm, DRM_XE_RAS_ERR_SEV_MAX, sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ ras->node = node;
+
+ err = register_nodes(xe);
+ if (err) {
+ drm_err(&xe->drm, "Failed to register DRM RAS nodes (%pe)\n", ERR_PTR(err));
+ return err;
+ }
+
+ err = drmm_add_action_or_reset(&xe->drm, xe_drm_ras_unregister_nodes, xe);
+ if (err) {
+ drm_err(&xe->drm, "Failed to add action for Xe DRM RAS (%pe)\n", ERR_PTR(err));
+ return err;
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_drm_ras.h b/drivers/gpu/drm/xe/xe_drm_ras.h
new file mode 100644
index 000000000000..5cc8f0124411
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_drm_ras.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+#ifndef XE_DRM_RAS_H_
+#define XE_DRM_RAS_H_
+
+struct xe_device;
+
+#define for_each_error_severity(i) \
+ for (i = 0; i < DRM_XE_RAS_ERR_SEV_MAX; i++)
+
+int xe_drm_ras_init(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_drm_ras_types.h b/drivers/gpu/drm/xe/xe_drm_ras_types.h
new file mode 100644
index 000000000000..8d729ad6a264
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_drm_ras_types.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#ifndef _XE_DRM_RAS_TYPES_H_
+#define _XE_DRM_RAS_TYPES_H_
+
+#include <linux/atomic.h>
+#include <drm/xe_drm.h>
+
+struct drm_ras_node;
+
+/* Error categories reported by hardware */
+enum hardware_error {
+ HARDWARE_ERROR_CORRECTABLE = 0,
+ HARDWARE_ERROR_NONFATAL,
+ HARDWARE_ERROR_FATAL,
+ HARDWARE_ERROR_MAX
+};
+
+/**
+ * struct xe_drm_ras_counter - XE RAS counter
+ *
+ * This structure contains error component and counter information
+ */
+struct xe_drm_ras_counter {
+ /** @name: error component name */
+ const char *name;
+
+ /** @counter: count of error */
+ atomic_t counter;
+};
+
+/**
+ * struct xe_drm_ras - XE DRM RAS structure
+ *
+ * This structure has details of error counters
+ */
+struct xe_drm_ras {
+ /** @node: DRM RAS node */
+ struct drm_ras_node *node;
+
+ /** @info: info array for all types of errors */
+ struct xe_drm_ras_counter *info[DRM_XE_RAS_ERR_SEV_MAX];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 2d0e73a6a6ee..b287d0e0e60a 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -353,6 +353,9 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags)
if (!(exec_queue_flags & EXEC_QUEUE_FLAG_KERNEL))
flags |= XE_LRC_CREATE_USER_CTX;
+ if (q->flags & EXEC_QUEUE_FLAG_DISABLE_STATE_CACHE_PERF_FIX)
+ flags |= XE_LRC_DISABLE_STATE_CACHE_PERF_FIX;
+
err = q->ops->init(q);
if (err)
return err;
@@ -399,6 +402,20 @@ err_lrc:
return err;
}
+/**
+ * xe_exec_queue_create() - Create an exec queue
+ * @xe: Xe device
+ * @vm: VM for the exec queue
+ * @logical_mask: Logical mask of HW engines
+ * @width: Width of the exec queue (number of LRCs)
+ * @hwe: Hardware engine
+ * @flags: Exec queue creation flags
+ * @extensions: Extensions for exec queue creation
+ *
+ * Create an exec queue (allocate and initialize) with the specified parameters
+ *
+ * Return: Pointer to the created exec queue on success, ERR_PTR on failure
+ */
struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
u32 logical_mask, u16 width,
struct xe_hw_engine *hwe, u32 flags,
@@ -442,6 +459,19 @@ err_post_alloc:
}
ALLOW_ERROR_INJECTION(xe_exec_queue_create, ERRNO);
+/**
+ * xe_exec_queue_create_class() - Create an exec queue for a specific engine class
+ * @xe: Xe device
+ * @gt: GT for the exec queue
+ * @vm: VM for the exec queue
+ * @class: Engine class
+ * @flags: Exec queue creation flags
+ * @extensions: Extensions for exec queue creation
+ *
+ * Create an exec queue for the specified engine class.
+ *
+ * Return: Pointer to the created exec queue on success, ERR_PTR on failure
+ */
struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
struct xe_vm *vm,
enum xe_engine_class class,
@@ -533,6 +563,14 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe,
}
ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO);
+/**
+ * xe_exec_queue_destroy() - Destroy an exec queue
+ * @ref: Reference count of the exec queue
+ *
+ * Called when the last reference to the exec queue is dropped.
+ * Cleans up all resources associated with the exec queue.
+ * This function should not be called directly; use xe_exec_queue_put() instead.
+ */
void xe_exec_queue_destroy(struct kref *ref)
{
struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount);
@@ -565,6 +603,14 @@ void xe_exec_queue_destroy(struct kref *ref)
q->ops->destroy(q);
}
+/**
+ * xe_exec_queue_fini() - Finalize an exec queue
+ * @q: The exec queue
+ *
+ * Finalizes the exec queue by updating run ticks, releasing LRC references,
+ * and freeing the queue structure. This is called after the queue has been
+ * destroyed and all references have been dropped.
+ */
void xe_exec_queue_fini(struct xe_exec_queue *q)
{
/*
@@ -579,6 +625,14 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
__xe_exec_queue_free(q);
}
+/**
+ * xe_exec_queue_assign_name() - Assign a name to an exec queue
+ * @q: The exec queue
+ * @instance: Instance number for the engine
+ *
+ * Assigns a human-readable name to the exec queue based on its engine class
+ * and instance number (e.g., "rcs0", "vcs1", "bcs2").
+ */
void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance)
{
switch (q->class) {
@@ -605,6 +659,15 @@ void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance)
}
}
+/**
+ * xe_exec_queue_lookup() - Look up an exec queue by ID
+ * @xef: Xe file private data
+ * @id: Exec queue ID
+ *
+ * Looks up an exec queue by its ID and increments its reference count.
+ *
+ * Return: Pointer to the exec queue if found, NULL otherwise
+ */
struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id)
{
struct xe_exec_queue *q;
@@ -618,6 +681,14 @@ struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id)
return q;
}
+/**
+ * xe_exec_queue_device_get_max_priority() - Get maximum priority for an exec queues
+ * @xe: Xe device
+ *
+ * Returns the maximum priority level that can be assigned to an exec queues.
+ *
+ * Return: Maximum priority level (HIGH if CAP_SYS_NICE, NORMAL otherwise)
+ */
enum xe_exec_queue_priority
xe_exec_queue_device_get_max_priority(struct xe_device *xe)
{
@@ -910,6 +981,17 @@ static int exec_queue_set_multi_queue_priority(struct xe_device *xe, struct xe_e
return q->ops->set_multi_queue_priority(q, value);
}
+static int exec_queue_set_state_cache_perf_fix(struct xe_device *xe, struct xe_exec_queue *q,
+ u64 value)
+{
+ if (XE_IOCTL_DBG(xe, q->class != XE_ENGINE_CLASS_RENDER))
+ return -EOPNOTSUPP;
+
+ q->flags |= value != 0 ? EXEC_QUEUE_FLAG_DISABLE_STATE_CACHE_PERF_FIX : 0;
+
+ return 0;
+}
+
typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
struct xe_exec_queue *q,
u64 value);
@@ -922,8 +1004,21 @@ static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
[DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP] = exec_queue_set_multi_group,
[DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY] =
exec_queue_set_multi_queue_priority,
+ [DRM_XE_EXEC_QUEUE_SET_DISABLE_STATE_CACHE_PERF_FIX] =
+ exec_queue_set_state_cache_perf_fix,
};
+/**
+ * xe_exec_queue_set_property_ioctl() - Set a property on an exec queue
+ * @dev: DRM device
+ * @data: IOCTL data
+ * @file: DRM file
+ *
+ * Allows setting properties on an existing exec queue. Currently only
+ * supports setting multi-queue priority.
+ *
+ * Return: 0 on success, negative error code on failure
+ */
int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
@@ -1006,7 +1101,8 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe,
ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE &&
ext.property != DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE &&
ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP &&
- ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY))
+ ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY &&
+ ext.property != DRM_XE_EXEC_QUEUE_SET_DISABLE_STATE_CACHE_PERF_FIX))
return -EINVAL;
idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs));
@@ -1148,6 +1244,18 @@ static bool has_sched_groups(struct xe_gt *gt)
return false;
}
+/**
+ * xe_exec_queue_create_ioctl() - Create an exec queue via IOCTL
+ * @dev: DRM device
+ * @data: IOCTL data
+ * @file: DRM file
+ *
+ * Creates a new exec queue based on user-provided parameters. Supports
+ * creating VM bind queues, regular exec queues, multi-lrc exec queues
+ * and multi-queue groups.
+ *
+ * Return: 0 on success with exec_queue_id filled in, negative error code on failure
+ */
int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
@@ -1324,6 +1432,17 @@ put_exec_queue:
return err;
}
+/**
+ * xe_exec_queue_get_property_ioctl() - Get a property from an exec queue
+ * @dev: DRM device
+ * @data: IOCTL data
+ * @file: DRM file
+ *
+ * Retrieves property values from an existing exec queue. Currently supports
+ * getting the ban/reset status.
+ *
+ * Return: 0 on success with value filled in, negative error code on failure
+ */
int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
@@ -1461,6 +1580,16 @@ void xe_exec_queue_kill(struct xe_exec_queue *q)
xe_vm_remove_compute_exec_queue(q->vm, q);
}
+/**
+ * xe_exec_queue_destroy_ioctl() - Destroy an exec queue via IOCTL
+ * @dev: DRM device
+ * @data: IOCTL data
+ * @file: DRM file
+ *
+ * Destroys an existing exec queue and releases its reference.
+ *
+ * Return: 0 on success, negative error code on failure
+ */
int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index a1f3938f4173..8ce78e0b1d50 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -134,6 +134,8 @@ struct xe_exec_queue {
#define EXEC_QUEUE_FLAG_LOW_LATENCY BIT(5)
/* for migration (kernel copy, clear, bind) jobs */
#define EXEC_QUEUE_FLAG_MIGRATE BIT(6)
+/* for programming COMMON_SLICE_CHICKEN3 on first submission */
+#define EXEC_QUEUE_FLAG_DISABLE_STATE_CACHE_PERF_FIX BIT(7)
/**
* @flags: flags for this exec queue, should statically setup aside from ban
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index b455af1e6072..bae895fa066a 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -10,6 +10,7 @@
#include <drm/drm_managed.h>
#include <uapi/drm/xe_drm.h>
+#include <generated/xe_device_wa_oob.h>
#include <generated/xe_wa_oob.h>
#include "instructions/xe_alu_commands.h"
@@ -38,6 +39,7 @@
#include "xe_gt_topology.h"
#include "xe_guc_exec_queue_types.h"
#include "xe_guc_pc.h"
+#include "xe_guc_rc.h"
#include "xe_guc_submit.h"
#include "xe_hw_fence.h"
#include "xe_hw_engine_class_sysfs.h"
@@ -450,6 +452,35 @@ put_exec_queue:
return err;
}
+static void wa_14026539277(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 val;
+
+ /*
+ * FIXME: We currently can't use FUNC(xe_rtp_match_not_sriov_vf) in the
+ * rules for Wa_14026539277 due to xe_wa_process_device_oob() being
+ * called before xe_sriov_probe_early(); and we can't move the call to
+ * the former to happen after the latter because MMIO read functions
+ * already depend on a device OOB workaround. This needs to be fixed by
+ * allowing workaround checks to happen at different stages of driver
+ * initialization.
+ */
+ if (IS_SRIOV_VF(xe))
+ return;
+
+ if (!XE_DEVICE_WA(xe, 14026539277))
+ return;
+
+ if (!xe_gt_is_main_type(gt))
+ return;
+
+ val = xe_gt_mcr_unicast_read_any(gt, L2COMPUTESIDECTRL);
+ val &= ~CECTRL;
+ val |= CECTRL_CENODATA_ALWAYS;
+ xe_gt_mcr_multicast_write(gt, L2COMPUTESIDECTRL, val);
+}
+
int xe_gt_init_early(struct xe_gt *gt)
{
int err;
@@ -575,6 +606,15 @@ static int gt_init_with_gt_forcewake(struct xe_gt *gt)
*/
gt->info.gmdid = xe_mmio_read32(&gt->mmio, GMD_ID);
+ /*
+ * Wa_14026539277 can't be implemented as a regular GT workaround (i.e.
+ * as an entry in gt_was[]) for two reasons: it is actually a device
+ * workaround that happens to involve programming a GT register; and it
+ * needs to be applied early to avoid getting the hardware in a bad
+ * state before we have a chance to do the necessary programming.
+ */
+ wa_14026539277(gt);
+
return 0;
}
@@ -896,6 +936,7 @@ static void gt_reset_worker(struct work_struct *w)
if (IS_SRIOV_PF(gt_to_xe(gt)))
xe_gt_sriov_pf_stop_prepare(gt);
+ xe_guc_rc_disable(&gt->uc.guc);
xe_uc_stop_prepare(&gt->uc);
xe_pagefault_reset(gt_to_xe(gt), gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index b867203b4997..2f376b5fb088 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -1579,10 +1579,7 @@ static int pf_update_vf_lmtt(struct xe_device *xe, unsigned int vfid)
goto fail;
offset = 0;
- for_each_gt(gt, xe, gtid) {
- if (xe_gt_is_media_type(gt))
- continue;
-
+ for_each_gt_with_type(gt, xe, gtid, BIT(XE_GT_TYPE_MAIN)) {
config = pf_pick_vf_config(gt, vfid);
bo = config->lmem_obj;
if (!bo)
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 54d2fc780127..e75653a5e797 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -1671,7 +1671,6 @@ void xe_guc_stop_prepare(struct xe_guc *guc)
if (!IS_SRIOV_VF(guc_to_xe(guc))) {
int err;
- xe_guc_rc_disable(guc);
err = xe_guc_pc_stop(&guc->pc);
xe_gt_WARN(guc_to_gt(guc), err, "Failed to stop GuC PC: %pe\n",
ERR_PTR(err));
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index f4cbc030f4c8..81b5f01b1f65 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -819,6 +819,7 @@ static void guc_um_init_params(struct xe_guc_ads *ads)
{
u32 um_queue_offset = guc_ads_um_queues_offset(ads);
struct xe_guc *guc = ads_to_guc(ads);
+ struct xe_device *xe = ads_to_xe(ads);
u64 base_dpa;
u32 base_ggtt;
bool with_dpa;
@@ -830,6 +831,16 @@ static void guc_um_init_params(struct xe_guc_ads *ads)
base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset;
for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) {
+ /*
+ * Some platforms support USM but not access counters.
+ * Skip ACCESS_COUNTER queue initialization for such
+ * platforms, leaving queue_params[2] zero-initialized
+ * to signal unavailability to the GuC.
+ */
+ if (i == GUC_UM_HW_QUEUE_ACCESS_COUNTER &&
+ !xe->info.has_access_counter)
+ continue;
+
ads_blob_write(ads, um_init_params.queue_params[i].base_dpa,
with_dpa ? (base_dpa + (i * GUC_UM_QUEUE_SIZE)) : 0);
ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address,
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
index 46ad1402347d..5da1ce5dc372 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -74,7 +74,7 @@ struct xe_guc_ct_snapshot {
/** @ctb_size: size of the snapshot of the CTB */
size_t ctb_size;
/** @ctb: snapshot of the entire CTB */
- u32 *ctb;
+ void *ctb;
};
/**
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 21fe73ab4583..bb8c4e793492 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -756,6 +756,14 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc)
if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
reg = xe_mmio_read32(&gt->mmio, MTL_MIRROR_TARGET_WP1);
gt_c_state = REG_FIELD_GET(MTL_CC_MASK, reg);
+
+ /*
+ * There are higher level sleep states that will cause this
+ * field to read out as its reset state, and those are only
+ * possible after the GT is already in C6.
+ */
+ if (gt_c_state == MTL_CRST)
+ gt_c_state = GT_C6;
} else {
reg = xe_mmio_read32(&gt->mmio, GT_CORE_STATUS);
gt_c_state = REG_FIELD_GET(RCN_MASK, reg);
diff --git a/drivers/gpu/drm/xe/xe_hw_error.c b/drivers/gpu/drm/xe/xe_hw_error.c
index 8c65291f36fc..2a31b430570e 100644
--- a/drivers/gpu/drm/xe/xe_hw_error.c
+++ b/drivers/gpu/drm/xe/xe_hw_error.c
@@ -3,6 +3,7 @@
* Copyright © 2025 Intel Corporation
*/
+#include <linux/bitmap.h>
#include <linux/fault-inject.h>
#include "regs/xe_gsc_regs.h"
@@ -10,20 +11,23 @@
#include "regs/xe_irq_regs.h"
#include "xe_device.h"
+#include "xe_drm_ras.h"
#include "xe_hw_error.h"
#include "xe_mmio.h"
#include "xe_survivability_mode.h"
-#define HEC_UNCORR_FW_ERR_BITS 4
+#define GT_HW_ERROR_MAX_ERR_BITS 16
+#define HEC_UNCORR_FW_ERR_BITS 4
+#define XE_RAS_REG_SIZE 32
+#define XE_SOC_NUM_IEH 2
+
+#define PVC_ERROR_MASK_SET(hw_err, err_bit) ((hw_err == HARDWARE_ERROR_CORRECTABLE) ? \
+ (PVC_COR_ERR_MASK & REG_BIT(err_bit)) : \
+ (PVC_FAT_ERR_MASK & REG_BIT(err_bit)))
+
extern struct fault_attr inject_csc_hw_error;
-/* Error categories reported by hardware */
-enum hardware_error {
- HARDWARE_ERROR_CORRECTABLE = 0,
- HARDWARE_ERROR_NONFATAL = 1,
- HARDWARE_ERROR_FATAL = 2,
- HARDWARE_ERROR_MAX,
-};
+static const char * const error_severity[] = DRM_XE_RAS_ERROR_SEVERITY_NAMES;
static const char * const hec_uncorrected_fw_errors[] = {
"Fatal",
@@ -32,20 +36,130 @@ static const char * const hec_uncorrected_fw_errors[] = {
"Data Corruption"
};
-static const char *hw_error_to_str(const enum hardware_error hw_err)
+static const unsigned long xe_hw_error_map[] = {
+ [XE_GT_ERROR] = DRM_XE_RAS_ERR_COMP_CORE_COMPUTE,
+ [XE_SOC_ERROR] = DRM_XE_RAS_ERR_COMP_SOC_INTERNAL,
+};
+
+enum gt_vector_regs {
+ ERR_STAT_GT_VECTOR0 = 0,
+ ERR_STAT_GT_VECTOR1,
+ ERR_STAT_GT_VECTOR2,
+ ERR_STAT_GT_VECTOR3,
+ ERR_STAT_GT_VECTOR4,
+ ERR_STAT_GT_VECTOR5,
+ ERR_STAT_GT_VECTOR6,
+ ERR_STAT_GT_VECTOR7,
+ ERR_STAT_GT_VECTOR_MAX
+};
+
+#define PVC_GT_VECTOR_LEN(hw_err) ((hw_err == HARDWARE_ERROR_CORRECTABLE) ? \
+ ERR_STAT_GT_VECTOR4 : ERR_STAT_GT_VECTOR_MAX)
+
+static enum drm_xe_ras_error_severity hw_err_to_severity(const enum hardware_error hw_err)
{
- switch (hw_err) {
- case HARDWARE_ERROR_CORRECTABLE:
- return "CORRECTABLE";
- case HARDWARE_ERROR_NONFATAL:
- return "NONFATAL";
- case HARDWARE_ERROR_FATAL:
- return "FATAL";
- default:
- return "UNKNOWN";
- }
+ if (hw_err == HARDWARE_ERROR_CORRECTABLE)
+ return DRM_XE_RAS_ERR_SEV_CORRECTABLE;
+
+ /* Uncorrectable errors comprise of both fatal and non-fatal errors */
+ return DRM_XE_RAS_ERR_SEV_UNCORRECTABLE;
}
+static const char * const pvc_master_global_err_reg[] = {
+ [0 ... 1] = "Undefined",
+ [2] = "HBM SS0: Channel0",
+ [3] = "HBM SS0: Channel1",
+ [4] = "HBM SS0: Channel2",
+ [5] = "HBM SS0: Channel3",
+ [6] = "HBM SS0: Channel4",
+ [7] = "HBM SS0: Channel5",
+ [8] = "HBM SS0: Channel6",
+ [9] = "HBM SS0: Channel7",
+ [10] = "HBM SS1: Channel0",
+ [11] = "HBM SS1: Channel1",
+ [12] = "HBM SS1: Channel2",
+ [13] = "HBM SS1: Channel3",
+ [14] = "HBM SS1: Channel4",
+ [15] = "HBM SS1: Channel5",
+ [16] = "HBM SS1: Channel6",
+ [17] = "HBM SS1: Channel7",
+ [18 ... 31] = "Undefined",
+};
+static_assert(ARRAY_SIZE(pvc_master_global_err_reg) == XE_RAS_REG_SIZE);
+
+static const char * const pvc_slave_global_err_reg[] = {
+ [0] = "Undefined",
+ [1] = "HBM SS2: Channel0",
+ [2] = "HBM SS2: Channel1",
+ [3] = "HBM SS2: Channel2",
+ [4] = "HBM SS2: Channel3",
+ [5] = "HBM SS2: Channel4",
+ [6] = "HBM SS2: Channel5",
+ [7] = "HBM SS2: Channel6",
+ [8] = "HBM SS2: Channel7",
+ [9] = "HBM SS3: Channel0",
+ [10] = "HBM SS3: Channel1",
+ [11] = "HBM SS3: Channel2",
+ [12] = "HBM SS3: Channel3",
+ [13] = "HBM SS3: Channel4",
+ [14] = "HBM SS3: Channel5",
+ [15] = "HBM SS3: Channel6",
+ [16] = "HBM SS3: Channel7",
+ [17] = "Undefined",
+ [18] = "ANR MDFI",
+ [19 ... 31] = "Undefined",
+};
+static_assert(ARRAY_SIZE(pvc_slave_global_err_reg) == XE_RAS_REG_SIZE);
+
+static const char * const pvc_slave_local_fatal_err_reg[] = {
+ [0] = "Local IEH: Malformed PCIe AER",
+ [1] = "Local IEH: Malformed PCIe ERR",
+ [2] = "Local IEH: UR conditions in IEH",
+ [3] = "Local IEH: From SERR Sources",
+ [4 ... 19] = "Undefined",
+ [20] = "Malformed MCA error packet (HBM/Punit)",
+ [21 ... 31] = "Undefined",
+};
+static_assert(ARRAY_SIZE(pvc_slave_local_fatal_err_reg) == XE_RAS_REG_SIZE);
+
+static const char * const pvc_master_local_fatal_err_reg[] = {
+ [0] = "Local IEH: Malformed IOSF PCIe AER",
+ [1] = "Local IEH: Malformed IOSF PCIe ERR",
+ [2] = "Local IEH: UR RESPONSE",
+ [3] = "Local IEH: From SERR SPI controller",
+ [4] = "Base Die MDFI T2T",
+ [5] = "Undefined",
+ [6] = "Base Die MDFI T2C",
+ [7] = "Undefined",
+ [8] = "Invalid CSC PSF Command Parity",
+ [9] = "Invalid CSC PSF Unexpected Completion",
+ [10] = "Invalid CSC PSF Unsupported Request",
+ [11] = "Invalid PCIe PSF Command Parity",
+ [12] = "PCIe PSF Unexpected Completion",
+ [13] = "PCIe PSF Unsupported Request",
+ [14 ... 19] = "Undefined",
+ [20] = "Malformed MCA error packet (HBM/Punit)",
+ [21 ... 31] = "Undefined",
+};
+static_assert(ARRAY_SIZE(pvc_master_local_fatal_err_reg) == XE_RAS_REG_SIZE);
+
+static const char * const pvc_master_local_nonfatal_err_reg[] = {
+ [0 ... 3] = "Undefined",
+ [4] = "Base Die MDFI T2T",
+ [5] = "Undefined",
+ [6] = "Base Die MDFI T2C",
+ [7] = "Undefined",
+ [8] = "Invalid CSC PSF Command Parity",
+ [9] = "Invalid CSC PSF Unexpected Completion",
+ [10] = "Invalid PCIe PSF Command Parity",
+ [11 ... 31] = "Undefined",
+};
+static_assert(ARRAY_SIZE(pvc_master_local_nonfatal_err_reg) == XE_RAS_REG_SIZE);
+
+#define PVC_MASTER_LOCAL_REG_INFO(hw_err) ((hw_err == HARDWARE_ERROR_FATAL) ? \
+ pvc_master_local_fatal_err_reg : \
+ pvc_master_local_nonfatal_err_reg)
+
static bool fault_inject_csc_hw_error(void)
{
return IS_ENABLED(CONFIG_DEBUG_FS) && should_fail(&inject_csc_hw_error, 1);
@@ -64,7 +178,8 @@ static void csc_hw_error_work(struct work_struct *work)
static void csc_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err)
{
- const char *hw_err_str = hw_error_to_str(hw_err);
+ const enum drm_xe_ras_error_severity severity = hw_err_to_severity(hw_err);
+ const char *severity_str = error_severity[severity];
struct xe_device *xe = tile_to_xe(tile);
struct xe_mmio *mmio = &tile->mmio;
u32 base, err_bit, err_src;
@@ -77,8 +192,8 @@ static void csc_hw_error_handler(struct xe_tile *tile, const enum hardware_error
lockdep_assert_held(&xe->irq.lock);
err_src = xe_mmio_read32(mmio, HEC_UNCORR_ERR_STATUS(base));
if (!err_src) {
- drm_err_ratelimited(&xe->drm, HW_ERR "Tile%d reported HEC_ERR_STATUS_%s blank\n",
- tile->id, hw_err_str);
+ drm_err_ratelimited(&xe->drm, HW_ERR "Tile%d reported %s HEC_ERR_STATUS register blank\n",
+ tile->id, severity_str);
return;
}
@@ -86,8 +201,8 @@ static void csc_hw_error_handler(struct xe_tile *tile, const enum hardware_error
fw_err = xe_mmio_read32(mmio, HEC_UNCORR_FW_ERR_DW0(base));
for_each_set_bit(err_bit, &fw_err, HEC_UNCORR_FW_ERR_BITS) {
drm_err_ratelimited(&xe->drm, HW_ERR
- "%s: HEC Uncorrected FW %s error reported, bit[%d] is set\n",
- hw_err_str, hec_uncorrected_fw_errors[err_bit],
+ "HEC FW %s %s reported, bit[%d] is set\n",
+ hec_uncorrected_fw_errors[err_bit], severity_str,
err_bit);
schedule_work(&tile->csc_hw_error_work);
@@ -97,29 +212,285 @@ static void csc_hw_error_handler(struct xe_tile *tile, const enum hardware_error
xe_mmio_write32(mmio, HEC_UNCORR_ERR_STATUS(base), err_src);
}
+static void log_hw_error(struct xe_tile *tile, const char *name,
+ const enum drm_xe_ras_error_severity severity)
+{
+ const char *severity_str = error_severity[severity];
+ struct xe_device *xe = tile_to_xe(tile);
+
+ if (severity == DRM_XE_RAS_ERR_SEV_CORRECTABLE)
+ drm_warn(&xe->drm, "%s %s detected\n", name, severity_str);
+ else
+ drm_err_ratelimited(&xe->drm, "%s %s detected\n", name, severity_str);
+}
+
+static void log_gt_err(struct xe_tile *tile, const char *name, int i, u32 err,
+ const enum drm_xe_ras_error_severity severity)
+{
+ const char *severity_str = error_severity[severity];
+ struct xe_device *xe = tile_to_xe(tile);
+
+ if (severity == DRM_XE_RAS_ERR_SEV_CORRECTABLE)
+ drm_warn(&xe->drm, "%s %s detected, ERROR_STAT_GT_VECTOR%d:0x%08x\n",
+ name, severity_str, i, err);
+ else
+ drm_err_ratelimited(&xe->drm, "%s %s detected, ERROR_STAT_GT_VECTOR%d:0x%08x\n",
+ name, severity_str, i, err);
+}
+
+static void log_soc_error(struct xe_tile *tile, const char * const *reg_info,
+ const enum drm_xe_ras_error_severity severity, u32 err_bit, u32 index)
+{
+ const char *severity_str = error_severity[severity];
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_drm_ras *ras = &xe->ras;
+ struct xe_drm_ras_counter *info = ras->info[severity];
+ const char *name;
+
+ name = reg_info[err_bit];
+
+ if (strcmp(name, "Undefined")) {
+ if (severity == DRM_XE_RAS_ERR_SEV_CORRECTABLE)
+ drm_warn(&xe->drm, "%s SOC %s detected", name, severity_str);
+ else
+ drm_err_ratelimited(&xe->drm, "%s SOC %s detected", name, severity_str);
+ atomic_inc(&info[index].counter);
+ }
+}
+
+static void gt_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err,
+ u32 error_id)
+{
+ const enum drm_xe_ras_error_severity severity = hw_err_to_severity(hw_err);
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_drm_ras *ras = &xe->ras;
+ struct xe_drm_ras_counter *info = ras->info[severity];
+ struct xe_mmio *mmio = &tile->mmio;
+ unsigned long err_stat = 0;
+ int i;
+
+ if (xe->info.platform != XE_PVC)
+ return;
+
+ if (hw_err == HARDWARE_ERROR_NONFATAL) {
+ atomic_inc(&info[error_id].counter);
+ log_hw_error(tile, info[error_id].name, severity);
+ return;
+ }
+
+ for (i = 0; i < PVC_GT_VECTOR_LEN(hw_err); i++) {
+ u32 vector, val;
+
+ vector = xe_mmio_read32(mmio, ERR_STAT_GT_VECTOR_REG(hw_err, i));
+ if (!vector)
+ continue;
+
+ switch (i) {
+ case ERR_STAT_GT_VECTOR0:
+ case ERR_STAT_GT_VECTOR1: {
+ u32 errbit;
+
+ val = hweight32(vector);
+ atomic_add(val, &info[error_id].counter);
+ log_gt_err(tile, "Subslice", i, vector, severity);
+
+ /*
+ * Error status register is only populated once per error.
+ * Read the register and clear once.
+ */
+ if (err_stat)
+ break;
+
+ err_stat = xe_mmio_read32(mmio, ERR_STAT_GT_REG(hw_err));
+ for_each_set_bit(errbit, &err_stat, GT_HW_ERROR_MAX_ERR_BITS) {
+ if (PVC_ERROR_MASK_SET(hw_err, errbit))
+ atomic_inc(&info[error_id].counter);
+ }
+ if (err_stat)
+ xe_mmio_write32(mmio, ERR_STAT_GT_REG(hw_err), err_stat);
+ break;
+ }
+ case ERR_STAT_GT_VECTOR2:
+ case ERR_STAT_GT_VECTOR3:
+ val = hweight32(vector);
+ atomic_add(val, &info[error_id].counter);
+ log_gt_err(tile, "L3 BANK", i, vector, severity);
+ break;
+ case ERR_STAT_GT_VECTOR6:
+ val = hweight32(vector);
+ atomic_add(val, &info[error_id].counter);
+ log_gt_err(tile, "TLB", i, vector, severity);
+ break;
+ case ERR_STAT_GT_VECTOR7:
+ val = hweight32(vector);
+ atomic_add(val, &info[error_id].counter);
+ log_gt_err(tile, "L3 Fabric", i, vector, severity);
+ break;
+ default:
+ log_gt_err(tile, "Undefined", i, vector, severity);
+ }
+
+ xe_mmio_write32(mmio, ERR_STAT_GT_VECTOR_REG(hw_err, i), vector);
+ }
+}
+
+static void soc_slave_ieh_handler(struct xe_tile *tile, const enum hardware_error hw_err, u32 error_id)
+{
+ const enum drm_xe_ras_error_severity severity = hw_err_to_severity(hw_err);
+ unsigned long slave_global_errstat, slave_local_errstat;
+ struct xe_mmio *mmio = &tile->mmio;
+ u32 regbit, slave;
+
+ slave = SOC_PVC_SLAVE_BASE;
+ slave_global_errstat = xe_mmio_read32(mmio, SOC_GLOBAL_ERR_STAT_REG(slave, hw_err));
+
+ if (slave_global_errstat & SOC_IEH1_LOCAL_ERR_STATUS) {
+ slave_local_errstat = xe_mmio_read32(mmio, SOC_LOCAL_ERR_STAT_REG(slave, hw_err));
+
+ if (hw_err == HARDWARE_ERROR_FATAL) {
+ for_each_set_bit(regbit, &slave_local_errstat, XE_RAS_REG_SIZE)
+ log_soc_error(tile, pvc_slave_local_fatal_err_reg, severity,
+ regbit, error_id);
+ }
+
+ xe_mmio_write32(mmio, SOC_LOCAL_ERR_STAT_REG(slave, hw_err),
+ slave_local_errstat);
+ }
+
+ for_each_set_bit(regbit, &slave_global_errstat, XE_RAS_REG_SIZE)
+ log_soc_error(tile, pvc_slave_global_err_reg, severity, regbit, error_id);
+
+ xe_mmio_write32(mmio, SOC_GLOBAL_ERR_STAT_REG(slave, hw_err), slave_global_errstat);
+}
+
+static void soc_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err,
+ u32 error_id)
+{
+ const enum drm_xe_ras_error_severity severity = hw_err_to_severity(hw_err);
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_mmio *mmio = &tile->mmio;
+ unsigned long master_global_errstat, master_local_errstat;
+ u32 master, slave, regbit;
+ int i;
+
+ if (xe->info.platform != XE_PVC)
+ return;
+
+ master = SOC_PVC_MASTER_BASE;
+ slave = SOC_PVC_SLAVE_BASE;
+
+ /* Mask error type in GSYSEVTCTL so that no new errors of the type will be reported */
+ for (i = 0; i < XE_SOC_NUM_IEH; i++)
+ xe_mmio_write32(mmio, SOC_GSYSEVTCTL_REG(master, slave, i), ~REG_BIT(hw_err));
+
+ if (hw_err == HARDWARE_ERROR_CORRECTABLE) {
+ xe_mmio_write32(mmio, SOC_GLOBAL_ERR_STAT_REG(master, hw_err), REG_GENMASK(31, 0));
+ xe_mmio_write32(mmio, SOC_LOCAL_ERR_STAT_REG(master, hw_err), REG_GENMASK(31, 0));
+ xe_mmio_write32(mmio, SOC_GLOBAL_ERR_STAT_REG(slave, hw_err), REG_GENMASK(31, 0));
+ xe_mmio_write32(mmio, SOC_LOCAL_ERR_STAT_REG(slave, hw_err), REG_GENMASK(31, 0));
+ goto unmask_gsysevtctl;
+ }
+
+ /*
+ * Read the master global IEH error register, if BIT(1) is set then process
+ * the slave IEH first. If BIT(0) in global error register is set then process
+ * the corresponding local error registers.
+ */
+ master_global_errstat = xe_mmio_read32(mmio, SOC_GLOBAL_ERR_STAT_REG(master, hw_err));
+ if (master_global_errstat & SOC_SLAVE_IEH)
+ soc_slave_ieh_handler(tile, hw_err, error_id);
+
+ if (master_global_errstat & SOC_IEH0_LOCAL_ERR_STATUS) {
+ master_local_errstat = xe_mmio_read32(mmio, SOC_LOCAL_ERR_STAT_REG(master, hw_err));
+
+ for_each_set_bit(regbit, &master_local_errstat, XE_RAS_REG_SIZE)
+ log_soc_error(tile, PVC_MASTER_LOCAL_REG_INFO(hw_err), severity, regbit, error_id);
+
+ xe_mmio_write32(mmio, SOC_LOCAL_ERR_STAT_REG(master, hw_err), master_local_errstat);
+ }
+
+ for_each_set_bit(regbit, &master_global_errstat, XE_RAS_REG_SIZE)
+ log_soc_error(tile, pvc_master_global_err_reg, severity, regbit, error_id);
+
+ xe_mmio_write32(mmio, SOC_GLOBAL_ERR_STAT_REG(master, hw_err), master_global_errstat);
+
+unmask_gsysevtctl:
+ for (i = 0; i < XE_SOC_NUM_IEH; i++)
+ xe_mmio_write32(mmio, SOC_GSYSEVTCTL_REG(master, slave, i),
+ (HARDWARE_ERROR_MAX << 1) + 1);
+}
+
static void hw_error_source_handler(struct xe_tile *tile, const enum hardware_error hw_err)
{
- const char *hw_err_str = hw_error_to_str(hw_err);
+ const enum drm_xe_ras_error_severity severity = hw_err_to_severity(hw_err);
+ const char *severity_str = error_severity[severity];
struct xe_device *xe = tile_to_xe(tile);
- unsigned long flags;
- u32 err_src;
+ struct xe_drm_ras *ras = &xe->ras;
+ struct xe_drm_ras_counter *info = ras->info[severity];
+ unsigned long flags, err_src;
+ u32 err_bit;
- if (xe->info.platform != XE_BATTLEMAGE)
+ if (!IS_DGFX(xe))
return;
spin_lock_irqsave(&xe->irq.lock, flags);
err_src = xe_mmio_read32(&tile->mmio, DEV_ERR_STAT_REG(hw_err));
if (!err_src) {
- drm_err_ratelimited(&xe->drm, HW_ERR "Tile%d reported DEV_ERR_STAT_%s blank!\n",
- tile->id, hw_err_str);
+ drm_err_ratelimited(&xe->drm, HW_ERR "Tile%d reported %s DEV_ERR_STAT register blank!\n",
+ tile->id, severity_str);
goto unlock;
}
- if (err_src & XE_CSC_ERROR)
+ /*
+ * On encountering CSC firmware errors, the graphics device becomes unrecoverable
+ * so return immediately on error. The only way to recover from these errors is
+ * firmware flash. The device will enter Runtime Survivability mode when such
+ * errors are detected.
+ */
+ if (err_src & REG_BIT(XE_CSC_ERROR)) {
csc_hw_error_handler(tile, hw_err);
+ goto clear_reg;
+ }
- xe_mmio_write32(&tile->mmio, DEV_ERR_STAT_REG(hw_err), err_src);
+ if (!info)
+ goto clear_reg;
+
+ for_each_set_bit(err_bit, &err_src, XE_RAS_REG_SIZE) {
+ const char *name;
+ u32 error_id;
+
+ /* Check error bit is within bounds */
+ if (err_bit >= ARRAY_SIZE(xe_hw_error_map))
+ break;
+
+ error_id = xe_hw_error_map[err_bit];
+ /* Check error component is within max */
+ if (!error_id || error_id >= DRM_XE_RAS_ERR_COMP_MAX)
+ continue;
+
+ name = info[error_id].name;
+ if (!name)
+ continue;
+
+ if (severity == DRM_XE_RAS_ERR_SEV_CORRECTABLE) {
+ drm_warn(&xe->drm, HW_ERR
+ "TILE%d reported %s %s, bit[%d] is set\n",
+ tile->id, name, severity_str, err_bit);
+ } else {
+ drm_err_ratelimited(&xe->drm, HW_ERR
+ "TILE%d reported %s %s, bit[%d] is set\n",
+ tile->id, name, severity_str, err_bit);
+ }
+
+ if (err_bit == XE_GT_ERROR)
+ gt_hw_error_handler(tile, hw_err, error_id);
+ if (err_bit == XE_SOC_ERROR)
+ soc_hw_error_handler(tile, hw_err, error_id);
+ }
+
+clear_reg:
+ xe_mmio_write32(&tile->mmio, DEV_ERR_STAT_REG(hw_err), err_src);
unlock:
spin_unlock_irqrestore(&xe->irq.lock, flags);
}
@@ -141,9 +512,18 @@ void xe_hw_error_irq_handler(struct xe_tile *tile, const u32 master_ctl)
if (fault_inject_csc_hw_error())
schedule_work(&tile->csc_hw_error_work);
- for (hw_err = 0; hw_err < HARDWARE_ERROR_MAX; hw_err++)
+ for (hw_err = 0; hw_err < HARDWARE_ERROR_MAX; hw_err++) {
if (master_ctl & ERROR_IRQ(hw_err))
hw_error_source_handler(tile, hw_err);
+ }
+}
+
+static int hw_error_info_init(struct xe_device *xe)
+{
+ if (xe->info.platform != XE_PVC)
+ return 0;
+
+ return xe_drm_ras_init(xe);
}
/*
@@ -172,11 +552,16 @@ static void process_hw_errors(struct xe_device *xe)
void xe_hw_error_init(struct xe_device *xe)
{
struct xe_tile *tile = xe_device_get_root_tile(xe);
+ int ret;
if (!IS_DGFX(xe) || IS_SRIOV_VF(xe))
return;
INIT_WORK(&tile->csc_hw_error_work, csc_hw_error_work);
+ ret = hw_error_info_init(xe);
+ if (ret)
+ drm_err(&xe->drm, "Failed to initialize XE DRM RAS (%pe)\n", ERR_PTR(ret));
+
process_hw_errors(xe);
}
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index fcdbd403fa3c..73a503d88217 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -14,6 +14,7 @@
#include "instructions/xe_gfxpipe_commands.h"
#include "instructions/xe_gfx_state_commands.h"
#include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
#include "regs/xe_lrc_layout.h"
#include "xe_bb.h"
#include "xe_bo.h"
@@ -1446,6 +1447,7 @@ static int xe_lrc_ctx_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct
struct xe_device *xe = gt_to_xe(gt);
struct iosys_map map;
u32 arb_enable;
+ u32 state_cache_perf_fix[3];
int err;
/*
@@ -1546,6 +1548,13 @@ static int xe_lrc_ctx_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct
arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
+ if (init_flags & XE_LRC_DISABLE_STATE_CACHE_PERF_FIX) {
+ state_cache_perf_fix[0] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
+ state_cache_perf_fix[1] = COMMON_SLICE_CHICKEN3.addr;
+ state_cache_perf_fix[2] = _MASKED_BIT_ENABLE(DISABLE_STATE_CACHE_PERF_FIX);
+ xe_lrc_write_ring(lrc, state_cache_perf_fix, sizeof(state_cache_perf_fix));
+ }
+
map = __xe_lrc_seqno_map(lrc);
xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index 48f7c26cf129..e7c975f9e2d9 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -49,6 +49,7 @@ struct xe_lrc_snapshot {
#define XE_LRC_CREATE_RUNALONE BIT(0)
#define XE_LRC_CREATE_PXP BIT(1)
#define XE_LRC_CREATE_USER_CTX BIT(2)
+#define XE_LRC_DISABLE_STATE_CACHE_PERF_FIX BIT(3)
struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
void *replay_state, u32 ring_size, u16 msix_vec, u32 flags);
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 8af6c347bea8..fc918b4fba54 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -184,19 +184,11 @@ static void xe_migrate_program_identity(struct xe_device *xe, struct xe_vm *vm,
xe_assert(xe, pos == vram_limit);
}
-static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
- struct xe_vm *vm, struct drm_exec *exec)
+static int xe_migrate_pt_bo_alloc(struct xe_tile *tile, struct xe_migrate *m,
+ struct xe_vm *vm, struct drm_exec *exec)
{
- struct xe_device *xe = tile_to_xe(tile);
- u16 pat_index = xe->pat.idx[XE_CACHE_WB];
- u8 id = tile->id;
- u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level;
-#define VRAM_IDENTITY_MAP_COUNT 2
- u32 num_setup = num_level + VRAM_IDENTITY_MAP_COUNT;
-#undef VRAM_IDENTITY_MAP_COUNT
- u32 map_ofs, level, i;
struct xe_bo *bo, *batch = tile->mem.kernel_bb_pool->bo;
- u64 entry, pt29_ofs;
+ u32 num_entries = NUM_PT_SLOTS;
/* Can't bump NUM_PT_SLOTS too high */
BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/XE_PAGE_SIZE);
@@ -216,6 +208,24 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
if (IS_ERR(bo))
return PTR_ERR(bo);
+ m->pt_bo = bo;
+ return 0;
+}
+
+static void xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
+ struct xe_vm *vm, u32 *ofs)
+{
+ struct xe_device *xe = tile_to_xe(tile);
+ u16 pat_index = xe->pat.idx[XE_CACHE_WB];
+ u8 id = tile->id;
+ u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level;
+#define VRAM_IDENTITY_MAP_COUNT 2
+ u32 num_setup = num_level + VRAM_IDENTITY_MAP_COUNT;
+#undef VRAM_IDENTITY_MAP_COUNT
+ u32 map_ofs, level, i;
+ struct xe_bo *bo = m->pt_bo, *batch = tile->mem.kernel_bb_pool->bo;
+ u64 entry, pt29_ofs;
+
/* PT30 & PT31 reserved for 2M identity map */
pt29_ofs = xe_bo_size(bo) - 3 * XE_PAGE_SIZE;
entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs);
@@ -338,6 +348,12 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
}
}
+ if (ofs)
+ *ofs = map_ofs;
+}
+
+static void xe_migrate_suballoc_manager_init(struct xe_migrate *m, u32 map_ofs)
+{
/*
* Example layout created above, with root level = 3:
* [PT0...PT7]: kernel PT's for copy/clear; 64 or 4KiB PTE's
@@ -363,9 +379,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
drm_suballoc_manager_init(&m->vm_update_sa,
(size_t)(map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) *
NUM_VMUSA_UNIT_PER_PAGE, 0);
-
- m->pt_bo = bo;
- return 0;
}
/*
@@ -416,12 +429,22 @@ static int xe_migrate_lock_prepare_vm(struct xe_tile *tile, struct xe_migrate *m
struct xe_device *xe = tile_to_xe(tile);
struct xe_validation_ctx ctx;
struct drm_exec exec;
+ u32 map_ofs;
int err = 0;
xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
err = xe_vm_drm_exec_lock(vm, &exec);
+ if (err)
+ return err;
+
drm_exec_retry_on_contention(&exec);
- err = xe_migrate_prepare_vm(tile, m, vm, &exec);
+
+ err = xe_migrate_pt_bo_alloc(tile, m, vm, &exec);
+ if (err)
+ return err;
+
+ xe_migrate_prepare_vm(tile, m, vm, &map_ofs);
+ xe_migrate_suballoc_manager_init(m, map_ofs);
drm_exec_retry_on_contention(&exec);
xe_validation_retry_on_oom(&ctx, &err);
}
@@ -837,31 +860,13 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
return flush_flags;
}
-/**
- * xe_migrate_copy() - Copy content of TTM resources.
- * @m: The migration context.
- * @src_bo: The buffer object @src is currently bound to.
- * @dst_bo: If copying between resources created for the same bo, set this to
- * the same value as @src_bo. If copying between buffer objects, set it to
- * the buffer object @dst is currently bound to.
- * @src: The source TTM resource.
- * @dst: The dst TTM resource.
- * @copy_only_ccs: If true copy only CCS metadata
- *
- * Copies the contents of @src to @dst: On flat CCS devices,
- * the CCS metadata is copied as well if needed, or if not present,
- * the CCS metadata of @dst is cleared for security reasons.
- *
- * Return: Pointer to a dma_fence representing the last copy batch, or
- * an error pointer on failure. If there is a failure, any copy operation
- * started by the function call has been synced.
- */
-struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
- struct xe_bo *src_bo,
- struct xe_bo *dst_bo,
- struct ttm_resource *src,
- struct ttm_resource *dst,
- bool copy_only_ccs)
+static struct dma_fence *__xe_migrate_copy(struct xe_migrate *m,
+ struct xe_bo *src_bo,
+ struct xe_bo *dst_bo,
+ struct ttm_resource *src,
+ struct ttm_resource *dst,
+ bool copy_only_ccs,
+ bool is_vram_resolve)
{
struct xe_gt *gt = m->tile->primary_gt;
struct xe_device *xe = gt_to_xe(gt);
@@ -882,8 +887,15 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
bool copy_ccs = xe_device_has_flat_ccs(xe) &&
xe_bo_needs_ccs_pages(src_bo) && xe_bo_needs_ccs_pages(dst_bo);
bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram);
- bool use_comp_pat = type_device && xe_device_has_flat_ccs(xe) &&
- GRAPHICS_VER(xe) >= 20 && src_is_vram && !dst_is_vram;
+
+ /*
+ * For decompression operation, always use the compression PAT index.
+ * Otherwise, only use the compression PAT index for device memory
+ * when copying from VRAM to system memory.
+ */
+ bool use_comp_pat = is_vram_resolve || (type_device &&
+ xe_device_has_flat_ccs(xe) &&
+ GRAPHICS_VER(xe) >= 20 && src_is_vram && !dst_is_vram);
/* Copying CCS between two different BOs is not supported yet. */
if (XE_WARN_ON(copy_ccs && src_bo != dst_bo))
@@ -1043,6 +1055,53 @@ err_sync:
}
/**
+ * xe_migrate_copy() - Copy content of TTM resources.
+ * @m: The migration context.
+ * @src_bo: The buffer object @src is currently bound to.
+ * @dst_bo: If copying between resources created for the same bo, set this to
+ * the same value as @src_bo. If copying between buffer objects, set it to
+ * the buffer object @dst is currently bound to.
+ * @src: The source TTM resource.
+ * @dst: The dst TTM resource.
+ * @copy_only_ccs: If true copy only CCS metadata
+ *
+ * Copies the contents of @src to @dst: On flat CCS devices,
+ * the CCS metadata is copied as well if needed, or if not present,
+ * the CCS metadata of @dst is cleared for security reasons.
+ *
+ * Return: Pointer to a dma_fence representing the last copy batch, or
+ * an error pointer on failure. If there is a failure, any copy operation
+ * started by the function call has been synced.
+ */
+struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
+ struct xe_bo *src_bo,
+ struct xe_bo *dst_bo,
+ struct ttm_resource *src,
+ struct ttm_resource *dst,
+ bool copy_only_ccs)
+{
+ return __xe_migrate_copy(m, src_bo, dst_bo, src, dst, copy_only_ccs, false);
+}
+
+/**
+ * xe_migrate_resolve() - Resolve and decompress a buffer object if required.
+ * @m: The migrate context
+ * @bo: The buffer object to resolve
+ * @res: The reservation object
+ *
+ * Wrapper around __xe_migrate_copy() with is_vram_resolve set to true
+ * to trigger decompression if needed.
+ *
+ * Return: A dma_fence that signals on completion, or an ERR_PTR on failure.
+ */
+struct dma_fence *xe_migrate_resolve(struct xe_migrate *m,
+ struct xe_bo *bo,
+ struct ttm_resource *res)
+{
+ return __xe_migrate_copy(m, bo, bo, res, res, false, true);
+}
+
+/**
* xe_migrate_lrc() - Get the LRC from migrate context.
* @migrate: Migrate context.
*
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index 1522afb37dcf..169279d9d8c2 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -132,6 +132,10 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
struct ttm_resource *dst,
bool copy_only_ccs);
+struct dma_fence *xe_migrate_resolve(struct xe_migrate *m,
+ struct xe_bo *bo,
+ struct ttm_resource *res);
+
int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
struct xe_bo *src_bo,
enum xe_sriov_vf_ccs_rw_ctxs read_write);
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index f840d9a58740..34c9031e1e74 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -311,10 +311,25 @@ u16 xe_pat_index_get_l3_policy(struct xe_device *xe, u16 pat_index)
return REG_FIELD_GET(XE2_L3_POLICY, xe->pat.table[pat_index].value);
}
+static const struct xe_pat_table_entry *gt_pta_entry(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ if (xe_gt_is_main_type(gt))
+ return xe->pat.pat_primary_pta;
+
+ if (xe_gt_is_media_type(gt))
+ return xe->pat.pat_media_pta;
+
+ xe_assert(xe, false);
+ return NULL;
+}
+
static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
int n_entries)
{
struct xe_device *xe = gt_to_xe(gt);
+ const struct xe_pat_table_entry *pta_entry = gt_pta_entry(gt);
for (int i = 0; i < n_entries; i++) {
struct xe_reg reg = XE_REG(_PAT_INDEX(i));
@@ -324,16 +339,16 @@ static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[
if (xe->pat.pat_ats)
xe_mmio_write32(&gt->mmio, XE_REG(_PAT_ATS), xe->pat.pat_ats->value);
- if (xe->pat.pat_primary_pta && xe_gt_is_main_type(gt))
- xe_mmio_write32(&gt->mmio, XE_REG(_PAT_PTA), xe->pat.pat_primary_pta->value);
- if (xe->pat.pat_media_pta && xe_gt_is_media_type(gt))
- xe_mmio_write32(&gt->mmio, XE_REG(_PAT_PTA), xe->pat.pat_media_pta->value);
+
+ if (pta_entry)
+ xe_mmio_write32(&gt->mmio, XE_REG(_PAT_PTA), pta_entry->value);
}
static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry table[],
int n_entries)
{
struct xe_device *xe = gt_to_xe(gt);
+ const struct xe_pat_table_entry *pta_entry = gt_pta_entry(gt);
for (int i = 0; i < n_entries; i++) {
struct xe_reg_mcr reg_mcr = XE_REG_MCR(_PAT_INDEX(i));
@@ -343,10 +358,9 @@ static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry ta
if (xe->pat.pat_ats)
xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe->pat.pat_ats->value);
- if (xe->pat.pat_primary_pta && xe_gt_is_main_type(gt))
- xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe->pat.pat_primary_pta->value);
- if (xe->pat.pat_media_pta && xe_gt_is_media_type(gt))
- xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe->pat.pat_media_pta->value);
+
+ if (pta_entry)
+ xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), pta_entry->value);
}
static int xelp_dump(struct xe_gt *gt, struct drm_printer *p)
@@ -677,8 +691,7 @@ int xe_pat_dump(struct xe_gt *gt, struct drm_printer *p)
int xe_pat_dump_sw_config(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- const struct xe_pat_table_entry *pta_entry = xe_gt_is_main_type(gt) ?
- xe->pat.pat_primary_pta : xe->pat.pat_media_pta;
+ const struct xe_pat_table_entry *pta_entry = gt_pta_entry(gt);
char label[PAT_LABEL_LEN];
if (!xe->pat.table || !xe->pat.n_entries)
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index b48e84549888..01673d2b2464 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -81,6 +81,7 @@ static const struct xe_graphics_desc graphics_xehpc = {
XE_HP_FEATURES,
+ .has_access_counter = 1,
.has_asid = 1,
.has_atomic_enable_pte_bit = 1,
.has_usm = 1,
@@ -98,6 +99,7 @@ static const struct xe_graphics_desc graphics_xelpg = {
};
#define XE2_GFX_FEATURES \
+ .has_access_counter = 1, \
.has_asid = 1, \
.has_atomic_enable_pte_bit = 1, \
.has_range_tlb_inval = 1, \
@@ -123,6 +125,7 @@ static const struct xe_graphics_desc graphics_xe3p_lpg = {
static const struct xe_graphics_desc graphics_xe3p_xpc = {
XE2_GFX_FEATURES,
+ .has_access_counter = 0,
.has_indirect_ring_state = 1,
.hw_engine_mask =
GENMASK(XE_HW_ENGINE_BCS8, XE_HW_ENGINE_BCS1) |
@@ -776,6 +779,8 @@ static int xe_info_init_early(struct xe_device *xe,
xe->info.max_gt_per_tile = desc->max_gt_per_tile;
xe->info.tile_count = 1 + desc->max_remote_tiles;
+ xe_step_platform_get(xe);
+
err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0);
if (err)
return err;
@@ -910,7 +915,7 @@ static int xe_info_init(struct xe_device *xe,
if (desc->pre_gmdid_graphics_ip) {
graphics_ip = desc->pre_gmdid_graphics_ip;
media_ip = desc->pre_gmdid_media_ip;
- xe->info.step = xe_step_pre_gmdid_get(xe);
+ xe_step_pre_gmdid_get(xe);
} else {
xe_assert(xe, !desc->pre_gmdid_media_ip);
ret = handle_gmdid(xe, &graphics_ip, &media_ip,
@@ -918,9 +923,7 @@ static int xe_info_init(struct xe_device *xe,
if (ret)
return ret;
- xe->info.step = xe_step_gmdid_get(xe,
- graphics_gmdid_revid,
- media_gmdid_revid);
+ xe_step_gmdid_get(xe, graphics_gmdid_revid, media_gmdid_revid);
}
/*
@@ -944,6 +947,7 @@ static int xe_info_init(struct xe_device *xe,
media_desc = NULL;
}
+ xe->info.has_access_counter = graphics_desc->has_access_counter;
xe->info.has_asid = graphics_desc->has_asid;
xe->info.has_atomic_enable_pte_bit = graphics_desc->has_atomic_enable_pte_bit;
if (xe->info.platform != XE_PVC)
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index 47e8a1552c2b..8eee4fb1c57c 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -70,6 +70,7 @@ struct xe_graphics_desc {
u8 num_geometry_xecore_fuse_regs;
u8 num_compute_xecore_fuse_regs;
+ u8 has_access_counter:1;
u8 has_asid:1;
u8 has_atomic_enable_pte_bit:1;
u8 has_indirect_ring_state:1;
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 34db266b723f..4852fdcb4b95 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -340,6 +340,8 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT;
config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY;
+ config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
+ DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_STATE_CACHE_PERF_FIX;
config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] =
xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
config->info[DRM_XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits;
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 7bfdc6795ce6..991f218f1cc3 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -55,6 +55,13 @@ static bool rule_matches(const struct xe_device *xe,
match = xe->info.platform == r->platform &&
xe->info.subplatform == r->subplatform;
break;
+ case XE_RTP_MATCH_PLATFORM_STEP:
+ if (drm_WARN_ON(&xe->drm, xe->info.step.platform == STEP_NONE))
+ return false;
+
+ match = xe->info.step.platform >= r->step_start &&
+ xe->info.step.platform < r->step_end;
+ break;
case XE_RTP_MATCH_GRAPHICS_VERSION:
if (drm_WARN_ON(&xe->drm, !gt))
return false;
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index be4195264286..7d6daa7eb1e4 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -35,6 +35,10 @@ struct xe_reg_sr;
{ .match_type = XE_RTP_MATCH_SUBPLATFORM, \
.platform = plat__, .subplatform = sub__ }
+#define _XE_RTP_RULE_PLATFORM_STEP(start__, end__) \
+ { .match_type = XE_RTP_MATCH_PLATFORM_STEP, \
+ .step_start = start__, .step_end = end__ }
+
#define _XE_RTP_RULE_GRAPHICS_STEP(start__, end__) \
{ .match_type = XE_RTP_MATCH_GRAPHICS_STEP, \
.step_start = start__, .step_end = end__ }
@@ -67,6 +71,22 @@ struct xe_reg_sr;
_XE_RTP_RULE_SUBPLATFORM(XE_##plat_, XE_SUBPLATFORM_##plat_##_##sub_)
/**
+ * XE_RTP_RULE_PLATFORM_STEP - Create rule matching platform-level stepping
+ * @start_: First stepping matching the rule
+ * @end_: First stepping that does not match the rule
+ *
+ * Note that the range matching this rule is [ @start_, @end_ ), i.e. inclusive
+ * on the left, exclusive on the right.
+ *
+ * You need to make sure that proper support for reading platform-level stepping
+ * information is present for the target platform before using this rule.
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_PLATFORM_STEP(start_, end_) \
+ _XE_RTP_RULE_PLATFORM_STEP(STEP_##start_, STEP_##end_)
+
+/**
* XE_RTP_RULE_GRAPHICS_STEP - Create rule matching graphics stepping
* @start_: First stepping matching the rule
* @end_: First stepping that does not match the rule
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
index 6ba7f226c227..166251615be1 100644
--- a/drivers/gpu/drm/xe/xe_rtp_types.h
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -41,6 +41,7 @@ struct xe_rtp_action {
enum {
XE_RTP_MATCH_PLATFORM,
XE_RTP_MATCH_SUBPLATFORM,
+ XE_RTP_MATCH_PLATFORM_STEP,
XE_RTP_MATCH_GRAPHICS_VERSION,
XE_RTP_MATCH_GRAPHICS_VERSION_RANGE,
XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT,
diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c
index 2860986f82f7..d0f888c31831 100644
--- a/drivers/gpu/drm/xe/xe_step.c
+++ b/drivers/gpu/drm/xe/xe_step.c
@@ -109,21 +109,45 @@ static const int pvc_basedie_subids[] = {
__diag_pop();
/**
+ * xe_step_platform_get - Determine platform-level stepping from PCI revid
+ * @xe: Xe device
+ *
+ * Convert the PCI revid into a platform-level stepping value and store that
+ * in the device info.
+ */
+void xe_step_platform_get(struct xe_device *xe)
+{
+ /*
+ * Not all platforms map PCI revid directly into our symbolic stepping
+ * enumeration. Some platforms will have a single PCI revid used for a
+ * range platform level steppings and some might even require specific
+ * mappings. So prefer to err on the side of caution and include only
+ * the platforms from which we need the stepping info for workaround
+ * checks.
+ */
+
+ if (xe->info.platform == XE_NOVALAKE_P)
+ xe->info.step.platform = STEP_A0 + xe->info.revid;
+}
+
+/**
* xe_step_pre_gmdid_get - Determine IP steppings from PCI revid
* @xe: Xe device
*
* Convert the PCI revid into proper IP steppings. This should only be
* used on platforms that do not have GMD_ID support.
*/
-struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe)
+void xe_step_pre_gmdid_get(struct xe_device *xe)
{
const struct xe_step_info *revids = NULL;
- struct xe_step_info step = {};
u16 revid = xe->info.revid;
int size = 0;
const int *basedie_info = NULL;
int basedie_size = 0;
int baseid = 0;
+ u8 graphics = STEP_NONE;
+ u8 media = STEP_NONE;
+ u8 basedie = STEP_NONE;
if (xe->info.platform == XE_PVC) {
baseid = FIELD_GET(GENMASK(5, 3), xe->info.revid);
@@ -166,10 +190,12 @@ struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe)
/* Not using the stepping scheme for the platform yet. */
if (!revids)
- return step;
+ goto done;
if (revid < size && revids[revid].graphics != STEP_NONE) {
- step = revids[revid];
+ graphics = revids[revid].graphics;
+ media = revids[revid].media;
+ basedie = revids[revid].basedie;
} else {
drm_warn(&xe->drm, "Unknown revid 0x%02x\n", revid);
@@ -187,25 +213,30 @@ struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe)
if (revid < size) {
drm_dbg(&xe->drm, "Using steppings for revid 0x%02x\n",
revid);
- step = revids[revid];
+ graphics = revids[revid].graphics;
+ media = revids[revid].media;
+ basedie = revids[revid].basedie;
} else {
drm_dbg(&xe->drm, "Using future steppings\n");
- step.graphics = STEP_FUTURE;
+ graphics = STEP_FUTURE;
}
}
- drm_WARN_ON(&xe->drm, step.graphics == STEP_NONE);
+ drm_WARN_ON(&xe->drm, graphics == STEP_NONE);
if (basedie_info && basedie_size) {
if (baseid < basedie_size && basedie_info[baseid] != STEP_NONE) {
- step.basedie = basedie_info[baseid];
+ basedie = basedie_info[baseid];
} else {
drm_warn(&xe->drm, "Unknown baseid 0x%02x\n", baseid);
- step.basedie = STEP_FUTURE;
+ basedie = STEP_FUTURE;
}
}
- return step;
+done:
+ xe->info.step.graphics = graphics;
+ xe->info.step.media = media;
+ xe->info.step.basedie = basedie;
}
/**
@@ -220,28 +251,27 @@ struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe)
* all platforms: major steppings (A0, B0, etc.) are 4 apart, with minor
* steppings (A1, A2, etc.) taking the values in between.
*/
-struct xe_step_info xe_step_gmdid_get(struct xe_device *xe,
- u32 graphics_gmdid_revid,
- u32 media_gmdid_revid)
+void xe_step_gmdid_get(struct xe_device *xe,
+ u32 graphics_gmdid_revid,
+ u32 media_gmdid_revid)
{
- struct xe_step_info step = {
- .graphics = STEP_A0 + graphics_gmdid_revid,
- .media = STEP_A0 + media_gmdid_revid,
- };
+ u8 graphics = STEP_A0 + graphics_gmdid_revid;
+ u8 media = STEP_A0 + media_gmdid_revid;
- if (step.graphics >= STEP_FUTURE) {
- step.graphics = STEP_FUTURE;
+ if (graphics >= STEP_FUTURE) {
+ graphics = STEP_FUTURE;
drm_dbg(&xe->drm, "Graphics GMD_ID revid value %d treated as future stepping\n",
graphics_gmdid_revid);
}
- if (step.media >= STEP_FUTURE) {
- step.media = STEP_FUTURE;
+ if (media >= STEP_FUTURE) {
+ media = STEP_FUTURE;
drm_dbg(&xe->drm, "Media GMD_ID revid value %d treated as future stepping\n",
media_gmdid_revid);
}
- return step;
+ xe->info.step.graphics = graphics;
+ xe->info.step.media = media;
}
#define STEP_NAME_CASE(name) \
diff --git a/drivers/gpu/drm/xe/xe_step.h b/drivers/gpu/drm/xe/xe_step.h
index 686cb59200c2..41f1c95c46e5 100644
--- a/drivers/gpu/drm/xe/xe_step.h
+++ b/drivers/gpu/drm/xe/xe_step.h
@@ -12,10 +12,12 @@
struct xe_device;
-struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe);
-struct xe_step_info xe_step_gmdid_get(struct xe_device *xe,
- u32 graphics_gmdid_revid,
- u32 media_gmdid_revid);
+void xe_step_platform_get(struct xe_device *xe);
+
+void xe_step_pre_gmdid_get(struct xe_device *xe);
+void xe_step_gmdid_get(struct xe_device *xe,
+ u32 graphics_gmdid_revid,
+ u32 media_gmdid_revid);
static inline u32 xe_step_to_gmdid(enum xe_step step) { return step - STEP_A0; }
const char *xe_step_name(enum xe_step step);
diff --git a/drivers/gpu/drm/xe/xe_step_types.h b/drivers/gpu/drm/xe/xe_step_types.h
index d978cc2512f2..43ca73850739 100644
--- a/drivers/gpu/drm/xe/xe_step_types.h
+++ b/drivers/gpu/drm/xe/xe_step_types.h
@@ -9,6 +9,7 @@
#include <linux/types.h>
struct xe_step_info {
+ u8 platform;
u8 graphics;
u8 media;
u8 basedie;
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 002b6c22ad3f..a91c84487a67 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -19,6 +19,7 @@
#include "xe_pt.h"
#include "xe_svm.h"
#include "xe_tile.h"
+#include "xe_tlb_inval.h"
#include "xe_ttm_vram_mgr.h"
#include "xe_vm.h"
#include "xe_vm_types.h"
@@ -225,6 +226,7 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
const struct mmu_notifier_range *mmu_range)
{
struct xe_vm *vm = gpusvm_to_vm(gpusvm);
+ struct xe_tlb_inval_batch batch;
struct xe_device *xe = vm->xe;
struct drm_gpusvm_range *r, *first;
struct xe_tile *tile;
@@ -276,8 +278,10 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
xe_device_wmb(xe);
- err = xe_vm_range_tilemask_tlb_inval(vm, adj_start, adj_end, tile_mask);
- WARN_ON_ONCE(err);
+ err = xe_tlb_inval_range_tilemask_submit(xe, vm->usm.asid, adj_start, adj_end,
+ tile_mask, &batch);
+ if (!WARN_ON_ONCE(err))
+ xe_tlb_inval_batch_wait(&batch);
range_notifier_event_end:
r = first;
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c
index 933f30fb617d..10dcd4abb00f 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval.c
+++ b/drivers/gpu/drm/xe/xe_tlb_inval.c
@@ -486,3 +486,87 @@ bool xe_tlb_inval_idle(struct xe_tlb_inval *tlb_inval)
guard(spinlock_irq)(&tlb_inval->pending_lock);
return list_is_singular(&tlb_inval->pending_fences);
}
+
+/**
+ * xe_tlb_inval_batch_wait() - Wait for all fences in a TLB invalidation batch
+ * @batch: Batch of TLB invalidation fences to wait on
+ *
+ * Waits for every fence in @batch to signal, then resets @batch so it can be
+ * reused for a subsequent invalidation.
+ */
+void xe_tlb_inval_batch_wait(struct xe_tlb_inval_batch *batch)
+{
+ struct xe_tlb_inval_fence *fence = &batch->fence[0];
+ unsigned int i;
+
+ for (i = 0; i < batch->num_fences; ++i)
+ xe_tlb_inval_fence_wait(fence++);
+
+ batch->num_fences = 0;
+}
+
+/**
+ * xe_tlb_inval_range_tilemask_submit() - Submit TLB invalidations for an
+ * address range on a tile mask
+ * @xe: The xe device
+ * @asid: Address space ID
+ * @start: start address
+ * @end: end address
+ * @tile_mask: mask for which gt's issue tlb invalidation
+ * @batch: Batch of tlb invalidate fences
+ *
+ * Issue a range based TLB invalidation for gt's in tilemask
+ * If the function returns an error, there is no need to call
+ * xe_tlb_inval_batch_wait() on @batch.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_tlb_inval_range_tilemask_submit(struct xe_device *xe, u32 asid,
+ u64 start, u64 end, u8 tile_mask,
+ struct xe_tlb_inval_batch *batch)
+{
+ struct xe_tlb_inval_fence *fence = &batch->fence[0];
+ struct xe_tile *tile;
+ u32 fence_id = 0;
+ u8 id;
+ int err;
+
+ batch->num_fences = 0;
+ if (!tile_mask)
+ return 0;
+
+ for_each_tile(tile, xe, id) {
+ if (!(tile_mask & BIT(id)))
+ continue;
+
+ xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval,
+ &fence[fence_id], true);
+
+ err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval,
+ &fence[fence_id], start, end,
+ asid, NULL);
+ if (err)
+ goto wait;
+ ++fence_id;
+
+ if (!tile->media_gt)
+ continue;
+
+ xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval,
+ &fence[fence_id], true);
+
+ err = xe_tlb_inval_range(&tile->media_gt->tlb_inval,
+ &fence[fence_id], start, end,
+ asid, NULL);
+ if (err)
+ goto wait;
+ ++fence_id;
+ }
+
+wait:
+ batch->num_fences = fence_id;
+ if (err)
+ xe_tlb_inval_batch_wait(batch);
+
+ return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.h b/drivers/gpu/drm/xe/xe_tlb_inval.h
index 62089254fa23..a76b7823a5f2 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval.h
+++ b/drivers/gpu/drm/xe/xe_tlb_inval.h
@@ -45,4 +45,10 @@ void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno);
bool xe_tlb_inval_idle(struct xe_tlb_inval *tlb_inval);
+int xe_tlb_inval_range_tilemask_submit(struct xe_device *xe, u32 asid,
+ u64 start, u64 end, u8 tile_mask,
+ struct xe_tlb_inval_batch *batch);
+
+void xe_tlb_inval_batch_wait(struct xe_tlb_inval_batch *batch);
+
#endif /* _XE_TLB_INVAL_ */
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_types.h b/drivers/gpu/drm/xe/xe_tlb_inval_types.h
index 3b089f90f002..3d1797d186fd 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval_types.h
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_types.h
@@ -9,6 +9,8 @@
#include <linux/workqueue.h>
#include <linux/dma-fence.h>
+#include "xe_device_types.h"
+
struct drm_suballoc;
struct xe_tlb_inval;
@@ -132,4 +134,16 @@ struct xe_tlb_inval_fence {
ktime_t inval_time;
};
+/**
+ * struct xe_tlb_inval_batch - Batch of TLB invalidation fences
+ *
+ * Holds one fence per GT covered by a TLB invalidation request.
+ */
+struct xe_tlb_inval_batch {
+ /** @fence: per-GT TLB invalidation fences */
+ struct xe_tlb_inval_fence fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
+ /** @num_fences: number of valid entries in @fence */
+ unsigned int num_fences;
+};
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c
index e120323c43bc..6761005c0b90 100644
--- a/drivers/gpu/drm/xe/xe_userptr.c
+++ b/drivers/gpu/drm/xe/xe_userptr.c
@@ -8,8 +8,17 @@
#include <linux/mm.h>
+#include "xe_tlb_inval.h"
#include "xe_trace_bo.h"
+static void xe_userptr_assert_in_notifier(struct xe_vm *vm)
+{
+ lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) ||
+ (lockdep_is_held(&vm->lock) &&
+ lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
+ dma_resv_held(xe_vm_resv(vm))));
+}
+
/**
* xe_vma_userptr_check_repin() - Advisory check for repin needed
* @uvma: The userptr vma
@@ -73,18 +82,83 @@ int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
&ctx);
}
-static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma)
+static struct mmu_interval_notifier_finish *
+xe_vma_userptr_do_inval(struct xe_vm *vm, struct xe_userptr_vma *uvma, bool is_deferred)
{
struct xe_userptr *userptr = &uvma->userptr;
struct xe_vma *vma = &uvma->vma;
- struct dma_resv_iter cursor;
- struct dma_fence *fence;
struct drm_gpusvm_ctx ctx = {
.in_notifier = true,
.read_only = xe_vma_read_only(vma),
};
long err;
+ xe_userptr_assert_in_notifier(vm);
+ if (is_deferred)
+ xe_assert(vm->xe, userptr->finish_inuse && !userptr->tlb_inval_submitted);
+
+ err = dma_resv_wait_timeout(xe_vm_resv(vm),
+ DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT);
+ XE_WARN_ON(err <= 0);
+
+ if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
+ if (!userptr->finish_inuse) {
+ /*
+ * Defer the TLB wait to an extra pass so the caller
+ * can pipeline TLB flushes across GPUs before waiting
+ * on any of them.
+ */
+ xe_assert(vm->xe, !userptr->tlb_inval_submitted);
+ userptr->finish_inuse = true;
+ userptr->tlb_inval_submitted = true;
+ err = xe_vm_invalidate_vma_submit(vma, &userptr->inval_batch);
+ XE_WARN_ON(err);
+ return &userptr->finish;
+ }
+ err = xe_vm_invalidate_vma(vma);
+ XE_WARN_ON(err);
+ }
+
+ if (is_deferred)
+ userptr->finish_inuse = false;
+ drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
+ xe_vma_size(vma) >> PAGE_SHIFT, &ctx);
+ return NULL;
+}
+
+static void
+xe_vma_userptr_complete_tlb_inval(struct xe_vm *vm, struct xe_userptr_vma *uvma)
+{
+ struct xe_userptr *userptr = &uvma->userptr;
+ struct xe_vma *vma = &uvma->vma;
+ struct drm_gpusvm_ctx ctx = {
+ .in_notifier = true,
+ .read_only = xe_vma_read_only(vma),
+ };
+
+ xe_userptr_assert_in_notifier(vm);
+ xe_assert(vm->xe, userptr->finish_inuse);
+ xe_assert(vm->xe, userptr->tlb_inval_submitted);
+
+ xe_tlb_inval_batch_wait(&userptr->inval_batch);
+ userptr->tlb_inval_submitted = false;
+ userptr->finish_inuse = false;
+ drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
+ xe_vma_size(vma) >> PAGE_SHIFT, &ctx);
+}
+
+static struct mmu_interval_notifier_finish *
+xe_vma_userptr_invalidate_pass1(struct xe_vm *vm, struct xe_userptr_vma *uvma)
+{
+ struct xe_userptr *userptr = &uvma->userptr;
+ struct xe_vma *vma = &uvma->vma;
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
+ bool signaled = true;
+
+ xe_userptr_assert_in_notifier(vm);
+
/*
* Tell exec and rebind worker they need to repin and rebind this
* userptr.
@@ -105,27 +179,31 @@ static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uv
*/
dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
DMA_RESV_USAGE_BOOKKEEP);
- dma_resv_for_each_fence_unlocked(&cursor, fence)
+ dma_resv_for_each_fence_unlocked(&cursor, fence) {
dma_fence_enable_sw_signaling(fence);
+ if (signaled && !dma_fence_is_signaled(fence))
+ signaled = false;
+ }
dma_resv_iter_end(&cursor);
- err = dma_resv_wait_timeout(xe_vm_resv(vm),
- DMA_RESV_USAGE_BOOKKEEP,
- false, MAX_SCHEDULE_TIMEOUT);
- XE_WARN_ON(err <= 0);
+ /*
+ * Only one caller at a time can use the multi-pass state.
+ * If it's already in use, or all fences are already signaled,
+ * proceed directly to invalidation without deferring.
+ */
+ if (signaled || userptr->finish_inuse)
+ return xe_vma_userptr_do_inval(vm, uvma, false);
- if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
- err = xe_vm_invalidate_vma(vma);
- XE_WARN_ON(err);
- }
+ /* Defer: the notifier core will call invalidate_finish once done. */
+ userptr->finish_inuse = true;
- drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
- xe_vma_size(vma) >> PAGE_SHIFT, &ctx);
+ return &userptr->finish;
}
-static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
- const struct mmu_notifier_range *range,
- unsigned long cur_seq)
+static bool xe_vma_userptr_invalidate_start(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq,
+ struct mmu_interval_notifier_finish **p_finish)
{
struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
struct xe_vma *vma = &uvma->vma;
@@ -138,21 +216,48 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
return false;
vm_dbg(&xe_vma_vm(vma)->xe->drm,
- "NOTIFIER: addr=0x%016llx, range=0x%016llx",
+ "NOTIFIER PASS1: addr=0x%016llx, range=0x%016llx",
xe_vma_start(vma), xe_vma_size(vma));
down_write(&vm->svm.gpusvm.notifier_lock);
mmu_interval_set_seq(mni, cur_seq);
- __vma_userptr_invalidate(vm, uvma);
+ *p_finish = xe_vma_userptr_invalidate_pass1(vm, uvma);
+
up_write(&vm->svm.gpusvm.notifier_lock);
- trace_xe_vma_userptr_invalidate_complete(vma);
+ if (!*p_finish)
+ trace_xe_vma_userptr_invalidate_complete(vma);
return true;
}
+static void xe_vma_userptr_invalidate_finish(struct mmu_interval_notifier_finish *finish)
+{
+ struct xe_userptr_vma *uvma = container_of(finish, typeof(*uvma), userptr.finish);
+ struct xe_vma *vma = &uvma->vma;
+ struct xe_vm *vm = xe_vma_vm(vma);
+
+ vm_dbg(&xe_vma_vm(vma)->xe->drm,
+ "NOTIFIER PASS2: addr=0x%016llx, range=0x%016llx",
+ xe_vma_start(vma), xe_vma_size(vma));
+
+ down_write(&vm->svm.gpusvm.notifier_lock);
+ /*
+ * If a TLB invalidation was previously submitted (deferred from the
+ * synchronous pass1 fallback), wait for it and unmap pages.
+ * Otherwise, fences have now completed: invalidate the TLB and unmap.
+ */
+ if (uvma->userptr.tlb_inval_submitted)
+ xe_vma_userptr_complete_tlb_inval(vm, uvma);
+ else
+ xe_vma_userptr_do_inval(vm, uvma, true);
+ up_write(&vm->svm.gpusvm.notifier_lock);
+ trace_xe_vma_userptr_invalidate_complete(vma);
+}
+
static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
- .invalidate = vma_userptr_invalidate,
+ .invalidate_start = xe_vma_userptr_invalidate_start,
+ .invalidate_finish = xe_vma_userptr_invalidate_finish,
};
#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
@@ -164,6 +269,7 @@ static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
*/
void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
{
+ static struct mmu_interval_notifier_finish *finish;
struct xe_vm *vm = xe_vma_vm(&uvma->vma);
/* Protect against concurrent userptr pinning */
@@ -179,7 +285,12 @@ void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
if (!mmu_interval_read_retry(&uvma->userptr.notifier,
uvma->userptr.pages.notifier_seq))
uvma->userptr.pages.notifier_seq -= 2;
- __vma_userptr_invalidate(vm, uvma);
+
+ finish = xe_vma_userptr_invalidate_pass1(vm, uvma);
+ if (finish)
+ finish = xe_vma_userptr_do_inval(vm, uvma, true);
+ if (finish)
+ xe_vma_userptr_complete_tlb_inval(vm, uvma);
}
#endif
diff --git a/drivers/gpu/drm/xe/xe_userptr.h b/drivers/gpu/drm/xe/xe_userptr.h
index ef801234991e..2a3cd1b5efbb 100644
--- a/drivers/gpu/drm/xe/xe_userptr.h
+++ b/drivers/gpu/drm/xe/xe_userptr.h
@@ -14,6 +14,8 @@
#include <drm/drm_gpusvm.h>
+#include "xe_tlb_inval_types.h"
+
struct xe_vm;
struct xe_vma;
struct xe_userptr_vma;
@@ -56,7 +58,34 @@ struct xe_userptr {
* @notifier: MMU notifier for user pointer (invalidation call back)
*/
struct mmu_interval_notifier notifier;
-
+ /**
+ * @finish: MMU notifier finish structure for two-pass invalidation.
+ * Embedded here to avoid allocation in the notifier callback.
+ * Protected by struct xe_vm::svm.gpusvm.notifier_lock in write mode
+ * alternatively by the same lock in read mode *and* the vm resv held.
+ */
+ struct mmu_interval_notifier_finish finish;
+ /**
+ * @inval_batch: TLB invalidation batch for deferred completion.
+ * Stores an in-flight TLB invalidation submitted during a two-pass
+ * notifier so the wait can be deferred to a subsequent pass, allowing
+ * multiple GPUs to be signalled before any of them are waited on.
+ * Protected using the same locking as @finish.
+ */
+ struct xe_tlb_inval_batch inval_batch;
+ /**
+ * @finish_inuse: Whether @finish is currently in use by an in-progress
+ * two-pass invalidation.
+ * Protected using the same locking as @finish.
+ */
+ bool finish_inuse;
+ /**
+ * @tlb_inval_submitted: Whether a TLB invalidation has been submitted
+ * via @inval_batch and is pending completion. When set, the next pass
+ * must call xe_tlb_inval_batch_wait() before reusing @inval_batch.
+ * Protected using the same locking as @finish.
+ */
+ bool tlb_inval_submitted;
/**
* @initial_bind: user pointer has been bound at least once.
* write: vm->svm.gpusvm.notifier_lock in read mode and vm->resv held.
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 548b0769b3ef..5572e12c2a7e 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2362,6 +2362,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
op->map.vma_flags |= XE_VMA_DUMPABLE;
if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
op->map.vma_flags |= XE_VMA_MADV_AUTORESET;
+ op->map.request_decompress = flags & DRM_XE_VM_BIND_FLAG_DECOMPRESS;
op->map.pat_index = pat_index;
op->map.invalidate_on_bind =
__xe_vm_needs_clear_scratch_pages(vm, flags);
@@ -2902,7 +2903,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
}
static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
- bool res_evict, bool validate)
+ bool res_evict, bool validate, bool request_decompress)
{
struct xe_bo *bo = xe_vma_bo(vma);
struct xe_vm *vm = xe_vma_vm(vma);
@@ -2915,6 +2916,12 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
err = xe_bo_validate(bo, vm,
xe_vm_allow_vm_eviction(vm) &&
res_evict, exec);
+
+ if (err)
+ return err;
+
+ if (request_decompress)
+ err = xe_bo_decompress(bo);
}
return err;
@@ -3009,7 +3016,8 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
err = vma_lock_and_validate(exec, op->map.vma,
res_evict,
!xe_vm_in_fault_mode(vm) ||
- op->map.immediate);
+ op->map.immediate,
+ op->map.request_decompress);
break;
case DRM_GPUVA_OP_REMAP:
err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
@@ -3018,13 +3026,13 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
err = vma_lock_and_validate(exec,
gpuva_to_vma(op->base.remap.unmap->va),
- res_evict, false);
+ res_evict, false, false);
if (!err && op->remap.prev)
err = vma_lock_and_validate(exec, op->remap.prev,
- res_evict, true);
+ res_evict, true, false);
if (!err && op->remap.next)
err = vma_lock_and_validate(exec, op->remap.next,
- res_evict, true);
+ res_evict, true, false);
break;
case DRM_GPUVA_OP_UNMAP:
err = check_ufence(gpuva_to_vma(op->base.unmap.va));
@@ -3033,7 +3041,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
err = vma_lock_and_validate(exec,
gpuva_to_vma(op->base.unmap.va),
- res_evict, false);
+ res_evict, false, false);
break;
case DRM_GPUVA_OP_PREFETCH:
{
@@ -3048,7 +3056,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
err = vma_lock_and_validate(exec,
gpuva_to_vma(op->base.prefetch.va),
- res_evict, false);
+ res_evict, false, false);
if (!err && !xe_vma_has_no_bo(vma))
err = xe_bo_migrate(xe_vma_bo(vma),
region_to_mem_type[region],
@@ -3370,7 +3378,8 @@ ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
DRM_XE_VM_BIND_FLAG_DUMPABLE | \
DRM_XE_VM_BIND_FLAG_CHECK_PXP | \
DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \
- DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
+ DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET | \
+ DRM_XE_VM_BIND_FLAG_DECOMPRESS)
#ifdef TEST_VM_OPS_ERROR
#define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
@@ -3430,6 +3439,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
bool is_cpu_addr_mirror = flags &
DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
+ bool is_decompress = flags & DRM_XE_VM_BIND_FLAG_DECOMPRESS;
u16 pat_index = (*bind_ops)[i].pat_index;
u16 coh_mode;
bool comp_en;
@@ -3466,7 +3476,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
XE_IOCTL_DBG(xe, obj_offset && (is_null ||
is_cpu_addr_mirror)) ||
XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
- (is_null || is_cpu_addr_mirror)) ||
+ (is_decompress || is_null || is_cpu_addr_mirror)) ||
+ XE_IOCTL_DBG(xe, is_decompress &&
+ xe_pat_index_get_comp_en(xe, pat_index)) ||
XE_IOCTL_DBG(xe, !obj &&
op == DRM_XE_VM_BIND_OP_MAP &&
!is_null && !is_cpu_addr_mirror) ||
@@ -3508,6 +3520,13 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
err = -EINVAL;
goto free_bind_ops;
}
+
+ if (is_decompress && (XE_IOCTL_DBG(xe, !xe_device_has_flat_ccs(xe)) ||
+ XE_IOCTL_DBG(xe, GRAPHICS_VER(xe) < 20) ||
+ XE_IOCTL_DBG(xe, !IS_DGFX(xe)))) {
+ err = -EOPNOTSUPP;
+ goto free_bind_ops;
+ }
}
return 0;
@@ -3967,76 +3986,20 @@ void xe_vm_unlock(struct xe_vm *vm)
}
/**
- * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an
- * address range
- * @vm: The VM
- * @start: start address
- * @end: end address
- * @tile_mask: mask for which gt's issue tlb invalidation
- *
- * Issue a range based TLB invalidation for gt's in tilemask
- *
- * Returns 0 for success, negative error code otherwise.
- */
-int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
- u64 end, u8 tile_mask)
-{
- struct xe_tlb_inval_fence
- fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
- struct xe_tile *tile;
- u32 fence_id = 0;
- u8 id;
- int err;
-
- if (!tile_mask)
- return 0;
-
- for_each_tile(tile, vm->xe, id) {
- if (!(tile_mask & BIT(id)))
- continue;
-
- xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval,
- &fence[fence_id], true);
-
- err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval,
- &fence[fence_id], start, end,
- vm->usm.asid, NULL);
- if (err)
- goto wait;
- ++fence_id;
-
- if (!tile->media_gt)
- continue;
-
- xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval,
- &fence[fence_id], true);
-
- err = xe_tlb_inval_range(&tile->media_gt->tlb_inval,
- &fence[fence_id], start, end,
- vm->usm.asid, NULL);
- if (err)
- goto wait;
- ++fence_id;
- }
-
-wait:
- for (id = 0; id < fence_id; ++id)
- xe_tlb_inval_fence_wait(&fence[id]);
-
- return err;
-}
-
-/**
- * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
+ * xe_vm_invalidate_vma_submit - Submit a job to invalidate GPU mappings for
+ * VMA.
* @vma: VMA to invalidate
+ * @batch: TLB invalidation batch to populate; caller must later call
+ * xe_tlb_inval_batch_wait() on it to wait for completion
*
* Walks a list of page tables leaves which it memset the entries owned by this
- * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
- * complete.
+ * VMA to zero, invalidates the TLBs, but doesn't block waiting for TLB flush
+ * to complete, but instead populates @batch which can be waited on using
+ * xe_tlb_inval_batch_wait().
*
* Returns 0 for success, negative error code otherwise.
*/
-int xe_vm_invalidate_vma(struct xe_vma *vma)
+int xe_vm_invalidate_vma_submit(struct xe_vma *vma, struct xe_tlb_inval_batch *batch)
{
struct xe_device *xe = xe_vma_vm(vma)->xe;
struct xe_vm *vm = xe_vma_vm(vma);
@@ -4080,12 +4043,35 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
xe_device_wmb(xe);
- ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma),
- xe_vma_end(vma), tile_mask);
+ ret = xe_tlb_inval_range_tilemask_submit(xe, xe_vma_vm(vma)->usm.asid,
+ xe_vma_start(vma), xe_vma_end(vma),
+ tile_mask, batch);
/* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);
+ return ret;
+}
+
+/**
+ * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
+ * @vma: VMA to invalidate
+ *
+ * Walks a list of page tables leaves which it memset the entries owned by this
+ * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
+ * complete.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_vm_invalidate_vma(struct xe_vma *vma)
+{
+ struct xe_tlb_inval_batch batch;
+ int ret;
+
+ ret = xe_vm_invalidate_vma_submit(vma, &batch);
+ if (ret)
+ return ret;
+ xe_tlb_inval_batch_wait(&batch);
return ret;
}
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index f849e369432b..0bc7ed23eeae 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -240,11 +240,10 @@ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
struct xe_svm_range *range);
-int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
- u64 end, u8 tile_mask);
-
int xe_vm_invalidate_vma(struct xe_vma *vma);
+int xe_vm_invalidate_vma_submit(struct xe_vma *vma, struct xe_tlb_inval_batch *batch);
+
int xe_vm_validate_protected(struct xe_vm *vm);
static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm)
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index bc39a9a9790c..869db304d96d 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -12,6 +12,7 @@
#include "xe_pat.h"
#include "xe_pt.h"
#include "xe_svm.h"
+#include "xe_tlb_inval.h"
struct xe_vmas_in_madvise_range {
u64 addr;
@@ -235,13 +236,20 @@ static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end)
static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end)
{
u8 tile_mask = xe_zap_ptes_in_madvise_range(vm, start, end);
+ struct xe_tlb_inval_batch batch;
+ int err;
if (!tile_mask)
return 0;
xe_device_wmb(vm->xe);
- return xe_vm_range_tilemask_tlb_inval(vm, start, end, tile_mask);
+ err = xe_tlb_inval_range_tilemask_submit(vm->xe, vm->usm.asid, start, end,
+ tile_mask, &batch);
+ if (!err)
+ xe_tlb_inval_batch_wait(&batch);
+
+ return err;
}
static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args)
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 1f6f7e30e751..69e80c94138a 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -18,6 +18,7 @@
#include "xe_device_types.h"
#include "xe_pt_types.h"
#include "xe_range_fence.h"
+#include "xe_tlb_inval_types.h"
#include "xe_userptr.h"
struct drm_pagemap;
@@ -376,6 +377,8 @@ struct xe_vma_op_map {
bool immediate;
/** @read_only: Read only */
bool invalidate_on_bind;
+ /** @request_decompress: schedule decompression for GPU map */
+ bool request_decompress;
/** @pat_index: The pat index to use for this operation. */
u16 pat_index;
};
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 4699b098fe13..0eb96abc27df 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -164,6 +164,11 @@ static const struct xe_rtp_entry_sr gt_was[] = {
MEDIA_VERSION_RANGE(1301, 3500)),
XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES))
},
+ { XE_RTP_NAME("14026578760"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3510, 3511), OR,
+ MEDIA_VERSION(3503)),
+ XE_RTP_ACTIONS(SET(GAMSTLB_CTRL, DIS_PEND_GPA_LINK))
+ },
/* DG1 */
@@ -301,10 +306,6 @@ static const struct xe_rtp_entry_sr gt_was[] = {
XE_RTP_ACTIONS(SET(MMIOATSREQLIMIT_GAM_WALK_3D,
DIS_ATS_WRONLY_PG))
},
- { XE_RTP_NAME("16028780921"),
- XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0)),
- XE_RTP_ACTIONS(SET(CCCHKNREG2, LOCALITYDIS))
- },
{ XE_RTP_NAME("14026144927"),
XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0)),
XE_RTP_ACTIONS(SET(L3SQCREG2, L3_SQ_DISABLE_COAMA_2WAY_COH |
diff --git a/include/drm/drm_ras.h b/include/drm/drm_ras.h
new file mode 100644
index 000000000000..5d50209e51db
--- /dev/null
+++ b/include/drm/drm_ras.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#ifndef __DRM_RAS_H__
+#define __DRM_RAS_H__
+
+#include <uapi/drm/drm_ras.h>
+
+/**
+ * struct drm_ras_node - A DRM RAS Node
+ */
+struct drm_ras_node {
+ /** @id: Unique identifier for the node. Dynamically assigned. */
+ u32 id;
+ /**
+ * @device_name: Human-readable name of the device. Given by the driver.
+ */
+ const char *device_name;
+ /** @node_name: Human-readable name of the node. Given by the driver. */
+ const char *node_name;
+ /** @type: Type of the node (enum drm_ras_node_type). */
+ enum drm_ras_node_type type;
+
+ /* Error-Counter Related Callback and Variables */
+
+ /** @error_counter_range: Range of valid Error IDs for this node. */
+ struct {
+ /** @first: First valid Error ID. */
+ u32 first;
+ /** @last: Last valid Error ID. Mandatory entry. */
+ u32 last;
+ } error_counter_range;
+
+ /**
+ * @query_error_counter:
+ *
+ * This callback is used by drm-ras to query a specific error counter.
+ * Used for input check and to iterate all error counters in a node.
+ *
+ * Driver should expect query_error_counter() to be called with
+ * error_id from `error_counter_range.first` to
+ * `error_counter_range.last`.
+ *
+ * The @query_error_counter is a mandatory callback for
+ * error_counter_node.
+ *
+ * Returns: 0 on success,
+ * -ENOENT when error_id is not supported as an indication that
+ * drm_ras should silently skip this entry. Used for
+ * supporting non-contiguous error ranges.
+ * Driver is responsible for maintaining the list of
+ * supported error IDs in the range of first to last.
+ * Other negative values on errors that should terminate the
+ * netlink query.
+ */
+ int (*query_error_counter)(struct drm_ras_node *node, u32 error_id,
+ const char **name, u32 *val);
+
+ /** @priv: Driver private data */
+ void *priv;
+};
+
+struct drm_device;
+
+#if IS_ENABLED(CONFIG_DRM_RAS)
+int drm_ras_node_register(struct drm_ras_node *node);
+void drm_ras_node_unregister(struct drm_ras_node *node);
+#else
+static inline int drm_ras_node_register(struct drm_ras_node *node) { return 0; }
+static inline void drm_ras_node_unregister(struct drm_ras_node *node) { }
+#endif
+
+#endif
diff --git a/include/drm/drm_ras_genl_family.h b/include/drm/drm_ras_genl_family.h
new file mode 100644
index 000000000000..910fb3943a75
--- /dev/null
+++ b/include/drm/drm_ras_genl_family.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#ifndef __DRM_RAS_GENL_FAMILY_H__
+#define __DRM_RAS_GENL_FAMILY_H__
+
+#if IS_ENABLED(CONFIG_DRM_RAS)
+int drm_ras_genl_family_register(void);
+void drm_ras_genl_family_unregister(void);
+#else
+static inline int drm_ras_genl_family_register(void) { return 0; }
+static inline void drm_ras_genl_family_unregister(void) { }
+#endif
+
+#endif
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 07a2bbaf86e9..dcdfdf1e0b39 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -234,15 +234,57 @@ struct mmu_notifier {
};
/**
+ * struct mmu_interval_notifier_finish - mmu_interval_notifier two-pass abstraction
+ * @link: Lockless list link for the notifiers pending pass list
+ * @notifier: The mmu_interval_notifier for which the finish pass is called.
+ *
+ * Allocate, typically using GFP_NOWAIT in the interval notifier's start pass.
+ * Note that with a large number of notifiers implementing two passes,
+ * allocation with GFP_NOWAIT will become increasingly likely to fail, so consider
+ * implementing a small pool instead of using kmalloc() allocations.
+ *
+ * If the implementation needs to pass data between the start and the finish passes,
+ * the recommended way is to embed struct mmu_interval_notifier_finish into a larger
+ * structure that also contains the data needed to be shared. Keep in mind that
+ * a notifier callback can be invoked in parallel, and each invocation needs its
+ * own struct mmu_interval_notifier_finish.
+ *
+ * If allocation fails, then the &mmu_interval_notifier_ops->invalidate_start op
+ * needs to implements the full notifier functionality. Please refer to its
+ * documentation.
+ */
+struct mmu_interval_notifier_finish {
+ struct llist_node link;
+ struct mmu_interval_notifier *notifier;
+};
+
+/**
* struct mmu_interval_notifier_ops
* @invalidate: Upon return the caller must stop using any SPTEs within this
* range. This function can sleep. Return false only if sleeping
* was required but mmu_notifier_range_blockable(range) is false.
+ * @invalidate_start: Similar to @invalidate, but intended for two-pass notifier
+ * callbacks where the call to @invalidate_start is the first
+ * pass and any struct mmu_interval_notifier_finish pointer
+ * returned in the @finish parameter describes the finish pass.
+ * If *@finish is %NULL on return, then no final pass will be
+ * called, and @invalidate_start needs to implement the full
+ * notifier, behaving like @invalidate. The value of *@finish
+ * is guaranteed to be %NULL at function entry.
+ * @invalidate_finish: Called as the second pass for any notifier that returned
+ * a non-NULL *@finish from @invalidate_start. The @finish
+ * pointer passed here is the same one returned by
+ * @invalidate_start.
*/
struct mmu_interval_notifier_ops {
bool (*invalidate)(struct mmu_interval_notifier *interval_sub,
const struct mmu_notifier_range *range,
unsigned long cur_seq);
+ bool (*invalidate_start)(struct mmu_interval_notifier *interval_sub,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq,
+ struct mmu_interval_notifier_finish **finish);
+ void (*invalidate_finish)(struct mmu_interval_notifier_finish *finish);
};
struct mmu_interval_notifier {
diff --git a/include/uapi/drm/drm_ras.h b/include/uapi/drm/drm_ras.h
new file mode 100644
index 000000000000..5f40fa5b869d
--- /dev/null
+++ b/include/uapi/drm/drm_ras.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/drm_ras.yaml */
+/* YNL-GEN uapi header */
+/* To regenerate run: tools/net/ynl/ynl-regen.sh */
+
+#ifndef _UAPI_LINUX_DRM_RAS_H
+#define _UAPI_LINUX_DRM_RAS_H
+
+#define DRM_RAS_FAMILY_NAME "drm-ras"
+#define DRM_RAS_FAMILY_VERSION 1
+
+/*
+ * Type of the node. Currently, only error-counter nodes are supported, which
+ * expose reliability counters for a hardware/software component.
+ */
+enum drm_ras_node_type {
+ DRM_RAS_NODE_TYPE_ERROR_COUNTER = 1,
+};
+
+enum {
+ DRM_RAS_A_NODE_ATTRS_NODE_ID = 1,
+ DRM_RAS_A_NODE_ATTRS_DEVICE_NAME,
+ DRM_RAS_A_NODE_ATTRS_NODE_NAME,
+ DRM_RAS_A_NODE_ATTRS_NODE_TYPE,
+
+ __DRM_RAS_A_NODE_ATTRS_MAX,
+ DRM_RAS_A_NODE_ATTRS_MAX = (__DRM_RAS_A_NODE_ATTRS_MAX - 1)
+};
+
+enum {
+ DRM_RAS_A_ERROR_COUNTER_ATTRS_NODE_ID = 1,
+ DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_ID,
+ DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_NAME,
+ DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_VALUE,
+
+ __DRM_RAS_A_ERROR_COUNTER_ATTRS_MAX,
+ DRM_RAS_A_ERROR_COUNTER_ATTRS_MAX = (__DRM_RAS_A_ERROR_COUNTER_ATTRS_MAX - 1)
+};
+
+enum {
+ DRM_RAS_CMD_LIST_NODES = 1,
+ DRM_RAS_CMD_GET_ERROR_COUNTER,
+
+ __DRM_RAS_CMD_MAX,
+ DRM_RAS_CMD_MAX = (__DRM_RAS_CMD_MAX - 1)
+};
+
+#endif /* _UAPI_LINUX_DRM_RAS_H */
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index ef2565048bdf..0497b85fa12a 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -406,6 +406,9 @@ struct drm_xe_query_mem_regions {
* - %DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT - Flag is set if the
* device supports the userspace hint %DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION.
* This is exposed only on Xe2+.
+ * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_STATE_CACHE_PERF_FIX - Flag is set
+ * if a queue can be creaed with
+ * %DRM_XE_EXEC_QUEUE_SET_DISABLE_STATE_CACHE_PERF_FIX
* - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment
* required by this device, typically SZ_4K or SZ_64K
* - %DRM_XE_QUERY_CONFIG_VA_BITS - Maximum bits of a virtual address
@@ -425,6 +428,7 @@ struct drm_xe_query_config {
#define DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY (1 << 1)
#define DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR (1 << 2)
#define DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT (1 << 3)
+ #define DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_STATE_CACHE_PERF_FIX (1 << 4)
#define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT 2
#define DRM_XE_QUERY_CONFIG_VA_BITS 3
#define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 4
@@ -1053,6 +1057,13 @@ struct drm_xe_vm_destroy {
* not invoke autoreset. Neither will stack variables going out of scope.
* Therefore it's recommended to always explicitly reset the madvises when
* freeing the memory backing a region used in a &DRM_IOCTL_XE_MADVISE call.
+ * - DRM_XE_VM_BIND_FLAG_DECOMPRESS - Request on-device decompression for a MAP.
+ * When set on a MAP bind operation, request the driver schedule an on-device
+ * in-place decompression (via the migrate/resolve path) for the GPU mapping
+ * created by this bind. Only valid for DRM_XE_VM_BIND_OP_MAP; usage on
+ * other ops is rejected. The bind's pat_index must select the device's
+ * "no-compression" PAT. Only meaningful for VRAM-backed BOs on devices that
+ * support Flat CCS and the required HW generation XE2+.
*
* The @prefetch_mem_region_instance for %DRM_XE_VM_BIND_OP_PREFETCH can also be:
* - %DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC, which ensures prefetching occurs in
@@ -1160,6 +1171,7 @@ struct drm_xe_vm_bind_op {
#define DRM_XE_VM_BIND_FLAG_CHECK_PXP (1 << 4)
#define DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR (1 << 5)
#define DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET (1 << 6)
+#define DRM_XE_VM_BIND_FLAG_DECOMPRESS (1 << 7)
/** @flags: Bind flags */
__u32 flags;
@@ -1285,6 +1297,9 @@ struct drm_xe_vm_bind {
* - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY - Set the queue
* priority within the multi-queue group. Current valid priority values are 0–2
* (default is 1), with higher values indicating higher priority.
+ * - %DRM_XE_EXEC_QUEUE_SET_DISABLE_STATE_CACHE_PERF_FIX - Set the queue to
+ * enable render color cache keying on BTP+BTI instead of just BTI
+ * (only valid for render queues).
*
* The example below shows how to use @drm_xe_exec_queue_create to create
* a simple exec_queue (no parallel submission) of class
@@ -1329,6 +1344,7 @@ struct drm_xe_exec_queue_create {
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP 4
#define DRM_XE_MULTI_GROUP_CREATE (1ull << 63)
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY 5
+#define DRM_XE_EXEC_QUEUE_SET_DISABLE_STATE_CACHE_PERF_FIX 6
/** @extensions: Pointer to the first extension struct, if any */
__u64 extensions;
@@ -2357,6 +2373,85 @@ struct drm_xe_exec_queue_set_property {
__u64 reserved[2];
};
+/**
+ * DOC: Xe DRM RAS
+ *
+ * The enums and strings defined below map to the attributes of the DRM RAS Netlink Interface.
+ * Refer to Documentation/netlink/specs/drm_ras.yaml for complete interface specification.
+ *
+ * Node Registration
+ * =================
+ *
+ * The driver registers DRM RAS nodes for each error severity level.
+ * enum drm_xe_ras_error_severity defines the node-id, while DRM_XE_RAS_ERROR_SEVERITY_NAMES maps
+ * node-id to node-name.
+ *
+ * Error Classification
+ * ====================
+ *
+ * Each node contains a list of error counters. Each error is identified by a error-id and
+ * an error-name. enum drm_xe_ras_error_component defines the error-id, while
+ * DRM_XE_RAS_ERROR_COMPONENT_NAMES maps error-id to error-name.
+ *
+ * User Interface
+ * ==============
+ *
+ * To retrieve error values of a error counter, userspace applications should
+ * follow the below steps:
+ *
+ * 1. Use command LIST_NODES to enumerate all available nodes
+ * 2. Select node by node-id or node-name
+ * 3. Use command GET_ERROR_COUNTERS to list errors of specific node
+ * 4. Query specific error values using either error-id or error-name
+ *
+ * .. code-block:: C
+ *
+ * // Lookup tables for ID-to-name resolution
+ * static const char *nodes[] = DRM_XE_RAS_ERROR_SEVERITY_NAMES;
+ * static const char *errors[] = DRM_XE_RAS_ERROR_COMPONENT_NAMES;
+ *
+ */
+
+/**
+ * enum drm_xe_ras_error_severity - DRM RAS error severity.
+ */
+enum drm_xe_ras_error_severity {
+ /** @DRM_XE_RAS_ERR_SEV_CORRECTABLE: Correctable Error */
+ DRM_XE_RAS_ERR_SEV_CORRECTABLE = 0,
+ /** @DRM_XE_RAS_ERR_SEV_UNCORRECTABLE: Uncorrectable Error */
+ DRM_XE_RAS_ERR_SEV_UNCORRECTABLE,
+ /** @DRM_XE_RAS_ERR_SEV_MAX: Max severity */
+ DRM_XE_RAS_ERR_SEV_MAX /* non-ABI */
+};
+
+/**
+ * enum drm_xe_ras_error_component - DRM RAS error component.
+ */
+enum drm_xe_ras_error_component {
+ /** @DRM_XE_RAS_ERR_COMP_CORE_COMPUTE: Core Compute Error */
+ DRM_XE_RAS_ERR_COMP_CORE_COMPUTE = 1,
+ /** @DRM_XE_RAS_ERR_COMP_SOC_INTERNAL: SoC Internal Error */
+ DRM_XE_RAS_ERR_COMP_SOC_INTERNAL,
+ /** @DRM_XE_RAS_ERR_COMP_MAX: Max Error */
+ DRM_XE_RAS_ERR_COMP_MAX /* non-ABI */
+};
+
+/*
+ * Error severity to name mapping.
+ */
+#define DRM_XE_RAS_ERROR_SEVERITY_NAMES { \
+ [DRM_XE_RAS_ERR_SEV_CORRECTABLE] = "correctable-errors", \
+ [DRM_XE_RAS_ERR_SEV_UNCORRECTABLE] = "uncorrectable-errors", \
+}
+
+/*
+ * Error component to name mapping.
+ */
+#define DRM_XE_RAS_ERROR_COMPONENT_NAMES { \
+ [DRM_XE_RAS_ERR_COMP_CORE_COMPUTE] = "core-compute", \
+ [DRM_XE_RAS_ERR_COMP_SOC_INTERNAL] = "soc-internal" \
+}
+
#if defined(__cplusplus)
}
#endif
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index a6cdf3674bdc..4d8a64ce8eda 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -260,6 +260,15 @@ mmu_interval_read_begin(struct mmu_interval_notifier *interval_sub)
}
EXPORT_SYMBOL_GPL(mmu_interval_read_begin);
+static void mn_itree_finish_pass(struct llist_head *finish_passes)
+{
+ struct llist_node *first = llist_reverse_order(__llist_del_all(finish_passes));
+ struct mmu_interval_notifier_finish *f, *next;
+
+ llist_for_each_entry_safe(f, next, first, link)
+ f->notifier->ops->invalidate_finish(f);
+}
+
static void mn_itree_release(struct mmu_notifier_subscriptions *subscriptions,
struct mm_struct *mm)
{
@@ -271,6 +280,7 @@ static void mn_itree_release(struct mmu_notifier_subscriptions *subscriptions,
.end = ULONG_MAX,
};
struct mmu_interval_notifier *interval_sub;
+ LLIST_HEAD(finish_passes);
unsigned long cur_seq;
bool ret;
@@ -278,11 +288,27 @@ static void mn_itree_release(struct mmu_notifier_subscriptions *subscriptions,
mn_itree_inv_start_range(subscriptions, &range, &cur_seq);
interval_sub;
interval_sub = mn_itree_inv_next(interval_sub, &range)) {
- ret = interval_sub->ops->invalidate(interval_sub, &range,
- cur_seq);
+ if (interval_sub->ops->invalidate_start) {
+ struct mmu_interval_notifier_finish *finish = NULL;
+
+ ret = interval_sub->ops->invalidate_start(interval_sub,
+ &range,
+ cur_seq,
+ &finish);
+ if (ret && finish) {
+ finish->notifier = interval_sub;
+ __llist_add(&finish->link, &finish_passes);
+ }
+
+ } else {
+ ret = interval_sub->ops->invalidate(interval_sub,
+ &range,
+ cur_seq);
+ }
WARN_ON(!ret);
}
+ mn_itree_finish_pass(&finish_passes);
mn_itree_inv_end(subscriptions);
}
@@ -430,7 +456,9 @@ static int mn_itree_invalidate(struct mmu_notifier_subscriptions *subscriptions,
const struct mmu_notifier_range *range)
{
struct mmu_interval_notifier *interval_sub;
+ LLIST_HEAD(finish_passes);
unsigned long cur_seq;
+ int err = 0;
for (interval_sub =
mn_itree_inv_start_range(subscriptions, range, &cur_seq);
@@ -438,23 +466,41 @@ static int mn_itree_invalidate(struct mmu_notifier_subscriptions *subscriptions,
interval_sub = mn_itree_inv_next(interval_sub, range)) {
bool ret;
- ret = interval_sub->ops->invalidate(interval_sub, range,
- cur_seq);
+ if (interval_sub->ops->invalidate_start) {
+ struct mmu_interval_notifier_finish *finish = NULL;
+
+ ret = interval_sub->ops->invalidate_start(interval_sub,
+ range,
+ cur_seq,
+ &finish);
+ if (ret && finish) {
+ finish->notifier = interval_sub;
+ __llist_add(&finish->link, &finish_passes);
+ }
+
+ } else {
+ ret = interval_sub->ops->invalidate(interval_sub,
+ range,
+ cur_seq);
+ }
if (!ret) {
if (WARN_ON(mmu_notifier_range_blockable(range)))
continue;
- goto out_would_block;
+ err = -EAGAIN;
+ break;
}
}
- return 0;
-out_would_block:
+ mn_itree_finish_pass(&finish_passes);
+
/*
* On -EAGAIN the non-blocking caller is not allowed to call
* invalidate_range_end()
*/
- mn_itree_inv_end(subscriptions);
- return -EAGAIN;
+ if (err)
+ mn_itree_inv_end(subscriptions);
+
+ return err;
}
static int mn_hlist_invalidate_range_start(
@@ -976,6 +1022,7 @@ int mmu_interval_notifier_insert(struct mmu_interval_notifier *interval_sub,
struct mmu_notifier_subscriptions *subscriptions;
int ret;
+ WARN_ON_ONCE(ops->invalidate_start && !ops->invalidate_finish);
might_lock(&mm->mmap_lock);
subscriptions = smp_load_acquire(&mm->notifier_subscriptions);