104 files changed, 5140 insertions, 488 deletions
diff --git a/drivers/gpu/drm/xe/.kunitconfig b/drivers/gpu/drm/xe/.kunitconfig
index 9590eac91af3..ad4b9b4a9f55 100644
--- a/drivers/gpu/drm/xe/.kunitconfig
+++ b/drivers/gpu/drm/xe/.kunitconfig
@@ -11,3 +11,8 @@ CONFIG_DRM_XE_DISPLAY=n
 CONFIG_EXPERT=y
 CONFIG_FB=y
 CONFIG_DRM_XE_KUNIT_TEST=y
+CONFIG_LOCK_DEBUGGING_SUPPORT=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_LOCKDEP=y
+CONFIG_DEBUG_LOCKDEP=y
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index efcf0ab7a1a6..fe8b266a9819 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -76,6 +76,7 @@ xe-y += xe_bb.o \
 	xe_ggtt.o \
 	xe_gpu_scheduler.o \
 	xe_gsc.o \
+	xe_gsc_proxy.o \
 	xe_gsc_submit.o \
 	xe_gt.o \
 	xe_gt_ccs_mode.o \
@@ -92,6 +93,7 @@ xe-y += xe_bb.o \
 	xe_guc.o \
 	xe_guc_ads.o \
 	xe_guc_ct.o \
+	xe_guc_db_mgr.o \
 	xe_guc_debugfs.o \
 	xe_guc_hwconfig.o \
 	xe_guc_log.o \
@@ -137,6 +139,7 @@ xe-y += xe_bb.o \
 	xe_uc_debugfs.o \
 	xe_uc_fw.o \
 	xe_vm.o \
+	xe_vram_freq.o \
 	xe_wait_user_fence.o \
 	xe_wa.o \
 	xe_wopcm.o
@@ -145,13 +148,19 @@ xe-y += xe_bb.o \
 xe-$(CONFIG_HWMON) += xe_hwmon.o
 
 # graphics virtualization (SR-IOV) support
-xe-y += xe_sriov.o
+xe-y += \
+	xe_guc_relay.o \
+	xe_memirq.o \
+	xe_sriov.o
 
 xe-$(CONFIG_PCI_IOV) += \
 	xe_lmtt.o \
 	xe_lmtt_2l.o \
 	xe_lmtt_ml.o
 
+xe-$(CONFIG_DRM_XE_KUNIT_TEST) += \
+	tests/xe_kunit_helpers.o
+
 # i915 Display compat #defines and #includes
 subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \
 	-I$(srctree)/$(src)/display/ext \
diff --git a/drivers/gpu/drm/xe/abi/gsc_proxy_commands_abi.h b/drivers/gpu/drm/xe/abi/gsc_proxy_commands_abi.h
new file mode 100644
index 000000000000..80bbf06a3eb8
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/gsc_proxy_commands_abi.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _ABI_GSC_PROXY_COMMANDS_ABI_H
+#define _ABI_GSC_PROXY_COMMANDS_ABI_H
+
+#include <linux/types.h>
+
+/* Heci client ID for proxy commands */
+#define HECI_MEADDRESS_PROXY 10
+
+/* FW-defined proxy header */
+struct xe_gsc_proxy_header {
+	/*
+	 * hdr:
+	 * Bits 0-7: type of the proxy message (see enum xe_gsc_proxy_type)
+	 * Bits 8-15: rsvd
+	 * Bits 16-31: length in bytes of the payload following the proxy header
+	 */
+	u32 hdr;
+#define GSC_PROXY_TYPE		 GENMASK(7, 0)
+#define GSC_PROXY_PAYLOAD_LENGTH GENMASK(31, 16)
+
+	u32 source;		/* Source of the Proxy message */
+	u32 destination;	/* Destination of the Proxy message */
+#define GSC_PROXY_ADDRESSING_KMD  0x10000
+#define GSC_PROXY_ADDRESSING_GSC  0x20000
+#define GSC_PROXY_ADDRESSING_CSME 0x30000
+
+	u32 status;		/* Command status */
+} __packed;
+
+/* FW-defined proxy types */
+enum xe_gsc_proxy_type {
+	GSC_PROXY_MSG_TYPE_PROXY_INVALID = 0,
+	GSC_PROXY_MSG_TYPE_PROXY_QUERY = 1,
+	GSC_PROXY_MSG_TYPE_PROXY_PAYLOAD = 2,
+	GSC_PROXY_MSG_TYPE_PROXY_END = 3,
+	GSC_PROXY_MSG_TYPE_PROXY_NOTIFICATION = 4,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
new file mode 100644
index 000000000000..5496a5890847
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _GUC_ACTIONS_PF_ABI_H
+#define _GUC_ACTIONS_PF_ABI_H
+
+#include "guc_communication_ctb_abi.h"
+
+/**
+ * DOC: GUC2PF_RELAY_FROM_VF
+ *
+ * This message is used by the GuC firmware to forward a VF2PF `Relay Message`_
+ * received from the Virtual Function (VF) driver to this Physical Function (PF)
+ * driver.
+ *
+ * This message is always sent as `CTB HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_                                   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | MBZ                                                          |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF` = 0x5100      |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **VFID** - source VF identifier                              |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | **RELAY_ID** - VF/PF message ID                              |
+ *  +---+-------+-----------------+--------------------------------------------+
+ *  | 3 |  31:0 | **RELAY_DATA1** |                                            |
+ *  +---+-------+-----------------+                                            |
+ *  |...|       |                 |       [Embedded `Relay Message`_]          |
+ *  +---+-------+-----------------+                                            |
+ *  | n |  31:0 | **RELAY_DATAx** |                                            |
+ *  +---+-------+-----------------+--------------------------------------------+
+ */
+#define XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF		0x5100
+
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN		(GUC_HXG_EVENT_MSG_MIN_LEN + 2u)
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_MAX_LEN \
+	(GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN + GUC_RELAY_MSG_MAX_LEN)
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_0_MBZ		GUC_HXG_EVENT_MSG_0_DATA0
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_1_VFID		GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_2_RELAY_ID	GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_3_RELAY_DATA1	GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_n_RELAY_DATAx	GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_NUM_RELAY_DATA	GUC_RELAY_MSG_MAX_LEN
+
+/**
+ * DOC: PF2GUC_RELAY_TO_VF
+ *
+ * This H2G message is used by the Physical Function (PF) driver to send embedded
+ * VF2PF `Relay Message`_ to the VF.
+ *
+ * This action message must be sent over CTB as `CTB HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = `GUC_HXG_TYPE_FAST_REQUEST`_                          |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | MBZ                                                          |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`XE_GUC_ACTION_PF2GUC_RELAY_TO_VF` = 0x5101        |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **VFID** - target VF identifier                              |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | **RELAY_ID** - VF/PF message ID                              |
+ *  +---+-------+-----------------+--------------------------------------------+
+ *  | 3 |  31:0 | **RELAY_DATA1** |                                            |
+ *  +---+-------+-----------------+                                            |
+ *  |...|       |                 |       [Embedded `Relay Message`_]          |
+ *  +---+-------+-----------------+                                            |
+ *  | n |  31:0 | **RELAY_DATAx** |                                            |
+ *  +---+-------+-----------------+--------------------------------------------+
+ */
+#define XE_GUC_ACTION_PF2GUC_RELAY_TO_VF		0x5101
+
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 2u)
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_MAX_LEN \
+	(PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN + GUC_RELAY_MSG_MAX_LEN)
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_1_VFID		GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_2_RELAY_ID	GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_3_RELAY_DATA1	GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_n_RELAY_DATAx	GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_NUM_RELAY_DATA	GUC_RELAY_MSG_MAX_LEN
+
+/**
+ * DOC: GUC2VF_RELAY_FROM_PF
+ *
+ * This message is used by the GuC firmware to deliver `Relay Message`_ from the
+ * Physical Function (PF) driver to this Virtual Function (VF) driver.
+ * See `GuC Relay Communication`_ for details.
+ *
+ * This message is always sent over CTB.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_                                   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | MBZ                                                          |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF` = 0x5102      |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **RELAY_ID** - VF/PF message ID                              |
+ *  +---+-------+-----------------+--------------------------------------------+
+ *  | 2 |  31:0 | **RELAY_DATA1** |                                            |
+ *  +---+-------+-----------------+                                            |
+ *  |...|       |                 |       [Embedded `Relay Message`_]          |
+ *  +---+-------+-----------------+                                            |
+ *  | n |  31:0 | **RELAY_DATAx** |                                            |
+ *  +---+-------+-----------------+--------------------------------------------+
+ */
+#define XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF		0x5102
+
+#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN		(GUC_HXG_EVENT_MSG_MIN_LEN + 1u)
+#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_MAX_LEN \
+	(GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN + GUC_RELAY_MSG_MAX_LEN)
+#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_0_MBZ		GUC_HXG_EVENT_MSG_0_DATA0
+#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_1_RELAY_ID	GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_n_RELAY_DATAx	GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_NUM_RELAY_DATA	GUC_RELAY_MSG_MAX_LEN
+
+/**
+ * DOC: VF2GUC_RELAY_TO_PF
+ *
+ * This message is used by the Virtual Function (VF) drivers to communicate with
+ * the Physical Function (PF) driver and send `Relay Message`_ to the PF driver.
+ * See `GuC Relay Communication`_ for details.
+ *
+ * This message must be sent over CTB.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ or GUC_HXG_TYPE_FAST_REQUEST_   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | MBZ                                                          |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`XE_GUC_ACTION_VF2GUC_RELAY_TO_PF` = 0x5103        |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **RELAY_ID** - VF/PF message ID                              |
+ *  +---+-------+-----------------+--------------------------------------------+
+ *  | 2 |  31:0 | **RELAY_DATA1** |                                            |
+ *  +---+-------+-----------------+                                            |
+ *  |...|       |                 |       [Embedded `Relay Message`_]          |
+ *  +---+-------+-----------------+                                            |
+ *  | n |  31:0 | **RELAY_DATAx** |                                            |
+ *  +---+-------+-----------------+--------------------------------------------+
+ */
+#define XE_GUC_ACTION_VF2GUC_RELAY_TO_PF		0x5103
+
+#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
+#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_MAX_LEN \
+	(VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN + GUC_RELAY_MSG_MAX_LEN)
+#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_1_RELAY_ID	GUC_HXG_REQUEST_MSG_n_DATAn
+#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_n_RELAY_DATAx	GUC_HXG_REQUEST_MSG_n_DATAn
+#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_NUM_RELAY_DATA	GUC_RELAY_MSG_MAX_LEN
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
index 3b83f907ece4..4aaed1cb4e12 100644
--- a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
@@ -81,12 +81,13 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
 
 #define GUC_CTB_HDR_LEN				1u
 #define GUC_CTB_MSG_MIN_LEN			GUC_CTB_HDR_LEN
-#define GUC_CTB_MSG_MAX_LEN			256u
+#define GUC_CTB_MSG_MAX_LEN			(GUC_CTB_MSG_MIN_LEN + GUC_CTB_MAX_DWORDS)
 #define GUC_CTB_MSG_0_FENCE			(0xffff << 16)
 #define GUC_CTB_MSG_0_FORMAT			(0xf << 12)
 #define   GUC_CTB_FORMAT_HXG			0u
 #define GUC_CTB_MSG_0_RESERVED			(0xf << 8)
 #define GUC_CTB_MSG_0_NUM_DWORDS		(0xff << 0)
+#define   GUC_CTB_MAX_DWORDS			255
 
 /**
  * DOC: CTB HXG Message
diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h
index 3d199016cf88..ff888d16bd4f 100644
--- a/drivers/gpu/drm/xe/abi/guc_messages_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h
@@ -24,6 +24,7 @@
  *  |   | 30:28 | **TYPE** - message type                                      |
  *  |   |       |   - _`GUC_HXG_TYPE_REQUEST` = 0                              |
  *  |   |       |   - _`GUC_HXG_TYPE_EVENT` = 1                                |
+ *  |   |       |   - _`GUC_HXG_TYPE_FAST_REQUEST` = 2                         |
  *  |   |       |   - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3                     |
  *  |   |       |   - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5                    |
  *  |   |       |   - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6                     |
@@ -46,6 +47,7 @@
 #define GUC_HXG_MSG_0_TYPE			(0x7 << 28)
 #define   GUC_HXG_TYPE_REQUEST			0u
 #define   GUC_HXG_TYPE_EVENT			1u
+#define   GUC_HXG_TYPE_FAST_REQUEST		2u
 #define   GUC_HXG_TYPE_NO_RESPONSE_BUSY		3u
 #define   GUC_HXG_TYPE_NO_RESPONSE_RETRY	5u
 #define   GUC_HXG_TYPE_RESPONSE_FAILURE		6u
diff --git a/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h
new file mode 100644
index 000000000000..747e428de421
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_RELAY_ACTIONS_ABI_H_
+#define _ABI_GUC_RELAY_ACTIONS_ABI_H_
+
+/**
+ * DOC: GuC Relay Debug Actions
+ *
+ * This range of action codes is reserved for debugging purposes only and should
+ * be used only on debug builds. These actions may not be supported by the
+ * production drivers. Their definitions could be changed in the future.
+ *
+ *  _`GUC_RELAY_ACTION_DEBUG_ONLY_START` = 0xDEB0
+ *  _`GUC_RELAY_ACTION_DEBUG_ONLY_END` = 0xDEFF
+ */
+
+#define GUC_RELAY_ACTION_DEBUG_ONLY_START	0xDEB0
+#define GUC_RELAY_ACTION_DEBUG_ONLY_END		0xDEFF
+
+/**
+ * DOC: VFXPF_TESTLOOP
+ *
+ * This `Relay Message`_ is used to selftest the `GuC Relay Communication`_.
+ *
+ * The following opcodes are defined:
+ * VFXPF_TESTLOOP_OPCODE_NOP_ will return no data.
+ * VFXPF_TESTLOOP_OPCODE_BUSY_ will reply with BUSY response first.
+ * VFXPF_TESTLOOP_OPCODE_RETRY_ will reply with RETRY response instead.
+ * VFXPF_TESTLOOP_OPCODE_ECHO_ will return same data as received.
+ * VFXPF_TESTLOOP_OPCODE_FAIL_ will always fail with error.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ or GUC_HXG_TYPE_FAST_REQUEST_   |
+ *  |   |       | or GUC_HXG_TYPE_EVENT_                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | **OPCODE**                                                   |
+ *  |   |       |    - _`VFXPF_TESTLOOP_OPCODE_NOP` = 0x0                      |
+ *  |   |       |    - _`VFXPF_TESTLOOP_OPCODE_BUSY` = 0xB                     |
+ *  |   |       |    - _`VFXPF_TESTLOOP_OPCODE_RETRY` = 0xD                    |
+ *  |   |       |    - _`VFXPF_TESTLOOP_OPCODE_ECHO` = 0xE                     |
+ *  |   |       |    - _`VFXPF_TESTLOOP_OPCODE_FAIL` = 0xF                     |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`IOV_ACTION_SELFTEST_RELAY`                        |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **DATA1** = optional, depends on **OPCODE**:                 |
+ *  |   |       | for VFXPF_TESTLOOP_OPCODE_BUSY_: time in ms for reply        |
+ *  |   |       | for VFXPF_TESTLOOP_OPCODE_FAIL_: expected error              |
+ *  |   |       | for VFXPF_TESTLOOP_OPCODE_ECHO_: payload                     |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |...|  31:0 | **DATAn** = only for **OPCODE** VFXPF_TESTLOOP_OPCODE_ECHO_  |
+ *  +---+-------+--------------------------------------------------------------+
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | DATA0 = MBZ                                                  |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |...|  31:0 | DATAn = only for **OPCODE** VFXPF_TESTLOOP_OPCODE_ECHO_      |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_RELAY_ACTION_VFXPF_TESTLOOP		(GUC_RELAY_ACTION_DEBUG_ONLY_START + 1)
+#define   VFXPF_TESTLOOP_OPCODE_NOP		0x0
+#define   VFXPF_TESTLOOP_OPCODE_BUSY		0xB
+#define   VFXPF_TESTLOOP_OPCODE_RETRY		0xD
+#define   VFXPF_TESTLOOP_OPCODE_ECHO		0xE
+#define   VFXPF_TESTLOOP_OPCODE_FAIL		0xF
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_relay_communication_abi.h b/drivers/gpu/drm/xe/abi/guc_relay_communication_abi.h
new file mode 100644
index 000000000000..f92625f04796
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_relay_communication_abi.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_RELAY_COMMUNICATION_ABI_H
+#define _ABI_GUC_RELAY_COMMUNICATION_ABI_H
+
+#include <linux/build_bug.h>
+
+#include "guc_actions_sriov_abi.h"
+#include "guc_communication_ctb_abi.h"
+#include "guc_messages_abi.h"
+
+/**
+ * DOC: GuC Relay Communication
+ *
+ * The communication between Virtual Function (VF) drivers and Physical Function
+ * (PF) drivers is based on the GuC firmware acting as a proxy (relay) agent.
+ *
+ * To communicate with the PF driver, VF's drivers use `VF2GUC_RELAY_TO_PF`_
+ * action that takes the `Relay Message`_ as opaque payload and requires the
+ * relay message identifier (RID) as additional parameter.
+ *
+ * This identifier is used by the drivers to match related messages.
+ *
+ * The GuC forwards this `Relay Message`_ and its identifier to the PF driver
+ * in `GUC2PF_RELAY_FROM_VF`_ action. This event message additionally contains
+ * the identifier of the origin VF (VFID).
+ *
+ * Likewise, to communicate with the VF drivers, PF driver use
+ * `VF2GUC_RELAY_TO_PF`_ action that in addition to the `Relay Message`_
+ * and the relay message identifier (RID) also takes the target VF identifier.
+ *
+ * The GuC uses this target VFID from the message to select where to send the
+ * `GUC2VF_RELAY_FROM_PF`_ with the embedded `Relay Message`_ with response::
+ *
+ *      VF                             GuC                              PF
+ *      |                               |                               |
+ *     [ ] VF2GUC_RELAY_TO_PF           |                               |
+ *     [ ]---------------------------> [ ]                              |
+ *     [ ] { rid, msg }                [ ]                              |
+ *     [ ]                             [ ] GUC2PF_RELAY_FROM_VF         |
+ *     [ ]                             [ ]---------------------------> [ ]
+ *     [ ]                              |  { VFID, rid, msg }          [ ]
+ *     [ ]                              |                              [ ]
+ *     [ ]                              |           PF2GUC_RELAY_TO_VF [ ]
+ *     [ ]                             [ ] <---------------------------[ ]
+ *     [ ]                             [ ]        { VFID, rid, reply }  |
+ *     [ ]        GUC2VF_RELAY_FROM_PF [ ]                              |
+ *     [ ] <---------------------------[ ]                              |
+ *      |               { rid, reply }  |                               |
+ *      |                               |                               |
+ *
+ * It is also possible that PF driver will initiate communication with the
+ * selected VF driver. The same GuC action messages will be used::
+ *
+ *      VF                             GuC                              PF
+ *      |                               |                               |
+ *      |                               |           PF2GUC_RELAY_TO_VF [ ]
+ *      |                              [ ] <---------------------------[ ]
+ *      |                              [ ]          { VFID, rid, msg } [ ]
+ *      |         GUC2VF_RELAY_FROM_PF [ ]                             [ ]
+ *     [ ] <---------------------------[ ]                             [ ]
+ *     [ ]                { rid, msg }  |                              [ ]
+ *     [ ]                              |                              [ ]
+ *     [ ] VF2GUC_RELAY_TO_PF           |                              [ ]
+ *     [ ]---------------------------> [ ]                             [ ]
+ *      |  { rid, reply }              [ ]                             [ ]
+ *      |                              [ ] GUC2PF_RELAY_FROM_VF        [ ]
+ *      |                              [ ]---------------------------> [ ]
+ *      |                               | { VFID, rid, reply }          |
+ *      |                               |                               |
+ */
+
+/**
+ * DOC: Relay Message
+ *
+ * The `Relay Message`_ is used by Physical Function (PF) driver and Virtual
+ * Function (VF) drivers to communicate using `GuC Relay Communication`_.
+ *
+ * Format of the `Relay Message`_ follows format of the generic `HXG Message`_.
+ *
+ *  +--------------------------------------------------------------------------+
+ *  |  `Relay Message`_                                                        |
+ *  +==========================================================================+
+ *  |  `HXG Message`_                                                          |
+ *  +--------------------------------------------------------------------------+
+ *
+ * Maximum length of the `Relay Message`_ is limited by the maximum length of
+ * the `CTB HXG Message`_ and format of the `GUC2PF_RELAY_FROM_VF`_ message.
+ */
+
+#define GUC_RELAY_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN
+#define GUC_RELAY_MSG_MAX_LEN \
+	(GUC_CTB_MAX_DWORDS - GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN)
+
+static_assert(PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN >
+	      VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN);
+
+/**
+ * DOC: Relay Error Codes
+ *
+ * The `GuC Relay Communication`_ can be used to pass `Relay Message`_ between
+ * drivers that run on different Operating Systems. To help in troubleshooting,
+ * `GuC Relay Communication`_ uses error codes that mostly match errno values.
+ */
+
+#define GUC_RELAY_ERROR_UNDISCLOSED			0
+#define GUC_RELAY_ERROR_OPERATION_NOT_PERMITTED		1	/* EPERM */
+#define GUC_RELAY_ERROR_PERMISSION_DENIED		13	/* EACCES */
+#define GUC_RELAY_ERROR_INVALID_ARGUMENT		22	/* EINVAL */
+#define GUC_RELAY_ERROR_INVALID_REQUEST_CODE		56	/* EBADRQC */
+#define GUC_RELAY_ERROR_NO_DATA_AVAILABLE		61	/* ENODATA */
+#define GUC_RELAY_ERROR_PROTOCOL_ERROR			71	/* EPROTO */
+#define GUC_RELAY_ERROR_MESSAGE_SIZE			90	/* EMSGSIZE */
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
index 5f19550cc845..68d9f6116bdf 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
@@ -35,12 +35,10 @@ static inline int i915_gem_object_read_from_page(struct xe_bo *bo,
 					  u32 ofs, u64 *ptr, u32 size)
 {
 	struct ttm_bo_kmap_obj map;
-	void *virtual;
+	void *src;
 	bool is_iomem;
 	int ret;
 
-	XE_WARN_ON(size != 8);
-
 	ret = xe_bo_lock(bo, true);
 	if (ret)
 		return ret;
@@ -50,11 +48,12 @@ static inline int i915_gem_object_read_from_page(struct xe_bo *bo,
 		goto out_unlock;
 
 	ofs &= ~PAGE_MASK;
-	virtual = ttm_kmap_obj_virtual(&map, &is_iomem);
+	src = ttm_kmap_obj_virtual(&map, &is_iomem);
+	src += ofs;
 	if (is_iomem)
-		*ptr = readq((void __iomem *)(virtual + ofs));
+		memcpy_fromio(ptr, (void __iomem *)src, size);
 	else
-		*ptr = *(u64 *)(virtual + ofs);
+		memcpy(ptr, src, size);
 
 	ttm_bo_kunmap(&map);
 out_unlock:
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
index 5d2a77b52db4..420eba0e4be0 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
@@ -162,18 +162,18 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev)
 
 #include "intel_wakeref.h"
 
-static inline bool intel_runtime_pm_get(struct xe_runtime_pm *pm)
+static inline intel_wakeref_t intel_runtime_pm_get(struct xe_runtime_pm *pm)
 {
 	struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
 
 	if (xe_pm_runtime_get(xe) < 0) {
 		xe_pm_runtime_put(xe);
-		return false;
+		return 0;
 	}
-	return true;
+	return 1;
 }
 
-static inline bool intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm)
+static inline intel_wakeref_t intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm)
 {
 	struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
 
@@ -187,7 +187,7 @@ static inline void intel_runtime_pm_put_unchecked(struct xe_runtime_pm *pm)
 	xe_pm_runtime_put(xe);
 }
 
-static inline void intel_runtime_pm_put(struct xe_runtime_pm *pm, bool wakeref)
+static inline void intel_runtime_pm_put(struct xe_runtime_pm *pm, intel_wakeref_t wakeref)
 {
 	if (wakeref)
 		intel_runtime_pm_put_unchecked(pm);
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
index 888e7a87a925..bd233007c1b7 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
@@ -19,6 +19,9 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe,
 	int err;
 	u32 flags = XE_BO_CREATE_PINNED_BIT | XE_BO_CREATE_STOLEN_BIT;
 
+	if (align)
+		size = ALIGN(size, align);
+
 	bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe),
 				       NULL, size, start, end,
 				       ttm_bo_type_kernel, flags);
diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
index 1cfa96167fde..c74ceb550dce 100644
--- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
@@ -56,6 +56,9 @@
 #define   MI_FLUSH_IMM_QW		REG_FIELD_PREP(MI_FLUSH_DW_LEN_DW, 5 - 2)
 #define   MI_FLUSH_DW_USE_GTT		REG_BIT(2)
 
+#define MI_LOAD_REGISTER_MEM		(__MI_INSTR(0x29) | XE_INSTR_NUM_DW(4))
+#define   MI_LRM_USE_GGTT		REG_BIT(22)
+
 #define MI_BATCH_BUFFER_START		__MI_INSTR(0x31)
 
 #endif
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 5592774fc690..0b1266c88a6a 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -75,12 +75,17 @@
 #define FF_THREAD_MODE(base)			XE_REG((base) + 0xa0)
 #define   FF_TESSELATION_DOP_GATE_DISABLE	BIT(19)
 
+#define RING_INT_SRC_RPT_PTR(base)		XE_REG((base) + 0xa4)
 #define RING_IMR(base)				XE_REG((base) + 0xa8)
+#define RING_INT_STATUS_RPT_PTR(base)		XE_REG((base) + 0xac)
 
 #define RING_EIR(base)				XE_REG((base) + 0xb0)
 #define RING_EMR(base)				XE_REG((base) + 0xb4)
 #define RING_ESR(base)				XE_REG((base) + 0xb8)
 
+#define INSTPM(base)				XE_REG((base) + 0xc0, XE_REG_OPTION_MASKED)
+#define   ENABLE_SEMAPHORE_POLL_BIT		REG_BIT(13)
+
 #define RING_CMD_CCTL(base)			XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED)
 /*
  * CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
@@ -136,6 +141,7 @@
 #define   TAIL_ADDR				0x001FFFF8
 
 #define RING_CTX_TIMESTAMP(base)		XE_REG((base) + 0x3a8)
+#define CSBE_DEBUG_STATUS(base)			XE_REG((base) + 0x3fc)
 
 #define RING_FORCE_TO_NONPRIV(base, i)		XE_REG(((base) + 0x4d0) + (i) * 4)
 #define   RING_FORCE_TO_NONPRIV_DENY		REG_BIT(30)
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 1dd361046b5d..0d4bfc35ff37 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -146,6 +146,7 @@
 
 /* Fuse readout registers for GT */
 #define XEHP_FUSE4				XE_REG(0x9114)
+#define   CFEG_WMTP_DISABLE			REG_BIT(20)
 #define   CCS_EN_MASK				REG_GENMASK(19, 16)
 #define   GT_L3_EXC_MASK			REG_GENMASK(6, 4)
 
@@ -344,6 +345,9 @@
 #define ROW_CHICKEN3				XE_REG_MCR(0xe49c, XE_REG_OPTION_MASKED)
 #define   DIS_FIX_EOT1_FLUSH			REG_BIT(9)
 
+#define TDL_TSL_CHICKEN				XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED)
+#define   SLM_WMTP_RESTORE			REG_BIT(11)
+
 #define ROW_CHICKEN				XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED)
 #define   UGM_BACKUP_MODE			REG_BIT(13)
 #define   MDQ_ARBITRATION_MODE			REG_BIT(12)
@@ -430,6 +434,15 @@
 #define   VOLTAGE_MASK				REG_GENMASK(10, 0)
 
 #define GT_INTR_DW(x)				XE_REG(0x190018 + ((x) * 4))
+#define   INTR_GSC				REG_BIT(31)
+#define   INTR_GUC				REG_BIT(25)
+#define   INTR_MGUC				REG_BIT(24)
+#define   INTR_BCS8				REG_BIT(23)
+#define   INTR_BCS(x)				REG_BIT(15 - (x))
+#define   INTR_CCS(x)				REG_BIT(4 + (x))
+#define   INTR_RCS0				REG_BIT(0)
+#define   INTR_VECS(x)				REG_BIT(31 - (x))
+#define   INTR_VCS(x)				REG_BIT(x)
 
 #define RENDER_COPY_INTR_ENABLE			XE_REG(0x190030)
 #define VCS_VECS_INTR_ENABLE			XE_REG(0x190034)
@@ -446,6 +459,7 @@
 #define   INTR_ENGINE_CLASS(x)			REG_FIELD_GET(GENMASK(18, 16), x)
 #define   INTR_ENGINE_INTR(x)			REG_FIELD_GET(GENMASK(15, 0), x)
 #define   OTHER_GUC_INSTANCE			0
+#define   OTHER_GSC_HECI2_INSTANCE		3
 #define   OTHER_GSC_INSTANCE			6
 
 #define IIR_REG_SELECTOR(x)			XE_REG(0x190070 + ((x) * 4))
@@ -454,6 +468,7 @@
 #define VCS0_VCS1_INTR_MASK			XE_REG(0x1900a8)
 #define VCS2_VCS3_INTR_MASK			XE_REG(0x1900ac)
 #define VECS0_VECS1_INTR_MASK			XE_REG(0x1900d0)
+#define HECI2_RSVD_INTR_MASK			XE_REG(0x1900e4)
 #define GUC_SG_INTR_MASK			XE_REG(0x1900e8)
 #define GPM_WGBOXPERF_INTR_MASK			XE_REG(0x1900ec)
 #define GUNIT_GSC_INTR_MASK			XE_REG(0x1900f4)
diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
index 4be81abc86ad..1825d8f79db6 100644
--- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
@@ -14,4 +14,13 @@
 #define CTX_PDP0_UDW			(0x30 + 1)
 #define CTX_PDP0_LDW			(0x32 + 1)
 
+#define CTX_LRM_INT_MASK_ENABLE		0x50
+#define CTX_INT_MASK_ENABLE_REG		(CTX_LRM_INT_MASK_ENABLE + 1)
+#define CTX_INT_MASK_ENABLE_PTR		(CTX_LRM_INT_MASK_ENABLE + 2)
+#define CTX_LRI_INT_REPORT_PTR		0x55
+#define CTX_INT_STATUS_REPORT_REG	(CTX_LRI_INT_REPORT_PTR + 1)
+#define CTX_INT_STATUS_REPORT_PTR	(CTX_LRI_INT_REPORT_PTR + 2)
+#define CTX_INT_SRC_REPORT_REG		(CTX_LRI_INT_REPORT_PTR + 3)
+#define CTX_INT_SRC_REPORT_PTR		(CTX_LRI_INT_REPORT_PTR + 4)
+
 #endif
diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile
index 39d8a0892274..9d1d88af8b2f 100644
--- a/drivers/gpu/drm/xe/tests/Makefile
+++ b/drivers/gpu/drm/xe/tests/Makefile
@@ -1,10 +1,15 @@
 # SPDX-License-Identifier: GPL-2.0
 
+# "live" kunit tests
 obj-$(CONFIG_DRM_XE_KUNIT_TEST) += \
 	xe_bo_test.o \
 	xe_dma_buf_test.o \
 	xe_migrate_test.o \
-	xe_mocs_test.o \
+	xe_mocs_test.o
+
+# Normal kunit tests
+obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_test.o
+xe_test-y = xe_test_mod.o \
 	xe_pci_test.o \
 	xe_rtp_test.o \
 	xe_wa_test.o
diff --git a/drivers/gpu/drm/xe/tests/xe_guc_db_mgr_test.c b/drivers/gpu/drm/xe/tests/xe_guc_db_mgr_test.c
new file mode 100644
index 000000000000..a87a7b4b040a
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_guc_db_mgr_test.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_device.h"
+#include "xe_kunit_helpers.h"
+
+static int guc_dbm_test_init(struct kunit *test)
+{
+	struct xe_guc_db_mgr *dbm;
+
+	xe_kunit_helper_xe_device_test_init(test);
+	dbm = &xe_device_get_gt(test->priv, 0)->uc.guc.dbm;
+
+	mutex_init(dbm_mutex(dbm));
+	test->priv = dbm;
+	return 0;
+}
+
+static void test_empty(struct kunit *test)
+{
+	struct xe_guc_db_mgr *dbm = test->priv;
+
+	KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, 0), 0);
+	KUNIT_ASSERT_EQ(test, dbm->count, 0);
+
+	mutex_lock(dbm_mutex(dbm));
+	KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0);
+	mutex_unlock(dbm_mutex(dbm));
+
+	KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+}
+
+static void test_default(struct kunit *test)
+{
+	struct xe_guc_db_mgr *dbm = test->priv;
+
+	KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, ~0), 0);
+	KUNIT_ASSERT_EQ(test, dbm->count, GUC_NUM_DOORBELLS);
+}
+
+static const unsigned int guc_dbm_params[] = {
+	GUC_NUM_DOORBELLS / 64,
+	GUC_NUM_DOORBELLS / 32,
+	GUC_NUM_DOORBELLS / 8,
+	GUC_NUM_DOORBELLS,
+};
+
+static void uint_param_get_desc(const unsigned int *p, char *desc)
+{
+	snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%u", *p);
+}
+
+KUNIT_ARRAY_PARAM(guc_dbm, guc_dbm_params, uint_param_get_desc);
+
+static void test_size(struct kunit *test)
+{
+	const unsigned int *p = test->param_value;
+	struct xe_guc_db_mgr *dbm = test->priv;
+	unsigned int n;
+	int id;
+
+	KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, *p), 0);
+	KUNIT_ASSERT_EQ(test, dbm->count, *p);
+
+	mutex_lock(dbm_mutex(dbm));
+	for (n = 0; n < *p; n++) {
+		KUNIT_EXPECT_GE(test, id = xe_guc_db_mgr_reserve_id_locked(dbm), 0);
+		KUNIT_EXPECT_LT(test, id, dbm->count);
+	}
+	KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0);
+	mutex_unlock(dbm_mutex(dbm));
+
+	mutex_lock(dbm_mutex(dbm));
+	for (n = 0; n < *p; n++)
+		xe_guc_db_mgr_release_id_locked(dbm, n);
+	mutex_unlock(dbm_mutex(dbm));
+}
+
+static void test_reuse(struct kunit *test)
+{
+	const unsigned int *p = test->param_value;
+	struct xe_guc_db_mgr *dbm = test->priv;
+	unsigned int n;
+
+	KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, *p), 0);
+
+	mutex_lock(dbm_mutex(dbm));
+	for (n = 0; n < *p; n++)
+		KUNIT_EXPECT_GE(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0);
+	KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0);
+	mutex_unlock(dbm_mutex(dbm));
+
+	mutex_lock(dbm_mutex(dbm));
+	for (n = 0; n < *p; n++) {
+		xe_guc_db_mgr_release_id_locked(dbm, n);
+		KUNIT_EXPECT_EQ(test, xe_guc_db_mgr_reserve_id_locked(dbm), n);
+	}
+	KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0);
+	mutex_unlock(dbm_mutex(dbm));
+
+	mutex_lock(dbm_mutex(dbm));
+	for (n = 0; n < *p; n++)
+		xe_guc_db_mgr_release_id_locked(dbm, n);
+	mutex_unlock(dbm_mutex(dbm));
+}
+
+static void test_range_overlap(struct kunit *test)
+{
+	const unsigned int *p = test->param_value;
+	struct xe_guc_db_mgr *dbm = test->priv;
+	int id1, id2, id3;
+	unsigned int n;
+
+	KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, ~0), 0);
+	KUNIT_ASSERT_LE(test, *p, dbm->count);
+
+	KUNIT_ASSERT_GE(test, id1 = xe_guc_db_mgr_reserve_range(dbm, *p, 0), 0);
+	for (n = 0; n < dbm->count - *p; n++) {
+		KUNIT_ASSERT_GE(test, id2 = xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+		KUNIT_ASSERT_NE(test, id2, id1);
+		KUNIT_ASSERT_NE_MSG(test, id2 < id1, id2 > id1 + *p - 1,
+				    "id1=%d id2=%d", id1, id2);
+	}
+	KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+	xe_guc_db_mgr_release_range(dbm, 0, dbm->count);
+
+	if (*p >= 1) {
+		KUNIT_ASSERT_GE(test, id1 = xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+		KUNIT_ASSERT_GE(test, id2 = xe_guc_db_mgr_reserve_range(dbm, *p - 1, 0), 0);
+		KUNIT_ASSERT_NE(test, id2, id1);
+		KUNIT_ASSERT_NE_MSG(test, id1 < id2, id1 > id2 + *p - 2,
+				    "id1=%d id2=%d", id1, id2);
+		for (n = 0; n < dbm->count - *p; n++) {
+			KUNIT_ASSERT_GE(test, id3 = xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+			KUNIT_ASSERT_NE(test, id3, id1);
+			KUNIT_ASSERT_NE(test, id3, id2);
+			KUNIT_ASSERT_NE_MSG(test, id3 < id2, id3 > id2 + *p - 2,
+					    "id3=%d id2=%d", id3, id2);
+		}
+		KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+		xe_guc_db_mgr_release_range(dbm, 0, dbm->count);
+	}
+}
+
+static void test_range_compact(struct kunit *test)
+{
+	const unsigned int *p = test->param_value;
+	struct xe_guc_db_mgr *dbm = test->priv;
+	unsigned int n;
+
+	KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, ~0), 0);
+	KUNIT_ASSERT_NE(test, *p, 0);
+	KUNIT_ASSERT_LE(test, *p, dbm->count);
+	if (dbm->count % *p)
+		kunit_skip(test, "must be divisible");
+
+	KUNIT_ASSERT_GE(test, xe_guc_db_mgr_reserve_range(dbm, *p, 0), 0);
+	for (n = 1; n < dbm->count / *p; n++)
+		KUNIT_ASSERT_GE(test, xe_guc_db_mgr_reserve_range(dbm, *p, 0), 0);
+	KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+	xe_guc_db_mgr_release_range(dbm, 0, dbm->count);
+}
+
+static void test_range_spare(struct kunit *test)
+{
+	const unsigned int *p = test->param_value;
+	struct xe_guc_db_mgr *dbm = test->priv;
+	int id;
+
+	KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, ~0), 0);
+	KUNIT_ASSERT_LE(test, *p, dbm->count);
+
+	KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, *p, dbm->count), 0);
+	KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, *p, dbm->count - *p + 1), 0);
+	KUNIT_ASSERT_EQ(test, id = xe_guc_db_mgr_reserve_range(dbm, *p, dbm->count - *p), 0);
+	KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, dbm->count - *p), 0);
+	xe_guc_db_mgr_release_range(dbm, id, *p);
+}
+
+static struct kunit_case guc_dbm_test_cases[] = {
+	KUNIT_CASE(test_empty),
+	KUNIT_CASE(test_default),
+	KUNIT_CASE_PARAM(test_size, guc_dbm_gen_params),
+	KUNIT_CASE_PARAM(test_reuse, guc_dbm_gen_params),
+	KUNIT_CASE_PARAM(test_range_overlap, guc_dbm_gen_params),
+	KUNIT_CASE_PARAM(test_range_compact, guc_dbm_gen_params),
+	KUNIT_CASE_PARAM(test_range_spare, guc_dbm_gen_params),
+	{}
+};
+
+static struct kunit_suite guc_dbm_suite = {
+	.name = "guc_dbm",
+	.test_cases = guc_dbm_test_cases,
+	.init = guc_dbm_test_init,
+};
+
+kunit_test_suites(&guc_dbm_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_guc_relay_test.c b/drivers/gpu/drm/xe/tests/xe_guc_relay_test.c
new file mode 100644
index 000000000000..13701451b923
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_guc_relay_test.c
@@ -0,0 +1,522 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <kunit/static_stub.h>
+#include <kunit/test.h>
+#include <kunit/test-bug.h>
+
+#include "xe_device.h"
+#include "xe_kunit_helpers.h"
+#include "xe_pci_test.h"
+
+#define TEST_RID	1234
+#define TEST_VFID	5
+#define TEST_LEN	6
+#define TEST_ACTION	0xa
+#define TEST_DATA(n)	(0xd0 + (n))
+
+static int replacement_relay_get_totalvfs(struct xe_guc_relay *relay)
+{
+	return TEST_VFID;
+}
+
+static int relay_test_init(struct kunit *test)
+{
+	struct xe_pci_fake_data fake = {
+		.sriov_mode = XE_SRIOV_MODE_PF,
+		.platform = XE_TIGERLAKE, /* some random platform */
+		.subplatform = XE_SUBPLATFORM_NONE,
+	};
+	struct xe_guc_relay *relay;
+	struct xe_device *xe;
+
+	test->priv = &fake;
+	xe_kunit_helper_xe_device_test_init(test);
+
+	xe = test->priv;
+	KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0);
+
+	relay = &xe_device_get_gt(xe, 0)->uc.guc.relay;
+	kunit_activate_static_stub(test, relay_get_totalvfs,
+				   replacement_relay_get_totalvfs);
+
+	KUNIT_ASSERT_EQ(test, xe_guc_relay_init(relay), 0);
+	KUNIT_EXPECT_TRUE(test, relay_is_ready(relay));
+	relay->last_rid = TEST_RID - 1;
+
+	test->priv = relay;
+	return 0;
+}
+
+static const u32 TEST_MSG[TEST_LEN] = {
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+	FIELD_PREP_CONST(GUC_HXG_EVENT_MSG_0_ACTION, TEST_ACTION) |
+	FIELD_PREP_CONST(GUC_HXG_EVENT_MSG_0_DATA0, TEST_DATA(0)),
+	TEST_DATA(1), TEST_DATA(2), TEST_DATA(3), TEST_DATA(4),
+};
+
+static int replacement_xe_guc_ct_send_recv_always_fails(struct xe_guc_ct *ct,
+							const u32 *msg, u32 len,
+							u32 *response_buffer)
+{
+	struct kunit *test = kunit_get_current_test();
+
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ct);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, msg);
+	KUNIT_ASSERT_GE(test, len, GUC_HXG_MSG_MIN_LEN);
+
+	return -ECOMM;
+}
+
+static int replacement_xe_guc_ct_send_recv_expects_pf2guc_relay(struct xe_guc_ct *ct,
+								const u32 *msg, u32 len,
+								u32 *response_buffer)
+{
+	struct kunit *test = kunit_get_current_test();
+
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ct);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, msg);
+	KUNIT_ASSERT_GE(test, len, PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN);
+	KUNIT_ASSERT_EQ(test, len, PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN + TEST_LEN);
+	KUNIT_EXPECT_EQ(test, GUC_HXG_ORIGIN_HOST, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]));
+	KUNIT_EXPECT_EQ(test, GUC_HXG_TYPE_REQUEST, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]));
+	KUNIT_EXPECT_EQ(test, XE_GUC_ACTION_PF2GUC_RELAY_TO_VF,
+			FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]));
+	KUNIT_EXPECT_EQ(test, TEST_VFID,
+			FIELD_GET(PF2GUC_RELAY_TO_VF_REQUEST_MSG_1_VFID, msg[1]));
+	KUNIT_EXPECT_EQ(test, TEST_RID,
+			FIELD_GET(PF2GUC_RELAY_TO_VF_REQUEST_MSG_2_RELAY_ID, msg[2]));
+	KUNIT_EXPECT_MEMEQ(test, TEST_MSG, msg + PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN,
+			   sizeof(u32) * TEST_LEN);
+	return 0;
+}
+
+static const u32 test_guc2pf[GUC2PF_RELAY_FROM_VF_EVENT_MSG_MAX_LEN] = {
+	/* transport */
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC) |
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+	FIELD_PREP_CONST(GUC_HXG_EVENT_MSG_0_ACTION, XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF),
+	FIELD_PREP_CONST(GUC2PF_RELAY_FROM_VF_EVENT_MSG_1_VFID, TEST_VFID),
+	FIELD_PREP_CONST(GUC2PF_RELAY_FROM_VF_EVENT_MSG_2_RELAY_ID, TEST_RID),
+	/* payload */
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS),
+};
+
+static const u32 test_guc2vf[GUC2VF_RELAY_FROM_PF_EVENT_MSG_MAX_LEN] = {
+	/* transport */
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC) |
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+	FIELD_PREP_CONST(GUC_HXG_EVENT_MSG_0_ACTION, XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF),
+	FIELD_PREP_CONST(GUC2VF_RELAY_FROM_PF_EVENT_MSG_1_RELAY_ID, TEST_RID),
+	/* payload */
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS),
+};
+
+static void pf_rejects_guc2pf_too_short(struct kunit *test)
+{
+	const u32 len = GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN - 1;
+	struct xe_guc_relay *relay = test->priv;
+	const u32 *msg = test_guc2pf;
+
+	KUNIT_ASSERT_EQ(test, -EPROTO, xe_guc_relay_process_guc2pf(relay, msg, len));
+}
+
+static void pf_rejects_guc2pf_too_long(struct kunit *test)
+{
+	const u32 len = GUC2PF_RELAY_FROM_VF_EVENT_MSG_MAX_LEN + 1;
+	struct xe_guc_relay *relay = test->priv;
+	const u32 *msg = test_guc2pf;
+
+	KUNIT_ASSERT_EQ(test, -EMSGSIZE, xe_guc_relay_process_guc2pf(relay, msg, len));
+}
+
+static void pf_rejects_guc2pf_no_payload(struct kunit *test)
+{
+	const u32 len = GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN;
+	struct xe_guc_relay *relay = test->priv;
+	const u32 *msg = test_guc2pf;
+
+	KUNIT_ASSERT_EQ(test, -EPROTO, xe_guc_relay_process_guc2pf(relay, msg, len));
+}
+
+static void pf_fails_no_payload(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	const u32 msg = 0;
+
+	KUNIT_ASSERT_EQ(test, -EPROTO, relay_process_msg(relay, TEST_VFID, TEST_RID, &msg, 0));
+}
+
+static void pf_fails_bad_origin(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	static const u32 msg[] = {
+		FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC) |
+		FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS),
+	};
+	u32 len = ARRAY_SIZE(msg);
+
+	KUNIT_ASSERT_EQ(test, -EPROTO, relay_process_msg(relay, TEST_VFID, TEST_RID, msg, len));
+}
+
+static void pf_fails_bad_type(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	const u32 msg[] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, 4), /* only 4 is undefined */
+	};
+	u32 len = ARRAY_SIZE(msg);
+
+	KUNIT_ASSERT_EQ(test, -EBADRQC, relay_process_msg(relay, TEST_VFID, TEST_RID, msg, len));
+}
+
+static void pf_txn_reports_error(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	struct relay_transaction *txn;
+
+	txn = __relay_get_transaction(relay, false, TEST_VFID, TEST_RID,
+				      TEST_MSG, TEST_LEN, NULL, 0);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, txn);
+
+	kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+				   replacement_xe_guc_ct_send_recv_always_fails);
+	KUNIT_EXPECT_EQ(test, -ECOMM, relay_send_transaction(relay, txn));
+
+	relay_release_transaction(relay, txn);
+}
+
+static void pf_txn_sends_pf2guc(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	struct relay_transaction *txn;
+
+	txn = __relay_get_transaction(relay, false, TEST_VFID, TEST_RID,
+				      TEST_MSG, TEST_LEN, NULL, 0);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, txn);
+
+	kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+				   replacement_xe_guc_ct_send_recv_expects_pf2guc_relay);
+	KUNIT_ASSERT_EQ(test, 0, relay_send_transaction(relay, txn));
+
+	relay_release_transaction(relay, txn);
+}
+
+static void pf_sends_pf2guc(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+
+	kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+				   replacement_xe_guc_ct_send_recv_expects_pf2guc_relay);
+	KUNIT_ASSERT_EQ(test, 0,
+			xe_guc_relay_send_to_vf(relay, TEST_VFID,
+						TEST_MSG, TEST_LEN, NULL, 0));
+}
+
+static int replacement_xe_guc_ct_send_recv_loopback_relay(struct xe_guc_ct *ct,
+							  const u32 *msg, u32 len,
+							  u32 *response_buffer)
+{
+	struct kunit *test = kunit_get_current_test();
+	struct xe_guc_relay *relay = test->priv;
+	u32 *reply = kunit_kzalloc(test, len * sizeof(u32), GFP_KERNEL);
+	int (*guc2relay)(struct xe_guc_relay *, const u32 *, u32);
+	u32 action;
+	int err;
+
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ct);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, msg);
+	KUNIT_ASSERT_GE(test, len, GUC_HXG_MSG_MIN_LEN);
+	KUNIT_ASSERT_EQ(test, GUC_HXG_TYPE_REQUEST,
+			FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]));
+	KUNIT_ASSERT_GE(test, len, GUC_HXG_REQUEST_MSG_MIN_LEN);
+	KUNIT_ASSERT_NOT_NULL(test, reply);
+
+	switch (FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0])) {
+	case XE_GUC_ACTION_PF2GUC_RELAY_TO_VF:
+		KUNIT_ASSERT_GE(test, len, PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN);
+		action = XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF;
+		guc2relay = xe_guc_relay_process_guc2pf;
+		break;
+	case XE_GUC_ACTION_VF2GUC_RELAY_TO_PF:
+		KUNIT_ASSERT_GE(test, len, VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN);
+		action = XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF;
+		guc2relay = xe_guc_relay_process_guc2vf;
+		break;
+	default:
+		KUNIT_FAIL(test, "bad RELAY action %#x", msg[0]);
+		return -EINVAL;
+	}
+
+	reply[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC) |
+		   FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+		   FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION, action);
+	memcpy(reply + 1, msg + 1, sizeof(u32) * (len - 1));
+
+	err = guc2relay(relay, reply, len);
+	KUNIT_EXPECT_EQ(test, err, 0);
+
+	return err;
+}
+
+static void test_requires_relay_testloop(struct kunit *test)
+{
+	/*
+	 * The debug relay action GUC_RELAY_ACTION_VFXPF_TESTLOOP is available
+	 * only on builds with CONFIG_DRM_XE_DEBUG_SRIOV enabled.
+	 * See "kunit.py --kconfig_add" option if it's missing.
+	 */
+	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV))
+		kunit_skip(test, "requires %s\n", __stringify(CONFIG_DRM_XE_DEBUG_SRIOV));
+}
+
+static void pf_loopback_nop(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	u32 request[] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_NOP),
+	};
+	u32 response[GUC_HXG_RESPONSE_MSG_MIN_LEN];
+	int ret;
+
+	test_requires_relay_testloop(test);
+
+	kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action);
+	kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+				   replacement_xe_guc_ct_send_recv_loopback_relay);
+	ret = xe_guc_relay_send_to_vf(relay, TEST_VFID,
+				      request, ARRAY_SIZE(request),
+				      response, ARRAY_SIZE(response));
+	KUNIT_ASSERT_EQ(test, ret, GUC_HXG_RESPONSE_MSG_MIN_LEN);
+	KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, response[0]),
+			GUC_HXG_ORIGIN_HOST);
+	KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_MSG_0_TYPE, response[0]),
+			GUC_HXG_TYPE_RESPONSE_SUCCESS);
+	KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, response[0]), 0);
+}
+
+static void pf_loopback_echo(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	u32 request[] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_ECHO),
+		TEST_DATA(1), TEST_DATA(2), TEST_DATA(3), TEST_DATA(4),
+	};
+	u32 response[ARRAY_SIZE(request)];
+	unsigned int n;
+	int ret;
+
+	test_requires_relay_testloop(test);
+
+	kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action);
+	kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+				   replacement_xe_guc_ct_send_recv_loopback_relay);
+	ret = xe_guc_relay_send_to_vf(relay, TEST_VFID,
+				      request, ARRAY_SIZE(request),
+				      response, ARRAY_SIZE(response));
+	KUNIT_ASSERT_EQ(test, ret, ARRAY_SIZE(response));
+	KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, response[0]),
+			GUC_HXG_ORIGIN_HOST);
+	KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_MSG_0_TYPE, response[0]),
+			GUC_HXG_TYPE_RESPONSE_SUCCESS);
+	KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, response[0]),
+			ARRAY_SIZE(response));
+	for (n = GUC_HXG_RESPONSE_MSG_MIN_LEN; n < ret; n++)
+		KUNIT_EXPECT_EQ(test, request[n], response[n]);
+}
+
+static void pf_loopback_fail(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	u32 request[] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_FAIL),
+	};
+	u32 response[GUC_HXG_RESPONSE_MSG_MIN_LEN];
+	int ret;
+
+	test_requires_relay_testloop(test);
+
+	kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action);
+	kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+				   replacement_xe_guc_ct_send_recv_loopback_relay);
+	ret = xe_guc_relay_send_to_vf(relay, TEST_VFID,
+				      request, ARRAY_SIZE(request),
+				      response, ARRAY_SIZE(response));
+	KUNIT_ASSERT_EQ(test, ret, -EREMOTEIO);
+}
+
+static void pf_loopback_busy(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	u32 request[] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_BUSY),
+		TEST_DATA(0xb),
+	};
+	u32 response[GUC_HXG_RESPONSE_MSG_MIN_LEN];
+	int ret;
+
+	test_requires_relay_testloop(test);
+
+	kunit_activate_static_stub(test, relay_testonly_nop, relay_process_incoming_action);
+	kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action);
+	kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+				   replacement_xe_guc_ct_send_recv_loopback_relay);
+	ret = xe_guc_relay_send_to_vf(relay, TEST_VFID,
+				      request, ARRAY_SIZE(request),
+				      response, ARRAY_SIZE(response));
+	KUNIT_ASSERT_EQ(test, ret, GUC_HXG_RESPONSE_MSG_MIN_LEN);
+}
+
+static void pf_loopback_retry(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	u32 request[] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_RETRY),
+		TEST_DATA(0xd), TEST_DATA(0xd),
+	};
+	u32 response[GUC_HXG_RESPONSE_MSG_MIN_LEN];
+	int ret;
+
+	test_requires_relay_testloop(test);
+
+	kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action);
+	kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+				   replacement_xe_guc_ct_send_recv_loopback_relay);
+	ret = xe_guc_relay_send_to_vf(relay, TEST_VFID,
+				      request, ARRAY_SIZE(request),
+				      response, ARRAY_SIZE(response));
+	KUNIT_ASSERT_EQ(test, ret, GUC_HXG_RESPONSE_MSG_MIN_LEN);
+}
+
+static struct kunit_case pf_relay_test_cases[] = {
+	KUNIT_CASE(pf_rejects_guc2pf_too_short),
+	KUNIT_CASE(pf_rejects_guc2pf_too_long),
+	KUNIT_CASE(pf_rejects_guc2pf_no_payload),
+	KUNIT_CASE(pf_fails_no_payload),
+	KUNIT_CASE(pf_fails_bad_origin),
+	KUNIT_CASE(pf_fails_bad_type),
+	KUNIT_CASE(pf_txn_reports_error),
+	KUNIT_CASE(pf_txn_sends_pf2guc),
+	KUNIT_CASE(pf_sends_pf2guc),
+	KUNIT_CASE(pf_loopback_nop),
+	KUNIT_CASE(pf_loopback_echo),
+	KUNIT_CASE(pf_loopback_fail),
+	KUNIT_CASE_SLOW(pf_loopback_busy),
+	KUNIT_CASE_SLOW(pf_loopback_retry),
+	{}
+};
+
+static struct kunit_suite pf_relay_suite = {
+	.name = "pf_relay",
+	.test_cases = pf_relay_test_cases,
+	.init = relay_test_init,
+};
+
+static void vf_rejects_guc2vf_too_short(struct kunit *test)
+{
+	const u32 len = GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN - 1;
+	struct xe_guc_relay *relay = test->priv;
+	const u32 *msg = test_guc2vf;
+
+	KUNIT_ASSERT_EQ(test, -EPROTO, xe_guc_relay_process_guc2vf(relay, msg, len));
+}
+
+static void vf_rejects_guc2vf_too_long(struct kunit *test)
+{
+	const u32 len = GUC2VF_RELAY_FROM_PF_EVENT_MSG_MAX_LEN + 1;
+	struct xe_guc_relay *relay = test->priv;
+	const u32 *msg = test_guc2vf;
+
+	KUNIT_ASSERT_EQ(test, -EMSGSIZE, xe_guc_relay_process_guc2vf(relay, msg, len));
+}
+
+static void vf_rejects_guc2vf_no_payload(struct kunit *test)
+{
+	const u32 len = GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN;
+	struct xe_guc_relay *relay = test->priv;
+	const u32 *msg = test_guc2vf;
+
+	KUNIT_ASSERT_EQ(test, -EPROTO, xe_guc_relay_process_guc2vf(relay, msg, len));
+}
+
+static struct kunit_case vf_relay_test_cases[] = {
+	KUNIT_CASE(vf_rejects_guc2vf_too_short),
+	KUNIT_CASE(vf_rejects_guc2vf_too_long),
+	KUNIT_CASE(vf_rejects_guc2vf_no_payload),
+	{}
+};
+
+static struct kunit_suite vf_relay_suite = {
+	.name = "vf_relay",
+	.test_cases = vf_relay_test_cases,
+	.init = relay_test_init,
+};
+
+static void xe_drops_guc2pf_if_not_ready(struct kunit *test)
+{
+	struct xe_device *xe = test->priv;
+	struct xe_guc_relay *relay = &xe_device_get_gt(xe, 0)->uc.guc.relay;
+	const u32 *msg = test_guc2pf;
+	u32 len = GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN + GUC_RELAY_MSG_MIN_LEN;
+
+	KUNIT_ASSERT_EQ(test, -ENODEV, xe_guc_relay_process_guc2pf(relay, msg, len));
+}
+
+static void xe_drops_guc2vf_if_not_ready(struct kunit *test)
+{
+	struct xe_device *xe = test->priv;
+	struct xe_guc_relay *relay = &xe_device_get_gt(xe, 0)->uc.guc.relay;
+	const u32 *msg = test_guc2vf;
+	u32 len = GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN + GUC_RELAY_MSG_MIN_LEN;
+
+	KUNIT_ASSERT_EQ(test, -ENODEV, xe_guc_relay_process_guc2vf(relay, msg, len));
+}
+
+static void xe_rejects_send_if_not_ready(struct kunit *test)
+{
+	struct xe_device *xe = test->priv;
+	struct xe_guc_relay *relay = &xe_device_get_gt(xe, 0)->uc.guc.relay;
+	u32 msg[GUC_RELAY_MSG_MIN_LEN];
+	u32 len = ARRAY_SIZE(msg);
+
+	KUNIT_ASSERT_EQ(test, -ENODEV, xe_guc_relay_send_to_pf(relay, msg, len, NULL, 0));
+	KUNIT_ASSERT_EQ(test, -ENODEV, relay_send_to(relay, TEST_VFID, msg, len, NULL, 0));
+}
+
+static struct kunit_case no_relay_test_cases[] = {
+	KUNIT_CASE(xe_drops_guc2pf_if_not_ready),
+	KUNIT_CASE(xe_drops_guc2vf_if_not_ready),
+	KUNIT_CASE(xe_rejects_send_if_not_ready),
+	{}
+};
+
+static struct kunit_suite no_relay_suite = {
+	.name = "no_relay",
+	.test_cases = no_relay_test_cases,
+	.init = xe_kunit_helper_xe_device_test_init,
+};
+
+kunit_test_suites(&no_relay_suite,
+		  &pf_relay_suite,
+		  &vf_relay_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c
new file mode 100644
index 000000000000..fefe79b3b75a
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <kunit/test.h>
+#include <kunit/static_stub.h>
+#include <kunit/visibility.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_kunit_helpers.h>
+
+#include "tests/xe_kunit_helpers.h"
+#include "tests/xe_pci_test.h"
+#include "xe_device_types.h"
+
+/**
+ * xe_kunit_helper_alloc_xe_device - Allocate a &xe_device for a KUnit test.
+ * @test: the &kunit where this &xe_device will be used
+ * @dev: The parent device object
+ *
+ * This function allocates xe_device using drm_kunit_helper_alloc_device().
+ * The xe_device allocation is managed by the test.
+ *
+ * @dev should be allocated using drm_kunit_helper_alloc_device().
+ *
+ * This function uses KUNIT_ASSERT to detect any allocation failures.
+ *
+ * Return: A pointer to the new &xe_device.
+ */
+struct xe_device *xe_kunit_helper_alloc_xe_device(struct kunit *test,
+						  struct device *dev)
+{
+	struct xe_device *xe;
+
+	xe = drm_kunit_helper_alloc_drm_device(test, dev,
+					       struct xe_device,
+					       drm, DRIVER_GEM);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
+	return xe;
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_kunit_helper_alloc_xe_device);
+
+static void kunit_action_restore_priv(void *priv)
+{
+	struct kunit *test = kunit_get_current_test();
+
+	test->priv = priv;
+}
+
+/**
+ * xe_kunit_helper_xe_device_test_init - Prepare a &xe_device for a KUnit test.
+ * @test: the &kunit where this fake &xe_device will be used
+ *
+ * This function allocates and initializes a fake &xe_device and stores its
+ * pointer as &kunit.priv to allow the test code to access it.
+ *
+ * This function can be directly used as custom implementation of
+ * &kunit_suite.init.
+ *
+ * It is possible to prepare specific variant of the fake &xe_device by passing
+ * in &kunit.priv pointer to the struct xe_pci_fake_data supplemented with
+ * desired parameters prior to calling this function.
+ *
+ * This function uses KUNIT_ASSERT to detect any failures.
+ *
+ * Return: Always 0.
+ */
+int xe_kunit_helper_xe_device_test_init(struct kunit *test)
+{
+	struct xe_device *xe;
+	struct device *dev;
+	int err;
+
+	dev = drm_kunit_helper_alloc_device(test);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+	xe = xe_kunit_helper_alloc_xe_device(test, dev);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
+
+	err = xe_pci_fake_device_init(xe);
+	KUNIT_ASSERT_EQ(test, err, 0);
+
+	err = kunit_add_action_or_reset(test, kunit_action_restore_priv, test->priv);
+	KUNIT_ASSERT_EQ(test, err, 0);
+
+	test->priv = xe;
+	return 0;
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_kunit_helper_xe_device_test_init);
diff --git a/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h
new file mode 100644
index 000000000000..067a1babf049
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_KUNIT_HELPERS_H_
+#define _XE_KUNIT_HELPERS_H_
+
+struct device;
+struct kunit;
+struct xe_device;
+
+struct xe_device *xe_kunit_helper_alloc_xe_device(struct kunit *test,
+						  struct device *dev);
+int xe_kunit_helper_xe_device_test_init(struct kunit *test);
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
index 7dd34f94e809..df5c36b70ab4 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -128,3 +128,39 @@ void xe_live_mocs_kernel_kunit(struct kunit *test)
 	xe_call_for_each_device(mocs_kernel_test_run_device);
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_kernel_kunit);
+
+static int mocs_reset_test_run_device(struct xe_device *xe)
+{
+	/* Check the mocs setup is retained over GT reset */
+
+	struct live_mocs mocs;
+	struct xe_gt *gt;
+	unsigned int flags;
+	int id;
+	struct kunit *test = xe_cur_kunit();
+
+	for_each_gt(gt, xe, id) {
+		flags = live_mocs_init(&mocs, gt);
+		kunit_info(test, "mocs_reset_test before reset\n");
+		if (flags & HAS_GLOBAL_MOCS)
+			read_mocs_table(gt, &mocs.table);
+		if (flags & HAS_LNCF_MOCS)
+			read_l3cc_table(gt, &mocs.table);
+
+		xe_gt_reset_async(gt);
+		flush_work(&gt->reset.worker);
+
+		kunit_info(test, "mocs_reset_test after reset\n");
+		if (flags & HAS_GLOBAL_MOCS)
+			read_mocs_table(gt, &mocs.table);
+		if (flags & HAS_LNCF_MOCS)
+			read_l3cc_table(gt, &mocs.table);
+	}
+	return 0;
+}
+
+void xe_live_mocs_reset_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(mocs_reset_test_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_reset_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
index ef56bd517b28..4f62e7a4270b 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
@@ -9,6 +9,7 @@
 
 static struct kunit_case xe_mocs_tests[] = {
 	KUNIT_CASE(xe_live_mocs_kernel_kunit),
+	KUNIT_CASE(xe_live_mocs_reset_kunit),
 	{}
 };
 
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.h b/drivers/gpu/drm/xe/tests/xe_mocs_test.h
index 7faa3575e6c3..e7699d495411 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.h
@@ -9,5 +9,6 @@
 struct kunit;
 
 void xe_live_mocs_kernel_kunit(struct kunit *test);
+void xe_live_mocs_reset_kunit(struct kunit *test);
 
 #endif
diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
index 602793644f61..f62809ca8b51 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -156,6 +156,9 @@ int xe_pci_fake_device_init(struct xe_device *xe)
 		return -ENODEV;
 
 done:
+	xe->sriov.__mode = data && data->sriov_mode ?
+			   data->sriov_mode : XE_SRIOV_MODE_NONE;
+
 	kunit_activate_static_stub(test, read_gmdid, fake_read_gmdid);
 
 	xe_info_init_early(xe, desc, subplatform_desc);
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c
index 171e4180f1aa..a6705a536391 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c
@@ -64,8 +64,3 @@ static struct kunit_suite xe_pci_test_suite = {
 };
 
 kunit_test_suite(xe_pci_test_suite);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("xe_pci kunit test");
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h
index 811ffe5bd9fd..f40dcec83992 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 
 #include "xe_platform_types.h"
+#include "xe_sriov_types.h"
 
 struct xe_device;
 struct xe_graphics_desc;
@@ -23,6 +24,7 @@ void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn);
 void xe_call_for_each_media_ip(xe_media_fn xe_fn);
 
 struct xe_pci_fake_data {
+	enum xe_sriov_mode sriov_mode;
 	enum xe_platform platform;
 	enum xe_subplatform subplatform;
 	u32 graphics_verx100;
diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
index 4a6972897675..06759d754783 100644
--- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
@@ -15,6 +15,7 @@
 #include "regs/xe_reg_defs.h"
 #include "xe_device.h"
 #include "xe_device_types.h"
+#include "xe_kunit_helpers.h"
 #include "xe_pci_test.h"
 #include "xe_reg_sr.h"
 #include "xe_rtp.h"
@@ -276,9 +277,7 @@ static int xe_rtp_test_init(struct kunit *test)
 	dev = drm_kunit_helper_alloc_device(test);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
 
-	xe = drm_kunit_helper_alloc_drm_device(test, dev,
-					       struct xe_device,
-					       drm, DRIVER_GEM);
+	xe = xe_kunit_helper_alloc_xe_device(test, dev);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
 
 	/* Initialize an empty device */
@@ -312,8 +311,3 @@ static struct kunit_suite xe_rtp_test_suite = {
 };
 
 kunit_test_suite(xe_rtp_test_suite);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("xe_rtp kunit test");
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_test_mod.c b/drivers/gpu/drm/xe/tests/xe_test_mod.c
new file mode 100644
index 000000000000..875f3e6f965e
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_test_mod.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+#include <linux/module.h>
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe kunit tests");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
index a53c22a19582..439477593faf 100644
--- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
@@ -9,6 +9,7 @@
 #include <kunit/test.h>
 
 #include "xe_device.h"
+#include "xe_kunit_helpers.h"
 #include "xe_pci_test.h"
 #include "xe_reg_sr.h"
 #include "xe_tuning.h"
@@ -65,14 +66,8 @@ static const struct platform_test_case cases[] = {
 	PLATFORM_CASE(ALDERLAKE_P, C0),
 	SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0),
 	SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0),
-	SUBPLATFORM_CASE(DG2, G10, A0),
-	SUBPLATFORM_CASE(DG2, G10, A1),
-	SUBPLATFORM_CASE(DG2, G10, B0),
 	SUBPLATFORM_CASE(DG2, G10, C0),
-	SUBPLATFORM_CASE(DG2, G11, A0),
-	SUBPLATFORM_CASE(DG2, G11, B0),
 	SUBPLATFORM_CASE(DG2, G11, B1),
-	SUBPLATFORM_CASE(DG2, G12, A0),
 	SUBPLATFORM_CASE(DG2, G12, A1),
 	PLATFORM_CASE(PVC, B0),
 	PLATFORM_CASE(PVC, B1),
@@ -108,9 +103,7 @@ static int xe_wa_test_init(struct kunit *test)
 	dev = drm_kunit_helper_alloc_device(test);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
 
-	xe = drm_kunit_helper_alloc_drm_device(test, dev,
-					       struct xe_device,
-					       drm, DRIVER_GEM);
+	xe = xe_kunit_helper_alloc_xe_device(test, dev);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
 
 	test->priv = &data;
@@ -163,8 +156,3 @@ static struct kunit_suite xe_rtp_test_suite = {
 };
 
 kunit_test_suite(xe_rtp_test_suite);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("xe_wa kunit test");
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 0b0e262e2166..686d716c5581 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -586,6 +586,8 @@ static int xe_bo_move_notify(struct xe_bo *bo,
 {
 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+	struct ttm_resource *old_mem = ttm_bo->resource;
+	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
 	int ret;
 
 	/*
@@ -605,6 +607,18 @@ static int xe_bo_move_notify(struct xe_bo *bo,
 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
 		dma_buf_move_notify(ttm_bo->base.dma_buf);
 
+	/*
+	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
+	 * so if we moved from VRAM make sure to unlink this from the userfault
+	 * tracking.
+	 */
+	if (mem_type_is_vram(old_mem_type)) {
+		mutex_lock(&xe->mem_access.vram_userfault.lock);
+		if (!list_empty(&bo->vram_userfault_link))
+			list_del_init(&bo->vram_userfault_link);
+		mutex_unlock(&xe->mem_access.vram_userfault.lock);
+	}
+
 	return 0;
 }
 
@@ -1027,7 +1041,7 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
 	}
 }
 
-struct ttm_device_funcs xe_ttm_funcs = {
+const struct ttm_device_funcs xe_ttm_funcs = {
 	.ttm_tt_create = xe_ttm_tt_create,
 	.ttm_tt_populate = xe_ttm_tt_populate,
 	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
@@ -1063,6 +1077,11 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
 	if (bo->vm && xe_bo_is_user(bo))
 		xe_vm_put(bo->vm);
 
+	mutex_lock(&xe->mem_access.vram_userfault.lock);
+	if (!list_empty(&bo->vram_userfault_link))
+		list_del(&bo->vram_userfault_link);
+	mutex_unlock(&xe->mem_access.vram_userfault.lock);
+
 	kfree(bo);
 }
 
@@ -1110,16 +1129,20 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
 {
 	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
 	struct drm_device *ddev = tbo->base.dev;
+	struct xe_device *xe = to_xe_device(ddev);
+	struct xe_bo *bo = ttm_to_xe_bo(tbo);
+	bool needs_rpm = bo->flags & XE_BO_CREATE_VRAM_MASK;
 	vm_fault_t ret;
 	int idx, r = 0;
 
+	if (needs_rpm)
+		xe_device_mem_access_get(xe);
+
 	ret = ttm_bo_vm_reserve(tbo, vmf);
 	if (ret)
-		return ret;
+		goto out;
 
 	if (drm_dev_enter(ddev, &idx)) {
-		struct xe_bo *bo = ttm_to_xe_bo(tbo);
-
 		trace_xe_bo_cpu_fault(bo);
 
 		if (should_migrate_to_system(bo)) {
@@ -1137,10 +1160,24 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
 	} else {
 		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
 	}
+
 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
-		return ret;
+		goto out;
+	/*
+	 * ttm_bo_vm_reserve() already has dma_resv_lock.
+	 */
+	if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) {
+		mutex_lock(&xe->mem_access.vram_userfault.lock);
+		if (list_empty(&bo->vram_userfault_link))
+			list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list);
+		mutex_unlock(&xe->mem_access.vram_userfault.lock);
+	}
 
 	dma_resv_unlock(tbo->base.resv);
+out:
+	if (needs_rpm)
+		xe_device_mem_access_put(xe);
+
 	return ret;
 }
 
@@ -1254,6 +1291,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 #ifdef CONFIG_PROC_FS
 	INIT_LIST_HEAD(&bo->client_link);
 #endif
+	INIT_LIST_HEAD(&bo->vram_userfault_link);
 
 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
 
@@ -2264,6 +2302,16 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
 	return err;
 }
 
+void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
+{
+	struct ttm_buffer_object *tbo = &bo->ttm;
+	struct ttm_device *bdev = tbo->bdev;
+
+	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
+
+	list_del_init(&bo->vram_userfault_link);
+}
+
 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
 #include "tests/xe_bo.c"
 #endif
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 9b1279aca127..db4b2db6b073 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -44,6 +44,7 @@
 #define XE_BO_FIXED_PLACEMENT_BIT	BIT(11)
 #define XE_BO_PAGETABLE			BIT(12)
 #define XE_BO_NEEDS_CPU_ACCESS		BIT(13)
+#define XE_BO_NEEDS_UC			BIT(14)
 /* this one is trigger internally only */
 #define XE_BO_INTERNAL_TEST		BIT(30)
 #define XE_BO_INTERNAL_64K		BIT(31)
@@ -242,12 +243,14 @@ int xe_bo_evict(struct xe_bo *bo, bool force_alloc);
 int xe_bo_evict_pinned(struct xe_bo *bo);
 int xe_bo_restore_pinned(struct xe_bo *bo);
 
-extern struct ttm_device_funcs xe_ttm_funcs;
+extern const struct ttm_device_funcs xe_ttm_funcs;
 
 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file);
 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file);
+void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo);
+
 int xe_bo_dumb_create(struct drm_file *file_priv,
 		      struct drm_device *dev,
 		      struct drm_mode_create_dumb *args);
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 64c2249a4e40..14ef13b7b421 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -88,6 +88,9 @@ struct xe_bo {
 	 * objects.
 	 */
 	u16 cpu_caching;
+
+	/** @vram_userfault_link: Link into @mem_access.vram_userfault.list */
+		struct list_head vram_userfault_link;
 };
 
 #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index c56fd7d59f05..01db5b27bec5 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -55,6 +55,7 @@ static int info(struct seq_file *m, void *data)
 	drm_printf(&p, "force_execlist %s\n", str_yes_no(xe->info.force_execlist));
 	drm_printf(&p, "has_flat_ccs %s\n", str_yes_no(xe->info.has_flat_ccs));
 	drm_printf(&p, "has_usm %s\n", str_yes_no(xe->info.has_usm));
+	drm_printf(&p, "skip_guc_pc %s\n", str_yes_no(xe->info.skip_guc_pc));
 	for_each_gt(gt, xe, id) {
 		drm_printf(&p, "gt%d force wake %d\n", id,
 			   xe_force_wake_ref(gt_to_fw(gt), XE_FW_GT));
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index b8d8da546670..ab417f4f7d2a 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -26,15 +26,18 @@
 #include "xe_exec_queue.h"
 #include "xe_exec.h"
 #include "xe_ggtt.h"
+#include "xe_gsc_proxy.h"
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
 #include "xe_irq.h"
+#include "xe_memirq.h"
 #include "xe_mmio.h"
 #include "xe_module.h"
 #include "xe_pat.h"
 #include "xe_pcode.h"
 #include "xe_pm.h"
 #include "xe_query.h"
+#include "xe_sriov.h"
 #include "xe_tile.h"
 #include "xe_ttm_stolen_mgr.h"
 #include "xe_ttm_sys_mgr.h"
@@ -432,10 +435,15 @@ int xe_device_probe(struct xe_device *xe)
 	struct xe_tile *tile;
 	struct xe_gt *gt;
 	int err;
+	u8 last_gt;
 	u8 id;
 
 	xe_pat_init_early(xe);
 
+	err = xe_sriov_init(xe);
+	if (err)
+		return err;
+
 	xe->info.mem_region_mask = 1;
 	err = xe_display_init_nommio(xe);
 	if (err)
@@ -456,6 +464,11 @@ int xe_device_probe(struct xe_device *xe)
 		err = xe_ggtt_init_early(tile->mem.ggtt);
 		if (err)
 			return err;
+		if (IS_SRIOV_VF(xe)) {
+			err = xe_memirq_init(&tile->sriov.vf.memirq);
+			if (err)
+				return err;
+		}
 	}
 
 	err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
@@ -510,16 +523,18 @@ int xe_device_probe(struct xe_device *xe)
 		goto err_irq_shutdown;
 
 	for_each_gt(gt, xe, id) {
+		last_gt = id;
+
 		err = xe_gt_init(gt);
 		if (err)
-			goto err_irq_shutdown;
+			goto err_fini_gt;
 	}
 
 	xe_heci_gsc_init(xe);
 
 	err = xe_display_init(xe);
 	if (err)
-		goto err_irq_shutdown;
+		goto err_fini_gt;
 
 	err = drm_dev_register(&xe->drm, 0);
 	if (err)
@@ -540,6 +555,14 @@ int xe_device_probe(struct xe_device *xe)
 err_fini_display:
 	xe_display_driver_remove(xe);
 
+err_fini_gt:
+	for_each_gt(gt, xe, id) {
+		if (id < last_gt)
+			xe_gt_remove(gt);
+		else
+			break;
+	}
+
 err_irq_shutdown:
 	xe_irq_shutdown(xe);
 err:
@@ -557,12 +580,18 @@ static void xe_device_remove_display(struct xe_device *xe)
 
 void xe_device_remove(struct xe_device *xe)
 {
+	struct xe_gt *gt;
+	u8 id;
+
 	xe_device_remove_display(xe);
 
 	xe_display_fini(xe);
 
 	xe_heci_gsc_fini(xe);
 
+	for_each_gt(gt, xe, id)
+		xe_gt_remove(gt);
+
 	xe_irq_shutdown(xe);
 }
 
@@ -613,7 +642,7 @@ void xe_device_wmb(struct xe_device *xe)
 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
 {
 	return xe_device_has_flat_ccs(xe) ?
-		DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0;
+		DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0;
 }
 
 bool xe_device_mem_access_ongoing(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 3da83b233206..af8ac2e9e270 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -168,6 +168,11 @@ static inline bool xe_device_has_sriov(struct xe_device *xe)
 	return xe->info.has_sriov;
 }
 
+static inline bool xe_device_has_memirq(struct xe_device *xe)
+{
+	return GRAPHICS_VERx100(xe) >= 1250;
+}
+
 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 5dc9127a2029..7eda86bd4c2a 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -16,6 +16,7 @@
 #include "xe_heci_gsc.h"
 #include "xe_gt_types.h"
 #include "xe_lmtt_types.h"
+#include "xe_memirq_types.h"
 #include "xe_platform_types.h"
 #include "xe_pt_types.h"
 #include "xe_sriov_types.h"
@@ -192,6 +193,10 @@ struct xe_tile {
 			/** @sriov.pf.lmtt: Local Memory Translation Table. */
 			struct xe_lmtt lmtt;
 		} pf;
+		struct {
+			/** @sriov.vf.memirq: Memory Based Interrupts. */
+			struct xe_memirq memirq;
+		} vf;
 	} sriov;
 
 	/** @migrate: Migration helper for vram blits and clearing */
@@ -316,6 +321,8 @@ struct xe_device {
 	struct {
 		/** @sriov.__mode: SR-IOV mode (Don't access directly!) */
 		enum xe_sriov_mode __mode;
+		/** @sriov.wq: workqueue used by the virtualization workers */
+		struct workqueue_struct *wq;
 	} sriov;
 
 	/** @clients: drm clients info */
@@ -380,6 +387,22 @@ struct xe_device {
 	struct {
 		/** @ref: ref count of memory accesses */
 		atomic_t ref;
+
+		/** @vram_userfault: Encapsulate vram_userfault related stuff */
+		struct {
+			/**
+			 * @lock: Protects access to @vram_usefault.list
+			 * Using mutex instead of spinlock as lock is applied to entire
+			 * list operation which may sleep
+			 */
+			struct mutex lock;
+
+			/**
+			 * @list: Keep list of userfaulted vram bo, which require to release their
+			 * mmap mappings at runtime suspend path
+			 */
+			struct list_head list;
+		} vram_userfault;
 	} mem_access;
 
 	/**
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index 64ed303728fd..da2627ed6ae7 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -175,7 +175,7 @@ static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
 	return 0;
 }
 
-const struct dma_buf_ops xe_dmabuf_ops = {
+static const struct dma_buf_ops xe_dmabuf_ops = {
 	.attach = xe_dma_buf_attach,
 	.detach = xe_dma_buf_detach,
 	.pin = xe_dma_buf_pin,
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index b853feed9ccc..59fd9bb40c18 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -96,7 +96,46 @@
 
 static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec)
 {
-	return drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec);
+	struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm);
+	struct drm_gem_object *obj;
+	unsigned long index;
+	int num_fences;
+	int ret;
+
+	ret = drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec);
+	if (ret)
+		return ret;
+
+	/*
+	 * 1 fence slot for the final submit, and 1 more for every per-tile for
+	 * GPU bind and 1 extra for CPU bind. Note that there are potentially
+	 * many vma per object/dma-resv, however the fence slot will just be
+	 * re-used, since they are largely the same timeline and the seqno
+	 * should be in order. In the case of CPU bind there is dummy fence used
+	 * for all CPU binds, so no need to have a per-tile slot for that.
+	 */
+	num_fences = 1 + 1 + vm->xe->info.tile_count;
+
+	/*
+	 * We don't know upfront exactly how many fence slots we will need at
+	 * the start of the exec, since the TTM bo_validate above can consume
+	 * numerous fence slots. Also due to how the dma_resv_reserve_fences()
+	 * works it only ensures that at least that many fence slots are
+	 * available i.e if there are already 10 slots available and we reserve
+	 * two more, it can just noop without reserving anything.  With this it
+	 * is quite possible that TTM steals some of the fence slots and then
+	 * when it comes time to do the vma binding and final exec stage we are
+	 * lacking enough fence slots, leading to some nasty BUG_ON() when
+	 * adding the fences. Hence just add our own fences here, after the
+	 * validate stage.
+	 */
+	drm_exec_for_each_locked_object(&vm_exec->exec, index, obj) {
+		ret = dma_resv_reserve_fences(obj->resv, num_fences);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
 int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
@@ -189,7 +228,6 @@ retry:
 	}
 
 	vm_exec.vm = &vm->gpuvm;
-	vm_exec.num_fences = 1 + vm->xe->info.tile_count;
 	vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT;
 	if (xe_vm_in_lr_mode(vm)) {
 		drm_exec_init(exec, vm_exec.flags, 0);
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index bcfc4127c7c5..c0b7434e78f1 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -30,16 +30,18 @@ enum xe_exec_queue_sched_prop {
 	XE_EXEC_QUEUE_SCHED_PROP_MAX = 3,
 };
 
-static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
-						    struct xe_vm *vm,
-						    u32 logical_mask,
-						    u16 width, struct xe_hw_engine *hwe,
-						    u32 flags)
+static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
+				      u64 extensions, int ext_number, bool create);
+
+static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
+						   struct xe_vm *vm,
+						   u32 logical_mask,
+						   u16 width, struct xe_hw_engine *hwe,
+						   u32 flags, u64 extensions)
 {
 	struct xe_exec_queue *q;
 	struct xe_gt *gt = hwe->gt;
 	int err;
-	int i;
 
 	/* only kernel queues can be permanent */
 	XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL));
@@ -52,8 +54,6 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
 	q->flags = flags;
 	q->hwe = hwe;
 	q->gt = gt;
-	if (vm)
-		q->vm = xe_vm_get(vm);
 	q->class = hwe->class;
 	q->width = width;
 	q->logical_mask = logical_mask;
@@ -67,12 +67,29 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
 	q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us;
 	q->sched_props.preempt_timeout_us =
 				hwe->eclass->sched_props.preempt_timeout_us;
+	q->sched_props.job_timeout_ms =
+				hwe->eclass->sched_props.job_timeout_ms;
 	if (q->flags & EXEC_QUEUE_FLAG_KERNEL &&
 	    q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY)
 		q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL;
 	else
 		q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
 
+	if (extensions) {
+		/*
+		 * may set q->usm, must come before xe_lrc_init(),
+		 * may overwrite q->sched_props, must come before q->ops->init()
+		 */
+		err = exec_queue_user_extensions(xe, q, extensions, 0, true);
+		if (err) {
+			kfree(q);
+			return ERR_PTR(err);
+		}
+	}
+
+	if (vm)
+		q->vm = xe_vm_get(vm);
+
 	if (xe_exec_queue_is_parallel(q)) {
 		q->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
 		q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO;
@@ -82,8 +99,23 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
 		q->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO;
 	}
 
-	for (i = 0; i < width; ++i) {
-		err = xe_lrc_init(q->lrc + i, hwe, q, vm, SZ_16K);
+	return q;
+}
+
+static void __xe_exec_queue_free(struct xe_exec_queue *q)
+{
+	if (q->vm)
+		xe_vm_put(q->vm);
+	kfree(q);
+}
+
+static int __xe_exec_queue_init(struct xe_exec_queue *q)
+{
+	struct xe_device *xe = gt_to_xe(q->gt);
+	int i, err;
+
+	for (i = 0; i < q->width; ++i) {
+		err = xe_lrc_init(q->lrc + i, q->hwe, q, q->vm, SZ_16K);
 		if (err)
 			goto err_lrc;
 	}
@@ -100,35 +132,47 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
 	 * can perform GuC CT actions when needed. Caller is expected to have
 	 * already grabbed the rpm ref outside any sensitive locks.
 	 */
-	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !vm))
+	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm))
 		drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe));
 
-	return q;
+	return 0;
 
 err_lrc:
 	for (i = i - 1; i >= 0; --i)
 		xe_lrc_finish(q->lrc + i);
-	kfree(q);
-	return ERR_PTR(err);
+	return err;
 }
 
 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
 					   u32 logical_mask, u16 width,
-					   struct xe_hw_engine *hwe, u32 flags)
+					   struct xe_hw_engine *hwe, u32 flags,
+					   u64 extensions)
 {
 	struct xe_exec_queue *q;
 	int err;
 
+	q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags,
+				  extensions);
+	if (IS_ERR(q))
+		return q;
+
 	if (vm) {
 		err = xe_vm_lock(vm, true);
 		if (err)
-			return ERR_PTR(err);
+			goto err_post_alloc;
 	}
-	q = __xe_exec_queue_create(xe, vm, logical_mask, width, hwe, flags);
+
+	err = __xe_exec_queue_init(q);
 	if (vm)
 		xe_vm_unlock(vm);
+	if (err)
+		goto err_post_alloc;
 
 	return q;
+
+err_post_alloc:
+	__xe_exec_queue_free(q);
+	return ERR_PTR(err);
 }
 
 struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
@@ -153,7 +197,7 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe
 	if (!logical_mask)
 		return ERR_PTR(-ENODEV);
 
-	return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags);
+	return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, 0);
 }
 
 void xe_exec_queue_destroy(struct kref *ref)
@@ -179,10 +223,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
 		xe_lrc_finish(q->lrc + i);
 	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm))
 		xe_device_mem_access_put(gt_to_xe(q->gt));
-	if (q->vm)
-		xe_vm_put(q->vm);
-
-	kfree(q);
+	__xe_exec_queue_free(q);
 }
 
 void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance)
@@ -240,7 +281,11 @@ static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q
 	if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe)))
 		return -EPERM;
 
-	return q->ops->set_priority(q, value);
+	if (!create)
+		return q->ops->set_priority(q, value);
+
+	q->sched_props.priority = value;
+	return 0;
 }
 
 static bool xe_exec_queue_enforce_schedule_limit(void)
@@ -307,7 +352,11 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *
 	    !xe_hw_engine_timeout_in_range(value, min, max))
 		return -EINVAL;
 
-	return q->ops->set_timeslice(q, value);
+	if (!create)
+		return q->ops->set_timeslice(q, value);
+
+	q->sched_props.timeslice_us = value;
+	return 0;
 }
 
 static int exec_queue_set_preemption_timeout(struct xe_device *xe,
@@ -323,7 +372,11 @@ static int exec_queue_set_preemption_timeout(struct xe_device *xe,
 	    !xe_hw_engine_timeout_in_range(value, min, max))
 		return -EINVAL;
 
-	return q->ops->set_preempt_timeout(q, value);
+	if (!create)
+		return q->ops->set_preempt_timeout(q, value);
+
+	q->sched_props.preempt_timeout_us = value;
+	return 0;
 }
 
 static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue *q,
@@ -358,7 +411,9 @@ static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue
 	    !xe_hw_engine_timeout_in_range(value, min, max))
 		return -EINVAL;
 
-	return q->ops->set_job_timeout(q, value);
+	q->sched_props.job_timeout_ms = value;
+
+	return 0;
 }
 
 static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q,
@@ -633,6 +688,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
 		for_each_gt(gt, xe, id) {
 			struct xe_exec_queue *new;
+			u32 flags;
 
 			if (xe_gt_is_media_type(gt))
 				continue;
@@ -651,14 +707,13 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 			/* The migration vm doesn't hold rpm ref */
 			xe_device_mem_access_get(xe);
 
+			flags = EXEC_QUEUE_FLAG_PERSISTENT | EXEC_QUEUE_FLAG_VM |
+				(id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0);
+
 			migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate);
 			new = xe_exec_queue_create(xe, migrate_vm, logical_mask,
-						   args->width, hwe,
-						   EXEC_QUEUE_FLAG_PERSISTENT |
-						   EXEC_QUEUE_FLAG_VM |
-						   (id ?
-						    EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD :
-						    0));
+						   args->width, hwe, flags,
+						   args->extensions);
 
 			xe_device_mem_access_put(xe); /* now held by engine */
 
@@ -706,7 +761,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 		q = xe_exec_queue_create(xe, vm, logical_mask,
 					 args->width, hwe,
 					 xe_vm_in_lr_mode(vm) ? 0 :
-					 EXEC_QUEUE_FLAG_PERSISTENT);
+					 EXEC_QUEUE_FLAG_PERSISTENT,
+					 args->extensions);
 		up_read(&vm->lock);
 		xe_vm_put(vm);
 		if (IS_ERR(q))
@@ -722,12 +778,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 		}
 	}
 
-	if (args->extensions) {
-		err = exec_queue_user_extensions(xe, q, args->extensions, 0, true);
-		if (XE_IOCTL_DBG(xe, err))
-			goto kill_exec_queue;
-	}
-
 	q->persistent.xef = xef;
 
 	mutex_lock(&xef->exec_queue.lock);
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
index d959cc4a1a82..02ce8d204622 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -16,7 +16,8 @@ struct xe_file;
 
 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
 					   u32 logical_mask, u16 width,
-					   struct xe_hw_engine *hw_engine, u32 flags);
+					   struct xe_hw_engine *hw_engine, u32 flags,
+					   u64 extensions);
 struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
 						 struct xe_vm *vm,
 						 enum xe_engine_class class, u32 flags);
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 8d4b7feb8c30..e7f84dee5275 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -142,6 +142,8 @@ struct xe_exec_queue {
 		u32 timeslice_us;
 		/** @preempt_timeout_us: preemption timeout in micro-seconds */
 		u32 preempt_timeout_us;
+		/** @job_timeout_ms: job timeout in milliseconds */
+		u32 job_timeout_ms;
 		/** @priority: priority of this exec queue */
 		enum xe_exec_queue_priority priority;
 	} sched_props;
@@ -198,8 +200,6 @@ struct xe_exec_queue_ops {
 	int (*set_timeslice)(struct xe_exec_queue *q, u32 timeslice_us);
 	/** @set_preempt_timeout: Set preemption timeout for exec queue */
 	int (*set_preempt_timeout)(struct xe_exec_queue *q, u32 preempt_timeout_us);
-	/** @set_job_timeout: Set job timeout for exec queue */
-	int (*set_job_timeout)(struct xe_exec_queue *q, u32 job_timeout_ms);
 	/**
 	 * @suspend: Suspend exec queue from executing, allowed to be called
 	 * multiple times in a row before resume with the caveat that
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 96b5224eb478..58dfe6a78ffe 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -418,13 +418,6 @@ static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
 	return 0;
 }
 
-static int execlist_exec_queue_set_job_timeout(struct xe_exec_queue *q,
-					       u32 job_timeout_ms)
-{
-	/* NIY */
-	return 0;
-}
-
 static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
 {
 	/* NIY */
@@ -455,7 +448,6 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
 	.set_priority = execlist_exec_queue_set_priority,
 	.set_timeslice = execlist_exec_queue_set_timeslice,
 	.set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
-	.set_job_timeout = execlist_exec_queue_set_job_timeout,
 	.suspend = execlist_exec_queue_suspend,
 	.suspend_wait = execlist_exec_queue_suspend_wait,
 	.resume = execlist_exec_queue_resume,
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 3efd2d066bf7..6fdf830678b3 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -11,9 +11,12 @@
 #include <drm/i915_drm.h>
 
 #include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_assert.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
+#include "xe_gt_printk.h"
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_map.h"
 #include "xe_mmio.h"
@@ -312,6 +315,74 @@ void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix)
 	}
 }
 
+static void xe_ggtt_dump_node(struct xe_ggtt *ggtt,
+			      const struct drm_mm_node *node, const char *description)
+{
+	char buf[10];
+
+	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+		string_get_size(node->size, 1, STRING_UNITS_2, buf, sizeof(buf));
+		xe_gt_dbg(ggtt->tile->primary_gt, "GGTT %#llx-%#llx (%s) %s\n",
+			  node->start, node->start + node->size, buf, description);
+	}
+}
+
+/**
+ * xe_ggtt_balloon - prevent allocation of specified GGTT addresses
+ * @ggtt: the &xe_ggtt where we want to make reservation
+ * @start: the starting GGTT address of the reserved region
+ * @end: then end GGTT address of the reserved region
+ * @node: the &drm_mm_node to hold reserved GGTT node
+ *
+ * Use xe_ggtt_deballoon() to release a reserved GGTT node.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 end, struct drm_mm_node *node)
+{
+	int err;
+
+	xe_tile_assert(ggtt->tile, start < end);
+	xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE));
+	xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE));
+	xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(node));
+
+	node->color = 0;
+	node->start = start;
+	node->size = end - start;
+
+	mutex_lock(&ggtt->lock);
+	err = drm_mm_reserve_node(&ggtt->mm, node);
+	mutex_unlock(&ggtt->lock);
+
+	if (xe_gt_WARN(ggtt->tile->primary_gt, err,
+		       "Failed to balloon GGTT %#llx-%#llx (%pe)\n",
+		       node->start, node->start + node->size, ERR_PTR(err)))
+		return err;
+
+	xe_ggtt_dump_node(ggtt, node, "balloon");
+	return 0;
+}
+
+/**
+ * xe_ggtt_deballoon - release a reserved GGTT region
+ * @ggtt: the &xe_ggtt where reserved node belongs
+ * @node: the &drm_mm_node with reserved GGTT region
+ *
+ * See xe_ggtt_balloon() for details.
+ */
+void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node)
+{
+	if (!drm_mm_node_allocated(node))
+		return;
+
+	xe_ggtt_dump_node(ggtt, node, "deballoon");
+
+	mutex_lock(&ggtt->lock);
+	drm_mm_remove_node(node);
+	mutex_unlock(&ggtt->lock);
+}
+
 int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node,
 				       u32 size, u32 align, u32 mm_flags)
 {
@@ -334,7 +405,8 @@ int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
 
 void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 {
-	u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB];
+	u16 cache_mode = bo->flags & XE_BO_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB;
+	u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode];
 	u64 start = bo->ggtt_node.start;
 	u64 offset, pte;
 
diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
index a09c166dff70..42705e1338e1 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.h
+++ b/drivers/gpu/drm/xe/xe_ggtt.h
@@ -16,6 +16,9 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt);
 int xe_ggtt_init(struct xe_ggtt *ggtt);
 void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix);
 
+int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 size, struct drm_mm_node *node);
+void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node);
+
 int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
 				u32 size, u32 align);
 int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt,
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index a8a895cf4b44..0b90fd9ef63a 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -13,6 +13,7 @@
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_exec_queue.h"
+#include "xe_gsc_proxy.h"
 #include "xe_gsc_submit.h"
 #include "xe_gt.h"
 #include "xe_gt_printk.h"
@@ -242,8 +243,31 @@ static int gsc_upload(struct xe_gsc *gsc)
 	if (err)
 		return err;
 
+	return 0;
+}
+
+static int gsc_upload_and_init(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	int ret;
+
+	ret = gsc_upload(gsc);
+	if (ret)
+		return ret;
+
+	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
 	xe_gt_dbg(gt, "GSC FW async load completed\n");
 
+	/* HuC auth failure is not fatal */
+	if (xe_huc_is_authenticated(&gt->uc.huc, XE_HUC_AUTH_VIA_GUC))
+		xe_huc_auth(&gt->uc.huc, XE_HUC_AUTH_VIA_GSC);
+
+	ret = xe_gsc_proxy_start(gsc);
+	if (ret)
+		return ret;
+
+	xe_gt_dbg(gt, "GSC proxy init completed\n");
+
 	return 0;
 }
 
@@ -252,24 +276,28 @@ static void gsc_work(struct work_struct *work)
 	struct xe_gsc *gsc = container_of(work, typeof(*gsc), work);
 	struct xe_gt *gt = gsc_to_gt(gsc);
 	struct xe_device *xe = gt_to_xe(gt);
+	u32 actions;
 	int ret;
 
+	spin_lock_irq(&gsc->lock);
+	actions = gsc->work_actions;
+	gsc->work_actions = 0;
+	spin_unlock_irq(&gsc->lock);
+
 	xe_device_mem_access_get(xe);
 	xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
 
-	ret = gsc_upload(gsc);
-	if (ret && ret != -EEXIST) {
-		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
-		goto out;
+	if (actions & GSC_ACTION_FW_LOAD) {
+		ret = gsc_upload_and_init(gsc);
+		if (ret && ret != -EEXIST)
+			xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
+		else
+			xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_RUNNING);
 	}
 
-	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
-
-	/* HuC auth failure is not fatal */
-	if (xe_huc_is_authenticated(&gt->uc.huc, XE_HUC_AUTH_VIA_GUC))
-		xe_huc_auth(&gt->uc.huc, XE_HUC_AUTH_VIA_GSC);
+	if (actions & GSC_ACTION_SW_PROXY)
+		xe_gsc_proxy_request_handler(gsc);
 
-out:
 	xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
 	xe_device_mem_access_put(xe);
 }
@@ -282,6 +310,7 @@ int xe_gsc_init(struct xe_gsc *gsc)
 
 	gsc->fw.type = XE_UC_FW_TYPE_GSC;
 	INIT_WORK(&gsc->work, gsc_work);
+	spin_lock_init(&gsc->lock);
 
 	/* The GSC uC is only available on the media GT */
 	if (tile->media_gt && (gt != tile->media_gt)) {
@@ -302,6 +331,10 @@ int xe_gsc_init(struct xe_gsc *gsc)
 	else if (ret)
 		goto out;
 
+	ret = xe_gsc_proxy_init(gsc);
+	if (ret && ret != -ENODEV)
+		goto out;
+
 	return 0;
 
 out:
@@ -356,7 +389,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
 	q = xe_exec_queue_create(xe, NULL,
 				 BIT(hwe->logical_instance), 1, hwe,
 				 EXEC_QUEUE_FLAG_KERNEL |
-				 EXEC_QUEUE_FLAG_PERMANENT);
+				 EXEC_QUEUE_FLAG_PERMANENT, 0);
 	if (IS_ERR(q)) {
 		xe_gt_err(gt, "Failed to create queue for GSC submission\n");
 		err = PTR_ERR(q);
@@ -401,6 +434,10 @@ void xe_gsc_load_start(struct xe_gsc *gsc)
 		return;
 	}
 
+	spin_lock_irq(&gsc->lock);
+	gsc->work_actions |= GSC_ACTION_FW_LOAD;
+	spin_unlock_irq(&gsc->lock);
+
 	queue_work(gsc->wq, &gsc->work);
 }
 
@@ -410,6 +447,15 @@ void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc)
 		flush_work(&gsc->work);
 }
 
+/**
+ * xe_gsc_remove() - Clean up the GSC structures before driver removal
+ * @gsc: the GSC uC
+ */
+void xe_gsc_remove(struct xe_gsc *gsc)
+{
+	xe_gsc_proxy_remove(gsc);
+}
+
 /*
  * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a
  * GSC engine reset by writing a notification bit in the GS1 register and then
diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h
index bc1ef7f31ea2..c6fb32e3fd79 100644
--- a/drivers/gpu/drm/xe/xe_gsc.h
+++ b/drivers/gpu/drm/xe/xe_gsc.h
@@ -14,6 +14,7 @@ int xe_gsc_init(struct xe_gsc *gsc);
 int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc);
 void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc);
 void xe_gsc_load_start(struct xe_gsc *gsc);
+void xe_gsc_remove(struct xe_gsc *gsc);
 
 void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep);
 
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
new file mode 100644
index 000000000000..309ef80e3b95
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -0,0 +1,537 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gsc_proxy.h"
+
+#include <linux/component.h>
+#include <linux/delay.h>
+
+#include <drm/drm_managed.h>
+#include <drm/i915_component.h>
+#include <drm/i915_gsc_proxy_mei_interface.h>
+
+#include "abi/gsc_proxy_commands_abi.h"
+#include "regs/xe_gsc_regs.h"
+#include "xe_bo.h"
+#include "xe_gsc.h"
+#include "xe_gsc_submit.h"
+#include "xe_gt.h"
+#include "xe_gt_printk.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_pm.h"
+
+/*
+ * GSC proxy:
+ * The GSC uC needs to communicate with the CSME to perform certain operations.
+ * Since the GSC can't perform this communication directly on platforms where it
+ * is integrated in GT, the graphics driver needs to transfer the messages from
+ * GSC to CSME and back. The proxy flow must be manually started after the GSC
+ * is loaded to signal to GSC that we're ready to handle its messages and allow
+ * it to query its init data from CSME; GSC will then trigger an HECI2 interrupt
+ * if it needs to send messages to CSME again.
+ * The proxy flow is as follow:
+ * 1 - Xe submits a request to GSC asking for the message to CSME
+ * 2 - GSC replies with the proxy header + payload for CSME
+ * 3 - Xe sends the reply from GSC as-is to CSME via the mei proxy component
+ * 4 - CSME replies with the proxy header + payload for GSC
+ * 5 - Xe submits a request to GSC with the reply from CSME
+ * 6 - GSC replies either with a new header + payload (same as step 2, so we
+ *     restart from there) or with an end message.
+ */
+
+/*
+ * The component should load quite quickly in most cases, but it could take
+ * a bit. Using a very big timeout just to cover the worst case scenario
+ */
+#define GSC_PROXY_INIT_TIMEOUT_MS 20000
+
+/* shorthand define for code compactness */
+#define PROXY_HDR_SIZE (sizeof(struct xe_gsc_proxy_header))
+
+/* the protocol supports up to 32K in each direction */
+#define GSC_PROXY_BUFFER_SIZE SZ_32K
+#define GSC_PROXY_CHANNEL_SIZE (GSC_PROXY_BUFFER_SIZE * 2)
+
+static struct xe_gt *
+gsc_to_gt(struct xe_gsc *gsc)
+{
+	return container_of(gsc, struct xe_gt, uc.gsc);
+}
+
+static inline struct xe_device *kdev_to_xe(struct device *kdev)
+{
+	return dev_get_drvdata(kdev);
+}
+
+static bool gsc_proxy_init_done(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	u32 fwsts1 = xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE));
+
+	return REG_FIELD_GET(HECI1_FWSTS1_CURRENT_STATE, fwsts1) ==
+	       HECI1_FWSTS1_PROXY_STATE_NORMAL;
+}
+
+static void __gsc_proxy_irq_rmw(struct xe_gsc *gsc, u32 clr, u32 set)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+
+	/* make sure we never accidentally write the RST bit */
+	clr |= HECI_H_CSR_RST;
+
+	xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), clr, set);
+}
+
+static void gsc_proxy_irq_clear(struct xe_gsc *gsc)
+{
+	/* The status bit is cleared by writing to it */
+	__gsc_proxy_irq_rmw(gsc, 0, HECI_H_CSR_IS);
+}
+
+static void gsc_proxy_irq_toggle(struct xe_gsc *gsc, bool enabled)
+{
+	u32 set = enabled ? HECI_H_CSR_IE : 0;
+	u32 clr = enabled ? 0 : HECI_H_CSR_IE;
+
+	__gsc_proxy_irq_rmw(gsc, clr, set);
+}
+
+static int proxy_send_to_csme(struct xe_gsc *gsc, u32 size)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct i915_gsc_proxy_component *comp = gsc->proxy.component;
+	int ret;
+
+	ret = comp->ops->send(comp->mei_dev, gsc->proxy.to_csme, size);
+	if (ret < 0) {
+		xe_gt_err(gt, "Failed to send CSME proxy message\n");
+		return ret;
+	}
+
+	ret = comp->ops->recv(comp->mei_dev, gsc->proxy.from_csme, GSC_PROXY_BUFFER_SIZE);
+	if (ret < 0) {
+		xe_gt_err(gt, "Failed to receive CSME proxy message\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static int proxy_send_to_gsc(struct xe_gsc *gsc, u32 size)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	u64 addr_in = xe_bo_ggtt_addr(gsc->proxy.bo);
+	u64 addr_out = addr_in + GSC_PROXY_BUFFER_SIZE;
+	int err;
+
+	/* the message must contain at least the gsc and proxy headers */
+	if (size > GSC_PROXY_BUFFER_SIZE) {
+		xe_gt_err(gt, "Invalid GSC proxy message size: %u\n", size);
+		return -EINVAL;
+	}
+
+	err = xe_gsc_pkt_submit_kernel(gsc, addr_in, size,
+				       addr_out, GSC_PROXY_BUFFER_SIZE);
+	if (err) {
+		xe_gt_err(gt, "Failed to submit gsc proxy rq (%pe)\n", ERR_PTR(err));
+		return err;
+	}
+
+	return 0;
+}
+
+static int validate_proxy_header(struct xe_gsc_proxy_header *header,
+				 u32 source, u32 dest, u32 max_size)
+{
+	u32 type = FIELD_GET(GSC_PROXY_TYPE, header->hdr);
+	u32 length = FIELD_GET(GSC_PROXY_PAYLOAD_LENGTH, header->hdr);
+
+	if (header->destination != dest || header->source != source)
+		return -ENOEXEC;
+
+	if (length + PROXY_HDR_SIZE > max_size)
+		return -E2BIG;
+
+	switch (type) {
+	case GSC_PROXY_MSG_TYPE_PROXY_PAYLOAD:
+		if (length > 0)
+			break;
+		fallthrough;
+	case GSC_PROXY_MSG_TYPE_PROXY_INVALID:
+		return -EIO;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+#define proxy_header_wr(xe_, map_, offset_, field_, val_) \
+	xe_map_wr_field(xe_, map_, offset_, struct xe_gsc_proxy_header, field_, val_)
+
+#define proxy_header_rd(xe_, map_, offset_, field_) \
+	xe_map_rd_field(xe_, map_, offset_, struct xe_gsc_proxy_header, field_)
+
+static u32 emit_proxy_header(struct xe_device *xe, struct iosys_map *map, u32 offset)
+{
+	xe_map_memset(xe, map, offset, 0, PROXY_HDR_SIZE);
+
+	proxy_header_wr(xe, map, offset, hdr,
+			FIELD_PREP(GSC_PROXY_TYPE, GSC_PROXY_MSG_TYPE_PROXY_QUERY) |
+			FIELD_PREP(GSC_PROXY_PAYLOAD_LENGTH, 0));
+
+	proxy_header_wr(xe, map, offset, source, GSC_PROXY_ADDRESSING_KMD);
+	proxy_header_wr(xe, map, offset, destination, GSC_PROXY_ADDRESSING_GSC);
+	proxy_header_wr(xe, map, offset, status, 0);
+
+	return offset + PROXY_HDR_SIZE;
+}
+
+static int proxy_query(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_gsc_proxy_header *to_csme_hdr = gsc->proxy.to_csme;
+	void *to_csme_payload = gsc->proxy.to_csme + PROXY_HDR_SIZE;
+	u32 wr_offset;
+	u32 reply_offset;
+	u32 size;
+	int ret;
+
+	wr_offset = xe_gsc_emit_header(xe, &gsc->proxy.to_gsc, 0,
+				       HECI_MEADDRESS_PROXY, 0, PROXY_HDR_SIZE);
+	wr_offset = emit_proxy_header(xe, &gsc->proxy.to_gsc, wr_offset);
+
+	size = wr_offset;
+
+	while (1) {
+		/*
+		 * Poison the GSC response header space to make sure we don't
+		 * read a stale reply.
+		 */
+		xe_gsc_poison_header(xe, &gsc->proxy.from_gsc, 0);
+
+		/* send proxy message to GSC */
+		ret = proxy_send_to_gsc(gsc, size);
+		if (ret)
+			goto proxy_error;
+
+		/* check the reply from GSC */
+		ret = xe_gsc_read_out_header(xe, &gsc->proxy.from_gsc, 0,
+					     PROXY_HDR_SIZE, &reply_offset);
+		if (ret) {
+			xe_gt_err(gt, "Invalid gsc header in proxy reply (%pe)\n",
+				  ERR_PTR(ret));
+			goto proxy_error;
+		}
+
+		/* copy the proxy header reply from GSC */
+		xe_map_memcpy_from(xe, to_csme_hdr, &gsc->proxy.from_gsc,
+				   reply_offset, PROXY_HDR_SIZE);
+
+		/* stop if this was the last message */
+		if (FIELD_GET(GSC_PROXY_TYPE, to_csme_hdr->hdr) == GSC_PROXY_MSG_TYPE_PROXY_END)
+			break;
+
+		/* make sure the GSC-to-CSME proxy header is sane */
+		ret = validate_proxy_header(to_csme_hdr,
+					    GSC_PROXY_ADDRESSING_GSC,
+					    GSC_PROXY_ADDRESSING_CSME,
+					    GSC_PROXY_BUFFER_SIZE - reply_offset);
+		if (ret) {
+			xe_gt_err(gt, "invalid GSC to CSME proxy header! (%pe)\n",
+				  ERR_PTR(ret));
+			goto proxy_error;
+		}
+
+		/* copy the rest of the message */
+		size = FIELD_GET(GSC_PROXY_PAYLOAD_LENGTH, to_csme_hdr->hdr);
+		xe_map_memcpy_from(xe, to_csme_payload, &gsc->proxy.from_gsc,
+				   reply_offset + PROXY_HDR_SIZE, size);
+
+		/* send the GSC message to the CSME */
+		ret = proxy_send_to_csme(gsc, size + PROXY_HDR_SIZE);
+		if (ret < 0)
+			goto proxy_error;
+
+		/* reply size from CSME, including the proxy header */
+		size = ret;
+		if (size < PROXY_HDR_SIZE) {
+			xe_gt_err(gt, "CSME to GSC proxy msg too small: 0x%x\n", size);
+			ret = -EPROTO;
+			goto proxy_error;
+		}
+
+		/* make sure the CSME-to-GSC proxy header is sane */
+		ret = validate_proxy_header(gsc->proxy.from_csme,
+					    GSC_PROXY_ADDRESSING_CSME,
+					    GSC_PROXY_ADDRESSING_GSC,
+					    GSC_PROXY_BUFFER_SIZE - reply_offset);
+		if (ret) {
+			xe_gt_err(gt, "invalid CSME to GSC proxy header! %d\n", ret);
+			goto proxy_error;
+		}
+
+		/* Emit a new header for sending the reply to the GSC */
+		wr_offset = xe_gsc_emit_header(xe, &gsc->proxy.to_gsc, 0,
+					       HECI_MEADDRESS_PROXY, 0, size);
+
+		/* copy the CSME reply and update the total msg size to include the GSC header */
+		xe_map_memcpy_to(xe, &gsc->proxy.to_gsc, wr_offset, gsc->proxy.from_csme, size);
+
+		size += wr_offset;
+	}
+
+proxy_error:
+	return ret < 0 ? ret : 0;
+}
+
+int xe_gsc_proxy_request_handler(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	int slept;
+	int err;
+
+	if (!gsc->proxy.component_added)
+		return -ENODEV;
+
+	/* when GSC is loaded, we can queue this before the component is bound */
+	for (slept = 0; slept < GSC_PROXY_INIT_TIMEOUT_MS; slept += 100) {
+		if (gsc->proxy.component)
+			break;
+
+		msleep(100);
+	}
+
+	mutex_lock(&gsc->proxy.mutex);
+	if (!gsc->proxy.component) {
+		xe_gt_err(gt, "GSC proxy component not bound!\n");
+		err = -EIO;
+	} else {
+		/*
+		 * clear the pending interrupt and allow new proxy requests to
+		 * be generated while we handle the current one
+		 */
+		gsc_proxy_irq_clear(gsc);
+		err = proxy_query(gsc);
+	}
+	mutex_unlock(&gsc->proxy.mutex);
+	return err;
+}
+
+void xe_gsc_proxy_irq_handler(struct xe_gsc *gsc, u32 iir)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+
+	if (unlikely(!iir))
+		return;
+
+	if (!gsc->proxy.component) {
+		xe_gt_err(gt, "GSC proxy irq received without the component being bound!\n");
+		return;
+	}
+
+	spin_lock(&gsc->lock);
+	gsc->work_actions |= GSC_ACTION_SW_PROXY;
+	spin_unlock(&gsc->lock);
+
+	queue_work(gsc->wq, &gsc->work);
+}
+
+static int xe_gsc_proxy_component_bind(struct device *xe_kdev,
+				       struct device *mei_kdev, void *data)
+{
+	struct xe_device *xe = kdev_to_xe(xe_kdev);
+	struct xe_gt *gt = xe->tiles[0].media_gt;
+	struct xe_gsc *gsc = &gt->uc.gsc;
+
+	mutex_lock(&gsc->proxy.mutex);
+	gsc->proxy.component = data;
+	gsc->proxy.component->mei_dev = mei_kdev;
+	mutex_unlock(&gsc->proxy.mutex);
+
+	return 0;
+}
+
+static void xe_gsc_proxy_component_unbind(struct device *xe_kdev,
+					  struct device *mei_kdev, void *data)
+{
+	struct xe_device *xe = kdev_to_xe(xe_kdev);
+	struct xe_gt *gt = xe->tiles[0].media_gt;
+	struct xe_gsc *gsc = &gt->uc.gsc;
+
+	xe_gsc_wait_for_worker_completion(gsc);
+
+	mutex_lock(&gsc->proxy.mutex);
+	gsc->proxy.component = NULL;
+	mutex_unlock(&gsc->proxy.mutex);
+}
+
+static const struct component_ops xe_gsc_proxy_component_ops = {
+	.bind   = xe_gsc_proxy_component_bind,
+	.unbind = xe_gsc_proxy_component_unbind,
+};
+
+static void proxy_channel_free(struct drm_device *drm, void *arg)
+{
+	struct xe_gsc *gsc = arg;
+
+	if (!gsc->proxy.bo)
+		return;
+
+	if (gsc->proxy.to_csme) {
+		kfree(gsc->proxy.to_csme);
+		gsc->proxy.to_csme = NULL;
+		gsc->proxy.from_csme = NULL;
+	}
+
+	if (gsc->proxy.bo) {
+		iosys_map_clear(&gsc->proxy.to_gsc);
+		iosys_map_clear(&gsc->proxy.from_gsc);
+		xe_bo_unpin_map_no_vm(gsc->proxy.bo);
+		gsc->proxy.bo = NULL;
+	}
+}
+
+static int proxy_channel_alloc(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_bo *bo;
+	void *csme;
+	int err;
+
+	csme = kzalloc(GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL);
+	if (!csme)
+		return -ENOMEM;
+
+	bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_PROXY_CHANNEL_SIZE,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_SYSTEM_BIT |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo)) {
+		kfree(csme);
+		return PTR_ERR(bo);
+	}
+
+	gsc->proxy.bo = bo;
+	gsc->proxy.to_gsc = IOSYS_MAP_INIT_OFFSET(&bo->vmap, 0);
+	gsc->proxy.from_gsc = IOSYS_MAP_INIT_OFFSET(&bo->vmap, GSC_PROXY_BUFFER_SIZE);
+	gsc->proxy.to_csme = csme;
+	gsc->proxy.from_csme = csme + GSC_PROXY_BUFFER_SIZE;
+
+	err = drmm_add_action_or_reset(&xe->drm, proxy_channel_free, gsc);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/**
+ * xe_gsc_proxy_init() - init objects and MEI component required by GSC proxy
+ * @gsc: the GSC uC
+ *
+ * Return: 0 if the initialization was successful, a negative errno otherwise.
+ */
+int xe_gsc_proxy_init(struct xe_gsc *gsc)
+{
+	int err;
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = tile_to_xe(tile);
+
+	mutex_init(&gsc->proxy.mutex);
+
+	if (!IS_ENABLED(CONFIG_INTEL_MEI_GSC_PROXY)) {
+		xe_gt_info(gt, "can't init GSC proxy due to missing mei component\n");
+		return -ENODEV;
+	}
+
+	/* no multi-tile devices with this feature yet */
+	if (tile->id > 0) {
+		xe_gt_err(gt, "unexpected GSC proxy init on tile %u\n", tile->id);
+		return -EINVAL;
+	}
+
+	err = proxy_channel_alloc(gsc);
+	if (err)
+		return err;
+
+	err = component_add_typed(xe->drm.dev, &xe_gsc_proxy_component_ops,
+				  I915_COMPONENT_GSC_PROXY);
+	if (err < 0) {
+		xe_gt_err(gt, "Failed to add GSC_PROXY component (%pe)\n", ERR_PTR(err));
+		return err;
+	}
+
+	gsc->proxy.component_added = true;
+
+	/* the component must be removed before unload, so can't use drmm for cleanup */
+
+	return 0;
+}
+
+/**
+ * xe_gsc_proxy_remove() - remove the GSC proxy MEI component
+ * @gsc: the GSC uC
+ */
+void xe_gsc_proxy_remove(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_device *xe = gt_to_xe(gt);
+	int err = 0;
+
+	if (!gsc->proxy.component_added)
+		return;
+
+	/* disable HECI2 IRQs */
+	xe_pm_runtime_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
+	if (err)
+		xe_gt_err(gt, "failed to get forcewake to disable GSC interrupts\n");
+
+	/* try do disable irq even if forcewake failed */
+	gsc_proxy_irq_toggle(gsc, false);
+
+	if (!err)
+		xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+	xe_pm_runtime_put(xe);
+
+	xe_gsc_wait_for_worker_completion(gsc);
+
+	component_del(xe->drm.dev, &xe_gsc_proxy_component_ops);
+	gsc->proxy.component_added = false;
+}
+
+/**
+ * xe_gsc_proxy_start() - start the proxy by submitting the first request
+ * @gsc: the GSC uC
+ *
+ * Return: 0 if the proxy are now enabled, a negative errno otherwise.
+ */
+int xe_gsc_proxy_start(struct xe_gsc *gsc)
+{
+	int err;
+
+	/* enable the proxy interrupt in the GSC shim layer */
+	gsc_proxy_irq_toggle(gsc, true);
+
+	/*
+	 * The handling of the first proxy request must be manually triggered to
+	 * notify the GSC that we're ready to support the proxy flow.
+	 */
+	err = xe_gsc_proxy_request_handler(gsc);
+	if (err)
+		return err;
+
+	if (!gsc_proxy_init_done(gsc)) {
+		xe_gt_err(gsc_to_gt(gsc), "GSC FW reports proxy init not completed\n");
+		return -EIO;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.h b/drivers/gpu/drm/xe/xe_gsc_proxy.h
new file mode 100644
index 000000000000..908f9441f093
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GSC_PROXY_H_
+#define _XE_GSC_PROXY_H_
+
+#include <linux/types.h>
+
+struct xe_gsc;
+
+int xe_gsc_proxy_init(struct xe_gsc *gsc);
+void xe_gsc_proxy_remove(struct xe_gsc *gsc);
+int xe_gsc_proxy_start(struct xe_gsc *gsc);
+
+int xe_gsc_proxy_request_handler(struct xe_gsc *gsc);
+void xe_gsc_proxy_irq_handler(struct xe_gsc *gsc, u32 iir);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.c b/drivers/gpu/drm/xe/xe_gsc_submit.c
index 8c5381e5913f..9ecc1ead6844 100644
--- a/drivers/gpu/drm/xe/xe_gsc_submit.c
+++ b/drivers/gpu/drm/xe/xe_gsc_submit.c
@@ -5,6 +5,8 @@
 
 #include "xe_gsc_submit.h"
 
+#include <linux/poison.h>
+
 #include "abi/gsc_command_header_abi.h"
 #include "xe_bb.h"
 #include "xe_exec_queue.h"
@@ -69,6 +71,17 @@ u32 xe_gsc_emit_header(struct xe_device *xe, struct iosys_map *map, u32 offset,
 };
 
 /**
+ * xe_gsc_poison_header - poison the MTL GSC header in memory
+ * @xe: the Xe device
+ * @map: the iosys map to write to
+ * @offset: offset from the start of the map at which the header resides
+ */
+void xe_gsc_poison_header(struct xe_device *xe, struct iosys_map *map, u32 offset)
+{
+	xe_map_memset(xe, map, offset, POISON_FREE, GSC_HDR_SIZE);
+};
+
+/**
  * xe_gsc_check_and_update_pending - check the pending bit and update the input
  * header with the retry handle from the output header
  * @xe: the Xe device
diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.h b/drivers/gpu/drm/xe/xe_gsc_submit.h
index 0801da5d446a..1939855031a6 100644
--- a/drivers/gpu/drm/xe/xe_gsc_submit.h
+++ b/drivers/gpu/drm/xe/xe_gsc_submit.h
@@ -14,6 +14,7 @@ struct xe_gsc;
 
 u32 xe_gsc_emit_header(struct xe_device *xe, struct iosys_map *map, u32 offset,
 		       u8 heci_client_id, u64 host_session_id, u32 payload_size);
+void xe_gsc_poison_header(struct xe_device *xe, struct iosys_map *map, u32 offset);
 
 bool xe_gsc_check_and_update_pending(struct xe_device *xe,
 				     struct iosys_map *in, u32 offset_in,
diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h
index 57fefd66a7ea..060d0fe848ad 100644
--- a/drivers/gpu/drm/xe/xe_gsc_types.h
+++ b/drivers/gpu/drm/xe/xe_gsc_types.h
@@ -6,12 +6,17 @@
 #ifndef _XE_GSC_TYPES_H_
 #define _XE_GSC_TYPES_H_
 
+#include <linux/iosys-map.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
 #include <linux/workqueue.h>
 
 #include "xe_uc_fw_types.h"
 
 struct xe_bo;
 struct xe_exec_queue;
+struct i915_gsc_proxy_component;
 
 /**
  * struct xe_gsc - GSC
@@ -34,6 +39,34 @@ struct xe_gsc {
 
 	/** @work: delayed load and proxy handling work */
 	struct work_struct work;
+
+	/** @lock: protects access to the work_actions mask */
+	spinlock_t lock;
+
+	/** @work_actions: mask of actions to be performed in the work */
+	u32 work_actions;
+#define GSC_ACTION_FW_LOAD BIT(0)
+#define GSC_ACTION_SW_PROXY BIT(1)
+
+	/** @proxy: sub-structure containing the SW proxy-related variables */
+	struct {
+		/** @component: struct for communication with mei component */
+		struct i915_gsc_proxy_component *component;
+		/** @mutex: protects the component binding and usage */
+		struct mutex mutex;
+		/** @component_added: whether the component has been added */
+		bool component_added;
+		/** @bo: object to store message to and from the GSC */
+		struct xe_bo *bo;
+		/** @to_gsc: map of the memory used to send messages to the GSC */
+		struct iosys_map to_gsc;
+		/** @from_gsc: map of the memory used to recv messages from the GSC */
+		struct iosys_map from_gsc;
+		/** @to_csme: pointer to the memory used to send messages to CSME */
+		void *to_csme;
+		/** @from_csme: pointer to the memory used to recv messages from CSME */
+		void *from_csme;
+	} proxy;
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 3af2adec1295..1fe4d54409d3 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -78,6 +78,19 @@ void xe_gt_sanitize(struct xe_gt *gt)
 	gt->uc.guc.submission_state.enabled = false;
 }
 
+/**
+ * xe_gt_remove() - Clean up the GT structures before driver removal
+ * @gt: the GT object
+ *
+ * This function should only act on objects/structures that must be cleaned
+ * before the driver removal callback is complete and therefore can't be
+ * deferred to a drmm action.
+ */
+void xe_gt_remove(struct xe_gt *gt)
+{
+	xe_uc_remove(&gt->uc);
+}
+
 static void gt_fini(struct drm_device *drm, void *arg)
 {
 	struct xe_gt *gt = arg;
@@ -235,7 +248,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
 			return -ENOMEM;
 
 		q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1,
-					 hwe, EXEC_QUEUE_FLAG_KERNEL);
+					 hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
 		if (IS_ERR(q)) {
 			err = PTR_ERR(q);
 			xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n",
@@ -252,7 +265,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
 		}
 
 		nop_q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance),
-					     1, hwe, EXEC_QUEUE_FLAG_KERNEL);
+					     1, hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
 		if (IS_ERR(nop_q)) {
 			err = PTR_ERR(nop_q);
 			xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n",
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 4486e083f5ef..c1675bd44cf6 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -41,6 +41,7 @@ int xe_gt_suspend(struct xe_gt *gt);
 int xe_gt_resume(struct xe_gt *gt);
 void xe_gt_reset_async(struct xe_gt *gt);
 void xe_gt_sanitize(struct xe_gt *gt);
+void xe_gt_remove(struct xe_gt *gt);
 
 /**
  * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 59a70d2e0a7a..7ce67c9d30a7 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -282,9 +282,9 @@ static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
 	bool ret = false;
 
 	spin_lock_irq(&pf_queue->lock);
-	if (pf_queue->head != pf_queue->tail) {
+	if (pf_queue->tail != pf_queue->head) {
 		desc = (const struct xe_guc_pagefault_desc *)
-			(pf_queue->data + pf_queue->head);
+			(pf_queue->data + pf_queue->tail);
 
 		pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0);
 		pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0);
@@ -302,7 +302,7 @@ static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
 		pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) <<
 			PFD_VIRTUAL_ADDR_LO_SHIFT;
 
-		pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) %
+		pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) %
 			PF_QUEUE_NUM_DW;
 		ret = true;
 	}
@@ -315,7 +315,7 @@ static bool pf_queue_full(struct pf_queue *pf_queue)
 {
 	lockdep_assert_held(&pf_queue->lock);
 
-	return CIRC_SPACE(pf_queue->tail, pf_queue->head, PF_QUEUE_NUM_DW) <=
+	return CIRC_SPACE(pf_queue->head, pf_queue->tail, PF_QUEUE_NUM_DW) <=
 		PF_MSG_LEN_DW;
 }
 
@@ -328,6 +328,11 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	u32 asid;
 	bool full;
 
+	/*
+	 * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0
+	 */
+	BUILD_BUG_ON(PF_QUEUE_NUM_DW % PF_MSG_LEN_DW);
+
 	if (unlikely(len != PF_MSG_LEN_DW))
 		return -EPROTO;
 
@@ -337,8 +342,8 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	spin_lock_irqsave(&pf_queue->lock, flags);
 	full = pf_queue_full(pf_queue);
 	if (!full) {
-		memcpy(pf_queue->data + pf_queue->tail, msg, len * sizeof(u32));
-		pf_queue->tail = (pf_queue->tail + len) % PF_QUEUE_NUM_DW;
+		memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32));
+		pf_queue->head = (pf_queue->head + len) % PF_QUEUE_NUM_DW;
 		queue_work(gt->usm.pf_wq, &pf_queue->worker);
 	} else {
 		drm_warn(&xe->drm, "PF Queue full, shouldn't be possible");
@@ -384,7 +389,7 @@ static void pf_queue_work_func(struct work_struct *w)
 		send_pagefault_reply(&gt->uc.guc, &reply);
 
 		if (time_after(jiffies, threshold) &&
-		    pf_queue->head != pf_queue->tail) {
+		    pf_queue->tail != pf_queue->head) {
 			queue_work(gt->usm.pf_wq, w);
 			break;
 		}
@@ -559,9 +564,9 @@ static bool get_acc(struct acc_queue *acc_queue, struct acc *acc)
 	bool ret = false;
 
 	spin_lock(&acc_queue->lock);
-	if (acc_queue->head != acc_queue->tail) {
+	if (acc_queue->tail != acc_queue->head) {
 		desc = (const struct xe_guc_acc_desc *)
-			(acc_queue->data + acc_queue->head);
+			(acc_queue->data + acc_queue->tail);
 
 		acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2);
 		acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 |
@@ -574,7 +579,7 @@ static bool get_acc(struct acc_queue *acc_queue, struct acc *acc)
 		acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI,
 					      desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO);
 
-		acc_queue->head = (acc_queue->head + ACC_MSG_LEN_DW) %
+		acc_queue->tail = (acc_queue->tail + ACC_MSG_LEN_DW) %
 				  ACC_QUEUE_NUM_DW;
 		ret = true;
 	}
@@ -602,7 +607,7 @@ static void acc_queue_work_func(struct work_struct *w)
 		}
 
 		if (time_after(jiffies, threshold) &&
-		    acc_queue->head != acc_queue->tail) {
+		    acc_queue->tail != acc_queue->head) {
 			queue_work(gt->usm.acc_wq, w);
 			break;
 		}
@@ -613,7 +618,7 @@ static bool acc_queue_full(struct acc_queue *acc_queue)
 {
 	lockdep_assert_held(&acc_queue->lock);
 
-	return CIRC_SPACE(acc_queue->tail, acc_queue->head, ACC_QUEUE_NUM_DW) <=
+	return CIRC_SPACE(acc_queue->head, acc_queue->tail, ACC_QUEUE_NUM_DW) <=
 		ACC_MSG_LEN_DW;
 }
 
@@ -624,6 +629,11 @@ int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	u32 asid;
 	bool full;
 
+	/*
+	 * The below logic doesn't work unless ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW == 0
+	 */
+	BUILD_BUG_ON(ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW);
+
 	if (unlikely(len != ACC_MSG_LEN_DW))
 		return -EPROTO;
 
@@ -633,9 +643,9 @@ int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	spin_lock(&acc_queue->lock);
 	full = acc_queue_full(acc_queue);
 	if (!full) {
-		memcpy(acc_queue->data + acc_queue->tail, msg,
+		memcpy(acc_queue->data + acc_queue->head, msg,
 		       len * sizeof(u32));
-		acc_queue->tail = (acc_queue->tail + len) % ACC_QUEUE_NUM_DW;
+		acc_queue->head = (acc_queue->head + len) % ACC_QUEUE_NUM_DW;
 		queue_work(gt->usm.acc_wq, &acc_queue->worker);
 	} else {
 		drm_warn(&gt_to_xe(gt)->drm, "ACC Queue full, dropping ACC");
diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h
index 5991bcadd47e..c2b004d3f48e 100644
--- a/drivers/gpu/drm/xe/xe_gt_printk.h
+++ b/drivers/gpu/drm/xe/xe_gt_printk.h
@@ -43,4 +43,48 @@
 #define xe_gt_WARN_ON_ONCE(_gt, _condition) \
 	xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "gt_WARN_ON_ONCE", __stringify(_condition))
 
+static inline void __xe_gt_printfn_err(struct drm_printer *p, struct va_format *vaf)
+{
+	struct xe_gt *gt = p->arg;
+
+	xe_gt_err(gt, "%pV", vaf);
+}
+
+static inline void __xe_gt_printfn_info(struct drm_printer *p, struct va_format *vaf)
+{
+	struct xe_gt *gt = p->arg;
+
+	xe_gt_info(gt, "%pV", vaf);
+}
+
+/**
+ * xe_gt_err_printer - Construct a &drm_printer that outputs to xe_gt_err()
+ * @gt: the &xe_gt pointer to use in xe_gt_err()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_gt_err_printer(struct xe_gt *gt)
+{
+	struct drm_printer p = {
+		.printfn = __xe_gt_printfn_err,
+		.arg = gt,
+	};
+	return p;
+}
+
+/**
+ * xe_gt_info_printer - Construct a &drm_printer that outputs to xe_gt_info()
+ * @gt: the &xe_gt pointer to use in xe_gt_info()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_gt_info_printer(struct xe_gt *gt)
+{
+	struct drm_printer p = {
+		.printfn = __xe_gt_printfn_info,
+		.arg = gt,
+	};
+	return p;
+}
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_printk.h b/drivers/gpu/drm/xe/xe_gt_sriov_printk.h
new file mode 100644
index 000000000000..17624b16300a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_printk.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PRINTK_H_
+#define _XE_GT_SRIOV_PRINTK_H_
+
+#include "xe_gt_printk.h"
+#include "xe_sriov_printk.h"
+
+#define __xe_gt_sriov_printk(gt, _level, fmt, ...) \
+	xe_gt_printk((gt), _level, "%s" fmt, xe_sriov_printk_prefix(gt_to_xe(gt)), ##__VA_ARGS__)
+
+#define xe_gt_sriov_err(_gt, _fmt, ...) \
+	__xe_gt_sriov_printk(_gt, err, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_sriov_notice(_gt, _fmt, ...) \
+	__xe_gt_sriov_printk(_gt, notice, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_sriov_info(_gt, _fmt, ...) \
+	__xe_gt_sriov_printk(_gt, info, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_sriov_dbg(_gt, _fmt, ...) \
+	__xe_gt_sriov_printk(_gt, dbg, _fmt, ##__VA_ARGS__)
+
+/* for low level noisy debug messages */
+#ifdef CONFIG_DRM_XE_DEBUG_SRIOV
+#define xe_gt_sriov_dbg_verbose(_gt, _fmt, ...) xe_gt_sriov_dbg(_gt, _fmt, ##__VA_ARGS__)
+#else
+#define xe_gt_sriov_dbg_verbose(_gt, _fmt, ...) typecheck(struct xe_gt *, (_gt))
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 7eef23a00d77..e3a4131ebb58 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -8,6 +8,7 @@
 #include "abi/guc_actions_abi.h"
 #include "xe_device.h"
 #include "xe_gt.h"
+#include "xe_gt_printk.h"
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
 #include "xe_trace.h"
@@ -30,8 +31,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
 			break;
 
 		trace_xe_gt_tlb_invalidation_fence_timeout(fence);
-		drm_err(&gt_to_xe(gt)->drm, "gt%d: TLB invalidation fence timeout, seqno=%d recv=%d",
-			gt->info.id, fence->seqno, gt->tlb_invalidation.seqno_recv);
+		xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d",
+			  fence->seqno, gt->tlb_invalidation.seqno_recv);
 
 		list_del(&fence->link);
 		fence->base.error = -ETIME;
@@ -312,9 +313,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
  */
 int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
 {
-	struct xe_device *xe = gt_to_xe(gt);
 	struct xe_guc *guc = &gt->uc.guc;
-	struct drm_printer p = drm_err_printer(__func__);
 	int ret;
 
 	/*
@@ -325,8 +324,10 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
 				 tlb_invalidation_seqno_past(gt, seqno),
 				 TLB_TIMEOUT);
 	if (!ret) {
-		drm_err(&xe->drm, "gt%d: TLB invalidation time'd out, seqno=%d, recv=%d\n",
-			gt->info.id, seqno, gt->tlb_invalidation.seqno_recv);
+		struct drm_printer p = xe_gt_err_printer(gt);
+
+		xe_gt_err(gt, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
+			  seqno, gt->tlb_invalidation.seqno_recv);
 		xe_guc_ct_print(&guc->ct, &p, true);
 		return -ETIME;
 	}
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index f74684660475..047cde6cda10 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -226,15 +226,15 @@ struct xe_gt {
 			/** @data: data in the page fault queue */
 			u32 data[PF_QUEUE_NUM_DW];
 			/**
-			 * @head: head pointer in DWs for page fault queue,
-			 * moved by worker which processes faults.
-			 */
-			u16 head;
-			/**
 			 * @tail: tail pointer in DWs for page fault queue,
-			 * moved by G2H handler.
+			 * moved by worker which processes faults (consumer).
 			 */
 			u16 tail;
+			/**
+			 * @head: head pointer in DWs for page fault queue,
+			 * moved by G2H handler (producer).
+			 */
+			u16 head;
 			/** @lock: protects page fault queue */
 			spinlock_t lock;
 			/** @worker: to process page faults */
@@ -252,15 +252,16 @@ struct xe_gt {
 			/** @data: data in the page fault queue */
 			u32 data[ACC_QUEUE_NUM_DW];
 			/**
-			 * @head: head pointer in DWs for page fault queue,
-			 * moved by worker which processes faults.
+			 * @tail: tail pointer in DWs for access counter queue,
+			 * moved by worker which processes counters
+			 * (consumer).
 			 */
-			u16 head;
+			u16 tail;
 			/**
-			 * @tail: tail pointer in DWs for page fault queue,
-			 * moved by G2H handler.
+			 * @head: head pointer in DWs for access counter queue,
+			 * moved by G2H handler (producer).
 			 */
-			u16 tail;
+			u16 head;
 			/** @lock: protects page fault queue */
 			spinlock_t lock;
 			/** @worker: to process access counters */
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 0a61390c64a7..576ff2c1fbb9 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -21,9 +21,12 @@
 #include "xe_guc_hwconfig.h"
 #include "xe_guc_log.h"
 #include "xe_guc_pc.h"
+#include "xe_guc_relay.h"
 #include "xe_guc_submit.h"
+#include "xe_memirq.h"
 #include "xe_mmio.h"
 #include "xe_platform_types.h"
+#include "xe_sriov.h"
 #include "xe_uc.h"
 #include "xe_uc_fw.h"
 #include "xe_wa.h"
@@ -129,22 +132,24 @@ static u32 guc_ctl_ads_flags(struct xe_guc *guc)
 	return flags;
 }
 
+#define GUC_VER(maj, min, pat)	(((maj) << 16) | ((min) << 8) | (pat))
+
 static u32 guc_ctl_wa_flags(struct xe_guc *guc)
 {
 	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_gt *gt = guc_to_gt(guc);
+	struct xe_uc_fw *uc_fw = &guc->fw;
+	struct xe_uc_fw_version *version = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
+
 	u32 flags = 0;
 
 	if (XE_WA(gt, 22012773006))
 		flags |= GUC_WA_POLLCS;
 
-	if (XE_WA(gt, 16011759253))
-		flags |= GUC_WA_GAM_CREDITS;
-
 	if (XE_WA(gt, 14014475959))
 		flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
 
-	if (XE_WA(gt, 22011391025) || XE_WA(gt, 14012197797))
+	if (XE_WA(gt, 22011391025))
 		flags |= GUC_WA_DUAL_QUEUE;
 
 	/*
@@ -155,9 +160,6 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
 	if (GRAPHICS_VERx100(xe) < 1270)
 		flags |= GUC_WA_PRE_PARSER;
 
-	if (XE_WA(gt, 16011777198))
-		flags |= GUC_WA_RCS_RESET_BEFORE_RC6;
-
 	if (XE_WA(gt, 22012727170) || XE_WA(gt, 22012727685))
 		flags |= GUC_WA_CONTEXT_ISOLATION;
 
@@ -168,6 +170,14 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
 	if (XE_WA(gt, 1509372804))
 		flags |= GUC_WA_RENDER_RST_RC6_EXIT;
 
+	if (XE_WA(gt, 14018913170)) {
+		if (GUC_VER(version->major, version->minor, version->patch) >= GUC_VER(70, 7, 0))
+			flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6;
+		else
+			drm_warn(&xe->drm, "can't apply WA 14018913170, GUC version expected >= 70.7.0, found %u %u %u\n",
+				 version->major, version->minor, version->patch);
+	}
+
 	return flags;
 }
 
@@ -246,6 +256,22 @@ static void guc_fini(struct drm_device *drm, void *arg)
 	xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
 }
 
+/**
+ * xe_guc_comm_init_early - early initialization of GuC communication
+ * @guc: the &xe_guc to initialize
+ *
+ * Must be called prior to first MMIO communication with GuC firmware.
+ */
+void xe_guc_comm_init_early(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+
+	if (xe_gt_is_media_type(gt))
+		guc->notify_reg = MED_GUC_HOST_INTERRUPT;
+	else
+		guc->notify_reg = GUC_HOST_INTERRUPT;
+}
+
 int xe_guc_init(struct xe_guc *guc)
 {
 	struct xe_device *xe = guc_to_xe(guc);
@@ -272,6 +298,10 @@ int xe_guc_init(struct xe_guc *guc)
 	if (ret)
 		goto out;
 
+	ret = xe_guc_relay_init(&guc->relay);
+	if (ret)
+		goto out;
+
 	ret = xe_guc_pc_init(&guc->pc);
 	if (ret)
 		goto out;
@@ -282,10 +312,7 @@ int xe_guc_init(struct xe_guc *guc)
 
 	guc_init_params(guc);
 
-	if (xe_gt_is_media_type(gt))
-		guc->notify_reg = MED_GUC_HOST_INTERRUPT;
-	else
-		guc->notify_reg = GUC_HOST_INTERRUPT;
+	xe_guc_comm_init_early(guc);
 
 	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE);
 
@@ -429,7 +456,6 @@ static int guc_wait_ucode(struct xe_guc *guc)
 
 	if (ret) {
 		struct drm_device *drm = &xe->drm;
-		struct drm_printer p = drm_info_printer(drm->dev);
 
 		drm_info(drm, "GuC load failed: status = 0x%08X\n", status);
 		drm_info(drm, "GuC load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
@@ -451,8 +477,6 @@ static int guc_wait_ucode(struct xe_guc *guc)
 						SOFT_SCRATCH(13)));
 			ret = -ENXIO;
 		}
-
-		xe_guc_log_print(&guc->log, &p);
 	} else {
 		drm_dbg(&xe->drm, "GuC successfully loaded");
 	}
@@ -579,10 +603,20 @@ static void guc_enable_irq(struct xe_guc *guc)
 
 int xe_guc_enable_communication(struct xe_guc *guc)
 {
+	struct xe_device *xe = guc_to_xe(guc);
 	int err;
 
 	guc_enable_irq(guc);
 
+	if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe)) {
+		struct xe_gt *gt = guc_to_gt(guc);
+		struct xe_tile *tile = gt_to_tile(gt);
+
+		err = xe_memirq_init_guc(&tile->sriov.vf.memirq, guc);
+		if (err)
+			return err;
+	}
+
 	xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK,
 		      ARAT_EXPIRED_INTRMSK, 0);
 
@@ -707,8 +741,12 @@ timeout:
 		if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) !=
 			     GUC_HXG_ORIGIN_GUC))
 			goto proto;
-		if (unlikely(ret))
+		if (unlikely(ret)) {
+			if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) !=
+			    GUC_HXG_TYPE_NO_RESPONSE_BUSY)
+				goto proto;
 			goto timeout;
+		}
 	}
 
 	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index d3e49e7fd7c3..94f2dc5f6f90 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -13,6 +13,7 @@
 
 struct drm_printer;
 
+void xe_guc_comm_init_early(struct xe_guc *guc);
 int xe_guc_init(struct xe_guc *guc);
 int xe_guc_init_post_hwconfig(struct xe_guc *guc);
 int xe_guc_post_load_init(struct xe_guc *guc);
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 24a33fa36496..ee5d99456aeb 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -9,16 +9,21 @@
 #include <linux/circ_buf.h>
 #include <linux/delay.h>
 
+#include <kunit/static_stub.h>
+
 #include <drm/drm_managed.h>
 
 #include "abi/guc_actions_abi.h"
+#include "abi/guc_actions_sriov_abi.h"
 #include "abi/guc_klvs_abi.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_gt_pagefault.h"
+#include "xe_gt_printk.h"
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_guc.h"
+#include "xe_guc_relay.h"
 #include "xe_guc_submit.h"
 #include "xe_map.h"
 #include "xe_pm.h"
@@ -28,6 +33,7 @@
 struct g2h_fence {
 	u32 *response_buffer;
 	u32 seqno;
+	u32 response_data;
 	u16 response_len;
 	u16 error;
 	u16 hint;
@@ -40,6 +46,7 @@ struct g2h_fence {
 static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer)
 {
 	g2h_fence->response_buffer = response_buffer;
+	g2h_fence->response_data = 0;
 	g2h_fence->response_len = 0;
 	g2h_fence->fail = false;
 	g2h_fence->retry = false;
@@ -448,7 +455,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
 				   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
 	} else {
 		cmd[1] =
-			FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+			FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_FAST_REQUEST) |
 			FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
 				   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
 	}
@@ -475,11 +482,31 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
 	return 0;
 }
 
+/*
+ * The CT protocol accepts a 16 bits fence. This field is fully owned by the
+ * driver, the GuC will just copy it to the reply message. Since we need to
+ * be able to distinguish between replies to REQUEST and FAST_REQUEST messages,
+ * we use one bit of the seqno as an indicator for that and a rolling counter
+ * for the remaining 15 bits.
+ */
+#define CT_SEQNO_MASK GENMASK(14, 0)
+#define CT_SEQNO_UNTRACKED BIT(15)
+static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence)
+{
+	u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK;
+
+	if (!is_g2h_fence)
+		seqno |= CT_SEQNO_UNTRACKED;
+
+	return seqno;
+}
+
 static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
 				u32 len, u32 g2h_len, u32 num_g2h,
 				struct g2h_fence *g2h_fence)
 {
 	struct xe_device *xe = ct_to_xe(ct);
+	u16 seqno;
 	int ret;
 
 	xe_assert(xe, !g2h_len || !g2h_fence);
@@ -505,7 +532,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
 		if (g2h_fence_needs_alloc(g2h_fence)) {
 			void *ptr;
 
-			g2h_fence->seqno = (ct->fence_seqno++ & 0xffff);
+			g2h_fence->seqno = next_ct_seqno(ct, true);
 			ptr = xa_store(&ct->fence_lookup,
 				       g2h_fence->seqno,
 				       g2h_fence, GFP_ATOMIC);
@@ -514,6 +541,10 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
 				goto out;
 			}
 		}
+
+		seqno = g2h_fence->seqno;
+	} else {
+		seqno = next_ct_seqno(ct, false);
 	}
 
 	if (g2h_len)
@@ -523,8 +554,7 @@ retry:
 	if (unlikely(ret))
 		goto out_unlock;
 
-	ret = h2g_write(ct, action, len, g2h_fence ? g2h_fence->seqno : 0,
-			!!g2h_fence);
+	ret = h2g_write(ct, action, len, seqno, !!g2h_fence);
 	if (unlikely(ret)) {
 		if (ret == -EAGAIN)
 			goto retry;
@@ -752,12 +782,31 @@ retry_same_fence:
 		ret = -EIO;
 	}
 
-	return ret > 0 ? 0 : ret;
+	return ret > 0 ? response_buffer ? g2h_fence.response_len : g2h_fence.response_data : ret;
 }
 
+/**
+ * xe_guc_ct_send_recv - Send and receive HXG to the GuC
+ * @ct: the &xe_guc_ct
+ * @action: the dword array with `HXG Request`_ message (can't be NULL)
+ * @len: length of the `HXG Request`_ message (in dwords, can't be 0)
+ * @response_buffer: placeholder for the `HXG Response`_ message (can be NULL)
+ *
+ * Send a `HXG Request`_ message to the GuC over CT communication channel and
+ * blocks until GuC replies with a `HXG Response`_ message.
+ *
+ * For non-blocking communication with GuC use xe_guc_ct_send().
+ *
+ * Note: The size of &response_buffer must be at least GUC_CTB_MAX_DWORDS_.
+ *
+ * Return: response length (in dwords) if &response_buffer was not NULL, or
+ *         DATA0 from `HXG Response`_ if &response_buffer was NULL, or
+ *         a negative error code on failure.
+ */
 int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
 			u32 *response_buffer)
 {
+	KUNIT_STATIC_STUB_REDIRECT(xe_guc_ct_send_recv, ct, action, len, response_buffer);
 	return guc_ct_send_recv(ct, action, len, response_buffer, false);
 }
 
@@ -767,9 +816,20 @@ int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action,
 	return guc_ct_send_recv(ct, action, len, response_buffer, true);
 }
 
+static u32 *msg_to_hxg(u32 *msg)
+{
+	return msg + GUC_CTB_MSG_MIN_LEN;
+}
+
+static u32 msg_len_to_hxg_len(u32 len)
+{
+	return len - GUC_CTB_MSG_MIN_LEN;
+}
+
 static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
 {
-	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+	u32 *hxg = msg_to_hxg(msg);
+	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
 
 	lockdep_assert_held(&ct->lock);
 
@@ -786,18 +846,41 @@ static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
 
 static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
 {
-	struct xe_device *xe = ct_to_xe(ct);
-	u32 response_len = len - GUC_CTB_MSG_MIN_LEN;
+	struct xe_gt *gt =  ct_to_gt(ct);
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 *hxg = msg_to_hxg(msg);
+	u32 hxg_len = msg_len_to_hxg_len(len);
 	u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]);
-	u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]);
+	u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]);
 	struct g2h_fence *g2h_fence;
 
 	lockdep_assert_held(&ct->lock);
 
+	/*
+	 * Fences for FAST_REQUEST messages are not tracked in ct->fence_lookup.
+	 * Those messages should never fail, so if we do get an error back it
+	 * means we're likely doing an illegal operation and the GuC is
+	 * rejecting it. We have no way to inform the code that submitted the
+	 * H2G that the message was rejected, so we need to escalate the
+	 * failure to trigger a reset.
+	 */
+	if (fence & CT_SEQNO_UNTRACKED) {
+		if (type == GUC_HXG_TYPE_RESPONSE_FAILURE)
+			xe_gt_err(gt, "FAST_REQ H2G fence 0x%x failed! e=0x%x, h=%u\n",
+				  fence,
+				  FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, hxg[0]),
+				  FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, hxg[0]));
+		else
+			xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n",
+				  type, fence);
+
+		return -EPROTO;
+	}
+
 	g2h_fence = xa_erase(&ct->fence_lookup, fence);
 	if (unlikely(!g2h_fence)) {
 		/* Don't tear down channel, as send could've timed out */
-		drm_warn(&xe->drm, "G2H fence (%u) not found!\n", fence);
+		xe_gt_warn(gt, "G2H fence (%u) not found!\n", fence);
 		g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
 		return 0;
 	}
@@ -806,18 +889,16 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
 
 	if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) {
 		g2h_fence->fail = true;
-		g2h_fence->error =
-			FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[1]);
-		g2h_fence->hint =
-			FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[1]);
+		g2h_fence->error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, hxg[0]);
+		g2h_fence->hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, hxg[0]);
 	} else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
 		g2h_fence->retry = true;
-		g2h_fence->reason =
-			FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, msg[1]);
+		g2h_fence->reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, hxg[0]);
 	} else if (g2h_fence->response_buffer) {
-		g2h_fence->response_len = response_len;
-		memcpy(g2h_fence->response_buffer, msg + GUC_CTB_MSG_MIN_LEN,
-		       response_len * sizeof(u32));
+		g2h_fence->response_len = hxg_len;
+		memcpy(g2h_fence->response_buffer, hxg, hxg_len * sizeof(u32));
+	} else {
+		g2h_fence->response_data = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, hxg[0]);
 	}
 
 	g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
@@ -833,14 +914,13 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
 static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
 {
 	struct xe_device *xe = ct_to_xe(ct);
-	u32 hxg, origin, type;
+	u32 *hxg = msg_to_hxg(msg);
+	u32 origin, type;
 	int ret;
 
 	lockdep_assert_held(&ct->lock);
 
-	hxg = msg[1];
-
-	origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg);
+	origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]);
 	if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) {
 		drm_err(&xe->drm,
 			"G2H channel broken on read, origin=%d, reset required\n",
@@ -850,7 +930,7 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
 		return -EPROTO;
 	}
 
-	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg);
+	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]);
 	switch (type) {
 	case GUC_HXG_TYPE_EVENT:
 		ret = parse_g2h_event(ct, msg, len);
@@ -876,14 +956,19 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
 {
 	struct xe_device *xe = ct_to_xe(ct);
 	struct xe_guc *guc = ct_to_guc(ct);
-	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
-	u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN;
-	u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN;
+	u32 hxg_len = msg_len_to_hxg_len(len);
+	u32 *hxg = msg_to_hxg(msg);
+	u32 action, adj_len;
+	u32 *payload;
 	int ret = 0;
 
-	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT)
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT)
 		return 0;
 
+	action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+	payload = hxg + GUC_HXG_EVENT_MSG_MIN_LEN;
+	adj_len = hxg_len - GUC_HXG_EVENT_MSG_MIN_LEN;
+
 	switch (action) {
 	case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
 		ret = xe_guc_sched_done_handler(guc, payload, adj_len);
@@ -920,6 +1005,12 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
 		ret = xe_guc_access_counter_notify_handler(guc, payload,
 							   adj_len);
 		break;
+	case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF:
+		ret = xe_guc_relay_process_guc2pf(&guc->relay, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF:
+		ret = xe_guc_relay_process_guc2vf(&guc->relay, payload, adj_len);
+		break;
 	default:
 		drm_err(&xe->drm, "unexpected action 0x%04x\n", action);
 	}
@@ -938,6 +1029,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
 	u32 tail, head, len;
 	s32 avail;
 	u32 action;
+	u32 *hxg;
 
 	lockdep_assert_held(&ct->fast_lock);
 
@@ -988,10 +1080,11 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
 				   avail * sizeof(u32));
 	}
 
-	action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+	hxg = msg_to_hxg(msg);
+	action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
 
 	if (fast_path) {
-		if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT)
+		if (FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT)
 			return 0;
 
 		switch (action) {
@@ -1017,9 +1110,11 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
 {
 	struct xe_device *xe = ct_to_xe(ct);
 	struct xe_guc *guc = ct_to_guc(ct);
-	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
-	u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN;
-	u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN;
+	u32 hxg_len = msg_len_to_hxg_len(len);
+	u32 *hxg = msg_to_hxg(msg);
+	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+	u32 *payload = hxg + GUC_HXG_MSG_MIN_LEN;
+	u32 adj_len = hxg_len - GUC_HXG_MSG_MIN_LEN;
 	int ret = 0;
 
 	switch (action) {
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index f15f8a4857e0..9ecb67db8ec4 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -24,9 +24,11 @@ void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic);
 
 static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
 {
+	if (!ct->enabled)
+		return;
+
 	wake_up_all(&ct->wq);
-	if (ct->enabled)
-		queue_work(system_unbound_wq, &ct->g2h_worker);
+	queue_work(system_unbound_wq, &ct->g2h_worker);
 	xe_guc_ct_fast_path(ct);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_guc_db_mgr.c b/drivers/gpu/drm/xe/xe_guc_db_mgr.c
new file mode 100644
index 000000000000..8d9a0287df6b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_db_mgr.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/bitmap.h>
+#include <linux/mutex.h>
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_guc_regs.h"
+
+#include "xe_assert.h"
+#include "xe_gt_printk.h"
+#include "xe_guc.h"
+#include "xe_guc_db_mgr.h"
+#include "xe_guc_types.h"
+
+/**
+ * DOC: GuC Doorbells
+ *
+ * The GFX doorbell solution provides a mechanism for submission of workload
+ * to the graphics hardware by a ring3 application without the penalty of
+ * ring transition for each workload submission.
+ *
+ * In SR-IOV mode, the doorbells are treated as shared resource and PF must
+ * be able to provision exclusive range of IDs across VFs, which may want to
+ * use this feature.
+ */
+
+static struct xe_guc *dbm_to_guc(struct xe_guc_db_mgr *dbm)
+{
+	return container_of(dbm, struct xe_guc, dbm);
+}
+
+static struct xe_gt *dbm_to_gt(struct xe_guc_db_mgr *dbm)
+{
+	return guc_to_gt(dbm_to_guc(dbm));
+}
+
+static struct xe_device *dbm_to_xe(struct xe_guc_db_mgr *dbm)
+{
+	return gt_to_xe(dbm_to_gt(dbm));
+}
+
+#define dbm_assert(_dbm, _cond)		xe_gt_assert(dbm_to_gt(_dbm), _cond)
+#define dbm_mutex(_dbm)			(&dbm_to_guc(_dbm)->submission_state.lock)
+
+static void dbm_print_locked(struct xe_guc_db_mgr *dbm, struct drm_printer *p, int indent);
+
+static void __fini_dbm(struct drm_device *drm, void *arg)
+{
+	struct xe_guc_db_mgr *dbm = arg;
+	unsigned int weight;
+
+	mutex_lock(dbm_mutex(dbm));
+
+	weight = bitmap_weight(dbm->bitmap, dbm->count);
+	if (weight) {
+		struct drm_printer p = xe_gt_info_printer(dbm_to_gt(dbm));
+
+		xe_gt_err(dbm_to_gt(dbm), "GuC doorbells manager unclean (%u/%u)\n",
+			  weight, dbm->count);
+		dbm_print_locked(dbm, &p, 1);
+	}
+
+	bitmap_free(dbm->bitmap);
+	dbm->bitmap = NULL;
+	dbm->count = 0;
+
+	mutex_unlock(dbm_mutex(dbm));
+}
+
+/**
+ * xe_guc_db_mgr_init() - Initialize GuC Doorbells Manager.
+ * @dbm: the &xe_guc_db_mgr to initialize
+ * @count: number of doorbells to manage
+ *
+ * The bare-metal or PF driver can pass ~0 as &count to indicate that all
+ * doorbells supported by the hardware are available for use.
+ *
+ * Only VF's drivers will have to provide explicit number of doorbells IDs
+ * that they can use.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_guc_db_mgr_init(struct xe_guc_db_mgr *dbm, unsigned int count)
+{
+	int ret;
+
+	if (count == ~0)
+		count = GUC_NUM_DOORBELLS;
+
+	dbm_assert(dbm, !dbm->bitmap);
+	dbm_assert(dbm, count <= GUC_NUM_DOORBELLS);
+
+	if (!count)
+		goto done;
+
+	dbm->bitmap = bitmap_zalloc(count, GFP_KERNEL);
+	if (!dbm->bitmap)
+		return -ENOMEM;
+	dbm->count = count;
+
+	ret = drmm_add_action_or_reset(&dbm_to_xe(dbm)->drm, __fini_dbm, dbm);
+	if (ret)
+		return ret;
+done:
+	xe_gt_dbg(dbm_to_gt(dbm), "using %u doorbell(s)\n", dbm->count);
+	return 0;
+}
+
+static int dbm_reserve_chunk_locked(struct xe_guc_db_mgr *dbm,
+				    unsigned int count, unsigned int spare)
+{
+	unsigned int used;
+	int index;
+
+	dbm_assert(dbm, count);
+	dbm_assert(dbm, count <= GUC_NUM_DOORBELLS);
+	dbm_assert(dbm, dbm->count <= GUC_NUM_DOORBELLS);
+	lockdep_assert_held(dbm_mutex(dbm));
+
+	if (!dbm->count)
+		return -ENODATA;
+
+	if (spare) {
+		used = bitmap_weight(dbm->bitmap, dbm->count);
+		if (used + count + spare > dbm->count)
+			return -EDQUOT;
+	}
+
+	index = bitmap_find_next_zero_area(dbm->bitmap, dbm->count, 0, count, 0);
+	if (index >= dbm->count)
+		return -ENOSPC;
+
+	bitmap_set(dbm->bitmap, index, count);
+
+	return index;
+}
+
+static void dbm_release_chunk_locked(struct xe_guc_db_mgr *dbm,
+				     unsigned int start, unsigned int count)
+{
+	dbm_assert(dbm, count);
+	dbm_assert(dbm, count <= GUC_NUM_DOORBELLS);
+	dbm_assert(dbm, dbm->count);
+	dbm_assert(dbm, dbm->count <= GUC_NUM_DOORBELLS);
+	lockdep_assert_held(dbm_mutex(dbm));
+
+	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+		unsigned int n;
+
+		for (n = 0; n < count; n++)
+			dbm_assert(dbm, test_bit(start + n, dbm->bitmap));
+	}
+	bitmap_clear(dbm->bitmap, start, count);
+}
+
+/**
+ * xe_guc_db_mgr_reserve_id_locked() - Reserve a single GuC Doorbell ID.
+ * @dbm: the &xe_guc_db_mgr
+ *
+ * This function expects that submission lock is already taken.
+ *
+ * Return: ID of the allocated GuC doorbell or a negative error code on failure.
+ */
+int xe_guc_db_mgr_reserve_id_locked(struct xe_guc_db_mgr *dbm)
+{
+	return dbm_reserve_chunk_locked(dbm, 1, 0);
+}
+
+/**
+ * xe_guc_db_mgr_release_id_locked() - Release a single GuC Doorbell ID.
+ * @dbm: the &xe_guc_db_mgr
+ * @id: the GuC Doorbell ID to release
+ *
+ * This function expects that submission lock is already taken.
+ */
+void xe_guc_db_mgr_release_id_locked(struct xe_guc_db_mgr *dbm, unsigned int id)
+{
+	return dbm_release_chunk_locked(dbm, id, 1);
+}
+
+/**
+ * xe_guc_db_mgr_reserve_range() - Reserve a range of GuC Doorbell IDs.
+ * @dbm: the &xe_guc_db_mgr
+ * @count: number of GuC doorbell IDs to reserve
+ * @spare: number of GuC doorbell IDs to keep available
+ *
+ * This function is dedicated for the for use by the PF which expects that
+ * allocated range for the VF will be contiguous and that there will be at
+ * least &spare IDs still available for the PF use after this reservation.
+ *
+ * Return: starting ID of the allocated GuC doorbell ID range or
+ *         a negative error code on failure.
+ */
+int xe_guc_db_mgr_reserve_range(struct xe_guc_db_mgr *dbm,
+				unsigned int count, unsigned int spare)
+{
+	int ret;
+
+	mutex_lock(dbm_mutex(dbm));
+	ret = dbm_reserve_chunk_locked(dbm, count, spare);
+	mutex_unlock(dbm_mutex(dbm));
+
+	return ret;
+}
+
+/**
+ * xe_guc_db_mgr_release_range() - Release a range of Doorbell IDs.
+ * @dbm: the &xe_guc_db_mgr
+ * @start: the starting ID of GuC doorbell ID range to release
+ * @count: number of GuC doorbell IDs to release
+ */
+void xe_guc_db_mgr_release_range(struct xe_guc_db_mgr *dbm,
+				 unsigned int start, unsigned int count)
+{
+	mutex_lock(dbm_mutex(dbm));
+	dbm_release_chunk_locked(dbm, start, count);
+	mutex_unlock(dbm_mutex(dbm));
+}
+
+static void dbm_print_locked(struct xe_guc_db_mgr *dbm, struct drm_printer *p, int indent)
+{
+	unsigned int rs, re;
+	unsigned int total;
+
+	drm_printf_indent(p, indent, "count: %u\n", dbm->count);
+	if (!dbm->bitmap)
+		return;
+
+	total = 0;
+	for_each_clear_bitrange(rs, re, dbm->bitmap, dbm->count) {
+		drm_printf_indent(p, indent, "available range: %u..%u (%u)\n",
+				  rs, re - 1, re - rs);
+		total += re - rs;
+	}
+	drm_printf_indent(p, indent, "available total: %u\n", total);
+
+	total = 0;
+	for_each_set_bitrange(rs, re, dbm->bitmap, dbm->count) {
+		drm_printf_indent(p, indent, "reserved range: %u..%u (%u)\n",
+				  rs, re - 1, re - rs);
+		total += re - rs;
+	}
+	drm_printf_indent(p, indent, "reserved total: %u\n", total);
+}
+
+/**
+ * xe_guc_db_mgr_print() - Print status of GuC Doorbells Manager.
+ * @dbm: the &xe_guc_db_mgr to print
+ * @p: the &drm_printer to print to
+ * @indent: tab indentation level
+ */
+void xe_guc_db_mgr_print(struct xe_guc_db_mgr *dbm,
+			 struct drm_printer *p, int indent)
+{
+	mutex_lock(dbm_mutex(dbm));
+	dbm_print_locked(dbm, p, indent);
+	mutex_unlock(dbm_mutex(dbm));
+}
+
+#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_guc_db_mgr_test.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_db_mgr.h b/drivers/gpu/drm/xe/xe_guc_db_mgr.h
new file mode 100644
index 000000000000..c250fa0ca9d6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_db_mgr.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GUC_DB_MGR_H_
+#define _XE_GUC_DB_MGR_H_
+
+struct drm_printer;
+struct xe_guc_db_mgr;
+
+int xe_guc_db_mgr_init(struct xe_guc_db_mgr *dbm, unsigned int count);
+
+int xe_guc_db_mgr_reserve_id_locked(struct xe_guc_db_mgr *dbm);
+void xe_guc_db_mgr_release_id_locked(struct xe_guc_db_mgr *dbm, unsigned int id);
+
+int xe_guc_db_mgr_reserve_range(struct xe_guc_db_mgr *dbm, unsigned int count, unsigned int spare);
+void xe_guc_db_mgr_release_range(struct xe_guc_db_mgr *dbm, unsigned int start, unsigned int count);
+
+void xe_guc_db_mgr_print(struct xe_guc_db_mgr *dbm, struct drm_printer *p, int indent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 4dd5a88a7826..c281fdbfd2d6 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -97,6 +97,7 @@ struct guc_update_exec_queue_policy {
 #define   GUC_WA_POLLCS			BIT(18)
 #define   GUC_WA_RENDER_RST_RC6_EXIT	BIT(19)
 #define   GUC_WA_RCS_REGS_IN_CCS_REGS_LIST	BIT(21)
+#define   GUC_WA_ENABLE_TSC_CHECK_ON_RC6	BIT(22)
 
 #define GUC_CTL_FEATURE			2
 #define   GUC_CTL_ENABLE_SLPC		BIT(2)
diff --git a/drivers/gpu/drm/xe/xe_guc_hxg_helpers.h b/drivers/gpu/drm/xe/xe_guc_hxg_helpers.h
new file mode 100644
index 000000000000..aeeb573c6842
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_hxg_helpers.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GUC_HXG_HELPERS_H_
+#define _XE_GUC_HXG_HELPERS_H_
+
+#include <linux/bitfield.h>
+#include <linux/types.h>
+
+#include "abi/guc_messages_abi.h"
+
+/**
+ * hxg_sizeof - Queries size of the object or type (in HXG units).
+ * @T: the object or type
+ *
+ * Force a compilation error if actual size is not aligned to HXG unit (u32).
+ *
+ * Return: size in dwords (u32).
+ */
+#define hxg_sizeof(T)	(sizeof(T) / sizeof(u32) + BUILD_BUG_ON_ZERO(sizeof(T) % sizeof(u32)))
+
+static inline const char *guc_hxg_type_to_string(unsigned int type)
+{
+	switch (type) {
+	case GUC_HXG_TYPE_REQUEST:
+		return "request";
+	case GUC_HXG_TYPE_FAST_REQUEST:
+		return "fast-request";
+	case GUC_HXG_TYPE_EVENT:
+		return "event";
+	case GUC_HXG_TYPE_NO_RESPONSE_BUSY:
+		return "busy";
+	case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
+		return "retry";
+	case GUC_HXG_TYPE_RESPONSE_FAILURE:
+		return "failure";
+	case GUC_HXG_TYPE_RESPONSE_SUCCESS:
+		return "response";
+	default:
+		return "<invalid>";
+	}
+}
+
+static inline bool guc_hxg_type_is_action(unsigned int type)
+{
+	switch (type) {
+	case GUC_HXG_TYPE_REQUEST:
+	case GUC_HXG_TYPE_FAST_REQUEST:
+	case GUC_HXG_TYPE_EVENT:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static inline bool guc_hxg_type_is_reply(unsigned int type)
+{
+	switch (type) {
+	case GUC_HXG_TYPE_NO_RESPONSE_BUSY:
+	case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
+	case GUC_HXG_TYPE_RESPONSE_FAILURE:
+	case GUC_HXG_TYPE_RESPONSE_SUCCESS:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static inline u32 guc_hxg_msg_encode_success(u32 *msg, u32 data0)
+{
+	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS) |
+		 FIELD_PREP(GUC_HXG_RESPONSE_MSG_0_DATA0, data0);
+
+	return GUC_HXG_RESPONSE_MSG_MIN_LEN;
+}
+
+static inline u32 guc_hxg_msg_encode_failure(u32 *msg, u32 error, u32 hint)
+{
+	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_FAILURE) |
+		 FIELD_PREP(GUC_HXG_FAILURE_MSG_0_HINT, hint) |
+		 FIELD_PREP(GUC_HXG_FAILURE_MSG_0_ERROR, error);
+
+	return GUC_HXG_FAILURE_MSG_LEN;
+}
+
+static inline u32 guc_hxg_msg_encode_busy(u32 *msg, u32 counter)
+{
+	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_NO_RESPONSE_BUSY) |
+		 FIELD_PREP(GUC_HXG_BUSY_MSG_0_COUNTER, counter);
+
+	return GUC_HXG_BUSY_MSG_LEN;
+}
+
+static inline u32 guc_hxg_msg_encode_retry(u32 *msg, u32 reason)
+{
+	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_NO_RESPONSE_RETRY) |
+		 FIELD_PREP(GUC_HXG_RETRY_MSG_0_REASON, reason);
+
+	return GUC_HXG_RETRY_MSG_LEN;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c
new file mode 100644
index 000000000000..c0a2d8d5d3b3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_relay.c
@@ -0,0 +1,941 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+
+#include <drm/drm_managed.h>
+
+#include <kunit/static_stub.h>
+#include <kunit/test-bug.h>
+
+#include "abi/guc_actions_sriov_abi.h"
+#include "abi/guc_relay_actions_abi.h"
+#include "abi/guc_relay_communication_abi.h"
+
+#include "xe_assert.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_hxg_helpers.h"
+#include "xe_guc_relay.h"
+#include "xe_guc_relay_types.h"
+#include "xe_sriov.h"
+
+/*
+ * How long should we wait for the response?
+ * XXX this value is subject for the profiling.
+ */
+#define RELAY_TIMEOUT_MSEC	(2500)
+
+static void relays_worker_fn(struct work_struct *w);
+
+static struct xe_guc *relay_to_guc(struct xe_guc_relay *relay)
+{
+	return container_of(relay, struct xe_guc, relay);
+}
+
+static struct xe_guc_ct *relay_to_ct(struct xe_guc_relay *relay)
+{
+	return &relay_to_guc(relay)->ct;
+}
+
+static struct xe_gt *relay_to_gt(struct xe_guc_relay *relay)
+{
+	return guc_to_gt(relay_to_guc(relay));
+}
+
+static struct xe_device *relay_to_xe(struct xe_guc_relay *relay)
+{
+	return gt_to_xe(relay_to_gt(relay));
+}
+
+#define relay_assert(relay, condition)	xe_gt_assert(relay_to_gt(relay), condition)
+#define relay_notice(relay, msg...)	xe_gt_sriov_notice(relay_to_gt(relay), "relay: " msg)
+#define relay_debug(relay, msg...)	xe_gt_sriov_dbg_verbose(relay_to_gt(relay), "relay: " msg)
+
+static int relay_get_totalvfs(struct xe_guc_relay *relay)
+{
+	struct xe_device *xe = relay_to_xe(relay);
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+
+	KUNIT_STATIC_STUB_REDIRECT(relay_get_totalvfs, relay);
+	return IS_SRIOV_VF(xe) ? 0 : pci_sriov_get_totalvfs(pdev);
+}
+
+static bool relay_is_ready(struct xe_guc_relay *relay)
+{
+	return mempool_initialized(&relay->pool);
+}
+
+static u32 relay_get_next_rid(struct xe_guc_relay *relay)
+{
+	u32 rid;
+
+	spin_lock(&relay->lock);
+	rid = ++relay->last_rid;
+	spin_unlock(&relay->lock);
+
+	return rid;
+}
+
+/**
+ * struct relay_transaction - internal data used to handle transactions
+ *
+ * Relation between struct relay_transaction members::
+ *
+ *                 <-------------------- GUC_CTB_MAX_DWORDS -------------->
+ *                                  <-------- GUC_RELAY_MSG_MAX_LEN --->
+ *                 <--- offset ---> <--- request_len ------->
+ *                +----------------+-------------------------+----------+--+
+ *                |                |                         |          |  |
+ *                +----------------+-------------------------+----------+--+
+ *                ^                ^
+ *               /                /
+ *    request_buf          request
+ *
+ *                 <-------------------- GUC_CTB_MAX_DWORDS -------------->
+ *                                  <-------- GUC_RELAY_MSG_MAX_LEN --->
+ *                 <--- offset ---> <--- response_len --->
+ *                +----------------+----------------------+-------------+--+
+ *                |                |                      |             |  |
+ *                +----------------+----------------------+-------------+--+
+ *                ^                ^
+ *               /                /
+ *   response_buf         response
+ */
+struct relay_transaction {
+	/**
+	 * @incoming: indicates whether this transaction represents an incoming
+	 *            request from the remote VF/PF or this transaction
+	 *            represents outgoing request to the remote VF/PF.
+	 */
+	bool incoming;
+
+	/**
+	 * @remote: PF/VF identifier of the origin (or target) of the relay
+	 *          request message.
+	 */
+	u32 remote;
+
+	/** @rid: identifier of the VF/PF relay message. */
+	u32 rid;
+
+	/**
+	 * @request: points to the inner VF/PF request message, copied to the
+	 *           #response_buf starting at #offset.
+	 */
+	u32 *request;
+
+	/** @request_len: length of the inner VF/PF request message. */
+	u32 request_len;
+
+	/**
+	 * @response: points to the placeholder buffer where inner VF/PF
+	 *            response will be located, for outgoing transaction
+	 *            this could be caller's buffer (if provided) otherwise
+	 *            it points to the #response_buf starting at #offset.
+	 */
+	u32 *response;
+
+	/**
+	 * @response_len: length of the inner VF/PF response message (only
+	 *                if #status is 0), initially set to the size of the
+	 *                placeholder buffer where response message will be
+	 *                copied.
+	 */
+	u32 response_len;
+
+	/**
+	 * @offset: offset to the start of the inner VF/PF relay message inside
+	 *          buffers; this offset is equal the length of the outer GuC
+	 *          relay header message.
+	 */
+	u32 offset;
+
+	/**
+	 * @request_buf: buffer with VF/PF request message including outer
+	 *               transport message.
+	 */
+	u32 request_buf[GUC_CTB_MAX_DWORDS];
+
+	/**
+	 * @response_buf: buffer with VF/PF response message including outer
+	 *                transport message.
+	 */
+	u32 response_buf[GUC_CTB_MAX_DWORDS];
+
+	/**
+	 * @reply: status of the reply, 0 means that data pointed by the
+	 *         #response is valid.
+	 */
+	int reply;
+
+	/** @done: completion of the outgoing transaction. */
+	struct completion done;
+
+	/** @link: transaction list link */
+	struct list_head link;
+};
+
+static u32 prepare_pf2guc(u32 *msg, u32 target, u32 rid)
+{
+	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, XE_GUC_ACTION_PF2GUC_RELAY_TO_VF);
+	msg[1] = FIELD_PREP(PF2GUC_RELAY_TO_VF_REQUEST_MSG_1_VFID, target);
+	msg[2] = FIELD_PREP(PF2GUC_RELAY_TO_VF_REQUEST_MSG_2_RELAY_ID, rid);
+
+	return PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN;
+}
+
+static u32 prepare_vf2guc(u32 *msg, u32 rid)
+{
+	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, XE_GUC_ACTION_VF2GUC_RELAY_TO_PF);
+	msg[1] = FIELD_PREP(VF2GUC_RELAY_TO_PF_REQUEST_MSG_1_RELAY_ID, rid);
+
+	return VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN;
+}
+
+static struct relay_transaction *
+__relay_get_transaction(struct xe_guc_relay *relay, bool incoming, u32 remote, u32 rid,
+			const u32 *action, u32 action_len, u32 *resp, u32 resp_size)
+{
+	struct relay_transaction *txn;
+
+	relay_assert(relay, action_len >= GUC_RELAY_MSG_MIN_LEN);
+	relay_assert(relay, action_len <= GUC_RELAY_MSG_MAX_LEN);
+	relay_assert(relay, !(!!resp ^ !!resp_size));
+	relay_assert(relay, resp_size <= GUC_RELAY_MSG_MAX_LEN);
+	relay_assert(relay, resp_size == 0 || resp_size >= GUC_RELAY_MSG_MIN_LEN);
+
+	if (unlikely(!relay_is_ready(relay)))
+		return ERR_PTR(-ENODEV);
+
+	/*
+	 * For incoming requests we can't use GFP_KERNEL as those are delivered
+	 * with CTB lock held which is marked as used in the reclaim path.
+	 * Btw, that's one of the reason why we use mempool here!
+	 */
+	txn = mempool_alloc(&relay->pool, incoming ? GFP_ATOMIC : GFP_KERNEL);
+	if (!txn)
+		return ERR_PTR(-ENOMEM);
+
+	txn->incoming = incoming;
+	txn->remote = remote;
+	txn->rid = rid;
+	txn->offset = remote ?
+		prepare_pf2guc(incoming ? txn->response_buf : txn->request_buf, remote, rid) :
+		prepare_vf2guc(incoming ? txn->response_buf : txn->request_buf, rid);
+
+	relay_assert(relay, txn->offset);
+	relay_assert(relay, txn->offset + GUC_RELAY_MSG_MAX_LEN <= ARRAY_SIZE(txn->request_buf));
+	relay_assert(relay, txn->offset + GUC_RELAY_MSG_MAX_LEN <= ARRAY_SIZE(txn->response_buf));
+
+	txn->request = txn->request_buf + txn->offset;
+	memcpy(&txn->request_buf[txn->offset], action, sizeof(u32) * action_len);
+	txn->request_len = action_len;
+
+	txn->response = resp ?: txn->response_buf + txn->offset;
+	txn->response_len = resp_size ?: GUC_RELAY_MSG_MAX_LEN;
+	txn->reply = -ENOMSG;
+	INIT_LIST_HEAD(&txn->link);
+	init_completion(&txn->done);
+
+	return txn;
+}
+
+static struct relay_transaction *
+relay_new_transaction(struct xe_guc_relay *relay, u32 target, const u32 *action, u32 len,
+		      u32 *resp, u32 resp_size)
+{
+	u32 rid = relay_get_next_rid(relay);
+
+	return __relay_get_transaction(relay, false, target, rid, action, len, resp, resp_size);
+}
+
+static struct relay_transaction *
+relay_new_incoming_transaction(struct xe_guc_relay *relay, u32 origin, u32 rid,
+			       const u32 *action, u32 len)
+{
+	return __relay_get_transaction(relay, true, origin, rid, action, len, NULL, 0);
+}
+
+static void relay_release_transaction(struct xe_guc_relay *relay, struct relay_transaction *txn)
+{
+	relay_assert(relay, list_empty(&txn->link));
+
+	txn->offset = 0;
+	txn->response = NULL;
+	txn->reply = -ESTALE;
+	mempool_free(txn, &relay->pool);
+}
+
+static int relay_send_transaction(struct xe_guc_relay *relay, struct relay_transaction *txn)
+{
+	u32 len = txn->incoming ? txn->response_len : txn->request_len;
+	u32 *buf = txn->incoming ? txn->response_buf : txn->request_buf;
+	u32 *msg = buf + txn->offset;
+	int ret;
+
+	relay_assert(relay, txn->offset);
+	relay_assert(relay, txn->offset + len <= GUC_CTB_MAX_DWORDS);
+	relay_assert(relay, len >= GUC_RELAY_MSG_MIN_LEN);
+	relay_assert(relay, len <= GUC_RELAY_MSG_MAX_LEN);
+
+	relay_debug(relay, "sending %s.%u to %u = %*ph\n",
+		    guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])),
+		    txn->rid, txn->remote, (int)sizeof(u32) * len, msg);
+
+	ret = xe_guc_ct_send_block(relay_to_ct(relay), buf, len + txn->offset);
+
+	if (unlikely(ret > 0)) {
+		relay_notice(relay, "Unexpected data=%d from GuC, wrong ABI?\n", ret);
+		ret = -EPROTO;
+	}
+	if (unlikely(ret < 0)) {
+		relay_notice(relay, "Failed to send %s.%x to GuC (%pe) %*ph ...\n",
+			     guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, buf[0])),
+			     FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, buf[0]),
+			     ERR_PTR(ret), (int)sizeof(u32) * txn->offset, buf);
+		relay_notice(relay, "Failed to send %s.%u to %u (%pe) %*ph\n",
+			     guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])),
+			     txn->rid, txn->remote, ERR_PTR(ret), (int)sizeof(u32) * len, msg);
+	}
+
+	return ret;
+}
+
+static void __fini_relay(struct drm_device *drm, void *arg)
+{
+	struct xe_guc_relay *relay = arg;
+
+	mempool_exit(&relay->pool);
+}
+
+/**
+ * xe_guc_relay_init - Initialize a &xe_guc_relay
+ * @relay: the &xe_guc_relay to initialize
+ *
+ * Initialize remaining members of &xe_guc_relay that may depend
+ * on the SR-IOV mode.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_guc_relay_init(struct xe_guc_relay *relay)
+{
+	const int XE_RELAY_MEMPOOL_MIN_NUM = 1;
+	struct xe_device *xe = relay_to_xe(relay);
+	int err;
+
+	relay_assert(relay, !relay_is_ready(relay));
+
+	if (!IS_SRIOV(xe))
+		return 0;
+
+	spin_lock_init(&relay->lock);
+	INIT_WORK(&relay->worker, relays_worker_fn);
+	INIT_LIST_HEAD(&relay->pending_relays);
+	INIT_LIST_HEAD(&relay->incoming_actions);
+
+	err = mempool_init_kmalloc_pool(&relay->pool, XE_RELAY_MEMPOOL_MIN_NUM +
+					relay_get_totalvfs(relay),
+					sizeof(struct relay_transaction));
+	if (err)
+		return err;
+
+	relay_debug(relay, "using mempool with %d elements\n", relay->pool.min_nr);
+
+	return drmm_add_action_or_reset(&xe->drm, __fini_relay, relay);
+}
+
+static u32 to_relay_error(int err)
+{
+	/* XXX: assume that relay errors match errno codes */
+	return err < 0 ? -err : GUC_RELAY_ERROR_UNDISCLOSED;
+}
+
+static int from_relay_error(u32 error)
+{
+	/* XXX: assume that relay errors match errno codes */
+	return error ? -error : -ENODATA;
+}
+
+static u32 sanitize_relay_error(u32 error)
+{
+	/* XXX TBD if generic error codes will be allowed */
+	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG))
+		error = GUC_RELAY_ERROR_UNDISCLOSED;
+	return error;
+}
+
+static u32 sanitize_relay_error_hint(u32 hint)
+{
+	/* XXX TBD if generic error codes will be allowed */
+	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG))
+		hint = 0;
+	return hint;
+}
+
+static u32 prepare_error_reply(u32 *msg, u32 error, u32 hint)
+{
+	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_FAILURE) |
+		 FIELD_PREP(GUC_HXG_FAILURE_MSG_0_HINT, hint) |
+		 FIELD_PREP(GUC_HXG_FAILURE_MSG_0_ERROR, error);
+
+	XE_WARN_ON(!FIELD_FIT(GUC_HXG_FAILURE_MSG_0_ERROR, error));
+	XE_WARN_ON(!FIELD_FIT(GUC_HXG_FAILURE_MSG_0_HINT, hint));
+
+	return GUC_HXG_FAILURE_MSG_LEN;
+}
+
+static void relay_testonly_nop(struct xe_guc_relay *relay)
+{
+	KUNIT_STATIC_STUB_REDIRECT(relay_testonly_nop, relay);
+}
+
+static int relay_send_message_and_wait(struct xe_guc_relay *relay,
+				       struct relay_transaction *txn,
+				       u32 *buf, u32 buf_size)
+{
+	unsigned long timeout = msecs_to_jiffies(RELAY_TIMEOUT_MSEC);
+	u32 *msg = &txn->request_buf[txn->offset];
+	u32 len = txn->request_len;
+	u32 type, action, data0;
+	int ret;
+	long n;
+
+	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
+	action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]);
+	data0 = FIELD_GET(GUC_HXG_REQUEST_MSG_0_DATA0, msg[0]);
+
+	relay_debug(relay, "%s.%u to %u action %#x:%u\n",
+		    guc_hxg_type_to_string(type),
+		    txn->rid, txn->remote, action, data0);
+
+	/* list ordering does not need to match RID ordering */
+	spin_lock(&relay->lock);
+	list_add_tail(&txn->link, &relay->pending_relays);
+	spin_unlock(&relay->lock);
+
+resend:
+	ret = relay_send_transaction(relay, txn);
+	if (unlikely(ret < 0))
+		goto unlink;
+
+wait:
+	n = wait_for_completion_timeout(&txn->done, timeout);
+	if (unlikely(n == 0 && txn->reply)) {
+		ret = -ETIME;
+		goto unlink;
+	}
+
+	relay_debug(relay, "%u.%u reply %d after %u msec\n",
+		    txn->remote, txn->rid, txn->reply, jiffies_to_msecs(timeout - n));
+	if (unlikely(txn->reply)) {
+		reinit_completion(&txn->done);
+		if (txn->reply == -EAGAIN)
+			goto resend;
+		if (txn->reply == -EBUSY) {
+			relay_testonly_nop(relay);
+			goto wait;
+		}
+		if (txn->reply > 0)
+			ret = from_relay_error(txn->reply);
+		else
+			ret = txn->reply;
+		goto unlink;
+	}
+
+	relay_debug(relay, "%u.%u response %*ph\n", txn->remote, txn->rid,
+		    (int)sizeof(u32) * txn->response_len, txn->response);
+	relay_assert(relay, txn->response_len >= GUC_RELAY_MSG_MIN_LEN);
+	ret = txn->response_len;
+
+unlink:
+	spin_lock(&relay->lock);
+	list_del_init(&txn->link);
+	spin_unlock(&relay->lock);
+
+	if (unlikely(ret < 0)) {
+		relay_notice(relay, "Unsuccessful %s.%u %#x:%u to %u (%pe) %*ph\n",
+			     guc_hxg_type_to_string(type), txn->rid,
+			     action, data0, txn->remote, ERR_PTR(ret),
+			     (int)sizeof(u32) * len, msg);
+	}
+
+	return ret;
+}
+
+static int relay_send_to(struct xe_guc_relay *relay, u32 target,
+			 const u32 *msg, u32 len, u32 *buf, u32 buf_size)
+{
+	struct relay_transaction *txn;
+	int ret;
+
+	relay_assert(relay, len >= GUC_RELAY_MSG_MIN_LEN);
+	relay_assert(relay, len <= GUC_RELAY_MSG_MAX_LEN);
+	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_HOST);
+	relay_assert(relay, guc_hxg_type_is_action(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])));
+
+	if (unlikely(!relay_is_ready(relay)))
+		return -ENODEV;
+
+	txn = relay_new_transaction(relay, target, msg, len, buf, buf_size);
+	if (IS_ERR(txn))
+		return PTR_ERR(txn);
+
+	switch (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])) {
+	case GUC_HXG_TYPE_REQUEST:
+		ret = relay_send_message_and_wait(relay, txn, buf, buf_size);
+		break;
+	case GUC_HXG_TYPE_FAST_REQUEST:
+		relay_assert(relay, !GUC_HXG_TYPE_FAST_REQUEST);
+		fallthrough;
+	case GUC_HXG_TYPE_EVENT:
+		ret = relay_send_transaction(relay, txn);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	relay_release_transaction(relay, txn);
+	return ret;
+}
+
+#ifdef CONFIG_PCI_IOV
+/**
+ * xe_guc_relay_send_to_vf - Send a message to the VF.
+ * @relay: the &xe_guc_relay which will send the message
+ * @target: target VF number
+ * @msg: request message to be sent
+ * @len: length of the request message (in dwords, can't be 0)
+ * @buf: placeholder for the response message
+ * @buf_size: size of the response message placeholder (in dwords)
+ *
+ * This function can only be used by the driver running in the SR-IOV PF mode.
+ *
+ * Return: Non-negative response length (in dwords) or
+ *         a negative error code on failure.
+ */
+int xe_guc_relay_send_to_vf(struct xe_guc_relay *relay, u32 target,
+			    const u32 *msg, u32 len, u32 *buf, u32 buf_size)
+{
+	relay_assert(relay, IS_SRIOV_PF(relay_to_xe(relay)));
+
+	return relay_send_to(relay, target, msg, len, buf, buf_size);
+}
+#endif
+
+/**
+ * xe_guc_relay_send_to_pf - Send a message to the PF.
+ * @relay: the &xe_guc_relay which will send the message
+ * @msg: request message to be sent
+ * @len: length of the message (in dwords, can't be 0)
+ * @buf: placeholder for the response message
+ * @buf_size: size of the response message placeholder (in dwords)
+ *
+ * This function can only be used by driver running in SR-IOV VF mode.
+ *
+ * Return: Non-negative response length (in dwords) or
+ *         a negative error code on failure.
+ */
+int xe_guc_relay_send_to_pf(struct xe_guc_relay *relay,
+			    const u32 *msg, u32 len, u32 *buf, u32 buf_size)
+{
+	relay_assert(relay, IS_SRIOV_VF(relay_to_xe(relay)));
+
+	return relay_send_to(relay, PFID, msg, len, buf, buf_size);
+}
+
+static int relay_handle_reply(struct xe_guc_relay *relay, u32 origin,
+			      u32 rid, int reply, const u32 *msg, u32 len)
+{
+	struct relay_transaction *pending;
+	int err = -ESRCH;
+
+	spin_lock(&relay->lock);
+	list_for_each_entry(pending, &relay->pending_relays, link) {
+		if (pending->remote != origin || pending->rid != rid) {
+			relay_debug(relay, "%u.%u still awaits response\n",
+				    pending->remote, pending->rid);
+			continue;
+		}
+		err = 0; /* found! */
+		if (reply == 0) {
+			if (len > pending->response_len) {
+				reply = -ENOBUFS;
+				err = -ENOBUFS;
+			} else {
+				memcpy(pending->response, msg, 4 * len);
+				pending->response_len = len;
+			}
+		}
+		pending->reply = reply;
+		complete_all(&pending->done);
+		break;
+	}
+	spin_unlock(&relay->lock);
+
+	return err;
+}
+
+static int relay_handle_failure(struct xe_guc_relay *relay, u32 origin,
+				u32 rid, const u32 *msg, u32 len)
+{
+	int error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[0]);
+	u32 hint __maybe_unused = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[0]);
+
+	relay_assert(relay, len);
+	relay_debug(relay, "%u.%u error %#x (%pe) hint %u debug %*ph\n",
+		    origin, rid, error, ERR_PTR(-error), hint, 4 * (len - 1), msg + 1);
+
+	return relay_handle_reply(relay, origin, rid, error ?: -EREMOTEIO, NULL, 0);
+}
+
+static int relay_testloop_action_handler(struct xe_guc_relay *relay, u32 origin,
+					 const u32 *msg, u32 len, u32 *response, u32 size)
+{
+	static ktime_t last_reply = 0;
+	u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
+	u32 action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]);
+	u32 opcode = FIELD_GET(GUC_HXG_REQUEST_MSG_0_DATA0, msg[0]);
+	ktime_t now = ktime_get();
+	bool busy;
+	int ret;
+
+	relay_assert(relay, guc_hxg_type_is_action(type));
+	relay_assert(relay, action == GUC_RELAY_ACTION_VFXPF_TESTLOOP);
+
+	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV))
+		return -ECONNREFUSED;
+
+	if (!last_reply)
+		last_reply = now;
+	busy = ktime_before(now, ktime_add_ms(last_reply, 2 * RELAY_TIMEOUT_MSEC));
+	if (!busy)
+		last_reply = now;
+
+	switch (opcode) {
+	case VFXPF_TESTLOOP_OPCODE_NOP:
+		if (type == GUC_HXG_TYPE_EVENT)
+			return 0;
+		return guc_hxg_msg_encode_success(response, 0);
+	case VFXPF_TESTLOOP_OPCODE_BUSY:
+		if (type == GUC_HXG_TYPE_EVENT)
+			return -EPROTO;
+		msleep(RELAY_TIMEOUT_MSEC / 8);
+		if (busy)
+			return -EINPROGRESS;
+		return guc_hxg_msg_encode_success(response, 0);
+	case VFXPF_TESTLOOP_OPCODE_RETRY:
+		if (type == GUC_HXG_TYPE_EVENT)
+			return -EPROTO;
+		msleep(RELAY_TIMEOUT_MSEC / 8);
+		if (busy)
+			return guc_hxg_msg_encode_retry(response, 0);
+		return guc_hxg_msg_encode_success(response, 0);
+	case VFXPF_TESTLOOP_OPCODE_ECHO:
+		if (type == GUC_HXG_TYPE_EVENT)
+			return -EPROTO;
+		if (size < len)
+			return -ENOBUFS;
+		ret = guc_hxg_msg_encode_success(response, len);
+		memcpy(response + ret, msg + ret, (len - ret) * sizeof(u32));
+		return len;
+	case VFXPF_TESTLOOP_OPCODE_FAIL:
+		return -EHWPOISON;
+	default:
+		break;
+	}
+
+	relay_notice(relay, "Unexpected action %#x opcode %#x\n", action, opcode);
+	return -EBADRQC;
+}
+
+static int relay_action_handler(struct xe_guc_relay *relay, u32 origin,
+				const u32 *msg, u32 len, u32 *response, u32 size)
+{
+	u32 type;
+	int ret;
+
+	relay_assert(relay, len >= GUC_HXG_MSG_MIN_LEN);
+
+	if (FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]) == GUC_RELAY_ACTION_VFXPF_TESTLOOP)
+		return relay_testloop_action_handler(relay, origin, msg, len, response, size);
+
+	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
+
+	/* XXX: PF services will be added later */
+	ret = -EOPNOTSUPP;
+
+	if (type == GUC_HXG_TYPE_EVENT)
+		relay_assert(relay, ret <= 0);
+
+	return ret;
+}
+
+static struct relay_transaction *relay_dequeue_transaction(struct xe_guc_relay *relay)
+{
+	struct relay_transaction *txn;
+
+	spin_lock(&relay->lock);
+	txn = list_first_entry_or_null(&relay->incoming_actions, struct relay_transaction, link);
+	if (txn)
+		list_del_init(&txn->link);
+	spin_unlock(&relay->lock);
+
+	return txn;
+}
+
+static void relay_process_incoming_action(struct xe_guc_relay *relay)
+{
+	struct relay_transaction *txn;
+	bool again = false;
+	u32 type;
+	int ret;
+
+	txn = relay_dequeue_transaction(relay);
+	if (!txn)
+		return;
+
+	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, txn->request_buf[txn->offset]);
+
+	ret = relay_action_handler(relay, txn->remote,
+				   txn->request_buf + txn->offset, txn->request_len,
+				   txn->response_buf + txn->offset,
+				   ARRAY_SIZE(txn->response_buf) - txn->offset);
+
+	if (ret == -EINPROGRESS) {
+		again = true;
+		ret = guc_hxg_msg_encode_busy(txn->response_buf + txn->offset, 0);
+	}
+
+	if (ret > 0) {
+		txn->response_len = ret;
+		ret = relay_send_transaction(relay, txn);
+	}
+
+	if (ret < 0) {
+		u32 error = to_relay_error(ret);
+
+		relay_notice(relay, "Failed to handle %s.%u from %u (%pe) %*ph\n",
+			     guc_hxg_type_to_string(type), txn->rid, txn->remote,
+			     ERR_PTR(ret), 4 * txn->request_len, txn->request_buf + txn->offset);
+
+		txn->response_len = prepare_error_reply(txn->response_buf + txn->offset,
+							txn->remote ?
+							sanitize_relay_error(error) : error,
+							txn->remote ?
+							sanitize_relay_error_hint(-ret) : -ret);
+		ret = relay_send_transaction(relay, txn);
+		again = false;
+	}
+
+	if (again) {
+		spin_lock(&relay->lock);
+		list_add(&txn->link, &relay->incoming_actions);
+		spin_unlock(&relay->lock);
+		return;
+	}
+
+	if (unlikely(ret < 0))
+		relay_notice(relay, "Failed to process action.%u (%pe) %*ph\n",
+			     txn->rid, ERR_PTR(ret), 4 * txn->request_len,
+			     txn->request_buf + txn->offset);
+
+	relay_release_transaction(relay, txn);
+}
+
+static bool relay_needs_worker(struct xe_guc_relay *relay)
+{
+	return !list_empty(&relay->incoming_actions);
+}
+
+static void relay_kick_worker(struct xe_guc_relay *relay)
+{
+	KUNIT_STATIC_STUB_REDIRECT(relay_kick_worker, relay);
+	queue_work(relay_to_xe(relay)->sriov.wq, &relay->worker);
+}
+
+static void relays_worker_fn(struct work_struct *w)
+{
+	struct xe_guc_relay *relay = container_of(w, struct xe_guc_relay, worker);
+
+	relay_process_incoming_action(relay);
+
+	if (relay_needs_worker(relay))
+		relay_kick_worker(relay);
+}
+
+static int relay_queue_action_msg(struct xe_guc_relay *relay, u32 origin, u32 rid,
+				  const u32 *msg, u32 len)
+{
+	struct relay_transaction *txn;
+
+	txn = relay_new_incoming_transaction(relay, origin, rid, msg, len);
+	if (IS_ERR(txn))
+		return PTR_ERR(txn);
+
+	spin_lock(&relay->lock);
+	list_add_tail(&txn->link, &relay->incoming_actions);
+	spin_unlock(&relay->lock);
+
+	relay_kick_worker(relay);
+	return 0;
+}
+
+static int relay_process_msg(struct xe_guc_relay *relay, u32 origin, u32 rid,
+			     const u32 *msg, u32 len)
+{
+	u32 type;
+	int err;
+
+	if (unlikely(len < GUC_HXG_MSG_MIN_LEN))
+		return -EPROTO;
+
+	if (FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) != GUC_HXG_ORIGIN_HOST)
+		return -EPROTO;
+
+	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
+	relay_debug(relay, "received %s.%u from %u = %*ph\n",
+		    guc_hxg_type_to_string(type), rid, origin, 4 * len, msg);
+
+	switch (type) {
+	case GUC_HXG_TYPE_REQUEST:
+	case GUC_HXG_TYPE_FAST_REQUEST:
+	case GUC_HXG_TYPE_EVENT:
+		err = relay_queue_action_msg(relay, origin, rid, msg, len);
+		break;
+	case GUC_HXG_TYPE_RESPONSE_SUCCESS:
+		err = relay_handle_reply(relay, origin, rid, 0, msg, len);
+		break;
+	case GUC_HXG_TYPE_NO_RESPONSE_BUSY:
+		err = relay_handle_reply(relay, origin, rid, -EBUSY, NULL, 0);
+		break;
+	case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
+		err = relay_handle_reply(relay, origin, rid, -EAGAIN, NULL, 0);
+		break;
+	case GUC_HXG_TYPE_RESPONSE_FAILURE:
+		err = relay_handle_failure(relay, origin, rid, msg, len);
+		break;
+	default:
+		err = -EBADRQC;
+	}
+
+	if (unlikely(err))
+		relay_notice(relay, "Failed to process %s.%u from %u (%pe) %*ph\n",
+			     guc_hxg_type_to_string(type), rid, origin,
+			     ERR_PTR(err), 4 * len, msg);
+
+	return err;
+}
+
+/**
+ * xe_guc_relay_process_guc2vf - Handle relay notification message from the GuC.
+ * @relay: the &xe_guc_relay which will handle the message
+ * @msg: message to be handled
+ * @len: length of the message (in dwords)
+ *
+ * This function will handle relay messages received from the GuC.
+ *
+ * This function is can only be used if driver is running in SR-IOV mode.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_guc_relay_process_guc2vf(struct xe_guc_relay *relay, const u32 *msg, u32 len)
+{
+	u32 rid;
+
+	relay_assert(relay, len >= GUC_HXG_MSG_MIN_LEN);
+	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
+	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
+	relay_assert(relay, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
+		     XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF);
+
+	if (unlikely(!IS_SRIOV_VF(relay_to_xe(relay)) && !kunit_get_current_test()))
+		return -EPERM;
+
+	if (unlikely(!relay_is_ready(relay)))
+		return -ENODEV;
+
+	if (unlikely(len < GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN))
+		return -EPROTO;
+
+	if (unlikely(len > GUC2VF_RELAY_FROM_PF_EVENT_MSG_MAX_LEN))
+		return -EMSGSIZE;
+
+	if (unlikely(FIELD_GET(GUC_HXG_EVENT_MSG_0_DATA0, msg[0])))
+		return -EPFNOSUPPORT;
+
+	rid = FIELD_GET(GUC2VF_RELAY_FROM_PF_EVENT_MSG_1_RELAY_ID, msg[1]);
+
+	return relay_process_msg(relay, PFID, rid,
+				 msg + GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN,
+				 len - GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN);
+}
+
+#ifdef CONFIG_PCI_IOV
+/**
+ * xe_guc_relay_process_guc2pf - Handle relay notification message from the GuC.
+ * @relay: the &xe_guc_relay which will handle the message
+ * @msg: message to be handled
+ * @len: length of the message (in dwords)
+ *
+ * This function will handle relay messages received from the GuC.
+ *
+ * This function can only be used if driver is running in SR-IOV PF mode.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_guc_relay_process_guc2pf(struct xe_guc_relay *relay, const u32 *msg, u32 len)
+{
+	u32 origin, rid;
+	int err;
+
+	relay_assert(relay, len >= GUC_HXG_EVENT_MSG_MIN_LEN);
+	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
+	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
+	relay_assert(relay, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
+		     XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF);
+
+	if (unlikely(!IS_SRIOV_PF(relay_to_xe(relay)) && !kunit_get_current_test()))
+		return -EPERM;
+
+	if (unlikely(!relay_is_ready(relay)))
+		return -ENODEV;
+
+	if (unlikely(len < GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN))
+		return -EPROTO;
+
+	if (unlikely(len > GUC2PF_RELAY_FROM_VF_EVENT_MSG_MAX_LEN))
+		return -EMSGSIZE;
+
+	if (unlikely(FIELD_GET(GUC_HXG_EVENT_MSG_0_DATA0, msg[0])))
+		return -EPFNOSUPPORT;
+
+	origin = FIELD_GET(GUC2PF_RELAY_FROM_VF_EVENT_MSG_1_VFID, msg[1]);
+	rid = FIELD_GET(GUC2PF_RELAY_FROM_VF_EVENT_MSG_2_RELAY_ID, msg[2]);
+
+	if (unlikely(origin > relay_get_totalvfs(relay)))
+		return -ENOENT;
+
+	err = relay_process_msg(relay, origin, rid,
+				msg + GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN,
+				len - GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN);
+
+	return err;
+}
+#endif
+
+#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_guc_relay_test.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_relay.h b/drivers/gpu/drm/xe/xe_guc_relay.h
new file mode 100644
index 000000000000..385429aa188a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_relay.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GUC_RELAY_H_
+#define _XE_GUC_RELAY_H_
+
+#include <linux/types.h>
+#include <linux/errno.h>
+
+struct xe_guc_relay;
+
+int xe_guc_relay_init(struct xe_guc_relay *relay);
+
+int xe_guc_relay_send_to_pf(struct xe_guc_relay *relay,
+			    const u32 *msg, u32 len, u32 *buf, u32 buf_size);
+
+int xe_guc_relay_process_guc2vf(struct xe_guc_relay *relay, const u32 *msg, u32 len);
+
+#ifdef CONFIG_PCI_IOV
+int xe_guc_relay_send_to_vf(struct xe_guc_relay *relay, u32 target,
+			    const u32 *msg, u32 len, u32 *buf, u32 buf_size);
+int xe_guc_relay_process_guc2pf(struct xe_guc_relay *relay, const u32 *msg, u32 len);
+#else
+static inline int xe_guc_relay_send_to_vf(struct xe_guc_relay *relay, u32 target,
+					  const u32 *msg, u32 len, u32 *buf, u32 buf_size)
+{
+	return -ENODEV;
+}
+static inline int xe_guc_relay_process_guc2pf(struct xe_guc_relay *relay, const u32 *msg, u32 len)
+{
+	return -ENODEV;
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_relay_types.h b/drivers/gpu/drm/xe/xe_guc_relay_types.h
new file mode 100644
index 000000000000..5999fcb77e96
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_relay_types.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GUC_RELAY_TYPES_H_
+#define _XE_GUC_RELAY_TYPES_H_
+
+#include <linux/mempool.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+/**
+ * struct xe_guc_relay - Data used by the VF-PF Relay Communication over GuC.
+ */
+struct xe_guc_relay {
+	/**@lock: protects all internal data. */
+	spinlock_t lock;
+
+	/** @worker: dispatches incoming action messages. */
+	struct work_struct worker;
+
+	/** @pending_relays: list of sent requests that await a response. */
+	struct list_head pending_relays;
+
+	/** @incoming_actions: list of incoming relay action messages to process. */
+	struct list_head incoming_actions;
+
+	/** @pool: pool of the relay message buffers. */
+	mempool_t pool;
+
+	/** @last_rid: last Relay-ID used while sending a message. */
+	u32 last_rid;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 54ffcfcdd41f..7c29b8333c71 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1218,7 +1218,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
 	init_waitqueue_head(&ge->suspend_wait);
 
 	timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
-		  q->hwe->eclass->sched_props.job_timeout_ms;
+		  q->sched_props.job_timeout_ms;
 	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
 			    get_submit_wq(guc),
 			    q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64,
@@ -1350,21 +1350,6 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
 	return 0;
 }
 
-static int guc_exec_queue_set_job_timeout(struct xe_exec_queue *q, u32 job_timeout_ms)
-{
-	struct xe_gpu_scheduler *sched = &q->guc->sched;
-	struct xe_guc *guc = exec_queue_to_guc(q);
-	struct xe_device *xe = guc_to_xe(guc);
-
-	xe_assert(xe, !exec_queue_registered(q));
-	xe_assert(xe, !exec_queue_banned(q));
-	xe_assert(xe, !exec_queue_killed(q));
-
-	sched->base.timeout = job_timeout_ms;
-
-	return 0;
-}
-
 static int guc_exec_queue_suspend(struct xe_exec_queue *q)
 {
 	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
@@ -1415,7 +1400,6 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = {
 	.set_priority = guc_exec_queue_set_priority,
 	.set_timeslice = guc_exec_queue_set_timeslice,
 	.set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
-	.set_job_timeout = guc_exec_queue_set_job_timeout,
 	.suspend = guc_exec_queue_suspend,
 	.suspend_wait = guc_exec_queue_suspend_wait,
 	.resume = guc_exec_queue_resume,
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index cd80802e8918..dc6059de669c 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -15,9 +15,23 @@
 #include "xe_guc_fwif.h"
 #include "xe_guc_log_types.h"
 #include "xe_guc_pc_types.h"
+#include "xe_guc_relay_types.h"
 #include "xe_uc_fw_types.h"
 
 /**
+ * struct xe_guc_db_mgr - GuC Doorbells Manager.
+ *
+ * Note: GuC Doorbells Manager is relying on &xe_guc::submission_state.lock
+ * to protect its members.
+ */
+struct xe_guc_db_mgr {
+	/** @count: number of doorbells to manage */
+	unsigned int count;
+	/** @bitmap: bitmap to track allocated doorbells */
+	unsigned long *bitmap;
+};
+
+/**
  * struct xe_guc - Graphic micro controller
  */
 struct xe_guc {
@@ -31,6 +45,8 @@ struct xe_guc {
 	struct xe_guc_ct ct;
 	/** @pc: GuC Power Conservation */
 	struct xe_guc_pc pc;
+	/** @dbm: GuC Doorbell Manager */
+	struct xe_guc_db_mgr dbm;
 	/** @submission_state: GuC submission state */
 	struct {
 		/** @exec_queue_lookup: Lookup an xe_engine from guc_id */
@@ -70,6 +86,9 @@ struct xe_guc {
 		u32 size;
 	} hwconfig;
 
+	/** @relay: GuC Relay Communication used in SR-IOV */
+	struct xe_guc_relay relay;
+
 	/**
 	 * @notify_reg: Register which is written to notify GuC of H2G messages
 	 */
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c
index bfdd33b9b23b..1c9d38b6f5f1 100644
--- a/drivers/gpu/drm/xe/xe_heci_gsc.c
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.c
@@ -29,7 +29,7 @@ static void heci_gsc_irq_unmask(struct irq_data *d)
 	/* generic irq handling */
 }
 
-static struct irq_chip heci_gsc_irq_chip = {
+static const struct irq_chip heci_gsc_irq_chip = {
 	.name = "gsc_irq_chip",
 	.irq_mask = heci_gsc_irq_mask,
 	.irq_unmask = heci_gsc_irq_unmask,
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 1fa5cf5eea97..3aaab507f37f 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -34,6 +34,7 @@ struct engine_info {
 	const char *name;
 	unsigned int class : 8;
 	unsigned int instance : 8;
+	unsigned int irq_offset : 8;
 	enum xe_force_wake_domains domain;
 	u32 mmio_base;
 };
@@ -43,6 +44,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "rcs0",
 		.class = XE_ENGINE_CLASS_RENDER,
 		.instance = 0,
+		.irq_offset = ilog2(INTR_RCS0),
 		.domain = XE_FW_RENDER,
 		.mmio_base = RENDER_RING_BASE,
 	},
@@ -50,6 +52,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "bcs0",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 0,
+		.irq_offset = ilog2(INTR_BCS(0)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = BLT_RING_BASE,
 	},
@@ -57,6 +60,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "bcs1",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 1,
+		.irq_offset = ilog2(INTR_BCS(1)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = XEHPC_BCS1_RING_BASE,
 	},
@@ -64,6 +68,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "bcs2",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 2,
+		.irq_offset = ilog2(INTR_BCS(2)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = XEHPC_BCS2_RING_BASE,
 	},
@@ -71,6 +76,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "bcs3",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 3,
+		.irq_offset = ilog2(INTR_BCS(3)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = XEHPC_BCS3_RING_BASE,
 	},
@@ -78,6 +84,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "bcs4",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 4,
+		.irq_offset = ilog2(INTR_BCS(4)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = XEHPC_BCS4_RING_BASE,
 	},
@@ -85,6 +92,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "bcs5",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 5,
+		.irq_offset = ilog2(INTR_BCS(5)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = XEHPC_BCS5_RING_BASE,
 	},
@@ -92,12 +100,14 @@ static const struct engine_info engine_infos[] = {
 		.name = "bcs6",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 6,
+		.irq_offset = ilog2(INTR_BCS(6)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = XEHPC_BCS6_RING_BASE,
 	},
 	[XE_HW_ENGINE_BCS7] = {
 		.name = "bcs7",
 		.class = XE_ENGINE_CLASS_COPY,
+		.irq_offset = ilog2(INTR_BCS(7)),
 		.instance = 7,
 		.domain = XE_FW_RENDER,
 		.mmio_base = XEHPC_BCS7_RING_BASE,
@@ -106,6 +116,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "bcs8",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 8,
+		.irq_offset = ilog2(INTR_BCS8),
 		.domain = XE_FW_RENDER,
 		.mmio_base = XEHPC_BCS8_RING_BASE,
 	},
@@ -114,6 +125,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vcs0",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 0,
+		.irq_offset = 32 + ilog2(INTR_VCS(0)),
 		.domain = XE_FW_MEDIA_VDBOX0,
 		.mmio_base = BSD_RING_BASE,
 	},
@@ -121,6 +133,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vcs1",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 1,
+		.irq_offset = 32 + ilog2(INTR_VCS(1)),
 		.domain = XE_FW_MEDIA_VDBOX1,
 		.mmio_base = BSD2_RING_BASE,
 	},
@@ -128,6 +141,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vcs2",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 2,
+		.irq_offset = 32 + ilog2(INTR_VCS(2)),
 		.domain = XE_FW_MEDIA_VDBOX2,
 		.mmio_base = BSD3_RING_BASE,
 	},
@@ -135,6 +149,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vcs3",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 3,
+		.irq_offset = 32 + ilog2(INTR_VCS(3)),
 		.domain = XE_FW_MEDIA_VDBOX3,
 		.mmio_base = BSD4_RING_BASE,
 	},
@@ -142,6 +157,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vcs4",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 4,
+		.irq_offset = 32 + ilog2(INTR_VCS(4)),
 		.domain = XE_FW_MEDIA_VDBOX4,
 		.mmio_base = XEHP_BSD5_RING_BASE,
 	},
@@ -149,6 +165,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vcs5",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 5,
+		.irq_offset = 32 + ilog2(INTR_VCS(5)),
 		.domain = XE_FW_MEDIA_VDBOX5,
 		.mmio_base = XEHP_BSD6_RING_BASE,
 	},
@@ -156,6 +173,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vcs6",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 6,
+		.irq_offset = 32 + ilog2(INTR_VCS(6)),
 		.domain = XE_FW_MEDIA_VDBOX6,
 		.mmio_base = XEHP_BSD7_RING_BASE,
 	},
@@ -163,6 +181,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vcs7",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 7,
+		.irq_offset = 32 + ilog2(INTR_VCS(7)),
 		.domain = XE_FW_MEDIA_VDBOX7,
 		.mmio_base = XEHP_BSD8_RING_BASE,
 	},
@@ -170,6 +189,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vecs0",
 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 		.instance = 0,
+		.irq_offset = 32 + ilog2(INTR_VECS(0)),
 		.domain = XE_FW_MEDIA_VEBOX0,
 		.mmio_base = VEBOX_RING_BASE,
 	},
@@ -177,6 +197,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vecs1",
 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 		.instance = 1,
+		.irq_offset = 32 + ilog2(INTR_VECS(1)),
 		.domain = XE_FW_MEDIA_VEBOX1,
 		.mmio_base = VEBOX2_RING_BASE,
 	},
@@ -184,6 +205,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vecs2",
 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 		.instance = 2,
+		.irq_offset = 32 + ilog2(INTR_VECS(2)),
 		.domain = XE_FW_MEDIA_VEBOX2,
 		.mmio_base = XEHP_VEBOX3_RING_BASE,
 	},
@@ -191,6 +213,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vecs3",
 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 		.instance = 3,
+		.irq_offset = 32 + ilog2(INTR_VECS(3)),
 		.domain = XE_FW_MEDIA_VEBOX3,
 		.mmio_base = XEHP_VEBOX4_RING_BASE,
 	},
@@ -198,6 +221,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "ccs0",
 		.class = XE_ENGINE_CLASS_COMPUTE,
 		.instance = 0,
+		.irq_offset = ilog2(INTR_CCS(0)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = COMPUTE0_RING_BASE,
 	},
@@ -205,6 +229,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "ccs1",
 		.class = XE_ENGINE_CLASS_COMPUTE,
 		.instance = 1,
+		.irq_offset = ilog2(INTR_CCS(1)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = COMPUTE1_RING_BASE,
 	},
@@ -212,6 +237,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "ccs2",
 		.class = XE_ENGINE_CLASS_COMPUTE,
 		.instance = 2,
+		.irq_offset = ilog2(INTR_CCS(2)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = COMPUTE2_RING_BASE,
 	},
@@ -219,6 +245,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "ccs3",
 		.class = XE_ENGINE_CLASS_COMPUTE,
 		.instance = 3,
+		.irq_offset = ilog2(INTR_CCS(3)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = COMPUTE3_RING_BASE,
 	},
@@ -289,6 +316,19 @@ static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
 	       xe_rtp_match_first_render_or_compute(gt, hwe);
 }
 
+static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt,
+				      const struct xe_hw_engine *hwe)
+{
+	if (GRAPHICS_VER(gt_to_xe(gt)) < 20)
+		return false;
+
+	if (hwe->class != XE_ENGINE_CLASS_COMPUTE &&
+	    hwe->class != XE_ENGINE_CLASS_RENDER)
+		return false;
+
+	return xe_mmio_read32(hwe->gt, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
+}
+
 void
 xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
 {
@@ -319,6 +359,14 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
 		  XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
 					   RCU_MODE_FIXED_SLICE_CCS_MODE))
 		},
+		/* Disable WMTP if HW doesn't support it */
+		{ XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"),
+		  XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)),
+		  XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0),
+					   PREEMPT_GPGPU_LEVEL_MASK,
+					   PREEMPT_GPGPU_THREAD_GROUP_LEVEL)),
+		  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE)
+		},
 		{}
 	};
 
@@ -397,6 +445,7 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
 	hwe->class = info->class;
 	hwe->instance = info->instance;
 	hwe->mmio_base = info->mmio_base;
+	hwe->irq_offset = info->irq_offset;
 	hwe->domain = info->domain;
 	hwe->name = info->name;
 	hwe->fence_irq = &gt->fence_irq[info->class];
@@ -700,7 +749,6 @@ struct xe_hw_engine_snapshot *
 xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
 {
 	struct xe_hw_engine_snapshot *snapshot;
-	int len;
 
 	if (!xe_hw_engine_is_valid(hwe))
 		return NULL;
@@ -710,11 +758,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
 	if (!snapshot)
 		return NULL;
 
-	len = strlen(hwe->name) + 1;
-	snapshot->name = kzalloc(len, GFP_ATOMIC);
-	if (snapshot->name)
-		strscpy(snapshot->name, hwe->name, len);
-
+	snapshot->name = kstrdup(hwe->name, GFP_ATOMIC);
 	snapshot->class = hwe->class;
 	snapshot->logical_instance = hwe->logical_instance;
 	snapshot->forcewake.domain = hwe->domain;
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
index e49bc14f0ecf..2345fb42fa39 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
@@ -73,7 +73,7 @@ static ssize_t job_timeout_max_show(struct kobject *kobj,
 	return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_max);
 }
 
-static struct kobj_attribute job_timeout_max_attr =
+static const struct kobj_attribute job_timeout_max_attr =
 __ATTR(job_timeout_max, 0644, job_timeout_max_show, job_timeout_max_store);
 
 static ssize_t job_timeout_min_store(struct kobject *kobj,
@@ -109,7 +109,7 @@ static ssize_t job_timeout_min_show(struct kobject *kobj,
 	return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_min);
 }
 
-static struct kobj_attribute job_timeout_min_attr =
+static const struct kobj_attribute job_timeout_min_attr =
 __ATTR(job_timeout_min, 0644, job_timeout_min_show, job_timeout_min_store);
 
 static ssize_t job_timeout_store(struct kobject *kobj,
@@ -142,7 +142,7 @@ static ssize_t job_timeout_show(struct kobject *kobj,
 	return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_ms);
 }
 
-static struct kobj_attribute job_timeout_attr =
+static const struct kobj_attribute job_timeout_attr =
 __ATTR(job_timeout_ms, 0644, job_timeout_show, job_timeout_store);
 
 static ssize_t job_timeout_default(struct kobject *kobj,
@@ -153,7 +153,7 @@ static ssize_t job_timeout_default(struct kobject *kobj,
 	return sprintf(buf, "%u\n", eclass->defaults.job_timeout_ms);
 }
 
-static struct kobj_attribute job_timeout_def =
+static const struct kobj_attribute job_timeout_def =
 __ATTR(job_timeout_ms, 0444, job_timeout_default, NULL);
 
 static ssize_t job_timeout_min_default(struct kobject *kobj,
@@ -164,7 +164,7 @@ static ssize_t job_timeout_min_default(struct kobject *kobj,
 	return sprintf(buf, "%u\n", eclass->defaults.job_timeout_min);
 }
 
-static struct kobj_attribute job_timeout_min_def =
+static const struct kobj_attribute job_timeout_min_def =
 __ATTR(job_timeout_min, 0444, job_timeout_min_default, NULL);
 
 static ssize_t job_timeout_max_default(struct kobject *kobj,
@@ -175,7 +175,7 @@ static ssize_t job_timeout_max_default(struct kobject *kobj,
 	return sprintf(buf, "%u\n", eclass->defaults.job_timeout_max);
 }
 
-static struct kobj_attribute job_timeout_max_def =
+static const struct kobj_attribute job_timeout_max_def =
 __ATTR(job_timeout_max, 0444, job_timeout_max_default, NULL);
 
 static ssize_t timeslice_duration_store(struct kobject *kobj,
@@ -234,7 +234,7 @@ static ssize_t timeslice_duration_max_show(struct kobject *kobj,
 	return sprintf(buf, "%u\n", eclass->sched_props.timeslice_max);
 }
 
-static struct kobj_attribute timeslice_duration_max_attr =
+static const struct kobj_attribute timeslice_duration_max_attr =
 	__ATTR(timeslice_duration_max, 0644, timeslice_duration_max_show,
 	       timeslice_duration_max_store);
 
@@ -272,7 +272,7 @@ static ssize_t timeslice_duration_min_show(struct kobject *kobj,
 	return sprintf(buf, "%u\n", eclass->sched_props.timeslice_min);
 }
 
-static struct kobj_attribute timeslice_duration_min_attr =
+static const struct kobj_attribute timeslice_duration_min_attr =
 	__ATTR(timeslice_duration_min, 0644, timeslice_duration_min_show,
 	       timeslice_duration_min_store);
 
@@ -284,7 +284,7 @@ static ssize_t timeslice_duration_show(struct kobject *kobj,
 	return sprintf(buf, "%u\n", eclass->sched_props.timeslice_us);
 }
 
-static struct kobj_attribute timeslice_duration_attr =
+static const struct kobj_attribute timeslice_duration_attr =
 	__ATTR(timeslice_duration_us, 0644, timeslice_duration_show,
 	       timeslice_duration_store);
 
@@ -296,7 +296,7 @@ static ssize_t timeslice_default(struct kobject *kobj,
 	return sprintf(buf, "%u\n", eclass->defaults.timeslice_us);
 }
 
-static struct kobj_attribute timeslice_duration_def =
+static const struct kobj_attribute timeslice_duration_def =
 __ATTR(timeslice_duration_us, 0444, timeslice_default, NULL);
 
 static ssize_t timeslice_min_default(struct kobject *kobj,
@@ -307,7 +307,7 @@ static ssize_t timeslice_min_default(struct kobject *kobj,
 	return sprintf(buf, "%u\n", eclass->defaults.timeslice_min);
 }
 
-static struct kobj_attribute timeslice_duration_min_def =
+static const struct kobj_attribute timeslice_duration_min_def =
 __ATTR(timeslice_duration_min, 0444, timeslice_min_default, NULL);
 
 static ssize_t timeslice_max_default(struct kobject *kobj,
@@ -318,7 +318,7 @@ static ssize_t timeslice_max_default(struct kobject *kobj,
 	return sprintf(buf, "%u\n", eclass->defaults.timeslice_max);
 }
 
-static struct kobj_attribute timeslice_duration_max_def =
+static const struct kobj_attribute timeslice_duration_max_def =
 __ATTR(timeslice_duration_max, 0444, timeslice_max_default, NULL);
 
 static ssize_t preempt_timeout_store(struct kobject *kobj,
@@ -351,7 +351,7 @@ static ssize_t preempt_timeout_show(struct kobject *kobj,
 	return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_us);
 }
 
-static struct kobj_attribute preempt_timeout_attr =
+static const struct kobj_attribute preempt_timeout_attr =
 __ATTR(preempt_timeout_us, 0644, preempt_timeout_show, preempt_timeout_store);
 
 static ssize_t preempt_timeout_default(struct kobject *kobj,
@@ -363,7 +363,7 @@ static ssize_t preempt_timeout_default(struct kobject *kobj,
 	return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_us);
 }
 
-static struct kobj_attribute preempt_timeout_def =
+static const struct kobj_attribute preempt_timeout_def =
 __ATTR(preempt_timeout_us, 0444, preempt_timeout_default, NULL);
 
 static ssize_t preempt_timeout_min_default(struct kobject *kobj,
@@ -375,7 +375,7 @@ static ssize_t preempt_timeout_min_default(struct kobject *kobj,
 	return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_min);
 }
 
-static struct kobj_attribute preempt_timeout_min_def =
+static const struct kobj_attribute preempt_timeout_min_def =
 __ATTR(preempt_timeout_min, 0444, preempt_timeout_min_default, NULL);
 
 static ssize_t preempt_timeout_max_default(struct kobject *kobj,
@@ -387,7 +387,7 @@ static ssize_t preempt_timeout_max_default(struct kobject *kobj,
 	return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_max);
 }
 
-static struct kobj_attribute preempt_timeout_max_def =
+static const struct kobj_attribute preempt_timeout_max_def =
 __ATTR(preempt_timeout_max, 0444, preempt_timeout_max_default, NULL);
 
 static ssize_t preempt_timeout_max_store(struct kobject *kobj,
@@ -423,7 +423,7 @@ static ssize_t preempt_timeout_max_show(struct kobject *kobj,
 	return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_max);
 }
 
-static struct kobj_attribute preempt_timeout_max_attr =
+static const struct kobj_attribute preempt_timeout_max_attr =
 	__ATTR(preempt_timeout_max, 0644, preempt_timeout_max_show,
 	       preempt_timeout_max_store);
 
@@ -460,7 +460,7 @@ static ssize_t preempt_timeout_min_show(struct kobject *kobj,
 	return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_min);
 }
 
-static struct kobj_attribute preempt_timeout_min_attr =
+static const struct kobj_attribute preempt_timeout_min_attr =
 	__ATTR(preempt_timeout_min, 0644, preempt_timeout_min_show,
 	       preempt_timeout_min_store);
 
@@ -477,7 +477,7 @@ static const struct attribute *defaults[] = {
 	NULL
 };
 
-static const struct attribute *files[] = {
+static const struct attribute * const files[] = {
 	&job_timeout_attr.attr,
 	&job_timeout_min_attr.attr,
 	&job_timeout_max_attr.attr,
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index 39908dec042a..dfeaaac08b7f 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -116,6 +116,8 @@ struct xe_hw_engine {
 	u16 instance;
 	/** @logical_instance: logical instance of this hw engine */
 	u16 logical_instance;
+	/** @irq_offset: IRQ offset of this hw engine */
+	u16 irq_offset;
 	/** @mmio_base: MMIO base address of this hw engine*/
 	u32 mmio_base;
 	/**
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 6ef2aa1eae8b..89c6f7f84b5a 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -402,7 +402,7 @@ static const struct attribute_group *hwmon_groups[] = {
 	NULL
 };
 
-static const struct hwmon_channel_info *hwmon_info[] = {
+static const struct hwmon_channel_info * const hwmon_info[] = {
 	HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT),
 	HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT),
 	HWMON_CHANNEL_INFO(in, HWMON_I_INPUT),
@@ -419,7 +419,7 @@ static int xe_hwmon_pcode_read_i1(struct xe_gt *gt, u32 *uval)
 
 	return xe_pcode_read(gt, PCODE_MBOX(PCODE_POWER_SETUP,
 			     POWER_SETUP_SUBCOMMAND_READ_I1, 0),
-			     uval, 0);
+			     uval, NULL);
 }
 
 static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval)
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index d1f5ba4bb745..2fd8cc26fc9f 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -14,10 +14,13 @@
 #include "xe_device.h"
 #include "xe_display.h"
 #include "xe_drv.h"
+#include "xe_gsc_proxy.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
 #include "xe_hw_engine.h"
+#include "xe_memirq.h"
 #include "xe_mmio.h"
+#include "xe_sriov.h"
 
 /*
  * Interrupt registers for a unit are always consecutive and ordered
@@ -129,6 +132,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
 	u32 ccs_mask, bcs_mask;
 	u32 irqs, dmask, smask;
 	u32 gsc_mask = 0;
+	u32 heci_mask = 0;
 
 	if (xe_device_uc_enabled(xe)) {
 		irqs = GT_RENDER_USER_INTERRUPT |
@@ -178,14 +182,23 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
 		xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask);
 		xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask);
 
-		if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER))
+		/*
+		 * the heci2 interrupt is enabled via the same register as the
+		 * GSCCS interrupts, but it has its own mask register.
+		 */
+		if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) {
 			gsc_mask = irqs;
-		else if (HAS_HECI_GSCFI(xe))
+			heci_mask = GSC_IRQ_INTF(1);
+		} else if (HAS_HECI_GSCFI(xe)) {
 			gsc_mask = GSC_IRQ_INTF(1);
+		}
+
 		if (gsc_mask) {
-			xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, gsc_mask);
+			xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, gsc_mask | heci_mask);
 			xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~gsc_mask);
 		}
+		if (heci_mask)
+			xe_mmio_write32(gt, HECI2_RSVD_INTR_MASK, ~(heci_mask << 16));
 	}
 }
 
@@ -232,6 +245,8 @@ gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir)
 		return xe_guc_irq_handler(&gt->uc.guc, iir);
 	if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt))
 		return xe_guc_irq_handler(&gt->uc.guc, iir);
+	if (instance == OTHER_GSC_HECI2_INSTANCE && xe_gt_is_media_type(gt))
+		return xe_gsc_proxy_irq_handler(&gt->uc.gsc, iir);
 
 	if (instance != OTHER_GUC_INSTANCE &&
 	    instance != OTHER_MEDIA_GUC_INSTANCE) {
@@ -249,15 +264,23 @@ static struct xe_gt *pick_engine_gt(struct xe_tile *tile,
 	if (MEDIA_VER(xe) < 13)
 		return tile->primary_gt;
 
-	if (class == XE_ENGINE_CLASS_VIDEO_DECODE ||
-	    class == XE_ENGINE_CLASS_VIDEO_ENHANCE)
+	switch (class) {
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
 		return tile->media_gt;
-
-	if (class == XE_ENGINE_CLASS_OTHER &&
-	    (instance == OTHER_MEDIA_GUC_INSTANCE || instance == OTHER_GSC_INSTANCE))
-		return tile->media_gt;
-
-	return tile->primary_gt;
+	case XE_ENGINE_CLASS_OTHER:
+		switch (instance) {
+		case OTHER_MEDIA_GUC_INSTANCE:
+		case OTHER_GSC_INSTANCE:
+		case OTHER_GSC_HECI2_INSTANCE:
+			return tile->media_gt;
+		default:
+			break;
+		};
+		fallthrough;
+	default:
+		return tile->primary_gt;
+	}
 }
 
 static void gt_irq_handler(struct xe_tile *tile,
@@ -484,6 +507,7 @@ static void gt_irq_reset(struct xe_tile *tile)
 	    HAS_HECI_GSCFI(tile_to_xe(tile))) {
 		xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, 0);
 		xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~0);
+		xe_mmio_write32(mmio, HECI2_RSVD_INTR_MASK, ~0);
 	}
 
 	xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_ENABLE, 0);
@@ -498,6 +522,9 @@ static void xelp_irq_reset(struct xe_tile *tile)
 
 	gt_irq_reset(tile);
 
+	if (IS_SRIOV_VF(tile_to_xe(tile)))
+		return;
+
 	mask_and_disable(tile, PCU_IRQ_OFFSET);
 }
 
@@ -508,6 +535,9 @@ static void dg1_irq_reset(struct xe_tile *tile)
 
 	gt_irq_reset(tile);
 
+	if (IS_SRIOV_VF(tile_to_xe(tile)))
+		return;
+
 	mask_and_disable(tile, PCU_IRQ_OFFSET);
 }
 
@@ -518,11 +548,34 @@ static void dg1_irq_reset_mstr(struct xe_tile *tile)
 	xe_mmio_write32(mmio, GFX_MSTR_IRQ, ~0);
 }
 
+static void vf_irq_reset(struct xe_device *xe)
+{
+	struct xe_tile *tile;
+	unsigned int id;
+
+	xe_assert(xe, IS_SRIOV_VF(xe));
+
+	if (GRAPHICS_VERx100(xe) < 1210)
+		xelp_intr_disable(xe);
+	else
+		xe_assert(xe, xe_device_has_memirq(xe));
+
+	for_each_tile(tile, xe, id) {
+		if (xe_device_has_memirq(xe))
+			xe_memirq_reset(&tile->sriov.vf.memirq);
+		else
+			gt_irq_reset(tile);
+	}
+}
+
 static void xe_irq_reset(struct xe_device *xe)
 {
 	struct xe_tile *tile;
 	u8 id;
 
+	if (IS_SRIOV_VF(xe))
+		return vf_irq_reset(xe);
+
 	for_each_tile(tile, xe, id) {
 		if (GRAPHICS_VERx100(xe) >= 1210)
 			dg1_irq_reset(tile);
@@ -545,8 +598,26 @@ static void xe_irq_reset(struct xe_device *xe)
 	}
 }
 
+static void vf_irq_postinstall(struct xe_device *xe)
+{
+	struct xe_tile *tile;
+	unsigned int id;
+
+	for_each_tile(tile, xe, id)
+		if (xe_device_has_memirq(xe))
+			xe_memirq_postinstall(&tile->sriov.vf.memirq);
+
+	if (GRAPHICS_VERx100(xe) < 1210)
+		xelp_intr_enable(xe, true);
+	else
+		xe_assert(xe, xe_device_has_memirq(xe));
+}
+
 static void xe_irq_postinstall(struct xe_device *xe)
 {
+	if (IS_SRIOV_VF(xe))
+		return vf_irq_postinstall(xe);
+
 	xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe));
 
 	/*
@@ -563,8 +634,30 @@ static void xe_irq_postinstall(struct xe_device *xe)
 		xelp_intr_enable(xe, true);
 }
 
+static irqreturn_t vf_mem_irq_handler(int irq, void *arg)
+{
+	struct xe_device *xe = arg;
+	struct xe_tile *tile;
+	unsigned int id;
+
+	spin_lock(&xe->irq.lock);
+	if (!xe->irq.enabled) {
+		spin_unlock(&xe->irq.lock);
+		return IRQ_NONE;
+	}
+	spin_unlock(&xe->irq.lock);
+
+	for_each_tile(tile, xe, id)
+		xe_memirq_handler(&tile->sriov.vf.memirq);
+
+	return IRQ_HANDLED;
+}
+
 static irq_handler_t xe_irq_handler(struct xe_device *xe)
 {
+	if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe))
+		return vf_mem_irq_handler;
+
 	if (GRAPHICS_VERx100(xe) >= 1210)
 		return dg1_irq_handler;
 	else
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index b7fa3831b684..f17e9785355e 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -19,6 +19,8 @@
 #include "xe_gt_printk.h"
 #include "xe_hw_fence.h"
 #include "xe_map.h"
+#include "xe_memirq.h"
+#include "xe_sriov.h"
 #include "xe_vm.h"
 
 #define CTX_VALID				(1 << 0)
@@ -532,6 +534,27 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
 	/* TODO: Timestamp */
 }
 
+static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
+{
+	struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->sriov.vf.memirq;
+	struct xe_device *xe = gt_to_xe(hwe->gt);
+
+	if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe))
+		return;
+
+	regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
+					MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
+	regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
+	regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
+
+	regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
+				       MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
+	regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
+	regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq);
+	regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
+	regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq);
+}
+
 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
 {
 	struct xe_device *xe = gt_to_xe(hwe->gt);
@@ -667,6 +690,7 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe)
 	regs = data + LRC_PPHWSP_SIZE;
 	set_offsets(regs, reg_offsets(xe, hwe->class), hwe);
 	set_context_control(regs, hwe);
+	set_memory_based_intr(regs, hwe);
 	reset_stop_ring(regs, hwe);
 
 	return data;
@@ -964,6 +988,20 @@ static int dump_mi_command(struct drm_printer *p,
 			drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
 		return numdw;
 
+	case MI_LOAD_REGISTER_MEM & MI_OPCODE:
+		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
+			   inst_header,
+			   dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
+			   dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
+		if (numdw == 4)
+			drm_printf(p, " - %#6x = %#010llx\n",
+				   dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
+		else
+			drm_printf(p, " - %*ph (%s)\n",
+				   (int)sizeof(u32) * (numdw - 1), dw + 1,
+				   numdw < 4 ? "truncated" : "malformed");
+		return numdw;
+
 	case MI_FORCE_WAKEUP:
 		drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
 		return numdw;
diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c
new file mode 100644
index 000000000000..76e95535d7f6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_memirq.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_guc_regs.h"
+#include "regs/xe_regs.h"
+
+#include "xe_assert.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_device_types.h"
+#include "xe_gt.h"
+#include "xe_gt_printk.h"
+#include "xe_guc.h"
+#include "xe_hw_engine.h"
+#include "xe_map.h"
+#include "xe_memirq.h"
+#include "xe_sriov.h"
+#include "xe_sriov_printk.h"
+
+#define memirq_assert(m, condition)	xe_tile_assert(memirq_to_tile(m), condition)
+#define memirq_debug(m, msg...)		xe_sriov_dbg_verbose(memirq_to_xe(m), "MEMIRQ: " msg)
+
+static struct xe_tile *memirq_to_tile(struct xe_memirq *memirq)
+{
+	return container_of(memirq, struct xe_tile, sriov.vf.memirq);
+}
+
+static struct xe_device *memirq_to_xe(struct xe_memirq *memirq)
+{
+	return tile_to_xe(memirq_to_tile(memirq));
+}
+
+static const char *guc_name(struct xe_guc *guc)
+{
+	return xe_gt_is_media_type(guc_to_gt(guc)) ? "media GuC" : "GuC";
+}
+
+/**
+ * DOC: Memory Based Interrupts
+ *
+ * MMIO register based interrupts infrastructure used for non-virtualized mode
+ * or SRIOV-8 (which supports 8 Virtual Functions) does not scale efficiently
+ * to allow delivering interrupts to a large number of Virtual machines or
+ * containers. Memory based interrupt status reporting provides an efficient
+ * and scalable infrastructure.
+ *
+ * For memory based interrupt status reporting hardware sequence is:
+ *  * Engine writes the interrupt event to memory
+ *    (Pointer to memory location is provided by SW. This memory surface must
+ *    be mapped to system memory and must be marked as un-cacheable (UC) on
+ *    Graphics IP Caches)
+ *  * Engine triggers an interrupt to host.
+ */
+
+/**
+ * DOC: Memory Based Interrupts Page Layout
+ *
+ * `Memory Based Interrupts`_ requires three different objects, which are
+ * called "page" in the specs, even if they aren't page-sized or aligned.
+ *
+ * To simplify the code we allocate a single page size object and then use
+ * offsets to embedded "pages". The address of those "pages" are then
+ * programmed in the HW via LRI and LRM in the context image.
+ *
+ * - _`Interrupt Status Report Page`: this page contains the interrupt
+ *   status vectors for each unit. Each bit in the interrupt vectors is
+ *   converted to a byte, with the byte being set to 0xFF when an
+ *   interrupt is triggered; interrupt vectors are 16b big so each unit
+ *   gets 16B. One space is reserved for each bit in one of the
+ *   GT_INTR_DWx registers, so this object needs a total of 1024B.
+ *   This object needs to be 4KiB aligned.
+ *
+ * - _`Interrupt Source Report Page`: this is the equivalent of the
+ *   GEN11_GT_INTR_DWx registers, with each bit in those registers being
+ *   mapped to a byte here. The offsets are the same, just bytes instead
+ *   of bits. This object needs to be cacheline aligned.
+ *
+ * - Interrupt Mask: the HW needs a location to fetch the interrupt
+ *   mask vector to be used by the LRM in the context, so we just use
+ *   the next available space in the interrupt page.
+ *
+ * ::
+ *
+ *   0x0000   +===========+  <== Interrupt Status Report Page
+ *            |           |
+ *            |           |     ____ +----+----------------+
+ *            |           |    /     |  0 | USER INTERRUPT |
+ *            +-----------+ __/      |  1 |                |
+ *            |  HWE(n)   | __       |    | CTX SWITCH     |
+ *            +-----------+   \      |    | WAIT SEMAPHORE |
+ *            |           |    \____ | 15 |                |
+ *            |           |          +----+----------------+
+ *            |           |
+ *   0x0400   +===========+  <== Interrupt Source Report Page
+ *            |  HWE(0)   |
+ *            |  HWE(1)   |
+ *            |           |
+ *            |  HWE(x)   |
+ *   0x0440   +===========+  <== Interrupt Enable Mask
+ *            |           |
+ *            |           |
+ *            +-----------+
+ */
+
+static void __release_xe_bo(struct drm_device *drm, void *arg)
+{
+	struct xe_bo *bo = arg;
+
+	xe_bo_unpin_map_no_vm(bo);
+}
+
+static int memirq_alloc_pages(struct xe_memirq *memirq)
+{
+	struct xe_device *xe = memirq_to_xe(memirq);
+	struct xe_tile *tile = memirq_to_tile(memirq);
+	struct xe_bo *bo;
+	int err;
+
+	BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_SOURCE_OFFSET, SZ_64));
+	BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_STATUS_OFFSET, SZ_4K));
+
+	/* XXX: convert to managed bo */
+	bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_SYSTEM_BIT |
+				  XE_BO_CREATE_GGTT_BIT |
+				  XE_BO_NEEDS_UC |
+				  XE_BO_NEEDS_CPU_ACCESS);
+	if (IS_ERR(bo)) {
+		err = PTR_ERR(bo);
+		goto out;
+	}
+
+	memirq_assert(memirq, !xe_bo_is_vram(bo));
+	memirq_assert(memirq, !memirq->bo);
+
+	iosys_map_memset(&bo->vmap, 0, 0, SZ_4K);
+
+	memirq->bo = bo;
+	memirq->source = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_SOURCE_OFFSET);
+	memirq->status = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_STATUS_OFFSET);
+	memirq->mask = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_ENABLE_OFFSET);
+
+	memirq_assert(memirq, !memirq->source.is_iomem);
+	memirq_assert(memirq, !memirq->status.is_iomem);
+	memirq_assert(memirq, !memirq->mask.is_iomem);
+
+	memirq_debug(memirq, "page offsets: source %#x status %#x\n",
+		     xe_memirq_source_ptr(memirq), xe_memirq_status_ptr(memirq));
+
+	return drmm_add_action_or_reset(&xe->drm, __release_xe_bo, memirq->bo);
+
+out:
+	xe_sriov_err(memirq_to_xe(memirq),
+		     "Failed to allocate memirq page (%pe)\n", ERR_PTR(err));
+	return err;
+}
+
+static void memirq_set_enable(struct xe_memirq *memirq, bool enable)
+{
+	iosys_map_wr(&memirq->mask, 0, u32, enable ? GENMASK(15, 0) : 0);
+
+	memirq->enabled = enable;
+}
+
+/**
+ * xe_memirq_init - Initialize data used by `Memory Based Interrupts`_.
+ * @memirq: the &xe_memirq to initialize
+ *
+ * Allocate `Interrupt Source Report Page`_ and `Interrupt Status Report Page`_
+ * used by `Memory Based Interrupts`_.
+ *
+ * These allocations are managed and will be implicitly released on unload.
+ *
+ * Note: This function shall be called only by the VF driver.
+ *
+ * If this function fails then VF driver won't be able to operate correctly.
+ * If `Memory Based Interrupts`_ are not used this function will return 0.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_memirq_init(struct xe_memirq *memirq)
+{
+	struct xe_device *xe = memirq_to_xe(memirq);
+	int err;
+
+	memirq_assert(memirq, IS_SRIOV_VF(xe));
+
+	if (!xe_device_has_memirq(xe))
+		return 0;
+
+	err = memirq_alloc_pages(memirq);
+	if (unlikely(err))
+		return err;
+
+	/* we need to start with all irqs enabled */
+	memirq_set_enable(memirq, true);
+
+	return 0;
+}
+
+/**
+ * xe_memirq_source_ptr - Get GGTT's offset of the `Interrupt Source Report Page`_.
+ * @memirq: the &xe_memirq to query
+ *
+ * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * and xe_memirq_init() didn't fail.
+ *
+ * Return: GGTT's offset of the `Interrupt Source Report Page`_.
+ */
+u32 xe_memirq_source_ptr(struct xe_memirq *memirq)
+{
+	memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
+	memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+	memirq_assert(memirq, memirq->bo);
+
+	return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_SOURCE_OFFSET;
+}
+
+/**
+ * xe_memirq_status_ptr - Get GGTT's offset of the `Interrupt Status Report Page`_.
+ * @memirq: the &xe_memirq to query
+ *
+ * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * and xe_memirq_init() didn't fail.
+ *
+ * Return: GGTT's offset of the `Interrupt Status Report Page`_.
+ */
+u32 xe_memirq_status_ptr(struct xe_memirq *memirq)
+{
+	memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
+	memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+	memirq_assert(memirq, memirq->bo);
+
+	return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_STATUS_OFFSET;
+}
+
+/**
+ * xe_memirq_enable_ptr - Get GGTT's offset of the Interrupt Enable Mask.
+ * @memirq: the &xe_memirq to query
+ *
+ * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * and xe_memirq_init() didn't fail.
+ *
+ * Return: GGTT's offset of the Interrupt Enable Mask.
+ */
+u32 xe_memirq_enable_ptr(struct xe_memirq *memirq)
+{
+	memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
+	memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+	memirq_assert(memirq, memirq->bo);
+
+	return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_ENABLE_OFFSET;
+}
+
+/**
+ * xe_memirq_init_guc - Prepare GuC for `Memory Based Interrupts`_.
+ * @memirq: the &xe_memirq
+ * @guc: the &xe_guc to setup
+ *
+ * Register `Interrupt Source Report Page`_ and `Interrupt Status Report Page`_
+ * to be used by the GuC when `Memory Based Interrupts`_ are required.
+ *
+ * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * and xe_memirq_init() didn't fail.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc)
+{
+	bool is_media = xe_gt_is_media_type(guc_to_gt(guc));
+	u32 offset = is_media ? ilog2(INTR_MGUC) : ilog2(INTR_GUC);
+	u32 source, status;
+	int err;
+
+	memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
+	memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+	memirq_assert(memirq, memirq->bo);
+
+	source = xe_memirq_source_ptr(memirq) + offset;
+	status = xe_memirq_status_ptr(memirq) + offset * SZ_16;
+
+	err = xe_guc_self_cfg64(guc, GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_KEY,
+				source);
+	if (unlikely(err))
+		goto failed;
+
+	err = xe_guc_self_cfg64(guc, GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_KEY,
+				status);
+	if (unlikely(err))
+		goto failed;
+
+	return 0;
+
+failed:
+	xe_sriov_err(memirq_to_xe(memirq),
+		     "Failed to setup report pages in %s (%pe)\n",
+		     guc_name(guc), ERR_PTR(err));
+	return err;
+}
+
+/**
+ * xe_memirq_reset - Disable processing of `Memory Based Interrupts`_.
+ * @memirq: struct xe_memirq
+ *
+ * This is part of the driver IRQ setup flow.
+ *
+ * This function shall only be used by the VF driver on platforms that use
+ * `Memory Based Interrupts`_.
+ */
+void xe_memirq_reset(struct xe_memirq *memirq)
+{
+	memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
+	memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+
+	if (memirq->bo)
+		memirq_set_enable(memirq, false);
+}
+
+/**
+ * xe_memirq_postinstall - Enable processing of `Memory Based Interrupts`_.
+ * @memirq: the &xe_memirq
+ *
+ * This is part of the driver IRQ setup flow.
+ *
+ * This function shall only be used by the VF driver on platforms that use
+ * `Memory Based Interrupts`_.
+ */
+void xe_memirq_postinstall(struct xe_memirq *memirq)
+{
+	memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
+	memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+
+	if (memirq->bo)
+		memirq_set_enable(memirq, true);
+}
+
+static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector,
+			    u16 offset, const char *name)
+{
+	u8 value;
+
+	value = iosys_map_rd(vector, offset, u8);
+	if (value) {
+		if (value != 0xff)
+			xe_sriov_err_ratelimited(memirq_to_xe(memirq),
+						 "Unexpected memirq value %#x from %s at %u\n",
+						 value, name, offset);
+		iosys_map_wr(vector, offset, u8, 0x00);
+	}
+
+	return value;
+}
+
+static void memirq_dispatch_engine(struct xe_memirq *memirq, struct iosys_map *status,
+				   struct xe_hw_engine *hwe)
+{
+	memirq_debug(memirq, "STATUS %s %*ph\n", hwe->name, 16, status->vaddr);
+
+	if (memirq_received(memirq, status, ilog2(GT_RENDER_USER_INTERRUPT), hwe->name))
+		xe_hw_engine_handle_irq(hwe, GT_RENDER_USER_INTERRUPT);
+}
+
+static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *status,
+				struct xe_guc *guc)
+{
+	const char *name = guc_name(guc);
+
+	memirq_debug(memirq, "STATUS %s %*ph\n", name, 16, status->vaddr);
+
+	if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name))
+		xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST);
+}
+
+/**
+ * xe_memirq_handler - The `Memory Based Interrupts`_ Handler.
+ * @memirq: the &xe_memirq
+ *
+ * This function reads and dispatches `Memory Based Interrupts`.
+ */
+void xe_memirq_handler(struct xe_memirq *memirq)
+{
+	struct xe_device *xe = memirq_to_xe(memirq);
+	struct xe_tile *tile = memirq_to_tile(memirq);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	struct iosys_map map;
+	unsigned int gtid;
+	struct xe_gt *gt;
+
+	if (!memirq->bo)
+		return;
+
+	memirq_assert(memirq, !memirq->source.is_iomem);
+	memirq_debug(memirq, "SOURCE %*ph\n", 32, memirq->source.vaddr);
+	memirq_debug(memirq, "SOURCE %*ph\n", 32, memirq->source.vaddr + 32);
+
+	for_each_gt(gt, xe, gtid) {
+		if (gt->tile != tile)
+			continue;
+
+		for_each_hw_engine(hwe, gt, id) {
+			if (memirq_received(memirq, &memirq->source, hwe->irq_offset, "SRC")) {
+				map = IOSYS_MAP_INIT_OFFSET(&memirq->status,
+							    hwe->irq_offset * SZ_16);
+				memirq_dispatch_engine(memirq, &map, hwe);
+			}
+		}
+	}
+
+	/* GuC and media GuC (if present) must be checked separately */
+
+	if (memirq_received(memirq, &memirq->source, ilog2(INTR_GUC), "SRC")) {
+		map = IOSYS_MAP_INIT_OFFSET(&memirq->status, ilog2(INTR_GUC) * SZ_16);
+		memirq_dispatch_guc(memirq, &map, &tile->primary_gt->uc.guc);
+	}
+
+	if (!tile->media_gt)
+		return;
+
+	if (memirq_received(memirq, &memirq->source, ilog2(INTR_MGUC), "SRC")) {
+		map = IOSYS_MAP_INIT_OFFSET(&memirq->status, ilog2(INTR_MGUC) * SZ_16);
+		memirq_dispatch_guc(memirq, &map, &tile->media_gt->uc.guc);
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_memirq.h b/drivers/gpu/drm/xe/xe_memirq.h
new file mode 100644
index 000000000000..2d40d03c3095
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_memirq.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MEMIRQ_H_
+#define _XE_MEMIRQ_H_
+
+#include <linux/types.h>
+
+struct xe_guc;
+struct xe_memirq;
+
+int xe_memirq_init(struct xe_memirq *memirq);
+
+u32 xe_memirq_source_ptr(struct xe_memirq *memirq);
+u32 xe_memirq_status_ptr(struct xe_memirq *memirq);
+u32 xe_memirq_enable_ptr(struct xe_memirq *memirq);
+
+void xe_memirq_reset(struct xe_memirq *memirq);
+void xe_memirq_postinstall(struct xe_memirq *memirq);
+void xe_memirq_handler(struct xe_memirq *memirq);
+
+int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_memirq_types.h b/drivers/gpu/drm/xe/xe_memirq_types.h
new file mode 100644
index 000000000000..625b6b8736cc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_memirq_types.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MEMIRQ_TYPES_H_
+#define _XE_MEMIRQ_TYPES_H_
+
+#include <linux/iosys-map.h>
+
+struct xe_bo;
+
+/* ISR */
+#define XE_MEMIRQ_STATUS_OFFSET		0x0
+/* IIR */
+#define XE_MEMIRQ_SOURCE_OFFSET		0x400
+/* IMR */
+#define XE_MEMIRQ_ENABLE_OFFSET		0x440
+
+/**
+ * struct xe_memirq - Data used by the `Memory Based Interrupts`_.
+ *
+ * @bo: buffer object with `Memory Based Interrupts Page Layout`_.
+ * @source: iosys pointer to `Interrupt Source Report Page`_.
+ * @status: iosys pointer to `Interrupt Status Report Page`_.
+ * @mask: iosys pointer to Interrupt Enable Mask.
+ * @enabled: internal flag used to control processing of the interrupts.
+ */
+struct xe_memirq {
+	struct xe_bo *bo;
+	struct iosys_map source;
+	struct iosys_map status;
+	struct iosys_map mask;
+	bool enabled;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index e05e9e7282b6..7abf15546ced 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -72,6 +72,15 @@ struct xe_migrate {
 #define NUM_PT_SLOTS 32
 #define LEVEL0_PAGE_TABLE_ENCODE_SIZE SZ_2M
 
+/*
+ * Although MI_STORE_DATA_IMM's "length" field is 10-bits, 0x3FE is the largest
+ * legal value accepted.  Since that instruction field is always stored in
+ * (val-2) format, this translates to 0x400 dwords for the true maximum length
+ * of the instruction.  Subtracting the instruction header (1 dword) and
+ * address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values.
+ */
+#define MAX_PTE_PER_SDI 0x1FE
+
 /**
  * xe_tile_migrate_engine() - Get this tile's migrate engine.
  * @tile: The tile.
@@ -347,7 +356,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 		m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe,
 					    EXEC_QUEUE_FLAG_KERNEL |
 					    EXEC_QUEUE_FLAG_PERMANENT |
-					    EXEC_QUEUE_FLAG_HIGH_PRIORITY);
+					    EXEC_QUEUE_FLAG_HIGH_PRIORITY, 0);
 	} else {
 		m->q = xe_exec_queue_create_class(xe, primary_gt, vm,
 						  XE_ENGINE_CLASS_COPY,
@@ -444,7 +453,7 @@ static u32 pte_update_size(struct xe_migrate *m,
 		*L0_ofs = xe_migrate_vm_addr(pt_ofs, 0);
 
 		/* MI_STORE_DATA_IMM */
-		cmds += 3 * DIV_ROUND_UP(num_4k_pages, 0x1ff);
+		cmds += 3 * DIV_ROUND_UP(num_4k_pages, MAX_PTE_PER_SDI);
 
 		/* PDE qwords */
 		cmds += num_4k_pages * 2;
@@ -472,14 +481,14 @@ static void emit_pte(struct xe_migrate *m,
 	/* Indirect access needs compression enabled uncached PAT index */
 	if (GRAPHICS_VERx100(xe) >= 2000)
 		pat_index = is_comp_pte ? xe->pat.idx[XE_CACHE_NONE_COMPRESSION] :
-					  xe->pat.idx[XE_CACHE_NONE];
+					  xe->pat.idx[XE_CACHE_WB];
 	else
 		pat_index = xe->pat.idx[XE_CACHE_WB];
 
 	ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
 
 	while (ptes) {
-		u32 chunk = min(0x1ffU, ptes);
+		u32 chunk = min(MAX_PTE_PER_SDI, ptes);
 
 		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
 		bb->cs[bb->len++] = ofs;
@@ -760,14 +769,14 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 		if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it))
 			xe_res_next(&src_it, src_L0);
 		else
-			emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0,
-				 src);
+			emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs,
+				 &src_it, src_L0, src);
 
 		if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it))
 			xe_res_next(&dst_it, src_L0);
 		else
-			emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0,
-				 dst);
+			emit_pte(m, bb, dst_L0_pt, dst_is_vram, copy_system_ccs,
+				 &dst_it, src_L0, dst);
 
 		if (copy_system_ccs)
 			emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src);
@@ -1009,8 +1018,8 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it))
 			xe_res_next(&src_it, clear_L0);
 		else
-			emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0,
-				 dst);
+			emit_pte(m, bb, clear_L0_pt, clear_vram, clear_system_ccs,
+				 &src_it, clear_L0, dst);
 
 		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 		update_idx = bb->len;
@@ -1098,7 +1107,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
 	 * This shouldn't be possible in practice.. might change when 16K
 	 * pages are used. Hence the assert.
 	 */
-	xe_tile_assert(tile, update->qwords <= 0x1ff);
+	xe_tile_assert(tile, update->qwords <= MAX_PTE_PER_SDI);
 	if (!ppgtt_ofs)
 		ppgtt_ofs = xe_migrate_vram_ofs(tile_to_xe(tile),
 						xe_bo_addr(update->pt_bo, 0,
@@ -1107,7 +1116,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
 	do {
 		u64 addr = ppgtt_ofs + ofs * 8;
 
-		chunk = min(update->qwords, 0x1ffU);
+		chunk = min(size, MAX_PTE_PER_SDI);
 
 		/* Ensure populatefn can do memset64 by aligning bb->cs */
 		if (!(bb->len & 1))
@@ -1283,7 +1292,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 		batch_size = 6 + num_updates * 2;
 
 	for (i = 0; i < num_updates; i++) {
-		u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, 0x1ff);
+		u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, MAX_PTE_PER_SDI);
 
 		/* align noop + MI_STORE_DATA_IMM cmd prefix */
 		batch_size += 4 * num_cmds + updates[i].qwords * 2;
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index c8c5d74b6e90..5f6b53ea5528 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -272,8 +272,8 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 		drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id,
 			 tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size);
 		drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id,
-			 &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + tile->mem.vram.actual_physical_size,
-			 &tile->mem.vram.io_start, tile->mem.vram.io_start + tile->mem.vram.io_size);
+			 &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + (u64)tile->mem.vram.actual_physical_size,
+			 &tile->mem.vram.io_start, tile->mem.vram.io_start + (u64)tile->mem.vram.io_size);
 
 		/* calculate total size using tile size to get the correct HW sizing */
 		total_size += tile_size;
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index ef79552e4f2f..d205d684d809 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -290,18 +290,6 @@ static const struct xe_mocs_entry dg2_mocs_desc[] = {
 	MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
 };
 
-static const struct xe_mocs_entry dg2_mocs_desc_g10_ax[] = {
-	/* Wa_14011441408: Set Go to Memory for MOCS#0 */
-	MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
-	/* UC - Coherent; GO:Memory */
-	MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
-	/* UC - Non-Coherent; GO:Memory */
-	MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
-
-	/* WB - LC */
-	MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
-};
-
 static const struct xe_mocs_entry pvc_mocs_desc[] = {
 	/* Error */
 	MOCS_ENTRY(0, 0, L3_3_WB),
@@ -409,15 +397,8 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 		info->unused_entries_index = 1;
 		break;
 	case XE_DG2:
-		if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10 &&
-		    xe->info.step.graphics >= STEP_A0 &&
-		    xe->info.step.graphics <= STEP_B0) {
-			info->size = ARRAY_SIZE(dg2_mocs_desc_g10_ax);
-			info->table = dg2_mocs_desc_g10_ax;
-		} else {
-			info->size = ARRAY_SIZE(dg2_mocs_desc);
-			info->table = dg2_mocs_desc;
-		}
+		info->size = ARRAY_SIZE(dg2_mocs_desc);
+		info->table = dg2_mocs_desc;
 		info->uc_index = 1;
 		info->n_entries = XELP_NUM_MOCS_ENTRIES;
 		info->unused_entries_index = 3;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index dcc5ded1558e..6664d1b2efdb 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -340,14 +340,14 @@ static const struct xe_device_desc lnl_desc = {
 __diag_pop();
 
 /* Map of GMD_ID values to graphics IP */
-static struct gmdid_map graphics_ip_map[] = {
+static const struct gmdid_map graphics_ip_map[] = {
 	{ 1270, &graphics_xelpg },
 	{ 1271, &graphics_xelpg },
 	{ 2004, &graphics_xe2 },
 };
 
 /* Map of GMD_ID values to media IP */
-static struct gmdid_map media_ip_map[] = {
+static const struct gmdid_map media_ip_map[] = {
 	{ 1300, &media_xelpmp },
 	{ 2000, &media_xe2 },
 };
@@ -774,6 +774,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		str_yes_no(xe_device_has_sriov(xe)),
 		xe_sriov_mode_to_string(xe_device_sriov_mode(xe)));
 
+	xe_pm_init_early(xe);
+
 	err = xe_device_probe(xe);
 	if (err)
 		return err;
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
index 5935cfe30204..f153ce96f69a 100644
--- a/drivers/gpu/drm/xe/xe_pcode_api.h
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -42,6 +42,13 @@
 #define	    POWER_SETUP_I1_SHIFT		6	/* 10.6 fixed point format */
 #define	    POWER_SETUP_I1_DATA_MASK		REG_GENMASK(15, 0)
 
+#define   PCODE_FREQUENCY_CONFIG		0x6e
+/* Frequency Config Sub Commands (param1) */
+#define     PCODE_MBOX_FC_SC_READ_FUSED_P0	0x0
+#define     PCODE_MBOX_FC_SC_READ_FUSED_PN	0x1
+/* Domain IDs (param2) */
+#define     PCODE_MBOX_DOMAIN_HBM		0x2
+
 struct pcode_err_decode {
 	int errno;
 	const char *str;
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index b429c2876a76..d5f219796d7e 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -163,6 +163,12 @@ static void xe_pm_runtime_init(struct xe_device *xe)
 	pm_runtime_put(dev);
 }
 
+void xe_pm_init_early(struct xe_device *xe)
+{
+	INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list);
+	drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock);
+}
+
 void xe_pm_init(struct xe_device *xe)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -214,6 +220,7 @@ struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
 
 int xe_pm_runtime_suspend(struct xe_device *xe)
 {
+	struct xe_bo *bo, *on;
 	struct xe_gt *gt;
 	u8 id;
 	int err = 0;
@@ -247,6 +254,16 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
 	 */
 	lock_map_acquire(&xe_device_mem_access_lockdep_map);
 
+	/*
+	 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
+	 * also checks and delets bo entry from user fault list.
+	 */
+	mutex_lock(&xe->mem_access.vram_userfault.lock);
+	list_for_each_entry_safe(bo, on,
+				 &xe->mem_access.vram_userfault.list, vram_userfault_link)
+		xe_bo_runtime_pm_release_mmap_offset(bo);
+	mutex_unlock(&xe->mem_access.vram_userfault.lock);
+
 	if (xe->d3cold.allowed) {
 		err = xe_bo_evict_all(xe);
 		if (err)
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
index 6b9031f7af24..64a97c6726a7 100644
--- a/drivers/gpu/drm/xe/xe_pm.h
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -20,6 +20,7 @@ struct xe_device;
 int xe_pm_suspend(struct xe_device *xe);
 int xe_pm_resume(struct xe_device *xe);
 
+void xe_pm_init_early(struct xe_device *xe);
 void xe_pm_init(struct xe_device *xe);
 void xe_pm_runtime_fini(struct xe_device *xe);
 int xe_pm_runtime_suspend(struct xe_device *xe);
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index e66ae1bdaf9c..3fa2ece7d228 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -7,9 +7,11 @@
 
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
 #include "xe_gt_types.h"
 #include "xe_platform_types.h"
 #include "xe_rtp.h"
+#include "xe_step.h"
 
 #undef XE_REG_MCR
 #define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
@@ -56,6 +58,12 @@ static const struct xe_rtp_entry_sr register_whitelist[] = {
 				   RING_FORCE_TO_NONPRIV_DENY,
 				   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
 	},
+	{ XE_RTP_NAME("16020183090"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(WHITELIST(CSBE_DEBUG_STATUS(RENDER_RING_BASE), 0))
+	},
+
 	{}
 };
 
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
index 42a0e0c917a0..f295d91886b1 100644
--- a/drivers/gpu/drm/xe/xe_sriov.c
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -3,6 +3,8 @@
  * Copyright © 2023 Intel Corporation
  */
 
+#include <drm/drm_managed.h>
+
 #include "xe_assert.h"
 #include "xe_sriov.h"
 
@@ -53,3 +55,33 @@ void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov)
 		drm_info(&xe->drm, "Running in %s mode\n",
 			 xe_sriov_mode_to_string(xe_device_sriov_mode(xe)));
 }
+
+static void fini_sriov(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = arg;
+
+	destroy_workqueue(xe->sriov.wq);
+	xe->sriov.wq = NULL;
+}
+
+/**
+ * xe_sriov_init - Initialize SR-IOV specific data.
+ * @xe: the &xe_device to initialize
+ *
+ * In this function we create dedicated workqueue that will be used
+ * by the SR-IOV specific workers.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_init(struct xe_device *xe)
+{
+	if (!IS_SRIOV(xe))
+		return 0;
+
+	xe_assert(xe, !xe->sriov.wq);
+	xe->sriov.wq = alloc_workqueue("xe-sriov-wq", 0, 0);
+	if (!xe->sriov.wq)
+		return -ENOMEM;
+
+	return drmm_add_action_or_reset(&xe->drm, fini_sriov, xe);
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h
index 5af73a3172b0..1545552162c9 100644
--- a/drivers/gpu/drm/xe/xe_sriov.h
+++ b/drivers/gpu/drm/xe/xe_sriov.h
@@ -13,6 +13,7 @@
 const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode);
 
 void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov);
+int xe_sriov_init(struct xe_device *xe);
 
 static inline enum xe_sriov_mode xe_device_sriov_mode(struct xe_device *xe)
 {
diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h
index 999a4311b98b..1a138108d139 100644
--- a/drivers/gpu/drm/xe/xe_sriov_types.h
+++ b/drivers/gpu/drm/xe/xe_sriov_types.h
@@ -9,6 +9,18 @@
 #include <linux/build_bug.h>
 
 /**
+ * VFID - Virtual Function Identifier
+ * @n: VF number
+ *
+ * Helper macro to represent Virtual Function (VF) Identifier.
+ * VFID(0) is used as alias to the PFID that represents Physical Function.
+ *
+ * Note: According to PCI spec, SR-IOV VF's numbers are 1-based (VF1, VF2, ...).
+ */
+#define VFID(n)		(n)
+#define PFID		VFID(0)
+
+/**
  * enum xe_sriov_mode - SR-IOV mode
  * @XE_SRIOV_MODE_NONE: bare-metal mode (non-virtualized)
  * @XE_SRIOV_MODE_PF: SR-IOV Physical Function (PF) mode
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c
index 0f8d3e7fce46..0662968d7bcb 100644
--- a/drivers/gpu/drm/xe/xe_tile_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c
@@ -9,6 +9,7 @@
 
 #include "xe_tile.h"
 #include "xe_tile_sysfs.h"
+#include "xe_vram_freq.h"
 
 static void xe_tile_sysfs_kobj_release(struct kobject *kobj)
 {
@@ -50,6 +51,8 @@ void xe_tile_sysfs_init(struct xe_tile *tile)
 
 	tile->sysfs = &kt->base;
 
+	xe_vram_freq_sysfs_init(tile);
+
 	err = drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile);
 	if (err)
 		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 95163c303f3e..e4e7262191ad 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -31,7 +31,7 @@ DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence,
 			     ),
 
 		    TP_fast_assign(
-			   __entry->fence = (u64)fence;
+			   __entry->fence = (unsigned long)fence;
 			   __entry->seqno = fence->seqno;
 			   ),
 
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 25e1ddfd2f86..8f37a809525f 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -7,8 +7,10 @@
 
 #include "xe_device.h"
 #include "xe_gsc.h"
+#include "xe_gsc_proxy.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
+#include "xe_guc_db_mgr.h"
 #include "xe_guc_pc.h"
 #include "xe_guc_submit.h"
 #include "xe_huc.h"
@@ -60,6 +62,10 @@ int xe_uc_init(struct xe_uc *uc)
 	if (ret)
 		goto err;
 
+	ret = xe_guc_db_mgr_init(&uc->guc.dbm, ~0);
+	if (ret)
+		goto err;
+
 	return 0;
 
 err:
@@ -256,3 +262,16 @@ int xe_uc_suspend(struct xe_uc *uc)
 
 	return xe_guc_suspend(&uc->guc);
 }
+
+/**
+ * xe_uc_remove() - Clean up the UC structures before driver removal
+ * @uc: the UC object
+ *
+ * This function should only act on objects/structures that must be cleaned
+ * before the driver removal callback is complete and therefore can't be
+ * deferred to a drmm action.
+ */
+void xe_uc_remove(struct xe_uc *uc)
+{
+	xe_gsc_remove(&uc->gsc);
+}
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
index 5d5110c0c834..e4d4e3c99f0e 100644
--- a/drivers/gpu/drm/xe/xe_uc.h
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -20,5 +20,6 @@ int xe_uc_stop(struct xe_uc *uc);
 int xe_uc_start(struct xe_uc *uc);
 int xe_uc_suspend(struct xe_uc *uc);
 int xe_uc_sanitize_reset(struct xe_uc *uc);
+void xe_uc_remove(struct xe_uc *uc);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 10b6995fbf29..d096a8c00bd4 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1054,7 +1054,7 @@ static struct drm_gpuva_op *xe_vm_op_alloc(void)
 
 static void xe_vm_free(struct drm_gpuvm *gpuvm);
 
-static struct drm_gpuvm_ops gpuvm_ops = {
+static const struct drm_gpuvm_ops gpuvm_ops = {
 	.op_alloc = xe_vm_op_alloc,
 	.vm_bo_validate = xe_gpuvm_validate,
 	.vm_free = xe_vm_free,
@@ -1855,10 +1855,8 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 	mutex_lock(&xef->vm.lock);
 	err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
 	mutex_unlock(&xef->vm.lock);
-	if (err) {
-		xe_vm_close_and_put(vm);
-		return err;
-	}
+	if (err)
+		goto err_close_and_put;
 
 	if (xe->info.has_asid) {
 		mutex_lock(&xe->usm.lock);
@@ -1866,11 +1864,9 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 				      XA_LIMIT(1, XE_MAX_ASID - 1),
 				      &xe->usm.next_asid, GFP_KERNEL);
 		mutex_unlock(&xe->usm.lock);
-		if (err < 0) {
-			xe_vm_close_and_put(vm);
-			return err;
-		}
-		err = 0;
+		if (err < 0)
+			goto err_free_id;
+
 		vm->usm.asid = asid;
 	}
 
@@ -1888,6 +1884,15 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 #endif
 
 	return 0;
+
+err_free_id:
+	mutex_lock(&xef->vm.lock);
+	xa_erase(&xef->vm.xa, id);
+	mutex_unlock(&xef->vm.lock);
+err_close_and_put:
+	xe_vm_close_and_put(vm);
+
+	return err;
 }
 
 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index cf2f96e8c1ab..e9c907cbcd89 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -199,8 +199,6 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
 
 int xe_vm_invalidate_vma(struct xe_vma *vma);
 
-extern struct ttm_device_funcs xe_ttm_funcs;
-
 static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm)
 {
 	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
diff --git a/drivers/gpu/drm/xe/xe_vram_freq.c b/drivers/gpu/drm/xe/xe_vram_freq.c
new file mode 100644
index 000000000000..733146293307
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vram_freq.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+#include <linux/sysfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_gt_types.h"
+#include "xe_pcode.h"
+#include "xe_pcode_api.h"
+#include "xe_tile.h"
+#include "xe_tile_sysfs.h"
+#include "xe_vram_freq.h"
+
+/**
+ * DOC: Xe VRAM freq
+ *
+ * Provides sysfs entries for vram frequency in tile
+ *
+ * device/tile#/memory/freq0/max_freq - This is maximum frequency. This value is read-only as it
+ *					is the fixed fuse point P0. It is not the system
+ *					configuration.
+ * device/tile#/memory/freq0/min_freq - This is minimum frequency. This value is read-only as it
+ *					is the fixed fuse point PN. It is not the system
+ *					configuration.
+ */
+
+static struct xe_tile *dev_to_tile(struct device *dev)
+{
+	return kobj_to_tile(dev->kobj.parent);
+}
+
+static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct xe_tile *tile = dev_to_tile(dev);
+	struct xe_gt *gt = tile->primary_gt;
+	u32 val, mbox;
+	int err;
+
+	mbox = REG_FIELD_PREP(PCODE_MB_COMMAND, PCODE_FREQUENCY_CONFIG)
+		| REG_FIELD_PREP(PCODE_MB_PARAM1, PCODE_MBOX_FC_SC_READ_FUSED_P0)
+		| REG_FIELD_PREP(PCODE_MB_PARAM2, PCODE_MBOX_DOMAIN_HBM);
+
+	err = xe_pcode_read(gt, mbox, &val, NULL);
+	if (err)
+		return err;
+
+	/* data_out - Fused P0 for domain ID in units of 50 MHz */
+	val *= 50;
+
+	return sysfs_emit(buf, "%u\n", val);
+}
+static DEVICE_ATTR_RO(max_freq);
+
+static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct xe_tile *tile = dev_to_tile(dev);
+	struct xe_gt *gt = tile->primary_gt;
+	u32 val, mbox;
+	int err;
+
+	mbox = REG_FIELD_PREP(PCODE_MB_COMMAND, PCODE_FREQUENCY_CONFIG)
+		| REG_FIELD_PREP(PCODE_MB_PARAM1, PCODE_MBOX_FC_SC_READ_FUSED_PN)
+		| REG_FIELD_PREP(PCODE_MB_PARAM2, PCODE_MBOX_DOMAIN_HBM);
+
+	err = xe_pcode_read(gt, mbox, &val, NULL);
+	if (err)
+		return err;
+
+	/* data_out - Fused Pn for domain ID in units of 50 MHz */
+	val *= 50;
+
+	return sysfs_emit(buf, "%u\n", val);
+}
+static DEVICE_ATTR_RO(min_freq);
+
+static struct attribute *freq_attrs[] = {
+	&dev_attr_max_freq.attr,
+	&dev_attr_min_freq.attr,
+	NULL
+};
+
+static const struct attribute_group freq_group_attrs = {
+	.name = "freq0",
+	.attrs = freq_attrs,
+};
+
+static void vram_freq_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct kobject *kobj = arg;
+
+	sysfs_remove_group(kobj, &freq_group_attrs);
+	kobject_put(kobj);
+}
+
+/*
+ * xe_vram_freq_init - Initialize vram frequency component
+ * @tile: Xe Tile object
+ *
+ * It needs to be initialized after the main tile component is ready
+ */
+
+void xe_vram_freq_sysfs_init(struct xe_tile *tile)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	struct kobject *kobj;
+	int err;
+
+	if (xe->info.platform != XE_PVC)
+		return;
+
+	kobj = kobject_create_and_add("memory", tile->sysfs);
+	if (!kobj)
+		drm_warn(&xe->drm, "failed to add memory directory, err: %d\n", -ENOMEM);
+
+	err = sysfs_create_group(kobj, &freq_group_attrs);
+	if (err) {
+		kobject_put(kobj);
+		drm_warn(&xe->drm, "failed to register vram freq sysfs, err: %d\n", err);
+		return;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, vram_freq_sysfs_fini, kobj);
+	if (err)
+		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+}
diff --git a/drivers/gpu/drm/xe/xe_vram_freq.h b/drivers/gpu/drm/xe/xe_vram_freq.h
new file mode 100644
index 000000000000..cbe8c12fbd64
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vram_freq.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_VRAM_FREQ_H_
+#define _XE_VRAM_FREQ_H_
+
+struct xe_tile;
+
+void xe_vram_freq_sysfs_init(struct xe_tile *tile);
+
+#endif /* _XE_VRAM_FREQ_H_ */
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 5f61dd87c586..3299130ba10a 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -125,13 +125,6 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 
 	/* DG2 */
 
-	{ XE_RTP_NAME("16010515920"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10),
-		       GRAPHICS_STEP(A0, B0),
-		       ENGINE_CLASS(VIDEO_DECODE)),
-	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS)),
-	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
-	},
 	{ XE_RTP_NAME("22010523718"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
 	  XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, CG3DDISCFEG_CLKGATE_DIS))
@@ -140,61 +133,6 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
 	  XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS))
 	},
-	{ XE_RTP_NAME("14012362059"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB))
-	},
-	{ XE_RTP_NAME("14012362059"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB))
-	},
-	{ XE_RTP_NAME("14010948348"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS))
-	},
-	{ XE_RTP_NAME("14011037102"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS))
-	},
-	{ XE_RTP_NAME("14011371254"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS))
-	},
-	{ XE_RTP_NAME("14011431319"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(UNSLCGCTL9440,
-			     GAMTLBOACS_CLKGATE_DIS |
-			     GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS |
-			     GAMTLBVDBOX5_CLKGATE_DIS | GAMTLBVDBOX4_CLKGATE_DIS |
-			     GAMTLBVDBOX3_CLKGATE_DIS | GAMTLBVDBOX2_CLKGATE_DIS |
-			     GAMTLBVDBOX1_CLKGATE_DIS | GAMTLBVDBOX0_CLKGATE_DIS |
-			     GAMTLBKCR_CLKGATE_DIS | GAMTLBGUC_CLKGATE_DIS |
-			     GAMTLBBLT_CLKGATE_DIS),
-			 SET(UNSLCGCTL9444,
-			     GAMTLBGFXA0_CLKGATE_DIS | GAMTLBGFXA1_CLKGATE_DIS |
-			     GAMTLBCOMPA0_CLKGATE_DIS | GAMTLBCOMPA1_CLKGATE_DIS |
-			     GAMTLBCOMPB0_CLKGATE_DIS | GAMTLBCOMPB1_CLKGATE_DIS |
-			     GAMTLBCOMPC0_CLKGATE_DIS | GAMTLBCOMPC1_CLKGATE_DIS |
-			     GAMTLBCOMPD0_CLKGATE_DIS | GAMTLBCOMPD1_CLKGATE_DIS |
-			     GAMTLBMERT_CLKGATE_DIS |
-			     GAMTLBVEBOX3_CLKGATE_DIS | GAMTLBVEBOX2_CLKGATE_DIS |
-			     GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS))
-	},
-	{ XE_RTP_NAME("14010569222"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS))
-	},
-	{ XE_RTP_NAME("14011028019"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS))
-	},
-	{ XE_RTP_NAME("14010680813"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(XEHP_GAMSTLB_CTRL,
-			     CONTROL_BLOCK_CLKGATE_DIS |
-			     EGRESS_BLOCK_CLKGATE_DIS |
-			     TAG_BLOCK_CLKGATE_DIS))
-	},
 	{ XE_RTP_NAME("14014830051"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(CLR(SARB_CHICKEN1, COMP_CKN_IN))
@@ -212,10 +150,6 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 			     INVALIDATION_BROADCAST_MODE_DIS |
 			     GLOBAL_INVALIDATION_MODE))
 	},
-	{ XE_RTP_NAME("14010648519"),
-	  XE_RTP_RULES(PLATFORM(DG2)),
-	  XE_RTP_ACTIONS(SET(XEHP_L3NODEARBCFG, XEHP_LNESPARE))
-	},
 
 	/* PVC */
 
@@ -377,13 +311,6 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 			     POLYGON_TRIFAN_LINELOOP_DISABLE))
 	},
 	{ XE_RTP_NAME("22012826095, 22013059131"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
-		       FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW,
-				   MAXREQS_PER_BANK,
-				   REG_FIELD_PREP(MAXREQS_PER_BANK, 2)))
-	},
-	{ XE_RTP_NAME("22012826095, 22013059131"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G11),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW,
@@ -391,27 +318,10 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 				   REG_FIELD_PREP(MAXREQS_PER_BANK, 2)))
 	},
 	{ XE_RTP_NAME("22013059131"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
-		       FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT))
-	},
-	{ XE_RTP_NAME("22013059131"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G11),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT))
 	},
-	{ XE_RTP_NAME("14010918519"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0),
-		       FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW,
-			     FORCE_SLM_FENCE_SCOPE_TO_TILE |
-			     FORCE_UGM_FENCE_SCOPE_TO_TILE,
-			     /*
-			      * Ignore read back as it always returns 0 in these
-			      * steps
-			      */
-			     .read_mask = 0))
-	},
 	{ XE_RTP_NAME("14015227452"),
 	  XE_RTP_RULES(PLATFORM(DG2),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
@@ -428,22 +338,12 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3))
 	},
-	{ XE_RTP_NAME("16011620976, 22015475538"),
+	{ XE_RTP_NAME("22015475538"),
 	  XE_RTP_RULES(PLATFORM(DG2),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8))
 	},
 	{ XE_RTP_NAME("22012654132"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0),
-		       FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC,
-			     /*
-			      * Register can't be read back for verification on
-			      * DG2 due to Wa_14012342262
-			      */
-			     .read_mask = 0))
-	},
-	{ XE_RTP_NAME("22012654132"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G11),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC,
@@ -461,68 +361,11 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_READ_SUPPRESSION))
 	},
-	{ XE_RTP_NAME("14013392000"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
-		       ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, ENABLE_LARGE_GRF_MODE))
-	},
-	{ XE_RTP_NAME("14012419201"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0),
-		       ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
-			     DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX))
-	},
-	{ XE_RTP_NAME("14012419201"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
-		       ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
-			     DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX))
-	},
-	{ XE_RTP_NAME("1308578152"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
-		       ENGINE_CLASS(RENDER),
-		       FUNC(xe_rtp_match_first_gslice_fused_off)),
-	  XE_RTP_ACTIONS(CLR(CS_DEBUG_MODE1(RENDER_RING_BASE),
-			     REPLAY_MODE_GRANULARITY))
-	},
 	{ XE_RTP_NAME("22010960976, 14013347512"),
 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(CLR(XEHP_HDC_CHICKEN0,
 			     LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK))
 	},
-	{ XE_RTP_NAME("1608949956, 14010198302"),
-	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(ROW_CHICKEN,
-			     MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE))
-	},
-	{ XE_RTP_NAME("22010430635"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0),
-		       ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
-			     DISABLE_GRF_CLEAR))
-	},
-	{ XE_RTP_NAME("14013202645"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
-		       ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
-	},
-	{ XE_RTP_NAME("14013202645"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
-		       ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
-	},
-	{ XE_RTP_NAME("22012532006"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0),
-		       ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7,
-			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA))
-	},
-	{ XE_RTP_NAME("22012532006"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
-		       ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7,
-			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA))
-	},
 	{ XE_RTP_NAME("14015150844"),
 	  XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES,
@@ -612,7 +455,10 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 			     PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS,
 			     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
 	},
-
+	{ XE_RTP_NAME("16018610683"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE))
+	},
 	{}
 };
 
@@ -652,21 +498,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 
 	/* DG2 */
 
-	{ XE_RTP_NAME("16011186671"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(CLR(VFLSKPD, DIS_MULT_MISS_RD_SQUASH),
-			 SET(VFLSKPD, DIS_OVER_FETCH_CACHE))
-	},
-	{ XE_RTP_NAME("14010469329"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3,
-			     XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE))
-	},
-	{ XE_RTP_NAME("14010698770, 22010613112, 22010465075"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3,
-			     DISABLE_CPS_AWARE_COLOR_PIPE))
-	},
 	{ XE_RTP_NAME("16013271637"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1,
@@ -708,6 +539,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
 	  XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE))
 	},
+	{ XE_RTP_NAME("14019877138"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
+	},
 
 	/* Xe2_LPG */
 
@@ -739,6 +574,11 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 	  XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD))
 	},
+	{ XE_RTP_NAME("16020183090"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(INSTPM(RENDER_RING_BASE), ENABLE_SEMAPHORE_POLL_BIT))
+	},
 
 	{}
 };
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 727bdc429212..b138cbd51bdb 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -1,13 +1,8 @@
 22012773006	GRAPHICS_VERSION_RANGE(1200, 1250)
-16011759253	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)
 14014475959	GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)
 		PLATFORM(DG2)
 22011391025	PLATFORM(DG2)
-14012197797	PLATFORM(DG2), GRAPHICS_STEP(A0, B0)
-16011777198	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0)
-		SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)
-22012727170	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0)
-		SUBPLATFORM(DG2, G11)
+22012727170	SUBPLATFORM(DG2, G11)
 22012727685	SUBPLATFORM(DG2, G11)
 16015675438	PLATFORM(PVC)
 		SUBPLATFORM(DG2, G10)
@@ -22,3 +17,8 @@
 14019821291	MEDIA_VERSION_RANGE(1300, 2000)
 14015076503	MEDIA_VERSION(1300)
 16020292621	GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
+14018913170	GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
+		MEDIA_VERSION(2000), GRAPHICS_STEP(A0, A1)
+		GRAPHICS_VERSION_RANGE(1270, 1274)
+		MEDIA_VERSION(1300)
+		PLATFORM(DG2)
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index a75eeba7bfe5..f69721339201 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -148,7 +148,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 
 		if (q) {
 			if (q->ops->reset_status(q)) {
-				drm_info(&xe->drm, "exec gueue reset detected\n");
+				drm_info(&xe->drm, "exec queue reset detected\n");
 				err = -EIO;
 				break;
 			}
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 9fa3ae324731..50bbea0992d9 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -832,7 +832,6 @@ struct drm_xe_vm_destroy {
  *
  * and the @flags can be:
  *  - %DRM_XE_VM_BIND_FLAG_READONLY
- *  - %DRM_XE_VM_BIND_FLAG_ASYNC
  *  - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the
  *    MAP operation immediately rather than deferring the MAP to the page
  *    fault handler.