summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c19
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h3232
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm202
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm1136
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm62
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c44
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c62
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.c31
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.h1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c76
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c189
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c387
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h18
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c75
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c43
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c43
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c43
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c42
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c77
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c74
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c145
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c97
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c146
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c64
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c42
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c49
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c115
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c103
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pasid.c70
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h67
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c218
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c115
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_queue.c30
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c35
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c99
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.h13
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c124
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h18
47 files changed, 4917 insertions, 2529 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index d3c3d3ab7225..62e88e5362e9 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -5,7 +5,7 @@
config HSA_AMD
bool "HSA kernel driver for AMD GPU devices"
- depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64)
+ depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64 || (RISCV && 64BIT))
select HMM_MIRROR
select MMU_NOTIFIER
select DRM_AMDGPU_USERPTR
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 0d3d8972240d..0ce08113c9f0 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -27,7 +27,6 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_device.o \
$(AMDKFD_PATH)/kfd_chardev.o \
$(AMDKFD_PATH)/kfd_topology.o \
- $(AMDKFD_PATH)/kfd_pasid.o \
$(AMDKFD_PATH)/kfd_doorbell.o \
$(AMDKFD_PATH)/kfd_flat_memory.o \
$(AMDKFD_PATH)/kfd_process.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 795382b55e0a..73acbe0b7c21 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -91,7 +91,6 @@ static void cik_event_interrupt_wq(struct kfd_node *dev,
const struct cik_ih_ring_entry *ihre =
(const struct cik_ih_ring_entry *)ih_ring_entry;
uint32_t context_id = ihre->data & 0xfffffff;
- unsigned int vmid = (ihre->ring_id & 0x0000ff00) >> 8;
u32 pasid = (ihre->ring_id & 0xffff0000) >> 16;
if (pasid == 0)
@@ -107,20 +106,26 @@ static void cik_event_interrupt_wq(struct kfd_node *dev,
kfd_signal_hw_exception_event(pasid);
else if (ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) {
+ struct kfd_process_device *pdd = NULL;
struct kfd_vm_fault_info info;
+ struct kfd_process *p;
kfd_smi_event_update_vmfault(dev, pasid);
- kfd_dqm_evict_pasid(dev->dqm, pasid);
+ p = kfd_lookup_process_by_pasid(pasid, &pdd);
+ if (!pdd)
+ return;
+
+ kfd_evict_process_device(pdd);
memset(&info, 0, sizeof(info));
amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->adev, &info);
- if (!info.page_addr && !info.status)
+ if (!info.page_addr && !info.status) {
+ kfd_unref_process(p);
return;
+ }
- if (info.vmid == vmid)
- kfd_signal_vm_fault_event(dev, pasid, &info, NULL);
- else
- kfd_signal_vm_fault_event(dev, pasid, NULL, NULL);
+ kfd_signal_vm_fault_event(pdd, &info, NULL);
+ kfd_unref_process(p);
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 02f7ba8c93cd..0320163b6e74 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -274,7 +274,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
static const uint32_t cwsr_trap_gfx9_hex[] = {
- 0xbf820001, 0xbf820258,
+ 0xbf820001, 0xbf820259,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
@@ -390,141 +390,98 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0xbefe007c, 0xbefc0070,
0xc0611c7a, 0x0000007c,
0xbf8cc07f, 0x80708470,
- 0xbefc007e, 0x867aff7f,
- 0x04000000, 0xbeef0080,
- 0x876f6f7a, 0xb8f02a05,
- 0x80708170, 0x8e708a70,
- 0xb8fb1605, 0x807b817b,
- 0x8e7b847b, 0x8e76827b,
- 0xbef600ff, 0x01000000,
- 0xbef20174, 0x80747074,
- 0x82758075, 0xbefc0080,
- 0xbf800000, 0xbe802b00,
- 0xbe822b02, 0xbe842b04,
- 0xbe862b06, 0xbe882b08,
- 0xbe8a2b0a, 0xbe8c2b0c,
- 0xbe8e2b0e, 0xc06b003a,
- 0x00000000, 0xbf8cc07f,
- 0xc06b013a, 0x00000010,
- 0xbf8cc07f, 0xc06b023a,
- 0x00000020, 0xbf8cc07f,
- 0xc06b033a, 0x00000030,
- 0xbf8cc07f, 0x8074c074,
- 0x82758075, 0x807c907c,
- 0xbf0a7b7c, 0xbf85ffe7,
- 0xbef40172, 0xbef00080,
- 0xbefe00c1, 0xbeff00c1,
- 0xbee80080, 0xbee90080,
- 0xbef600ff, 0x01000000,
- 0x867aff78, 0x00400000,
- 0xbf850003, 0xb8faf803,
- 0x897a7aff, 0x10000000,
- 0xbf85004d, 0xbe840080,
- 0xd2890000, 0x00000900,
- 0x80048104, 0xd2890001,
- 0x00000900, 0x80048104,
- 0xd2890002, 0x00000900,
- 0x80048104, 0xd2890003,
- 0x00000900, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
+ 0xbefc007e, 0xbf108080,
+ 0x867aff7f, 0x04000000,
+ 0xbeef0080, 0x876f6f7a,
+ 0xb8f02a05, 0x80708170,
+ 0x8e708a70, 0xb8fb1605,
+ 0x807b817b, 0x8e7b847b,
+ 0x8e76827b, 0xbef600ff,
+ 0x01000000, 0xbef20174,
+ 0x80747074, 0x82758075,
+ 0xbefc0080, 0xbf800000,
+ 0xbe802b00, 0xbe822b02,
+ 0xbe842b04, 0xbe862b06,
+ 0xbe882b08, 0xbe8a2b0a,
+ 0xbe8c2b0c, 0xbe8e2b0e,
+ 0xc06b003a, 0x00000000,
+ 0xbf8cc07f, 0xc06b013a,
+ 0x00000010, 0xbf8cc07f,
+ 0xc06b023a, 0x00000020,
+ 0xbf8cc07f, 0xc06b033a,
+ 0x00000030, 0xbf8cc07f,
+ 0x8074c074, 0x82758075,
+ 0x807c907c, 0xbf0a7b7c,
+ 0xbf85ffe7, 0xbef40172,
+ 0xbef00080, 0xbefe00c1,
+ 0xbeff00c1, 0xbee80080,
+ 0xbee90080, 0xbef600ff,
+ 0x01000000, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf85004d,
0xbe840080, 0xd2890000,
- 0x00000901, 0x80048104,
- 0xd2890001, 0x00000901,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
0x80048104, 0xd2890002,
- 0x00000901, 0x80048104,
- 0xd2890003, 0x00000901,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000902,
+ 0xd2890000, 0x00000901,
0x80048104, 0xd2890001,
- 0x00000902, 0x80048104,
- 0xd2890002, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890002, 0x00000901,
0x80048104, 0xd2890003,
- 0x00000902, 0x80048104,
+ 0x00000901, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000903, 0x80048104,
- 0xd2890001, 0x00000903,
+ 0x00000902, 0x80048104,
+ 0xd2890001, 0x00000902,
0x80048104, 0xd2890002,
- 0x00000903, 0x80048104,
- 0xd2890003, 0x00000903,
+ 0x00000902, 0x80048104,
+ 0xd2890003, 0x00000902,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbf820008,
- 0xe0724000, 0x701d0000,
- 0xe0724100, 0x701d0100,
- 0xe0724200, 0x701d0200,
- 0xe0724300, 0x701d0300,
- 0xbefe00c1, 0xbeff00c1,
- 0xb8fb4306, 0x867bc17b,
- 0xbf840063, 0xbf8a0000,
- 0x867aff6f, 0x04000000,
- 0xbf84005f, 0x8e7b867b,
- 0x8e7b827b, 0xbef6007b,
- 0xb8f02a05, 0x80708170,
- 0x8e708a70, 0xb8fa1605,
- 0x807a817a, 0x8e7a867a,
- 0x80707a70, 0x8070ff70,
- 0x00000080, 0xbef600ff,
- 0x01000000, 0xbefc0080,
- 0xd28c0002, 0x000100c1,
- 0xd28d0003, 0x000204c1,
- 0x867aff78, 0x00400000,
- 0xbf850003, 0xb8faf803,
- 0x897a7aff, 0x10000000,
- 0xbf850030, 0x24040682,
- 0xd86e4000, 0x00000002,
- 0xbf8cc07f, 0xbe840080,
- 0xd2890000, 0x00000900,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000903,
0x80048104, 0xd2890001,
- 0x00000900, 0x80048104,
- 0xd2890002, 0x00000900,
+ 0x00000903, 0x80048104,
+ 0xd2890002, 0x00000903,
0x80048104, 0xd2890003,
- 0x00000900, 0x80048104,
+ 0x00000903, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0xbe840080, 0xd2890000,
- 0x00000901, 0x80048104,
- 0xd2890001, 0x00000901,
- 0x80048104, 0xd2890002,
- 0x00000901, 0x80048104,
- 0xd2890003, 0x00000901,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0x680404ff,
- 0x00000200, 0xd0c9006a,
- 0x0000f702, 0xbf87ffd2,
- 0xbf820015, 0xd1060002,
- 0x00011103, 0x7e0602ff,
- 0x00000200, 0xbefc00ff,
- 0x00010000, 0xbe800077,
- 0x8677ff77, 0xff7fffff,
- 0x8777ff77, 0x00058000,
- 0xd8ec0000, 0x00000002,
- 0xbf8cc07f, 0xe0765000,
- 0x701d0002, 0x68040702,
- 0xd0c9006a, 0x0000f702,
- 0xbf87fff7, 0xbef70000,
- 0xbef000ff, 0x00000400,
- 0xbefe00c1, 0xbeff00c1,
- 0xb8fb2a05, 0x807b817b,
- 0x8e7b827b, 0xbef600ff,
- 0x01000000, 0xbefc0084,
- 0xbf0a7b7c, 0xbf84006d,
- 0xbf11017c, 0x807bff7b,
- 0x00001000, 0x867aff78,
+ 0xbf820008, 0xe0724000,
+ 0x701d0000, 0xe0724100,
+ 0x701d0100, 0xe0724200,
+ 0x701d0200, 0xe0724300,
+ 0x701d0300, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb4306,
+ 0x867bc17b, 0xbf840063,
+ 0xbf8a0000, 0x867aff6f,
+ 0x04000000, 0xbf84005f,
+ 0x8e7b867b, 0x8e7b827b,
+ 0xbef6007b, 0xb8f02a05,
+ 0x80708170, 0x8e708a70,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0x867aff78,
0x00400000, 0xbf850003,
0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850051,
+ 0x10000000, 0xbf850030,
+ 0x24040682, 0xd86e4000,
+ 0x00000002, 0xbf8cc07f,
0xbe840080, 0xd2890000,
0x00000900, 0x80048104,
0xd2890001, 0x00000900,
@@ -544,137 +501,181 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
+ 0x680404ff, 0x00000200,
+ 0xd0c9006a, 0x0000f702,
+ 0xbf87ffd2, 0xbf820015,
+ 0xd1060002, 0x00011103,
+ 0x7e0602ff, 0x00000200,
+ 0xbefc00ff, 0x00010000,
+ 0xbe800077, 0x8677ff77,
+ 0xff7fffff, 0x8777ff77,
+ 0x00058000, 0xd8ec0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xe0765000, 0x701d0002,
+ 0x68040702, 0xd0c9006a,
+ 0x0000f702, 0xbf87fff7,
+ 0xbef70000, 0xbef000ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb2a05,
+ 0x807b817b, 0x8e7b827b,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
+ 0x807bff7b, 0x00001000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850051, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
+ 0x00000900, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
+ 0xd2890000, 0x00000902,
0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
+ 0x00000902, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0x807c847c, 0xbf0a7b7c,
- 0xbf85ffb1, 0xbf9c0000,
- 0xbf820012, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0x807c847c,
- 0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffef,
- 0xbf9c0000, 0xbf8200c7,
- 0xbef4007e, 0x8675ff7f,
- 0x0000ffff, 0x8775ff75,
- 0x00040000, 0xbef60080,
- 0xbef700ff, 0x00807fac,
- 0x866eff7f, 0x04000000,
- 0xbf84001e, 0xbefe00c1,
- 0xbeff00c1, 0xb8ef4306,
- 0x866fc16f, 0xbf840019,
- 0x8e6f866f, 0x8e6f826f,
- 0xbef6006f, 0xb8f82a05,
- 0x80788178, 0x8e788a78,
- 0xb8ee1605, 0x806e816e,
- 0x8e6e866e, 0x80786e78,
- 0x8078ff78, 0x00000080,
- 0xbef600ff, 0x01000000,
- 0xbefc0080, 0xe0510000,
- 0x781d0000, 0xe0510100,
- 0x781d0000, 0x807cff7c,
- 0x00000200, 0x8078ff78,
- 0x00000200, 0xbf0a6f7c,
- 0xbf85fff6, 0xbefe00c1,
- 0xbeff00c1, 0xbef600ff,
- 0x01000000, 0xb8ef2a05,
- 0x806f816f, 0x8e6f826f,
- 0x806fff6f, 0x00008000,
- 0xbef80080, 0xbeee0078,
- 0x8078ff78, 0x00000400,
- 0xbefc0084, 0xbf11087c,
- 0xe0524000, 0x781d0000,
- 0xe0524100, 0x781d0100,
- 0xe0524200, 0x781d0200,
- 0xe0524300, 0x781d0300,
- 0xbf8c0f70, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0x807c847c,
- 0x8078ff78, 0x00000400,
- 0xbf0a6f7c, 0xbf85ffee,
- 0xbf9c0000, 0xe0524000,
- 0x6e1d0000, 0xe0524100,
- 0x6e1d0100, 0xe0524200,
- 0x6e1d0200, 0xe0524300,
- 0x6e1d0300, 0xbf8c0f70,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xbf8200c7, 0xbef4007e,
+ 0x8675ff7f, 0x0000ffff,
+ 0x8775ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x00807fac, 0x866eff7f,
+ 0x04000000, 0xbf84001e,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8ef4306, 0x866fc16f,
+ 0xbf840019, 0x8e6f866f,
+ 0x8e6f826f, 0xbef6006f,
0xb8f82a05, 0x80788178,
0x8e788a78, 0xb8ee1605,
0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x80f8c078,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f846f, 0x8e76826f,
+ 0x80786e78, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x781d0000,
+ 0xe0510100, 0x781d0000,
+ 0x807cff7c, 0x00000200,
+ 0x8078ff78, 0x00000200,
+ 0xbf0a6f7c, 0xbf85fff6,
+ 0xbefe00c1, 0xbeff00c1,
0xbef600ff, 0x01000000,
- 0xbefc006f, 0xc031003a,
- 0x00000078, 0x80f8c078,
- 0xbf8cc07f, 0x80fc907c,
- 0xbf800000, 0xbe802d00,
- 0xbe822d02, 0xbe842d04,
- 0xbe862d06, 0xbe882d08,
- 0xbe8a2d0a, 0xbe8c2d0c,
- 0xbe8e2d0e, 0xbf06807c,
- 0xbf84fff0, 0xb8f82a05,
+ 0xb8ef2a05, 0x806f816f,
+ 0x8e6f826f, 0x806fff6f,
+ 0x00008000, 0xbef80080,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
+ 0xbf11087c, 0xe0524000,
+ 0x781d0000, 0xe0524100,
+ 0x781d0100, 0xe0524200,
+ 0x781d0200, 0xe0524300,
+ 0x781d0300, 0xbf8c0f70,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0x807c847c, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7c,
+ 0xbf85ffee, 0xbf9c0000,
+ 0xe0524000, 0x6e1d0000,
+ 0xe0524100, 0x6e1d0100,
+ 0xe0524200, 0x6e1d0200,
+ 0xe0524300, 0x6e1d0300,
+ 0xbf8c0f70, 0xb8f82a05,
0x80788178, 0x8e788a78,
0xb8ee1605, 0x806e816e,
0x8e6e866e, 0x80786e78,
- 0xbef60084, 0xbef600ff,
- 0x01000000, 0xc0211bfa,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0xbef60084,
+ 0xbef600ff, 0x01000000,
+ 0xc0211bfa, 0x00000078,
+ 0x80788478, 0xc0211b3a,
0x00000078, 0x80788478,
- 0xc0211b3a, 0x00000078,
- 0x80788478, 0xc0211b7a,
+ 0xc0211b7a, 0x00000078,
+ 0x80788478, 0xc0211c3a,
0x00000078, 0x80788478,
- 0xc0211c3a, 0x00000078,
- 0x80788478, 0xc0211c7a,
+ 0xc0211c7a, 0x00000078,
+ 0x80788478, 0xc0211eba,
0x00000078, 0x80788478,
- 0xc0211eba, 0x00000078,
- 0x80788478, 0xc0211efa,
+ 0xc0211efa, 0x00000078,
+ 0x80788478, 0xc0211a3a,
0x00000078, 0x80788478,
- 0xc0211a3a, 0x00000078,
- 0x80788478, 0xc0211a7a,
+ 0xc0211a7a, 0x00000078,
+ 0x80788478, 0xc0211cfa,
0x00000078, 0x80788478,
- 0xc0211cfa, 0x00000078,
- 0x80788478, 0xbf8cc07f,
- 0xbefc006f, 0xbefe0070,
- 0xbeff0071, 0x866f7bff,
- 0x000003ff, 0xb96f4803,
- 0x866f7bff, 0xfffff800,
- 0x8f6f8b6f, 0xb96fa2c3,
- 0xb973f801, 0xb8ee2a05,
- 0x806e816e, 0x8e6e8a6e,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f866f, 0x806e6f6e,
- 0x806e746e, 0x826f8075,
- 0x866fff6f, 0x0000ffff,
- 0xc00b1c37, 0x00000050,
- 0xc00b1d37, 0x00000060,
- 0xc0031e77, 0x00000074,
- 0xbf8cc07f, 0x8f6e8b77,
- 0x866eff6e, 0x001f8000,
- 0xb96ef807, 0x866dff6d,
- 0x0000ffff, 0x86fe7e7e,
- 0x86ea6a6a, 0x8f6e837a,
- 0xb96ee0c2, 0xbf800002,
- 0xb97a0002, 0xbf8a0000,
- 0xbe801f6c, 0xbf9b0000,
+ 0xbf8cc07f, 0xbefc006f,
+ 0xbefe0070, 0xbeff0071,
+ 0x866f7bff, 0x000003ff,
+ 0xb96f4803, 0x866f7bff,
+ 0xfffff800, 0x8f6f8b6f,
+ 0xb96fa2c3, 0xb973f801,
+ 0xb8ee2a05, 0x806e816e,
+ 0x8e6e8a6e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b77, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
};
static const uint32_t cwsr_trap_nv1x_hex[] = {
@@ -1302,7 +1303,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
};
static const uint32_t cwsr_trap_arcturus_hex[] = {
- 0xbf820001, 0xbf8202d4,
+ 0xbf820001, 0xbf8202d5,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
@@ -1419,99 +1420,37 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0xbefe007c, 0xbefc0070,
0xc0611c7a, 0x0000007c,
0xbf8cc07f, 0x80708470,
- 0xbefc007e, 0x867aff7f,
- 0x04000000, 0xbeef0080,
- 0x876f6f7a, 0xb8f02a05,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fb1605,
- 0x807b817b, 0x8e7b847b,
- 0x8e76827b, 0xbef600ff,
- 0x01000000, 0xbef20174,
- 0x80747074, 0x82758075,
- 0xbefc0080, 0xbf800000,
- 0xbe802b00, 0xbe822b02,
- 0xbe842b04, 0xbe862b06,
- 0xbe882b08, 0xbe8a2b0a,
- 0xbe8c2b0c, 0xbe8e2b0e,
- 0xc06b003a, 0x00000000,
- 0xbf8cc07f, 0xc06b013a,
- 0x00000010, 0xbf8cc07f,
- 0xc06b023a, 0x00000020,
- 0xbf8cc07f, 0xc06b033a,
- 0x00000030, 0xbf8cc07f,
- 0x8074c074, 0x82758075,
- 0x807c907c, 0xbf0a7b7c,
- 0xbf85ffe7, 0xbef40172,
- 0xbef00080, 0xbefe00c1,
- 0xbeff00c1, 0xbee80080,
- 0xbee90080, 0xbef600ff,
- 0x01000000, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf85004d,
- 0xbe840080, 0xd2890000,
- 0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
- 0x80048104, 0xd2890002,
- 0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
- 0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
- 0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
- 0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
- 0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
- 0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0xbf820008, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0xbefe00c1,
- 0xbeff00c1, 0xb8fb4306,
- 0x867bc17b, 0xbf840064,
- 0xbf8a0000, 0x867aff6f,
- 0x04000000, 0xbf840060,
- 0x8e7b867b, 0x8e7b827b,
- 0xbef6007b, 0xb8f02a05,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fa1605,
- 0x807a817a, 0x8e7a867a,
- 0x80707a70, 0x8070ff70,
- 0x00000080, 0xbef600ff,
- 0x01000000, 0xbefc0080,
- 0xd28c0002, 0x000100c1,
- 0xd28d0003, 0x000204c1,
+ 0xbefc007e, 0xbf108080,
+ 0x867aff7f, 0x04000000,
+ 0xbeef0080, 0x876f6f7a,
+ 0xb8f02a05, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b847b, 0x8e76827b,
+ 0xbef600ff, 0x01000000,
+ 0xbef20174, 0x80747074,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a7b7c, 0xbf85ffe7,
+ 0xbef40172, 0xbef00080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
0x867aff78, 0x00400000,
0xbf850003, 0xb8faf803,
0x897a7aff, 0x10000000,
- 0xbf850030, 0x24040682,
- 0xd86e4000, 0x00000002,
- 0xbf8cc07f, 0xbe840080,
+ 0xbf85004d, 0xbe840080,
0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
0x00000900, 0x80048104,
@@ -1530,31 +1469,50 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
- 0xbf84ffee, 0x680404ff,
- 0x00000200, 0xd0c9006a,
- 0x0000f702, 0xbf87ffd2,
- 0xbf820015, 0xd1060002,
- 0x00011103, 0x7e0602ff,
- 0x00000200, 0xbefc00ff,
- 0x00010000, 0xbe800077,
- 0x8677ff77, 0xff7fffff,
- 0x8777ff77, 0x00058000,
- 0xd8ec0000, 0x00000002,
- 0xbf8cc07f, 0xe0765000,
- 0x701d0002, 0x68040702,
- 0xd0c9006a, 0x0000f702,
- 0xbf87fff7, 0xbef70000,
- 0xbef000ff, 0x00000400,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000902,
+ 0x80048104, 0xd2890001,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
+ 0x80048104, 0xd2890003,
+ 0x00000902, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbf820008,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
0xbefe00c1, 0xbeff00c1,
- 0xb8fb2a05, 0x807b817b,
- 0x8e7b827b, 0xbef600ff,
- 0x01000000, 0xbefc0084,
- 0xbf0a7b7c, 0xbf84006d,
- 0xbf11017c, 0x807bff7b,
- 0x00001000, 0x867aff78,
+ 0xb8fb4306, 0x867bc17b,
+ 0xbf840064, 0xbf8a0000,
+ 0x867aff6f, 0x04000000,
+ 0xbf840060, 0x8e7b867b,
+ 0x8e7b827b, 0xbef6007b,
+ 0xb8f02a05, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0x867aff78,
0x00400000, 0xbf850003,
0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850051,
+ 0x10000000, 0xbf850030,
+ 0x24040682, 0xd86e4000,
+ 0x00000002, 0xbf8cc07f,
0xbe840080, 0xd2890000,
0x00000900, 0x80048104,
0xd2890001, 0x00000900,
@@ -1574,215 +1532,259 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
+ 0x680404ff, 0x00000200,
+ 0xd0c9006a, 0x0000f702,
+ 0xbf87ffd2, 0xbf820015,
+ 0xd1060002, 0x00011103,
+ 0x7e0602ff, 0x00000200,
+ 0xbefc00ff, 0x00010000,
+ 0xbe800077, 0x8677ff77,
+ 0xff7fffff, 0x8777ff77,
+ 0x00058000, 0xd8ec0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xe0765000, 0x701d0002,
+ 0x68040702, 0xd0c9006a,
+ 0x0000f702, 0xbf87fff7,
+ 0xbef70000, 0xbef000ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb2a05,
+ 0x807b817b, 0x8e7b827b,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
+ 0x807bff7b, 0x00001000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850051, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
+ 0x00000900, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
+ 0xd2890000, 0x00000902,
0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
+ 0x00000902, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0x807c847c, 0xbf0a7b7c,
- 0xbf85ffb1, 0xbf9c0000,
- 0xbf820012, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0x807c847c,
- 0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffef,
- 0xbf9c0000, 0xbefc0080,
- 0xbf11017c, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850059,
- 0xd3d84000, 0x18000100,
- 0xd3d84001, 0x18000101,
- 0xd3d84002, 0x18000102,
- 0xd3d84003, 0x18000103,
0xbe840080, 0xd2890000,
- 0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
0x80048104, 0xd2890002,
- 0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
+ 0xbf84ffee, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xbefc0080, 0xbf11017c,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850059, 0xd3d84000,
+ 0x18000100, 0xd3d84001,
+ 0x18000101, 0xd3d84002,
+ 0x18000102, 0xd3d84003,
+ 0x18000103, 0xbe840080,
+ 0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
+ 0x00000900, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
+ 0xd2890000, 0x00000902,
0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
+ 0x00000902, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0x807c847c, 0xbf0a7b7c,
- 0xbf85ffa9, 0xbf9c0000,
- 0xbf820016, 0xd3d84000,
- 0x18000100, 0xd3d84001,
- 0x18000101, 0xd3d84002,
- 0x18000102, 0xd3d84003,
- 0x18000103, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0x807c847c,
- 0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffeb,
- 0xbf9c0000, 0xbf8200e3,
- 0xbef4007e, 0x8675ff7f,
- 0x0000ffff, 0x8775ff75,
- 0x00040000, 0xbef60080,
- 0xbef700ff, 0x00807fac,
- 0x866eff7f, 0x04000000,
- 0xbf84001f, 0xbefe00c1,
- 0xbeff00c1, 0xb8ef4306,
- 0x866fc16f, 0xbf84001a,
- 0x8e6f866f, 0x8e6f826f,
- 0xbef6006f, 0xb8f82a05,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x8078ff78,
- 0x00000080, 0xbef600ff,
- 0x01000000, 0xbefc0080,
- 0xe0510000, 0x781d0000,
- 0xe0510100, 0x781d0000,
- 0x807cff7c, 0x00000200,
- 0x8078ff78, 0x00000200,
- 0xbf0a6f7c, 0xbf85fff6,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffa9,
+ 0xbf9c0000, 0xbf820016,
+ 0xd3d84000, 0x18000100,
+ 0xd3d84001, 0x18000101,
+ 0xd3d84002, 0x18000102,
+ 0xd3d84003, 0x18000103,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffeb, 0xbf9c0000,
+ 0xbf8200e3, 0xbef4007e,
+ 0x8675ff7f, 0x0000ffff,
+ 0x8775ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x00807fac, 0x866eff7f,
+ 0x04000000, 0xbf84001f,
0xbefe00c1, 0xbeff00c1,
+ 0xb8ef4306, 0x866fc16f,
+ 0xbf84001a, 0x8e6f866f,
+ 0x8e6f826f, 0xbef6006f,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x8078ff78, 0x00000080,
0xbef600ff, 0x01000000,
- 0xb8ef2a05, 0x806f816f,
- 0x8e6f826f, 0x806fff6f,
- 0x00008000, 0xbef80080,
- 0xbeee0078, 0x8078ff78,
- 0x00000400, 0xbefc0084,
- 0xbf11087c, 0xe0524000,
- 0x781d0000, 0xe0524100,
- 0x781d0100, 0xe0524200,
- 0x781d0200, 0xe0524300,
- 0x781d0300, 0xbf8c0f70,
- 0x7e000300, 0x7e020301,
- 0x7e040302, 0x7e060303,
- 0x807c847c, 0x8078ff78,
- 0x00000400, 0xbf0a6f7c,
- 0xbf85ffee, 0xbefc0080,
- 0xbf11087c, 0xe0524000,
- 0x781d0000, 0xe0524100,
- 0x781d0100, 0xe0524200,
- 0x781d0200, 0xe0524300,
- 0x781d0300, 0xbf8c0f70,
- 0xd3d94000, 0x18000100,
- 0xd3d94001, 0x18000101,
- 0xd3d94002, 0x18000102,
- 0xd3d94003, 0x18000103,
- 0x807c847c, 0x8078ff78,
- 0x00000400, 0xbf0a6f7c,
- 0xbf85ffea, 0xbf9c0000,
- 0xe0524000, 0x6e1d0000,
- 0xe0524100, 0x6e1d0100,
- 0xe0524200, 0x6e1d0200,
- 0xe0524300, 0x6e1d0300,
- 0xbf8c0f70, 0xb8f82a05,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x80f8c078,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f846f, 0x8e76826f,
- 0xbef600ff, 0x01000000,
- 0xbefc006f, 0xc031003a,
- 0x00000078, 0x80f8c078,
- 0xbf8cc07f, 0x80fc907c,
- 0xbf800000, 0xbe802d00,
- 0xbe822d02, 0xbe842d04,
- 0xbe862d06, 0xbe882d08,
- 0xbe8a2d0a, 0xbe8c2d0c,
- 0xbe8e2d0e, 0xbf06807c,
- 0xbf84fff0, 0xb8f82a05,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0xbef60084,
- 0xbef600ff, 0x01000000,
- 0xc0211bfa, 0x00000078,
- 0x80788478, 0xc0211b3a,
+ 0xbefc0080, 0xe0510000,
+ 0x781d0000, 0xe0510100,
+ 0x781d0000, 0x807cff7c,
+ 0x00000200, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7c,
+ 0xbf85fff6, 0xbefe00c1,
+ 0xbeff00c1, 0xbef600ff,
+ 0x01000000, 0xb8ef2a05,
+ 0x806f816f, 0x8e6f826f,
+ 0x806fff6f, 0x00008000,
+ 0xbef80080, 0xbeee0078,
+ 0x8078ff78, 0x00000400,
+ 0xbefc0084, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0x7e000300,
+ 0x7e020301, 0x7e040302,
+ 0x7e060303, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffee,
+ 0xbefc0080, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0xd3d94000,
+ 0x18000100, 0xd3d94001,
+ 0x18000101, 0xd3d94002,
+ 0x18000102, 0xd3d94003,
+ 0x18000103, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffea,
+ 0xbf9c0000, 0xe0524000,
+ 0x6e1d0000, 0xe0524100,
+ 0x6e1d0100, 0xe0524200,
+ 0x6e1d0200, 0xe0524300,
+ 0x6e1d0300, 0xbf8c0f70,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
0x00000078, 0x80788478,
- 0xc0211b7a, 0x00000078,
- 0x80788478, 0xc0211c3a,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
0x00000078, 0x80788478,
- 0xc0211c7a, 0x00000078,
- 0x80788478, 0xc0211eba,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
0x00000078, 0x80788478,
- 0xc0211efa, 0x00000078,
- 0x80788478, 0xc0211a3a,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
0x00000078, 0x80788478,
- 0xc0211a7a, 0x00000078,
- 0x80788478, 0xc0211cfa,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
0x00000078, 0x80788478,
- 0xbf8cc07f, 0xbefc006f,
- 0xbefe0070, 0xbeff0071,
- 0x866f7bff, 0x000003ff,
- 0xb96f4803, 0x866f7bff,
- 0xfffff800, 0x8f6f8b6f,
- 0xb96fa2c3, 0xb973f801,
- 0xb8ee2a05, 0x806e816e,
- 0x8e6e8a6e, 0x8e6e816e,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f866f, 0x806e6f6e,
- 0x806e746e, 0x826f8075,
- 0x866fff6f, 0x0000ffff,
- 0xc00b1c37, 0x00000050,
- 0xc00b1d37, 0x00000060,
- 0xc0031e77, 0x00000074,
- 0xbf8cc07f, 0x8f6e8b77,
- 0x866eff6e, 0x001f8000,
- 0xb96ef807, 0x866dff6d,
- 0x0000ffff, 0x86fe7e7e,
- 0x86ea6a6a, 0x8f6e837a,
- 0xb96ee0c2, 0xbf800002,
- 0xb97a0002, 0xbf8a0000,
- 0xbe801f6c, 0xbf9b0000,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe0070,
+ 0xbeff0071, 0x866f7bff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f7bff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0x8e6e816e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b77, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
};
static const uint32_t cwsr_trap_aldebaran_hex[] = {
- 0xbf820001, 0xbf8202df,
+ 0xbf820001, 0xbf8202e0,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
@@ -1899,99 +1901,37 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0xbefe007c, 0xbefc0070,
0xc0611c7a, 0x0000007c,
0xbf8cc07f, 0x80708470,
- 0xbefc007e, 0x867aff7f,
- 0x04000000, 0xbeef0080,
- 0x876f6f7a, 0xb8f02985,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fb1605,
- 0x807b817b, 0x8e7b847b,
- 0x8e76827b, 0xbef600ff,
- 0x01000000, 0xbef20174,
- 0x80747074, 0x82758075,
- 0xbefc0080, 0xbf800000,
- 0xbe802b00, 0xbe822b02,
- 0xbe842b04, 0xbe862b06,
- 0xbe882b08, 0xbe8a2b0a,
- 0xbe8c2b0c, 0xbe8e2b0e,
- 0xc06b003a, 0x00000000,
- 0xbf8cc07f, 0xc06b013a,
- 0x00000010, 0xbf8cc07f,
- 0xc06b023a, 0x00000020,
- 0xbf8cc07f, 0xc06b033a,
- 0x00000030, 0xbf8cc07f,
- 0x8074c074, 0x82758075,
- 0x807c907c, 0xbf0a7b7c,
- 0xbf85ffe7, 0xbef40172,
- 0xbef00080, 0xbefe00c1,
- 0xbeff00c1, 0xbee80080,
- 0xbee90080, 0xbef600ff,
- 0x01000000, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf85004d,
- 0xbe840080, 0xd2890000,
- 0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
- 0x80048104, 0xd2890002,
- 0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
- 0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
- 0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
- 0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
- 0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
- 0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0xbf820008, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0xbefe00c1,
- 0xbeff00c1, 0xb8fb4306,
- 0x867bc17b, 0xbf840064,
- 0xbf8a0000, 0x867aff6f,
- 0x04000000, 0xbf840060,
- 0x8e7b867b, 0x8e7b827b,
- 0xbef6007b, 0xb8f02985,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fa1605,
- 0x807a817a, 0x8e7a867a,
- 0x80707a70, 0x8070ff70,
- 0x00000080, 0xbef600ff,
- 0x01000000, 0xbefc0080,
- 0xd28c0002, 0x000100c1,
- 0xd28d0003, 0x000204c1,
+ 0xbefc007e, 0xbf108080,
+ 0x867aff7f, 0x04000000,
+ 0xbeef0080, 0x876f6f7a,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b847b, 0x8e76827b,
+ 0xbef600ff, 0x01000000,
+ 0xbef20174, 0x80747074,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a7b7c, 0xbf85ffe7,
+ 0xbef40172, 0xbef00080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
0x867aff78, 0x00400000,
0xbf850003, 0xb8faf803,
0x897a7aff, 0x10000000,
- 0xbf850030, 0x24040682,
- 0xd86e4000, 0x00000002,
- 0xbf8cc07f, 0xbe840080,
+ 0xbf85004d, 0xbe840080,
0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
0x00000900, 0x80048104,
@@ -2010,31 +1950,50 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
- 0xbf84ffee, 0x680404ff,
- 0x00000200, 0xd0c9006a,
- 0x0000f702, 0xbf87ffd2,
- 0xbf820015, 0xd1060002,
- 0x00011103, 0x7e0602ff,
- 0x00000200, 0xbefc00ff,
- 0x00010000, 0xbe800077,
- 0x8677ff77, 0xff7fffff,
- 0x8777ff77, 0x00058000,
- 0xd8ec0000, 0x00000002,
- 0xbf8cc07f, 0xe0765000,
- 0x701d0002, 0x68040702,
- 0xd0c9006a, 0x0000f702,
- 0xbf87fff7, 0xbef70000,
- 0xbef000ff, 0x00000400,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000902,
+ 0x80048104, 0xd2890001,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
+ 0x80048104, 0xd2890003,
+ 0x00000902, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbf820008,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
0xbefe00c1, 0xbeff00c1,
- 0xb8fb2b05, 0x807b817b,
- 0x8e7b827b, 0xbef600ff,
- 0x01000000, 0xbefc0084,
- 0xbf0a7b7c, 0xbf84006d,
- 0xbf11017c, 0x807bff7b,
- 0x00001000, 0x867aff78,
+ 0xb8fb4306, 0x867bc17b,
+ 0xbf840064, 0xbf8a0000,
+ 0x867aff6f, 0x04000000,
+ 0xbf840060, 0x8e7b867b,
+ 0x8e7b827b, 0xbef6007b,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0x867aff78,
0x00400000, 0xbf850003,
0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850051,
+ 0x10000000, 0xbf850030,
+ 0x24040682, 0xd86e4000,
+ 0x00000002, 0xbf8cc07f,
0xbe840080, 0xd2890000,
0x00000900, 0x80048104,
0xd2890001, 0x00000900,
@@ -2054,51 +2013,31 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
- 0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
- 0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
- 0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0x807c847c, 0xbf0a7b7c,
- 0xbf85ffb1, 0xbf9c0000,
- 0xbf820012, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0x807c847c,
- 0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffef,
- 0xbf9c0000, 0xb8fb2985,
- 0x807b817b, 0x8e7b837b,
- 0xb8fa2b05, 0x807a817a,
- 0x8e7a827a, 0x80fb7a7b,
- 0x867b7b7b, 0xbf84007a,
+ 0x680404ff, 0x00000200,
+ 0xd0c9006a, 0x0000f702,
+ 0xbf87ffd2, 0xbf820015,
+ 0xd1060002, 0x00011103,
+ 0x7e0602ff, 0x00000200,
+ 0xbefc00ff, 0x00010000,
+ 0xbe800077, 0x8677ff77,
+ 0xff7fffff, 0x8777ff77,
+ 0x00058000, 0xd8ec0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xe0765000, 0x701d0002,
+ 0x68040702, 0xd0c9006a,
+ 0x0000f702, 0xbf87fff7,
+ 0xbef70000, 0xbef000ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb2b05,
+ 0x807b817b, 0x8e7b827b,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
0x807bff7b, 0x00001000,
- 0xbefc0080, 0xbf11017c,
0x867aff78, 0x00400000,
0xbf850003, 0xb8faf803,
0x897a7aff, 0x10000000,
- 0xbf850059, 0xd3d84000,
- 0x18000100, 0xd3d84001,
- 0x18000101, 0xd3d84002,
- 0x18000102, 0xd3d84003,
- 0x18000103, 0xbe840080,
+ 0xbf850051, 0xbe840080,
0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
0x00000900, 0x80048104,
@@ -2137,139 +2076,203 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0x807c847c,
- 0xbf0a7b7c, 0xbf85ffa9,
- 0xbf9c0000, 0xbf820016,
- 0xd3d84000, 0x18000100,
- 0xd3d84001, 0x18000101,
- 0xd3d84002, 0x18000102,
- 0xd3d84003, 0x18000103,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
0xe0724000, 0x701d0000,
0xe0724100, 0x701d0100,
0xe0724200, 0x701d0200,
0xe0724300, 0x701d0300,
0x807c847c, 0x8070ff70,
0x00000400, 0xbf0a7b7c,
- 0xbf85ffeb, 0xbf9c0000,
- 0xbf8200ee, 0xbef4007e,
- 0x8675ff7f, 0x0000ffff,
- 0x8775ff75, 0x00040000,
- 0xbef60080, 0xbef700ff,
- 0x00807fac, 0x866eff7f,
- 0x04000000, 0xbf84001f,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xb8fb2985, 0x807b817b,
+ 0x8e7b837b, 0xb8fa2b05,
+ 0x807a817a, 0x8e7a827a,
+ 0x80fb7a7b, 0x867b7b7b,
+ 0xbf84007a, 0x807bff7b,
+ 0x00001000, 0xbefc0080,
+ 0xbf11017c, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf850059,
+ 0xd3d84000, 0x18000100,
+ 0xd3d84001, 0x18000101,
+ 0xd3d84002, 0x18000102,
+ 0xd3d84003, 0x18000103,
+ 0xbe840080, 0xd2890000,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000901,
+ 0x80048104, 0xd2890001,
+ 0x00000901, 0x80048104,
+ 0xd2890002, 0x00000901,
+ 0x80048104, 0xd2890003,
+ 0x00000901, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000902, 0x80048104,
+ 0xd2890001, 0x00000902,
+ 0x80048104, 0xd2890002,
+ 0x00000902, 0x80048104,
+ 0xd2890003, 0x00000902,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000903,
+ 0x80048104, 0xd2890001,
+ 0x00000903, 0x80048104,
+ 0xd2890002, 0x00000903,
+ 0x80048104, 0xd2890003,
+ 0x00000903, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0x807c847c, 0xbf0a7b7c,
+ 0xbf85ffa9, 0xbf9c0000,
+ 0xbf820016, 0xd3d84000,
+ 0x18000100, 0xd3d84001,
+ 0x18000101, 0xd3d84002,
+ 0x18000102, 0xd3d84003,
+ 0x18000103, 0xe0724000,
+ 0x701d0000, 0xe0724100,
+ 0x701d0100, 0xe0724200,
+ 0x701d0200, 0xe0724300,
+ 0x701d0300, 0x807c847c,
+ 0x8070ff70, 0x00000400,
+ 0xbf0a7b7c, 0xbf85ffeb,
+ 0xbf9c0000, 0xbf8200ee,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0x866eff7f, 0x04000000,
+ 0xbf84001f, 0xbefe00c1,
+ 0xbeff00c1, 0xb8ef4306,
+ 0x866fc16f, 0xbf84001a,
+ 0x8e6f866f, 0x8e6f826f,
+ 0xbef6006f, 0xb8f82985,
+ 0x80788178, 0x8e788a78,
+ 0x8e788178, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x781d0000,
+ 0xe0510100, 0x781d0000,
+ 0x807cff7c, 0x00000200,
+ 0x8078ff78, 0x00000200,
+ 0xbf0a6f7c, 0xbf85fff6,
0xbefe00c1, 0xbeff00c1,
- 0xb8ef4306, 0x866fc16f,
- 0xbf84001a, 0x8e6f866f,
- 0x8e6f826f, 0xbef6006f,
- 0xb8f82985, 0x80788178,
- 0x8e788a78, 0x8e788178,
- 0xb8ee1605, 0x806e816e,
- 0x8e6e866e, 0x80786e78,
- 0x8078ff78, 0x00000080,
0xbef600ff, 0x01000000,
- 0xbefc0080, 0xe0510000,
- 0x781d0000, 0xe0510100,
- 0x781d0000, 0x807cff7c,
- 0x00000200, 0x8078ff78,
- 0x00000200, 0xbf0a6f7c,
- 0xbf85fff6, 0xbefe00c1,
- 0xbeff00c1, 0xbef600ff,
- 0x01000000, 0xb8ef2b05,
- 0x806f816f, 0x8e6f826f,
- 0x806fff6f, 0x00008000,
- 0xbef80080, 0xbeee0078,
- 0x8078ff78, 0x00000400,
- 0xbefc0084, 0xbf11087c,
- 0xe0524000, 0x781d0000,
- 0xe0524100, 0x781d0100,
- 0xe0524200, 0x781d0200,
- 0xe0524300, 0x781d0300,
- 0xbf8c0f70, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0x807c847c,
- 0x8078ff78, 0x00000400,
- 0xbf0a6f7c, 0xbf85ffee,
- 0xb8ef2985, 0x806f816f,
- 0x8e6f836f, 0xb8f92b05,
- 0x80798179, 0x8e798279,
- 0x80ef796f, 0x866f6f6f,
- 0xbf84001a, 0x806fff6f,
- 0x00008000, 0xbefc0080,
+ 0xb8ef2b05, 0x806f816f,
+ 0x8e6f826f, 0x806fff6f,
+ 0x00008000, 0xbef80080,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
0xbf11087c, 0xe0524000,
0x781d0000, 0xe0524100,
0x781d0100, 0xe0524200,
0x781d0200, 0xe0524300,
0x781d0300, 0xbf8c0f70,
- 0xd3d94000, 0x18000100,
- 0xd3d94001, 0x18000101,
- 0xd3d94002, 0x18000102,
- 0xd3d94003, 0x18000103,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
0x807c847c, 0x8078ff78,
0x00000400, 0xbf0a6f7c,
- 0xbf85ffea, 0xbf9c0000,
- 0xe0524000, 0x6e1d0000,
- 0xe0524100, 0x6e1d0100,
- 0xe0524200, 0x6e1d0200,
- 0xe0524300, 0x6e1d0300,
- 0xbf8c0f70, 0xb8f82985,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x80f8c078,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f846f, 0x8e76826f,
- 0xbef600ff, 0x01000000,
- 0xbefc006f, 0xc031003a,
- 0x00000078, 0x80f8c078,
- 0xbf8cc07f, 0x80fc907c,
- 0xbf800000, 0xbe802d00,
- 0xbe822d02, 0xbe842d04,
- 0xbe862d06, 0xbe882d08,
- 0xbe8a2d0a, 0xbe8c2d0c,
- 0xbe8e2d0e, 0xbf06807c,
- 0xbf84fff0, 0xb8f82985,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0xbef60084,
- 0xbef600ff, 0x01000000,
- 0xc0211bfa, 0x00000078,
- 0x80788478, 0xc0211b3a,
+ 0xbf85ffee, 0xb8ef2985,
+ 0x806f816f, 0x8e6f836f,
+ 0xb8f92b05, 0x80798179,
+ 0x8e798279, 0x80ef796f,
+ 0x866f6f6f, 0xbf84001a,
+ 0x806fff6f, 0x00008000,
+ 0xbefc0080, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0xd3d94000,
+ 0x18000100, 0xd3d94001,
+ 0x18000101, 0xd3d94002,
+ 0x18000102, 0xd3d94003,
+ 0x18000103, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffea,
+ 0xbf9c0000, 0xe0524000,
+ 0x6e1d0000, 0xe0524100,
+ 0x6e1d0100, 0xe0524200,
+ 0x6e1d0200, 0xe0524300,
+ 0x6e1d0300, 0xbf8c0f70,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
0x00000078, 0x80788478,
- 0xc0211b7a, 0x00000078,
- 0x80788478, 0xc0211c3a,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
0x00000078, 0x80788478,
- 0xc0211c7a, 0x00000078,
- 0x80788478, 0xc0211eba,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
0x00000078, 0x80788478,
- 0xc0211efa, 0x00000078,
- 0x80788478, 0xc0211a3a,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
0x00000078, 0x80788478,
- 0xc0211a7a, 0x00000078,
- 0x80788478, 0xc0211cfa,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
0x00000078, 0x80788478,
- 0xbf8cc07f, 0xbefc006f,
- 0xbefe0070, 0xbeff0071,
- 0x866f7bff, 0x000003ff,
- 0xb96f4803, 0x866f7bff,
- 0xfffff800, 0x8f6f8b6f,
- 0xb96fa2c3, 0xb973f801,
- 0xb8ee2985, 0x806e816e,
- 0x8e6e8a6e, 0x8e6e816e,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f866f, 0x806e6f6e,
- 0x806e746e, 0x826f8075,
- 0x866fff6f, 0x0000ffff,
- 0xc00b1c37, 0x00000050,
- 0xc00b1d37, 0x00000060,
- 0xc0031e77, 0x00000074,
- 0xbf8cc07f, 0x8f6e8b77,
- 0x866eff6e, 0x001f8000,
- 0xb96ef807, 0x866dff6d,
- 0x0000ffff, 0x86fe7e7e,
- 0x86ea6a6a, 0x8f6e837a,
- 0xb96ee0c2, 0xbf800002,
- 0xb97a0002, 0xbf8a0000,
- 0xbe801f6c, 0xbf9b0000,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe0070,
+ 0xbeff0071, 0x866f7bff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f7bff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2985,
+ 0x806e816e, 0x8e6e8a6e,
+ 0x8e6e816e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b77, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
};
static const uint32_t cwsr_trap_gfx10_hex[] = {
@@ -3151,7 +3154,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
};
static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
- 0xbf820001, 0xbf8202db,
+ 0xbf820001, 0xbf8202dc,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
@@ -3266,99 +3269,37 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0xbefe007c, 0xbefc0070,
0xc0611c7a, 0x0000007c,
0xbf8cc07f, 0x80708470,
- 0xbefc007e, 0x867aff7f,
- 0x04000000, 0xbeef0080,
- 0x876f6f7a, 0xb8f02985,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fb1605,
- 0x807b817b, 0x8e7b847b,
- 0x8e76827b, 0xbef600ff,
- 0x01000000, 0xbef20174,
- 0x80747074, 0x82758075,
- 0xbefc0080, 0xbf800000,
- 0xbe802b00, 0xbe822b02,
- 0xbe842b04, 0xbe862b06,
- 0xbe882b08, 0xbe8a2b0a,
- 0xbe8c2b0c, 0xbe8e2b0e,
- 0xc06b003a, 0x00000000,
- 0xbf8cc07f, 0xc06b013a,
- 0x00000010, 0xbf8cc07f,
- 0xc06b023a, 0x00000020,
- 0xbf8cc07f, 0xc06b033a,
- 0x00000030, 0xbf8cc07f,
- 0x8074c074, 0x82758075,
- 0x807c907c, 0xbf0a7b7c,
- 0xbf85ffe7, 0xbef40172,
- 0xbef00080, 0xbefe00c1,
- 0xbeff00c1, 0xbee80080,
- 0xbee90080, 0xbef600ff,
- 0x01000000, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf85004d,
- 0xbe840080, 0xd2890000,
- 0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
- 0x80048104, 0xd2890002,
- 0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
- 0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
- 0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
- 0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
- 0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
- 0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0xbf820008, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0xbefe00c1,
- 0xbeff00c1, 0xb8fb4306,
- 0x867bc17b, 0xbf840064,
- 0xbf8a0000, 0x867aff6f,
- 0x04000000, 0xbf840060,
- 0x8e7b867b, 0x8e7b827b,
- 0xbef6007b, 0xb8f02985,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fa1605,
- 0x807a817a, 0x8e7a867a,
- 0x80707a70, 0x8070ff70,
- 0x00000080, 0xbef600ff,
- 0x01000000, 0xbefc0080,
- 0xd28c0002, 0x000100c1,
- 0xd28d0003, 0x000204c1,
+ 0xbefc007e, 0xbf108080,
+ 0x867aff7f, 0x04000000,
+ 0xbeef0080, 0x876f6f7a,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b847b, 0x8e76827b,
+ 0xbef600ff, 0x01000000,
+ 0xbef20174, 0x80747074,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a7b7c, 0xbf85ffe7,
+ 0xbef40172, 0xbef00080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
0x867aff78, 0x00400000,
0xbf850003, 0xb8faf803,
0x897a7aff, 0x10000000,
- 0xbf850030, 0x24040682,
- 0xd86e4000, 0x00000002,
- 0xbf8cc07f, 0xbe840080,
+ 0xbf85004d, 0xbe840080,
0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
0x00000900, 0x80048104,
@@ -3377,31 +3318,50 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
- 0xbf84ffee, 0x680404ff,
- 0x00000200, 0xd0c9006a,
- 0x0000f702, 0xbf87ffd2,
- 0xbf820015, 0xd1060002,
- 0x00011103, 0x7e0602ff,
- 0x00000200, 0xbefc00ff,
- 0x00010000, 0xbe800077,
- 0x8677ff77, 0xff7fffff,
- 0x8777ff77, 0x00058000,
- 0xd8ec0000, 0x00000002,
- 0xbf8cc07f, 0xe0765000,
- 0x701d0002, 0x68040702,
- 0xd0c9006a, 0x0000f702,
- 0xbf87fff7, 0xbef70000,
- 0xbef000ff, 0x00000400,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000902,
+ 0x80048104, 0xd2890001,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
+ 0x80048104, 0xd2890003,
+ 0x00000902, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbf820008,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
0xbefe00c1, 0xbeff00c1,
- 0xb8fb2b05, 0x807b817b,
- 0x8e7b827b, 0xbef600ff,
- 0x01000000, 0xbefc0084,
- 0xbf0a7b7c, 0xbf84006d,
- 0xbf11017c, 0x807bff7b,
- 0x00001000, 0x867aff78,
+ 0xb8fb4306, 0x867bc17b,
+ 0xbf840064, 0xbf8a0000,
+ 0x867aff6f, 0x04000000,
+ 0xbf840060, 0x8e7b867b,
+ 0x8e7b827b, 0xbef6007b,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0x867aff78,
0x00400000, 0xbf850003,
0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850051,
+ 0x10000000, 0xbf850030,
+ 0x24040682, 0xd86e4000,
+ 0x00000002, 0xbf8cc07f,
0xbe840080, 0xd2890000,
0x00000900, 0x80048104,
0xd2890001, 0x00000900,
@@ -3421,51 +3381,31 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
- 0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
- 0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
- 0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0x807c847c, 0xbf0a7b7c,
- 0xbf85ffb1, 0xbf9c0000,
- 0xbf820012, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0x807c847c,
- 0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffef,
- 0xbf9c0000, 0xb8fb2985,
- 0x807b817b, 0x8e7b837b,
- 0xb8fa2b05, 0x807a817a,
- 0x8e7a827a, 0x80fb7a7b,
- 0x867b7b7b, 0xbf84007a,
+ 0x680404ff, 0x00000200,
+ 0xd0c9006a, 0x0000f702,
+ 0xbf87ffd2, 0xbf820015,
+ 0xd1060002, 0x00011103,
+ 0x7e0602ff, 0x00000200,
+ 0xbefc00ff, 0x00010000,
+ 0xbe800077, 0x8677ff77,
+ 0xff7fffff, 0x8777ff77,
+ 0x00058000, 0xd8ec0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xe0765000, 0x701d0002,
+ 0x68040702, 0xd0c9006a,
+ 0x0000f702, 0xbf87fff7,
+ 0xbef70000, 0xbef000ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb2b05,
+ 0x807b817b, 0x8e7b827b,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
0x807bff7b, 0x00001000,
- 0xbefc0080, 0xbf11017c,
0x867aff78, 0x00400000,
0xbf850003, 0xb8faf803,
0x897a7aff, 0x10000000,
- 0xbf850059, 0xd3d84000,
- 0x18000100, 0xd3d84001,
- 0x18000101, 0xd3d84002,
- 0x18000102, 0xd3d84003,
- 0x18000103, 0xbe840080,
+ 0xbf850051, 0xbe840080,
0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
0x00000900, 0x80048104,
@@ -3504,143 +3444,207 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0x807c847c,
- 0xbf0a7b7c, 0xbf85ffa9,
- 0xbf9c0000, 0xbf820016,
- 0xd3d84000, 0x18000100,
- 0xd3d84001, 0x18000101,
- 0xd3d84002, 0x18000102,
- 0xd3d84003, 0x18000103,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
0xe0724000, 0x701d0000,
0xe0724100, 0x701d0100,
0xe0724200, 0x701d0200,
0xe0724300, 0x701d0300,
0x807c847c, 0x8070ff70,
0x00000400, 0xbf0a7b7c,
- 0xbf85ffeb, 0xbf9c0000,
- 0xbf8200ee, 0xbef4007e,
- 0x8675ff7f, 0x0000ffff,
- 0x8775ff75, 0x00040000,
- 0xbef60080, 0xbef700ff,
- 0x00807fac, 0x866eff7f,
- 0x04000000, 0xbf84001f,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xb8fb2985, 0x807b817b,
+ 0x8e7b837b, 0xb8fa2b05,
+ 0x807a817a, 0x8e7a827a,
+ 0x80fb7a7b, 0x867b7b7b,
+ 0xbf84007a, 0x807bff7b,
+ 0x00001000, 0xbefc0080,
+ 0xbf11017c, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf850059,
+ 0xd3d84000, 0x18000100,
+ 0xd3d84001, 0x18000101,
+ 0xd3d84002, 0x18000102,
+ 0xd3d84003, 0x18000103,
+ 0xbe840080, 0xd2890000,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000901,
+ 0x80048104, 0xd2890001,
+ 0x00000901, 0x80048104,
+ 0xd2890002, 0x00000901,
+ 0x80048104, 0xd2890003,
+ 0x00000901, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000902, 0x80048104,
+ 0xd2890001, 0x00000902,
+ 0x80048104, 0xd2890002,
+ 0x00000902, 0x80048104,
+ 0xd2890003, 0x00000902,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000903,
+ 0x80048104, 0xd2890001,
+ 0x00000903, 0x80048104,
+ 0xd2890002, 0x00000903,
+ 0x80048104, 0xd2890003,
+ 0x00000903, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0x807c847c, 0xbf0a7b7c,
+ 0xbf85ffa9, 0xbf9c0000,
+ 0xbf820016, 0xd3d84000,
+ 0x18000100, 0xd3d84001,
+ 0x18000101, 0xd3d84002,
+ 0x18000102, 0xd3d84003,
+ 0x18000103, 0xe0724000,
+ 0x701d0000, 0xe0724100,
+ 0x701d0100, 0xe0724200,
+ 0x701d0200, 0xe0724300,
+ 0x701d0300, 0x807c847c,
+ 0x8070ff70, 0x00000400,
+ 0xbf0a7b7c, 0xbf85ffeb,
+ 0xbf9c0000, 0xbf8200ee,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0x866eff7f, 0x04000000,
+ 0xbf84001f, 0xbefe00c1,
+ 0xbeff00c1, 0xb8ef4306,
+ 0x866fc16f, 0xbf84001a,
+ 0x8e6f866f, 0x8e6f826f,
+ 0xbef6006f, 0xb8f82985,
+ 0x80788178, 0x8e788a78,
+ 0x8e788178, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x781d0000,
+ 0xe0510100, 0x781d0000,
+ 0x807cff7c, 0x00000200,
+ 0x8078ff78, 0x00000200,
+ 0xbf0a6f7c, 0xbf85fff6,
0xbefe00c1, 0xbeff00c1,
- 0xb8ef4306, 0x866fc16f,
- 0xbf84001a, 0x8e6f866f,
- 0x8e6f826f, 0xbef6006f,
- 0xb8f82985, 0x80788178,
- 0x8e788a78, 0x8e788178,
- 0xb8ee1605, 0x806e816e,
- 0x8e6e866e, 0x80786e78,
- 0x8078ff78, 0x00000080,
0xbef600ff, 0x01000000,
- 0xbefc0080, 0xe0510000,
- 0x781d0000, 0xe0510100,
- 0x781d0000, 0x807cff7c,
- 0x00000200, 0x8078ff78,
- 0x00000200, 0xbf0a6f7c,
- 0xbf85fff6, 0xbefe00c1,
- 0xbeff00c1, 0xbef600ff,
- 0x01000000, 0xb8ef2b05,
- 0x806f816f, 0x8e6f826f,
- 0x806fff6f, 0x00008000,
- 0xbef80080, 0xbeee0078,
- 0x8078ff78, 0x00000400,
- 0xbefc0084, 0xbf11087c,
- 0xe0524000, 0x781d0000,
- 0xe0524100, 0x781d0100,
- 0xe0524200, 0x781d0200,
- 0xe0524300, 0x781d0300,
- 0xbf8c0f70, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0x807c847c,
- 0x8078ff78, 0x00000400,
- 0xbf0a6f7c, 0xbf85ffee,
- 0xb8ef2985, 0x806f816f,
- 0x8e6f836f, 0xb8f92b05,
- 0x80798179, 0x8e798279,
- 0x80ef796f, 0x866f6f6f,
- 0xbf84001a, 0x806fff6f,
- 0x00008000, 0xbefc0080,
+ 0xb8ef2b05, 0x806f816f,
+ 0x8e6f826f, 0x806fff6f,
+ 0x00008000, 0xbef80080,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
0xbf11087c, 0xe0524000,
0x781d0000, 0xe0524100,
0x781d0100, 0xe0524200,
0x781d0200, 0xe0524300,
0x781d0300, 0xbf8c0f70,
- 0xd3d94000, 0x18000100,
- 0xd3d94001, 0x18000101,
- 0xd3d94002, 0x18000102,
- 0xd3d94003, 0x18000103,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
0x807c847c, 0x8078ff78,
0x00000400, 0xbf0a6f7c,
- 0xbf85ffea, 0xbf9c0000,
- 0xe0524000, 0x6e1d0000,
- 0xe0524100, 0x6e1d0100,
- 0xe0524200, 0x6e1d0200,
- 0xe0524300, 0x6e1d0300,
- 0xbf8c0f70, 0xb8f82985,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x80f8c078,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f846f, 0x8e76826f,
- 0xbef600ff, 0x01000000,
- 0xbefc006f, 0xc031003a,
- 0x00000078, 0x80f8c078,
- 0xbf8cc07f, 0x80fc907c,
- 0xbf800000, 0xbe802d00,
- 0xbe822d02, 0xbe842d04,
- 0xbe862d06, 0xbe882d08,
- 0xbe8a2d0a, 0xbe8c2d0c,
- 0xbe8e2d0e, 0xbf06807c,
- 0xbf84fff0, 0xb8f82985,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0xbef60084,
- 0xbef600ff, 0x01000000,
- 0xc0211bfa, 0x00000078,
- 0x80788478, 0xc0211b3a,
+ 0xbf85ffee, 0xb8ef2985,
+ 0x806f816f, 0x8e6f836f,
+ 0xb8f92b05, 0x80798179,
+ 0x8e798279, 0x80ef796f,
+ 0x866f6f6f, 0xbf84001a,
+ 0x806fff6f, 0x00008000,
+ 0xbefc0080, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0xd3d94000,
+ 0x18000100, 0xd3d94001,
+ 0x18000101, 0xd3d94002,
+ 0x18000102, 0xd3d94003,
+ 0x18000103, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffea,
+ 0xbf9c0000, 0xe0524000,
+ 0x6e1d0000, 0xe0524100,
+ 0x6e1d0100, 0xe0524200,
+ 0x6e1d0200, 0xe0524300,
+ 0x6e1d0300, 0xbf8c0f70,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
0x00000078, 0x80788478,
- 0xc0211b7a, 0x00000078,
- 0x80788478, 0xc0211c3a,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
0x00000078, 0x80788478,
- 0xc0211c7a, 0x00000078,
- 0x80788478, 0xc0211eba,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
0x00000078, 0x80788478,
- 0xc0211efa, 0x00000078,
- 0x80788478, 0xc0211a3a,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
0x00000078, 0x80788478,
- 0xc0211a7a, 0x00000078,
- 0x80788478, 0xc0211cfa,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
0x00000078, 0x80788478,
- 0xbf8cc07f, 0xbefc006f,
- 0xbefe0070, 0xbeff0071,
- 0x866f7bff, 0x000003ff,
- 0xb96f4803, 0x866f7bff,
- 0xfffff800, 0x8f6f8b6f,
- 0xb96fa2c3, 0xb973f801,
- 0xb8ee2985, 0x806e816e,
- 0x8e6e8a6e, 0x8e6e816e,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f866f, 0x806e6f6e,
- 0x806e746e, 0x826f8075,
- 0x866fff6f, 0x0000ffff,
- 0xc00b1c37, 0x00000050,
- 0xc00b1d37, 0x00000060,
- 0xc0031e77, 0x00000074,
- 0xbf8cc07f, 0x8f6e8b79,
- 0x866eff6e, 0x001f8000,
- 0xb96ef807, 0x866dff6d,
- 0x0000ffff, 0x86fe7e7e,
- 0x86ea6a6a, 0x8f6e837a,
- 0xb96ee0c2, 0xbf800002,
- 0xb97a0002, 0xbf8a0000,
- 0xbe801f6c, 0xbf9b0000,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe0070,
+ 0xbeff0071, 0x866f7bff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f7bff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2985,
+ 0x806e816e, 0x8e6e8a6e,
+ 0x8e6e816e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b79, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
};
static const uint32_t cwsr_trap_gfx12_hex[] = {
- 0xbfa00001, 0xbfa0024b,
+ 0xbfa00001, 0xbfa002a2,
0xb0804009, 0xb8f8f804,
0x9178ff78, 0x00008c00,
0xb8fbf811, 0x8b6eff78,
@@ -3714,7 +3718,15 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
0x00011677, 0xd7610000,
0x00011a79, 0xd7610000,
0x00011c7e, 0xd7610000,
- 0x00011e7f, 0xbefe00ff,
+ 0x00011e7f, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xbefe00ff,
0x00003fff, 0xbeff0080,
0xee0a407a, 0x000c0000,
0x00004000, 0xd760007a,
@@ -3751,38 +3763,46 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
0x00000200, 0xbef600ff,
0x01000000, 0x7e000280,
0x7e020280, 0x7e040280,
- 0xbefd0080, 0xbe804ec2,
- 0xbf94fffe, 0xb8faf804,
- 0x8b7a847a, 0x91788478,
- 0x8c787a78, 0xd7610002,
- 0x0000fa71, 0x807d817d,
- 0xd7610002, 0x0000fa6c,
- 0x807d817d, 0x917aff6d,
- 0x80000000, 0xd7610002,
- 0x0000fa7a, 0x807d817d,
- 0xd7610002, 0x0000fa6e,
- 0x807d817d, 0xd7610002,
- 0x0000fa6f, 0x807d817d,
- 0xd7610002, 0x0000fa78,
- 0x807d817d, 0xb8faf811,
- 0xd7610002, 0x0000fa7a,
- 0x807d817d, 0xd7610002,
- 0x0000fa7b, 0x807d817d,
- 0xb8f1f801, 0xd7610002,
- 0x0000fa71, 0x807d817d,
- 0xb8f1f814, 0xd7610002,
- 0x0000fa71, 0x807d817d,
- 0xb8f1f815, 0xd7610002,
- 0x0000fa71, 0x807d817d,
- 0xb8f1f812, 0xd7610002,
- 0x0000fa71, 0x807d817d,
- 0xb8f1f813, 0xd7610002,
- 0x0000fa71, 0x807d817d,
+ 0xbe804ec2, 0xbf94fffe,
+ 0xb8faf804, 0x8b7a847a,
+ 0x91788478, 0x8c787a78,
+ 0x917aff6d, 0x80000000,
+ 0xd7610002, 0x00010071,
+ 0xd7610002, 0x0001026c,
+ 0xd7610002, 0x0001047a,
+ 0xd7610002, 0x0001066e,
+ 0xd7610002, 0x0001086f,
+ 0xd7610002, 0x00010a78,
+ 0xd7610002, 0x00010e7b,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xb8faf811, 0xd7610002,
+ 0x00010c7a, 0xb8faf801,
+ 0xd7610002, 0x0001107a,
+ 0xb8faf814, 0xd7610002,
+ 0x0001127a, 0xb8faf815,
+ 0xd7610002, 0x0001147a,
+ 0xb8faf812, 0xd7610002,
+ 0x0001167a, 0xb8faf813,
+ 0xd7610002, 0x0001187a,
0xb8faf802, 0xd7610002,
- 0x0000fa7a, 0x807d817d,
- 0xbefa50c1, 0xbfc70000,
- 0xd7610002, 0x0000fa7a,
- 0x807d817d, 0xbefe00ff,
+ 0x00011a7a, 0xbefa50c1,
+ 0xbfc70000, 0xd7610002,
+ 0x00011c7a, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xbefe00ff,
0x0000ffff, 0xbeff0080,
0xc4068070, 0x008ce802,
0x00000000, 0xbefe00c1,
@@ -3797,328 +3817,840 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
0xbe824102, 0xbe844104,
0xbe864106, 0xbe884108,
0xbe8a410a, 0xbe8c410c,
- 0xbe8e410e, 0xd7610002,
- 0x0000f200, 0x80798179,
- 0xd7610002, 0x0000f201,
- 0x80798179, 0xd7610002,
- 0x0000f202, 0x80798179,
- 0xd7610002, 0x0000f203,
- 0x80798179, 0xd7610002,
- 0x0000f204, 0x80798179,
- 0xd7610002, 0x0000f205,
- 0x80798179, 0xd7610002,
- 0x0000f206, 0x80798179,
- 0xd7610002, 0x0000f207,
- 0x80798179, 0xd7610002,
- 0x0000f208, 0x80798179,
- 0xd7610002, 0x0000f209,
- 0x80798179, 0xd7610002,
- 0x0000f20a, 0x80798179,
- 0xd7610002, 0x0000f20b,
- 0x80798179, 0xd7610002,
- 0x0000f20c, 0x80798179,
- 0xd7610002, 0x0000f20d,
- 0x80798179, 0xd7610002,
- 0x0000f20e, 0x80798179,
- 0xd7610002, 0x0000f20f,
- 0x80798179, 0xbf06a079,
- 0xbfa10007, 0xc4068070,
+ 0xbe8e410e, 0xbf068079,
+ 0xbfa10032, 0xd7610002,
+ 0x00010000, 0xd7610002,
+ 0x00010201, 0xd7610002,
+ 0x00010402, 0xd7610002,
+ 0x00010603, 0xd7610002,
+ 0x00010804, 0xd7610002,
+ 0x00010a05, 0xd7610002,
+ 0x00010c06, 0xd7610002,
+ 0x00010e07, 0xd7610002,
+ 0x00011008, 0xd7610002,
+ 0x00011209, 0xd7610002,
+ 0x0001140a, 0xd7610002,
+ 0x0001160b, 0xd7610002,
+ 0x0001180c, 0xd7610002,
+ 0x00011a0d, 0xd7610002,
+ 0x00011c0e, 0xd7610002,
+ 0x00011e0f, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80799079,
+ 0xbfa00038, 0xd7610002,
+ 0x00012000, 0xd7610002,
+ 0x00012201, 0xd7610002,
+ 0x00012402, 0xd7610002,
+ 0x00012603, 0xd7610002,
+ 0x00012804, 0xd7610002,
+ 0x00012a05, 0xd7610002,
+ 0x00012c06, 0xd7610002,
+ 0x00012e07, 0xd7610002,
+ 0x00013008, 0xd7610002,
+ 0x00013209, 0xd7610002,
+ 0x0001340a, 0xd7610002,
+ 0x0001360b, 0xd7610002,
+ 0x0001380c, 0xd7610002,
+ 0x00013a0d, 0xd7610002,
+ 0x00013c0e, 0xd7610002,
+ 0x00013e0f, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80799079,
+ 0xc4068070, 0x008ce802,
+ 0x00000000, 0x8070ff70,
+ 0x00000080, 0xbef90080,
+ 0x7e040280, 0x807d907d,
+ 0xbf0aff7d, 0x00000060,
+ 0xbfa2ff88, 0xbe804100,
+ 0xbe824102, 0xbe844104,
+ 0xbe864106, 0xbe884108,
+ 0xbe8a410a, 0xd7610002,
+ 0x00010000, 0xd7610002,
+ 0x00010201, 0xd7610002,
+ 0x00010402, 0xd7610002,
+ 0x00010603, 0xd7610002,
+ 0x00010804, 0xd7610002,
+ 0x00010a05, 0xd7610002,
+ 0x00010c06, 0xd7610002,
+ 0x00010e07, 0xd7610002,
+ 0x00011008, 0xd7610002,
+ 0x00011209, 0xd7610002,
+ 0x0001140a, 0xd7610002,
+ 0x0001160b, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xc4068070,
0x008ce802, 0x00000000,
+ 0xbefe00c1, 0x857d9973,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20002, 0xbeff0080,
+ 0xbfa00001, 0xbeff00c1,
+ 0xb8fb4306, 0x8b7bc17b,
+ 0xbfa10044, 0x8b7aff6d,
+ 0x80000000, 0xbfa10041,
+ 0x847b897b, 0xbef6007b,
+ 0xb8f03b05, 0x80708170,
+ 0xbf0d9973, 0xbfa20002,
+ 0x84708970, 0xbfa00001,
+ 0x84708a70, 0xb8fa1e06,
+ 0x847a8a7a, 0x80707a70,
+ 0x8070ff70, 0x00000200,
0x8070ff70, 0x00000080,
- 0xbef90080, 0x7e040280,
- 0x807d907d, 0xbf0aff7d,
- 0x00000060, 0xbfa2ffbb,
- 0xbe804100, 0xbe824102,
- 0xbe844104, 0xbe864106,
- 0xbe884108, 0xbe8a410a,
- 0xd7610002, 0x0000f200,
- 0x80798179, 0xd7610002,
- 0x0000f201, 0x80798179,
- 0xd7610002, 0x0000f202,
- 0x80798179, 0xd7610002,
- 0x0000f203, 0x80798179,
- 0xd7610002, 0x0000f204,
- 0x80798179, 0xd7610002,
- 0x0000f205, 0x80798179,
- 0xd7610002, 0x0000f206,
- 0x80798179, 0xd7610002,
- 0x0000f207, 0x80798179,
- 0xd7610002, 0x0000f208,
- 0x80798179, 0xd7610002,
- 0x0000f209, 0x80798179,
- 0xd7610002, 0x0000f20a,
- 0x80798179, 0xd7610002,
- 0x0000f20b, 0x80798179,
- 0xc4068070, 0x008ce802,
- 0x00000000, 0xbefe00c1,
- 0x857d9973, 0x8b7d817d,
- 0xbf06817d, 0xbfa20002,
- 0xbeff0080, 0xbfa00001,
- 0xbeff00c1, 0xb8fb4306,
- 0x8b7bc17b, 0xbfa10044,
- 0x8b7aff6d, 0x80000000,
- 0xbfa10041, 0x847b897b,
- 0xbef6007b, 0xb8f03b05,
- 0x80708170, 0xbf0d9973,
- 0xbfa20002, 0x84708970,
- 0xbfa00001, 0x84708a70,
- 0xb8fa1e06, 0x847a8a7a,
- 0x80707a70, 0x8070ff70,
- 0x00000200, 0x8070ff70,
- 0x00000080, 0xbef600ff,
- 0x01000000, 0xd71f0000,
- 0x000100c1, 0xd7200000,
- 0x000200c1, 0x16000084,
- 0x857d9973, 0x8b7d817d,
- 0xbf06817d, 0xbefd0080,
- 0xbfa20013, 0xbe8300ff,
- 0x00000080, 0xbf800000,
- 0xbf800000, 0xbf800000,
- 0xd8d80000, 0x01000000,
- 0xbf8a0000, 0xc4068070,
- 0x008ce801, 0x00000000,
- 0x807d037d, 0x80700370,
- 0xd5250000, 0x0001ff00,
- 0x00000080, 0xbf0a7b7d,
- 0xbfa2fff3, 0xbfa00012,
- 0xbe8300ff, 0x00000100,
+ 0xbef600ff, 0x01000000,
+ 0xd71f0000, 0x000100c1,
+ 0xd7200000, 0x000200c1,
+ 0x16000084, 0x857d9973,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbefd0080, 0xbfa20013,
+ 0xbe8300ff, 0x00000080,
0xbf800000, 0xbf800000,
0xbf800000, 0xd8d80000,
0x01000000, 0xbf8a0000,
0xc4068070, 0x008ce801,
0x00000000, 0x807d037d,
0x80700370, 0xd5250000,
- 0x0001ff00, 0x00000100,
+ 0x0001ff00, 0x00000080,
0xbf0a7b7d, 0xbfa2fff3,
- 0xbefe00c1, 0x857d9973,
- 0x8b7d817d, 0xbf06817d,
- 0xbfa20004, 0xbef000ff,
- 0x00000200, 0xbeff0080,
- 0xbfa00003, 0xbef000ff,
- 0x00000400, 0xbeff00c1,
- 0xb8fb3b05, 0x807b817b,
- 0x847b827b, 0x857d9973,
- 0x8b7d817d, 0xbf06817d,
- 0xbfa2001b, 0xbef600ff,
- 0x01000000, 0xbefd0084,
- 0xbf0a7b7d, 0xbfa10040,
- 0x7e008700, 0x7e028701,
- 0x7e048702, 0x7e068703,
- 0xc4068070, 0x008ce800,
- 0x00000000, 0xc4068070,
- 0x008ce801, 0x00008000,
- 0xc4068070, 0x008ce802,
- 0x00010000, 0xc4068070,
- 0x008ce803, 0x00018000,
- 0x807d847d, 0x8070ff70,
- 0x00000200, 0xbf0a7b7d,
- 0xbfa2ffeb, 0xbfa0002a,
+ 0xbfa00012, 0xbe8300ff,
+ 0x00000100, 0xbf800000,
+ 0xbf800000, 0xbf800000,
+ 0xd8d80000, 0x01000000,
+ 0xbf8a0000, 0xc4068070,
+ 0x008ce801, 0x00000000,
+ 0x807d037d, 0x80700370,
+ 0xd5250000, 0x0001ff00,
+ 0x00000100, 0xbf0a7b7d,
+ 0xbfa2fff3, 0xbefe00c1,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20004,
+ 0xbef000ff, 0x00000200,
+ 0xbeff0080, 0xbfa00003,
+ 0xbef000ff, 0x00000400,
+ 0xbeff00c1, 0xb8fb3b05,
+ 0x807b817b, 0x847b827b,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa2001b,
0xbef600ff, 0x01000000,
0xbefd0084, 0xbf0a7b7d,
- 0xbfa10015, 0x7e008700,
+ 0xbfa10040, 0x7e008700,
0x7e028701, 0x7e048702,
0x7e068703, 0xc4068070,
0x008ce800, 0x00000000,
0xc4068070, 0x008ce801,
- 0x00010000, 0xc4068070,
- 0x008ce802, 0x00020000,
+ 0x00008000, 0xc4068070,
+ 0x008ce802, 0x00010000,
0xc4068070, 0x008ce803,
- 0x00030000, 0x807d847d,
- 0x8070ff70, 0x00000400,
+ 0x00018000, 0x807d847d,
+ 0x8070ff70, 0x00000200,
0xbf0a7b7d, 0xbfa2ffeb,
- 0xb8fb1e06, 0x8b7bc17b,
- 0xbfa1000d, 0x847b837b,
- 0x807b7d7b, 0xbefe00c1,
- 0xbeff0080, 0x7e008700,
+ 0xbfa0002a, 0xbef600ff,
+ 0x01000000, 0xbefd0084,
+ 0xbf0a7b7d, 0xbfa10015,
+ 0x7e008700, 0x7e028701,
+ 0x7e048702, 0x7e068703,
0xc4068070, 0x008ce800,
- 0x00000000, 0x807d817d,
- 0x8070ff70, 0x00000080,
- 0xbf0a7b7d, 0xbfa2fff7,
- 0xbfa0016e, 0xbef4007e,
- 0x8b75ff7f, 0x0000ffff,
- 0x8c75ff75, 0x00040000,
- 0xbef60080, 0xbef700ff,
- 0x10807fac, 0xbef1007f,
- 0xb8f20742, 0x84729972,
- 0x8b6eff7f, 0x04000000,
- 0xbfa1003b, 0xbefe00c1,
- 0x857d9972, 0x8b7d817d,
- 0xbf06817d, 0xbfa20002,
- 0xbeff0080, 0xbfa00001,
- 0xbeff00c1, 0xb8ef4306,
- 0x8b6fc16f, 0xbfa10030,
- 0x846f896f, 0xbef6006f,
+ 0x00000000, 0xc4068070,
+ 0x008ce801, 0x00010000,
+ 0xc4068070, 0x008ce802,
+ 0x00020000, 0xc4068070,
+ 0x008ce803, 0x00030000,
+ 0x807d847d, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7d,
+ 0xbfa2ffeb, 0xb8fb1e06,
+ 0x8b7bc17b, 0xbfa1000d,
+ 0x847b837b, 0x807b7d7b,
+ 0xbefe00c1, 0xbeff0080,
+ 0x7e008700, 0xc4068070,
+ 0x008ce800, 0x00000000,
+ 0x807d817d, 0x8070ff70,
+ 0x00000080, 0xbf0a7b7d,
+ 0xbfa2fff7, 0xbfa0016e,
+ 0xbef4007e, 0x8b75ff7f,
+ 0x0000ffff, 0x8c75ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x10807fac,
+ 0xbef1007f, 0xb8f20742,
+ 0x84729972, 0x8b6eff7f,
+ 0x04000000, 0xbfa1003b,
+ 0xbefe00c1, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20002, 0xbeff0080,
+ 0xbfa00001, 0xbeff00c1,
+ 0xb8ef4306, 0x8b6fc16f,
+ 0xbfa10030, 0x846f896f,
+ 0xbef6006f, 0xb8f83b05,
+ 0x80788178, 0xbf0d9972,
+ 0xbfa20002, 0x84788978,
+ 0xbfa00001, 0x84788a78,
+ 0xb8ee1e06, 0x846e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbefd0080, 0xbfa2000d,
+ 0xc4050078, 0x0080e800,
+ 0x00000000, 0xbf8a0000,
+ 0xdac00000, 0x00000000,
+ 0x807dff7d, 0x00000080,
+ 0x8078ff78, 0x00000080,
+ 0xbf0a6f7d, 0xbfa2fff4,
+ 0xbfa0000c, 0xc4050078,
+ 0x0080e800, 0x00000000,
+ 0xbf8a0000, 0xdac00000,
+ 0x00000000, 0x807dff7d,
+ 0x00000100, 0x8078ff78,
+ 0x00000100, 0xbf0a6f7d,
+ 0xbfa2fff4, 0xbef80080,
+ 0xbefe00c1, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20002, 0xbeff0080,
+ 0xbfa00001, 0xbeff00c1,
+ 0xb8ef3b05, 0x806f816f,
+ 0x846f826f, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa2002c, 0xbef600ff,
+ 0x01000000, 0xbeee0078,
+ 0x8078ff78, 0x00000200,
+ 0xbefd0084, 0xbf0a6f7d,
+ 0xbfa10061, 0xc4050078,
+ 0x008ce800, 0x00000000,
+ 0xc4050078, 0x008ce801,
+ 0x00008000, 0xc4050078,
+ 0x008ce802, 0x00010000,
+ 0xc4050078, 0x008ce803,
+ 0x00018000, 0xbf8a0000,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807d847d, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7d,
+ 0xbfa2ffea, 0xc405006e,
+ 0x008ce800, 0x00000000,
+ 0xc405006e, 0x008ce801,
+ 0x00008000, 0xc405006e,
+ 0x008ce802, 0x00010000,
+ 0xc405006e, 0x008ce803,
+ 0x00018000, 0xbf8a0000,
+ 0xbfa0003d, 0xbef600ff,
+ 0x01000000, 0xbeee0078,
+ 0x8078ff78, 0x00000400,
+ 0xbefd0084, 0xbf0a6f7d,
+ 0xbfa10016, 0xc4050078,
+ 0x008ce800, 0x00000000,
+ 0xc4050078, 0x008ce801,
+ 0x00010000, 0xc4050078,
+ 0x008ce802, 0x00020000,
+ 0xc4050078, 0x008ce803,
+ 0x00030000, 0xbf8a0000,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807d847d, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7d,
+ 0xbfa2ffea, 0xb8ef1e06,
+ 0x8b6fc16f, 0xbfa1000f,
+ 0x846f836f, 0x806f7d6f,
+ 0xbefe00c1, 0xbeff0080,
+ 0xc4050078, 0x008ce800,
+ 0x00000000, 0xbf8a0000,
+ 0x7e008500, 0x807d817d,
+ 0x8078ff78, 0x00000080,
+ 0xbf0a6f7d, 0xbfa2fff6,
+ 0xbeff00c1, 0xc405006e,
+ 0x008ce800, 0x00000000,
+ 0xc405006e, 0x008ce801,
+ 0x00010000, 0xc405006e,
+ 0x008ce802, 0x00020000,
+ 0xc405006e, 0x008ce803,
+ 0x00030000, 0xbf8a0000,
0xb8f83b05, 0x80788178,
0xbf0d9972, 0xbfa20002,
0x84788978, 0xbfa00001,
0x84788a78, 0xb8ee1e06,
0x846e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
- 0x8078ff78, 0x00000080,
- 0xbef600ff, 0x01000000,
- 0x857d9972, 0x8b7d817d,
- 0xbf06817d, 0xbefd0080,
- 0xbfa2000d, 0xc4050078,
- 0x0080e800, 0x00000000,
- 0xbf8a0000, 0xdac00000,
- 0x00000000, 0x807dff7d,
- 0x00000080, 0x8078ff78,
- 0x00000080, 0xbf0a6f7d,
- 0xbfa2fff4, 0xbfa0000c,
- 0xc4050078, 0x0080e800,
- 0x00000000, 0xbf8a0000,
- 0xdac00000, 0x00000000,
- 0x807dff7d, 0x00000100,
- 0x8078ff78, 0x00000100,
- 0xbf0a6f7d, 0xbfa2fff4,
- 0xbef80080, 0xbefe00c1,
- 0x857d9972, 0x8b7d817d,
- 0xbf06817d, 0xbfa20002,
- 0xbeff0080, 0xbfa00001,
- 0xbeff00c1, 0xb8ef3b05,
- 0x806f816f, 0x846f826f,
- 0x857d9972, 0x8b7d817d,
- 0xbf06817d, 0xbfa2002c,
+ 0x80f8ff78, 0x00000050,
0xbef600ff, 0x01000000,
- 0xbeee0078, 0x8078ff78,
- 0x00000200, 0xbefd0084,
- 0xbf0a6f7d, 0xbfa10061,
- 0xc4050078, 0x008ce800,
- 0x00000000, 0xc4050078,
- 0x008ce801, 0x00008000,
- 0xc4050078, 0x008ce802,
- 0x00010000, 0xc4050078,
- 0x008ce803, 0x00018000,
- 0xbf8a0000, 0x7e008500,
- 0x7e028501, 0x7e048502,
- 0x7e068503, 0x807d847d,
+ 0xbefd00ff, 0x0000006c,
+ 0x80f89078, 0xf462403a,
+ 0xf0000000, 0xbf8a0000,
+ 0x80fd847d, 0xbf800000,
+ 0xbe804300, 0xbe824302,
+ 0x80f8a078, 0xf462603a,
+ 0xf0000000, 0xbf8a0000,
+ 0x80fd887d, 0xbf800000,
+ 0xbe804300, 0xbe824302,
+ 0xbe844304, 0xbe864306,
+ 0x80f8c078, 0xf462803a,
+ 0xf0000000, 0xbf8a0000,
+ 0x80fd907d, 0xbf800000,
+ 0xbe804300, 0xbe824302,
+ 0xbe844304, 0xbe864306,
+ 0xbe884308, 0xbe8a430a,
+ 0xbe8c430c, 0xbe8e430e,
+ 0xbf06807d, 0xbfa1fff0,
+ 0xb980f801, 0x00000000,
+ 0xb8f83b05, 0x80788178,
+ 0xbf0d9972, 0xbfa20002,
+ 0x84788978, 0xbfa00001,
+ 0x84788a78, 0xb8ee1e06,
+ 0x846e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
- 0xbf0a6f7d, 0xbfa2ffea,
- 0xc405006e, 0x008ce800,
- 0x00000000, 0xc405006e,
- 0x008ce801, 0x00008000,
- 0xc405006e, 0x008ce802,
- 0x00010000, 0xc405006e,
- 0x008ce803, 0x00018000,
- 0xbf8a0000, 0xbfa0003d,
0xbef600ff, 0x01000000,
- 0xbeee0078, 0x8078ff78,
- 0x00000400, 0xbefd0084,
- 0xbf0a6f7d, 0xbfa10016,
- 0xc4050078, 0x008ce800,
- 0x00000000, 0xc4050078,
- 0x008ce801, 0x00010000,
- 0xc4050078, 0x008ce802,
- 0x00020000, 0xc4050078,
- 0x008ce803, 0x00030000,
- 0xbf8a0000, 0x7e008500,
- 0x7e028501, 0x7e048502,
- 0x7e068503, 0x807d847d,
- 0x8078ff78, 0x00000400,
- 0xbf0a6f7d, 0xbfa2ffea,
- 0xb8ef1e06, 0x8b6fc16f,
- 0xbfa1000f, 0x846f836f,
- 0x806f7d6f, 0xbefe00c1,
- 0xbeff0080, 0xc4050078,
- 0x008ce800, 0x00000000,
- 0xbf8a0000, 0x7e008500,
- 0x807d817d, 0x8078ff78,
- 0x00000080, 0xbf0a6f7d,
- 0xbfa2fff6, 0xbeff00c1,
- 0xc405006e, 0x008ce800,
- 0x00000000, 0xc405006e,
- 0x008ce801, 0x00010000,
- 0xc405006e, 0x008ce802,
- 0x00020000, 0xc405006e,
- 0x008ce803, 0x00030000,
- 0xbf8a0000, 0xb8f83b05,
- 0x80788178, 0xbf0d9972,
- 0xbfa20002, 0x84788978,
- 0xbfa00001, 0x84788a78,
- 0xb8ee1e06, 0x846e8a6e,
- 0x80786e78, 0x8078ff78,
- 0x00000200, 0x80f8ff78,
- 0x00000050, 0xbef600ff,
- 0x01000000, 0xbefd00ff,
- 0x0000006c, 0x80f89078,
- 0xf462403a, 0xf0000000,
- 0xbf8a0000, 0x80fd847d,
- 0xbf800000, 0xbe804300,
- 0xbe824302, 0x80f8a078,
- 0xf462603a, 0xf0000000,
- 0xbf8a0000, 0x80fd887d,
- 0xbf800000, 0xbe804300,
- 0xbe824302, 0xbe844304,
- 0xbe864306, 0x80f8c078,
- 0xf462803a, 0xf0000000,
- 0xbf8a0000, 0x80fd907d,
- 0xbf800000, 0xbe804300,
- 0xbe824302, 0xbe844304,
- 0xbe864306, 0xbe884308,
- 0xbe8a430a, 0xbe8c430c,
- 0xbe8e430e, 0xbf06807d,
- 0xbfa1fff0, 0xb980f801,
- 0x00000000, 0xb8f83b05,
- 0x80788178, 0xbf0d9972,
- 0xbfa20002, 0x84788978,
- 0xbfa00001, 0x84788a78,
- 0xb8ee1e06, 0x846e8a6e,
- 0x80786e78, 0x8078ff78,
- 0x00000200, 0xbef600ff,
- 0x01000000, 0xbeff0071,
- 0xf4621bfa, 0xf0000000,
- 0x80788478, 0xf4621b3a,
+ 0xbeff0071, 0xf4621bfa,
0xf0000000, 0x80788478,
- 0xf4621b7a, 0xf0000000,
- 0x80788478, 0xf4621c3a,
+ 0xf4621b3a, 0xf0000000,
+ 0x80788478, 0xf4621b7a,
0xf0000000, 0x80788478,
- 0xf4621c7a, 0xf0000000,
- 0x80788478, 0xf4621eba,
+ 0xf4621c3a, 0xf0000000,
+ 0x80788478, 0xf4621c7a,
0xf0000000, 0x80788478,
- 0xf4621efa, 0xf0000000,
- 0x80788478, 0xf4621e7a,
+ 0xf4621eba, 0xf0000000,
+ 0x80788478, 0xf4621efa,
0xf0000000, 0x80788478,
- 0xf4621cfa, 0xf0000000,
- 0x80788478, 0xf4621bba,
+ 0xf4621e7a, 0xf0000000,
+ 0x80788478, 0xf4621cfa,
0xf0000000, 0x80788478,
- 0xbf8a0000, 0xb96ef814,
0xf4621bba, 0xf0000000,
0x80788478, 0xbf8a0000,
- 0xb96ef815, 0xf4621bba,
+ 0xb96ef814, 0xf4621bba,
0xf0000000, 0x80788478,
- 0xbf8a0000, 0xb96ef812,
+ 0xbf8a0000, 0xb96ef815,
0xf4621bba, 0xf0000000,
0x80788478, 0xbf8a0000,
- 0xb96ef813, 0x8b6eff7f,
- 0x04000000, 0xbfa1000d,
- 0x80788478, 0xf4621bba,
+ 0xb96ef812, 0xf4621bba,
0xf0000000, 0x80788478,
- 0xbf8a0000, 0xbf0d806e,
- 0xbfa10006, 0x856e906e,
- 0x8b6e6e6e, 0xbfa10003,
- 0xbe804ec1, 0x816ec16e,
- 0xbfa0fffb, 0xbefd006f,
- 0xbefe0070, 0xbeff0071,
- 0xb97b2011, 0x857b867b,
- 0xb97b0191, 0x857b827b,
- 0xb97bba11, 0xb973f801,
- 0xb8ee3b05, 0x806e816e,
- 0xbf0d9972, 0xbfa20002,
- 0x846e896e, 0xbfa00001,
- 0x846e8a6e, 0xb8ef1e06,
- 0x846f8a6f, 0x806e6f6e,
- 0x806eff6e, 0x00000200,
- 0x806e746e, 0x826f8075,
- 0x8b6fff6f, 0x0000ffff,
- 0xf4605c37, 0xf8000050,
- 0xf4605d37, 0xf8000060,
- 0xf4601e77, 0xf8000074,
- 0xbf8a0000, 0x8b6dff6d,
- 0x0000ffff, 0x8bfe7e7e,
- 0x8bea6a6a, 0xb97af804,
+ 0xbf8a0000, 0xb96ef813,
+ 0x8b6eff7f, 0x04000000,
+ 0xbfa1000d, 0x80788478,
+ 0xf4621bba, 0xf0000000,
+ 0x80788478, 0xbf8a0000,
+ 0xbf0d806e, 0xbfa10006,
+ 0x856e906e, 0x8b6e6e6e,
+ 0xbfa10003, 0xbe804ec1,
+ 0x816ec16e, 0xbfa0fffb,
+ 0xbefd006f, 0xbefe0070,
+ 0xbeff0071, 0xb97b2011,
+ 0x857b867b, 0xb97b0191,
+ 0x857b827b, 0xb97bba11,
+ 0xb973f801, 0xb8ee3b05,
+ 0x806e816e, 0xbf0d9972,
+ 0xbfa20002, 0x846e896e,
+ 0xbfa00001, 0x846e8a6e,
+ 0xb8ef1e06, 0x846f8a6f,
+ 0x806e6f6e, 0x806eff6e,
+ 0x00000200, 0x806e746e,
+ 0x826f8075, 0x8b6fff6f,
+ 0x0000ffff, 0xf4605c37,
+ 0xf8000050, 0xf4605d37,
+ 0xf8000060, 0xf4601e77,
+ 0xf8000074, 0xbf8a0000,
+ 0x8b6dff6d, 0x0000ffff,
+ 0x8bfe7e7e, 0x8bea6a6a,
+ 0xb97af804, 0xbe804ec2,
+ 0xbf94fffe, 0xbe804a6c,
0xbe804ec2, 0xbf94fffe,
- 0xbe804a6c, 0xbfb10000,
+ 0xbfb10000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
- 0xbf9f0000, 0x00000000,
+};
+
+static const uint32_t cwsr_trap_gfx9_5_0_hex[] = {
+ 0xbf820001, 0xbf8202ca,
+ 0xb8f8f802, 0x8978ff78,
+ 0x00020006, 0xb8fbf803,
+ 0x866eff78, 0x00002000,
+ 0xbf840009, 0x866eff6d,
+ 0x00ff0000, 0xbf85001a,
+ 0x866eff7b, 0x00000400,
+ 0xbf850051, 0xbf8e0010,
+ 0xb8fbf803, 0xbf82fffa,
+ 0x866eff7b, 0x03c00900,
+ 0xbf850011, 0x866eff7b,
+ 0x000071ff, 0xbf840008,
+ 0x866fff7b, 0x00007080,
+ 0xbf840001, 0xbeee1a87,
+ 0xb8eff801, 0x8e6e8c6e,
+ 0x866e6f6e, 0xbf850006,
+ 0x866eff6d, 0x00ff0000,
+ 0xbf850003, 0x866eff7b,
+ 0x00000400, 0xbf85003a,
+ 0xb8faf807, 0x867aff7a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8979ff79, 0xfc000000,
+ 0x87797a79, 0xba7ff807,
+ 0x00000000, 0xb8faf812,
+ 0xb8fbf813, 0x8efa887a,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x877bff7b, 0xffff0000,
+ 0xc0031bbd, 0x00000010,
+ 0xbf8cc07f, 0x8e6e976e,
+ 0x8979ff79, 0x00800000,
+ 0x87796e79, 0xc0071bbd,
+ 0x00000000, 0xbf8cc07f,
+ 0xc0071ebd, 0x00000008,
+ 0xbf8cc07f, 0x86ee6e6e,
+ 0xbf840001, 0xbe801d6e,
+ 0x866eff6d, 0x01ff0000,
+ 0xbf850005, 0x8778ff78,
+ 0x00002000, 0x80ec886c,
+ 0x82ed806d, 0xbf820005,
+ 0x866eff6d, 0x01000000,
+ 0xbf850002, 0x806c846c,
+ 0x826d806d, 0x866dff6d,
+ 0x0000ffff, 0x8f7a8b79,
+ 0x867aff7a, 0x001f8000,
+ 0xb97af807, 0x86fe7e7e,
+ 0x86ea6a6a, 0x8f6e8378,
+ 0xb96ee0c2, 0xbf800002,
+ 0xb9780002, 0xbe801f6c,
+ 0x866dff6d, 0x0000ffff,
+ 0xbefa0080, 0xb97a0283,
+ 0xb8faf807, 0x867aff7a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8979ff79, 0xfc000000,
+ 0x87797a79, 0xba7ff807,
+ 0x00000000, 0xbeee007e,
+ 0xbeef007f, 0xbefe0180,
+ 0xbf900004, 0x877a8478,
+ 0xb97af802, 0xbf8e0002,
+ 0xbf88fffe, 0xb8fa2985,
+ 0x807a817a, 0x8e7a8a7a,
+ 0x8e7a817a, 0xb8fb1605,
+ 0x807b817b, 0x8e7b867b,
+ 0x807a7b7a, 0x807a7e7a,
+ 0x827b807f, 0x867bff7b,
+ 0x0000ffff, 0xc04b1c3d,
+ 0x00000050, 0xbf8cc07f,
+ 0xc04b1d3d, 0x00000060,
+ 0xbf8cc07f, 0xc0431e7d,
+ 0x00000074, 0xbf8cc07f,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0xbef1007c, 0xbef00080,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc0070, 0xc0611c7a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611b3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc0070, 0xc0611b7a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611bba, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc0070, 0xc0611bfa,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611e3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8fbf803,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611efa, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc0070, 0xc0611a3a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611a7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8f1f801,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611c7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbf108080,
+ 0x867aff7f, 0x04000000,
+ 0xbeef0080, 0x876f6f7a,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b847b, 0x8e76827b,
+ 0xbef600ff, 0x01000000,
+ 0xbef20174, 0x80747074,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a7b7c, 0xbf85ffe7,
+ 0xbef40172, 0xbef00080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf85004d, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
+ 0x00000900, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
+ 0x80048104, 0xd2890002,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000902,
+ 0x80048104, 0xd2890001,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
+ 0x80048104, 0xd2890003,
+ 0x00000902, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbf820008,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8fb5306, 0x867bc17b,
+ 0xbf840052, 0xbf8a0000,
+ 0x867aff6f, 0x04000000,
+ 0xbf84004e, 0x8e7b867b,
+ 0x8e7b827b, 0xbef6007b,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf85001d,
+ 0x24040682, 0xd86c0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xbe840080, 0xd2890000,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x680404ff,
+ 0x00000100, 0xd0c9006a,
+ 0x0000f702, 0xbf87ffe5,
+ 0xbf820016, 0xd1060002,
+ 0x00011103, 0x7e0602ff,
+ 0x00000200, 0xbefc00ff,
+ 0x00010000, 0xbe800077,
+ 0x8677ff77, 0xff7fffff,
+ 0x8777ff77, 0x00058000,
+ 0xd8ec0000, 0x00000002,
+ 0xbf8cc07f, 0xe0765000,
+ 0x701d0002, 0x68040702,
+ 0xd0c9006a, 0x0000f702,
+ 0xbefe016a, 0xbf87fff6,
+ 0xbef70000, 0xbef000ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb2b05,
+ 0x807b817b, 0x8e7b827b,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
+ 0x807bff7b, 0x00001000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850051, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
+ 0x00000900, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
+ 0x80048104, 0xd2890002,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000902,
+ 0x80048104, 0xd2890001,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
+ 0x80048104, 0xd2890003,
+ 0x00000902, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xb8fb2985, 0x807b817b,
+ 0x8e7b837b, 0xb8fa2b05,
+ 0x807a817a, 0x8e7a827a,
+ 0x80fb7a7b, 0x867b7b7b,
+ 0xbf84007a, 0x807bff7b,
+ 0x00001000, 0xbefc0080,
+ 0xbf11017c, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf850059,
+ 0xd3d84000, 0x18000100,
+ 0xd3d84001, 0x18000101,
+ 0xd3d84002, 0x18000102,
+ 0xd3d84003, 0x18000103,
+ 0xbe840080, 0xd2890000,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000901,
+ 0x80048104, 0xd2890001,
+ 0x00000901, 0x80048104,
+ 0xd2890002, 0x00000901,
+ 0x80048104, 0xd2890003,
+ 0x00000901, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000902, 0x80048104,
+ 0xd2890001, 0x00000902,
+ 0x80048104, 0xd2890002,
+ 0x00000902, 0x80048104,
+ 0xd2890003, 0x00000902,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000903,
+ 0x80048104, 0xd2890001,
+ 0x00000903, 0x80048104,
+ 0xd2890002, 0x00000903,
+ 0x80048104, 0xd2890003,
+ 0x00000903, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0x807c847c, 0xbf0a7b7c,
+ 0xbf85ffa9, 0xbf9c0000,
+ 0xbf820016, 0xd3d84000,
+ 0x18000100, 0xd3d84001,
+ 0x18000101, 0xd3d84002,
+ 0x18000102, 0xd3d84003,
+ 0x18000103, 0xe0724000,
+ 0x701d0000, 0xe0724100,
+ 0x701d0100, 0xe0724200,
+ 0x701d0200, 0xe0724300,
+ 0x701d0300, 0x807c847c,
+ 0x8070ff70, 0x00000400,
+ 0xbf0a7b7c, 0xbf85ffeb,
+ 0xbf9c0000, 0xbf8200f4,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0x866eff7f, 0x04000000,
+ 0xbf840025, 0xbefe00c1,
+ 0xbeff00c1, 0xb8ef5306,
+ 0x866fc16f, 0xbf840020,
+ 0x8e6f866f, 0x8e6f826f,
+ 0xbef6006f, 0xb8f82985,
+ 0x80788178, 0x8e788a78,
+ 0x8e788178, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x781d0000,
+ 0xe0510100, 0x781d0000,
+ 0xe0510200, 0x781d0000,
+ 0xe0510300, 0x781d0000,
+ 0xe0510400, 0x781d0000,
+ 0x807cff7c, 0x00000500,
+ 0x8078ff78, 0x00000500,
+ 0xbf0a6f7c, 0xbf85fff0,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbef600ff, 0x01000000,
+ 0xb8ef2b05, 0x806f816f,
+ 0x8e6f826f, 0x806fff6f,
+ 0x00008000, 0xbef80080,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
+ 0xbf11087c, 0xe0524000,
+ 0x781d0000, 0xe0524100,
+ 0x781d0100, 0xe0524200,
+ 0x781d0200, 0xe0524300,
+ 0x781d0300, 0xbf8c0f70,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0x807c847c, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7c,
+ 0xbf85ffee, 0xb8ef2985,
+ 0x806f816f, 0x8e6f836f,
+ 0xb8f92b05, 0x80798179,
+ 0x8e798279, 0x80ef796f,
+ 0x866f6f6f, 0xbf84001a,
+ 0x806fff6f, 0x00008000,
+ 0xbefc0080, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0xd3d94000,
+ 0x18000100, 0xd3d94001,
+ 0x18000101, 0xd3d94002,
+ 0x18000102, 0xd3d94003,
+ 0x18000103, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffea,
+ 0xbf9c0000, 0xe0524000,
+ 0x6e1d0000, 0xe0524100,
+ 0x6e1d0100, 0xe0524200,
+ 0x6e1d0200, 0xe0524300,
+ 0x6e1d0300, 0xbf8c0f70,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
+ 0x00000078, 0x80788478,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
+ 0x00000078, 0x80788478,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
+ 0x00000078, 0x80788478,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
+ 0x00000078, 0x80788478,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
+ 0x00000078, 0x80788478,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe0070,
+ 0xbeff0071, 0x866f7bff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f7bff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2985,
+ 0x806e816e, 0x8e6e8a6e,
+ 0x8e6e816e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b79, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index 44772eec9ef4..96fbb16ceb21 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -34,41 +34,24 @@
* cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -o gfx11.sp3
* sp3 gfx11.sp3 -hex gfx11.hex
*
- * gfx12:
- * cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx10.asm -P -o gfx12.sp3
- * sp3 gfx12.sp3 -hex gfx12.hex
*/
#define CHIP_NAVI10 26
#define CHIP_SIENNA_CICHLID 30
#define CHIP_PLUM_BONITO 36
-#define CHIP_GFX12 37
#define NO_SQC_STORE (ASIC_FAMILY >= CHIP_SIENNA_CICHLID)
#define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID)
#define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO)
#define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO)
-#define SW_SA_TRAP (ASIC_FAMILY >= CHIP_PLUM_BONITO && ASIC_FAMILY < CHIP_GFX12)
+#define SW_SA_TRAP (ASIC_FAMILY == CHIP_PLUM_BONITO)
#define SAVE_AFTER_XNACK_ERROR (HAVE_XNACK && !NO_SQC_STORE) // workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
-#if ASIC_FAMILY < CHIP_GFX12
#define S_COHERENCE glc:1
#define V_COHERENCE slc:1 glc:1
#define S_WAITCNT_0 s_waitcnt 0
-#else
-#define S_COHERENCE scope:SCOPE_SYS
-#define V_COHERENCE scope:SCOPE_SYS
-#define S_WAITCNT_0 s_wait_idle
-
-#define HW_REG_SHADER_FLAT_SCRATCH_LO HW_REG_WAVE_SCRATCH_BASE_LO
-#define HW_REG_SHADER_FLAT_SCRATCH_HI HW_REG_WAVE_SCRATCH_BASE_HI
-#define HW_REG_GPR_ALLOC HW_REG_WAVE_GPR_ALLOC
-#define HW_REG_LDS_ALLOC HW_REG_WAVE_LDS_ALLOC
-#define HW_REG_MODE HW_REG_WAVE_MODE
-#endif
-#if ASIC_FAMILY < CHIP_GFX12
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
var SQ_WAVE_STATUS_HALT_MASK = 0x2000
var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000
@@ -81,21 +64,6 @@ var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_E
var S_STATUS_HALT_MASK = SQ_WAVE_STATUS_HALT_MASK
var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000
var S_SAVE_PC_HI_HT_MASK = 0x01000000
-#else
-var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4
-var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9
-var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00
-var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000
-var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000
-var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15
-var SQ_WAVE_STATUS_WAVE64_SHIFT = 29
-var SQ_WAVE_STATUS_WAVE64_SIZE = 1
-var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9
-var S_STATUS_HWREG = HW_REG_WAVE_STATE_PRIV
-var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK
-var S_STATUS_HALT_MASK = SQ_WAVE_STATE_PRIV_HALT_MASK
-var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000
-#endif
var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
@@ -110,7 +78,6 @@ var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12
#endif
-#if ASIC_FAMILY < CHIP_GFX12
var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400
var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF
var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10
@@ -161,39 +128,6 @@ var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT
var S_TRAPSTS_HWREG = HW_REG_TRAPSTS
var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_TRAPSTS_SAVECTX_MASK
var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_TRAPSTS_SAVECTX_SHIFT
-#else
-var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF
-var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10
-var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5
-var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20
-var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40
-var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6
-var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80
-var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7
-var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100
-var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8
-var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200
-var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800
-var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80
-var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200
-
-var S_TRAPSTS_HWREG = HW_REG_WAVE_EXCP_FLAG_PRIV
-var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK
-var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
-var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\
- SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\
- SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\
- SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\
- SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\
- SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK
-var S_TRAPSTS_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
-var S_TRAPSTS_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
-var S_TRAPSTS_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
-var S_TRAPSTS_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT
-var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT
-var BARRIER_STATE_SIGNAL_OFFSET = 16
-var BARRIER_STATE_VALID_OFFSET = 0
-#endif
// bits [31:24] unused by SPI debug data
var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31
@@ -305,11 +239,7 @@ L_TRAP_NO_BARRIER:
L_HALTED:
// Host trap may occur while wave is halted.
-#if ASIC_FAMILY < CHIP_GFX12
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
-#else
- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
-#endif
s_cbranch_scc1 L_FETCH_2ND_TRAP
L_CHECK_SAVE:
@@ -336,7 +266,6 @@ L_NOT_HALTED:
// Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
// Maskable exceptions only cause the wave to enter the trap handler if
// their respective bit in mode.excp_en is set.
-#if ASIC_FAMILY < CHIP_GFX12
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
s_cbranch_scc0 L_CHECK_TRAP_ID
@@ -349,17 +278,6 @@ L_NOT_ADDR_WATCH:
s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
s_and_b32 ttmp2, ttmp2, ttmp3
s_cbranch_scc1 L_FETCH_2ND_TRAP
-#else
- s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
- s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK
- s_cbranch_scc0 L_NOT_ADDR_WATCH
- s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK
-
-L_NOT_ADDR_WATCH:
- s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL)
- s_and_b32 ttmp2, ttmp3, ttmp2
- s_cbranch_scc1 L_FETCH_2ND_TRAP
-#endif
L_CHECK_TRAP_ID:
// Check trap_id != 0
@@ -369,13 +287,8 @@ L_CHECK_TRAP_ID:
#if SINGLE_STEP_MISSED_WORKAROUND
// Prioritize single step exception over context save.
// Second-level trap will halt wave and RFE, re-entering for SAVECTX.
-#if ASIC_FAMILY < CHIP_GFX12
s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
-#else
- // WAVE_TRAP_CTRL is already in ttmp3.
- s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK
-#endif
s_cbranch_scc1 L_FETCH_2ND_TRAP
#endif
@@ -425,12 +338,7 @@ L_NO_NEXT_TRAP:
s_cbranch_scc1 L_TRAP_CASE
// Host trap will not cause trap re-entry.
-#if ASIC_FAMILY < CHIP_GFX12
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
-#else
- s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
- s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
-#endif
s_cbranch_scc1 L_EXIT_TRAP
s_or_b32 s_save_status, s_save_status, S_STATUS_HALT_MASK
@@ -457,16 +365,7 @@ L_EXIT_TRAP:
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
-#if ASIC_FAMILY < CHIP_GFX12
s_setreg_b32 hwreg(S_STATUS_HWREG), s_save_status
-#else
- // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it.
- // Only restore fields which the trap handler changes.
- s_lshr_b32 s_save_status, s_save_status, SQ_WAVE_STATE_PRIV_SCC_SHIFT
- s_setreg_b32 hwreg(S_STATUS_HWREG, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \
- SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_status
-#endif
-
s_rfe_b64 [ttmp0, ttmp1]
L_SAVE:
@@ -478,14 +377,6 @@ L_SAVE:
s_endpgm
L_HAVE_VGPRS:
#endif
-#if ASIC_FAMILY >= CHIP_GFX12
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
- s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT
- s_cbranch_scc0 L_HAVE_VGPRS
- s_endpgm
-L_HAVE_VGPRS:
-#endif
-
s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
s_mov_b32 s_save_tmp, 0
s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit
@@ -671,19 +562,6 @@ L_SAVE_HWREG:
s_mov_b32 m0, 0x0 //Next lane of v2 to write to
#endif
-#if ASIC_FAMILY >= CHIP_GFX12
- // Ensure no further changes to barrier or LDS state.
- // STATE_PRIV.BARRIER_COMPLETE may change up to this point.
- s_barrier_signal -2
- s_barrier_wait -2
-
- // Re-read final state of BARRIER_COMPLETE field for save.
- s_getreg_b32 s_save_tmp, hwreg(S_STATUS_HWREG)
- s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
- s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
- s_or_b32 s_save_status, s_save_status, s_save_tmp
-#endif
-
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset)
s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
@@ -707,21 +585,6 @@ L_SAVE_HWREG:
s_getreg_b32 s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI)
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
-#if ASIC_FAMILY >= CHIP_GFX12
- s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
- write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
-
- s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_TRAP_CTRL)
- write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
-
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
- write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
-
- s_get_barrier_state s_save_tmp, -1
- s_wait_kmcnt (0)
- write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
-#endif
-
#if NO_SQC_STORE
// Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
s_mov_b32 exec_lo, 0xFFFF
@@ -814,9 +677,7 @@ L_SAVE_LDS_NORMAL:
s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero?
s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
-#if ASIC_FAMILY < CHIP_GFX12
s_barrier //LDS is used? wait for other waves in the same TG
-#endif
s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
s_cbranch_scc0 L_SAVE_LDS_DONE
@@ -1081,11 +942,6 @@ L_RESTORE:
s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
-#if ASIC_FAMILY >= CHIP_GFX12
- // Save s_restore_spi_init_hi for later use.
- s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi
-#endif
-
//determine it is wave32 or wave64
get_wave_size2(s_restore_size)
@@ -1320,9 +1176,7 @@ L_RESTORE_SGPR:
// s_barrier with MODE.DEBUG_EN=1, STATUS.PRIV=1 incorrectly asserts debug exception.
// Clear DEBUG_EN before and restore MODE after the barrier.
s_setreg_imm32_b32 hwreg(HW_REG_MODE), 0
-#if ASIC_FAMILY < CHIP_GFX12
s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG
-#endif
/* restore HW registers */
L_RESTORE_HWREG:
@@ -1334,11 +1188,6 @@ L_RESTORE_HWREG:
s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
-#if ASIC_FAMILY >= CHIP_GFX12
- // Restore s_restore_spi_init_hi before the saved value gets clobbered.
- s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save
-#endif
-
read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset)
read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
@@ -1358,44 +1207,6 @@ L_RESTORE_HWREG:
s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s_restore_flat_scratch
-#if ASIC_FAMILY >= CHIP_GFX12
- read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
- S_WAITCNT_0
- s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp
-
- read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
- S_WAITCNT_0
- s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp
-
- // Only the first wave needs to restore the workgroup barrier.
- s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
- s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
-
- // Skip over WAVE_STATUS, since there is no state to restore from it
- s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4
-
- read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
- S_WAITCNT_0
-
- s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET
- s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
-
- // extract the saved signal count from s_restore_tmp
- s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET
-
- // We need to call s_barrier_signal repeatedly to restore the signal
- // count of the work group barrier. The member count is already
- // initialized with the number of waves in the work group.
-L_BARRIER_RESTORE_LOOP:
- s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp
- s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
- s_barrier_signal -1
- s_add_i32 s_restore_tmp, s_restore_tmp, -1
- s_branch L_BARRIER_RESTORE_LOOP
-
-L_SKIP_BARRIER_RESTORE:
-#endif
-
s_mov_b32 m0, s_restore_m0
s_mov_b32 exec_lo, s_restore_exec_lo
s_mov_b32 exec_hi, s_restore_exec_hi
@@ -1453,13 +1264,6 @@ L_RETURN_WITHOUT_PRIV:
s_setreg_b32 hwreg(S_STATUS_HWREG), s_restore_status // SCC is included, which is changed by previous salu
-#if ASIC_FAMILY >= CHIP_GFX12
- // Make barrier and LDS state visible to all waves in the group.
- // STATE_PRIV.BARRIER_COMPLETE may change after this point.
- s_barrier_signal -2
- s_barrier_wait -2
-#endif
-
s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
L_END_PGM:
@@ -1598,11 +1402,7 @@ function get_hwreg_size_bytes
end
function get_wave_size2(s_reg)
-#if ASIC_FAMILY < CHIP_GFX12
s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
-#else
- s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE)
-#endif
s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
end
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
new file mode 100644
index 000000000000..5a1a1b1f897f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
@@ -0,0 +1,1136 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* To compile this assembly code:
+ *
+ * gfx12:
+ * cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx12.asm -P -o gfx12.sp3
+ * sp3 gfx12.sp3 -hex gfx12.hex
+ */
+
+#define CHIP_GFX12 37
+
+#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised
+#define HAVE_VALU_SGPR_HAZARD (ASIC_FAMILY == CHIP_GFX12)
+
+var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4
+var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9
+var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00
+var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000
+var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000
+var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15
+var SQ_WAVE_STATUS_WAVE64_SHIFT = 29
+var SQ_WAVE_STATUS_WAVE64_SIZE = 1
+var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24
+var SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK
+var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000
+
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 8
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12
+var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24
+var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4
+var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9
+
+var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF
+var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10
+var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5
+var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20
+var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40
+var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6
+var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80
+var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7
+var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100
+var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8
+var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200
+var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800
+var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80
+var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200
+
+var SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK= SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE = 32 - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT
+var BARRIER_STATE_SIGNAL_OFFSET = 16
+var BARRIER_STATE_VALID_OFFSET = 0
+
+var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23
+var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000
+
+// SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14]
+// when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
+var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000
+var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC
+var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
+var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
+
+var S_SAVE_PC_HI_FIRST_WAVE_MASK = 0x80000000
+var S_SAVE_PC_HI_FIRST_WAVE_SHIFT = 31
+
+var s_sgpr_save_num = 108
+
+var s_save_spi_init_lo = exec_lo
+var s_save_spi_init_hi = exec_hi
+var s_save_pc_lo = ttmp0
+var s_save_pc_hi = ttmp1
+var s_save_exec_lo = ttmp2
+var s_save_exec_hi = ttmp3
+var s_save_state_priv = ttmp12
+var s_save_excp_flag_priv = ttmp15
+var s_save_xnack_mask = s_save_excp_flag_priv
+var s_wave_size = ttmp7
+var s_save_buf_rsrc0 = ttmp8
+var s_save_buf_rsrc1 = ttmp9
+var s_save_buf_rsrc2 = ttmp10
+var s_save_buf_rsrc3 = ttmp11
+var s_save_mem_offset = ttmp4
+var s_save_alloc_size = s_save_excp_flag_priv
+var s_save_tmp = ttmp14
+var s_save_m0 = ttmp5
+var s_save_ttmps_lo = s_save_tmp
+var s_save_ttmps_hi = s_save_excp_flag_priv
+
+var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
+var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
+
+var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
+var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26
+var S_WAVE_SIZE = 25
+
+var s_restore_spi_init_lo = exec_lo
+var s_restore_spi_init_hi = exec_hi
+var s_restore_mem_offset = ttmp12
+var s_restore_alloc_size = ttmp3
+var s_restore_tmp = ttmp2
+var s_restore_mem_offset_save = s_restore_tmp
+var s_restore_m0 = s_restore_alloc_size
+var s_restore_mode = ttmp7
+var s_restore_flat_scratch = s_restore_tmp
+var s_restore_pc_lo = ttmp0
+var s_restore_pc_hi = ttmp1
+var s_restore_exec_lo = ttmp4
+var s_restore_exec_hi = ttmp5
+var s_restore_state_priv = ttmp14
+var s_restore_excp_flag_priv = ttmp15
+var s_restore_xnack_mask = ttmp13
+var s_restore_buf_rsrc0 = ttmp8
+var s_restore_buf_rsrc1 = ttmp9
+var s_restore_buf_rsrc2 = ttmp10
+var s_restore_buf_rsrc3 = ttmp11
+var s_restore_size = ttmp6
+var s_restore_ttmps_lo = s_restore_tmp
+var s_restore_ttmps_hi = s_restore_alloc_size
+var s_restore_spi_init_hi_save = s_restore_exec_hi
+
+shader main
+ asic(DEFAULT)
+ type(CS)
+ wave_size(32)
+
+ s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save
+
+L_JUMP_TO_RESTORE:
+ s_branch L_RESTORE
+
+L_SKIP_RESTORE:
+ s_getreg_b32 s_save_state_priv, hwreg(HW_REG_WAVE_STATE_PRIV) //save STATUS since we will change SCC
+
+ // Clear SPI_PRIO: do not save with elevated priority.
+ // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
+ s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK
+
+ s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+
+ s_and_b32 ttmp2, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK
+ s_cbranch_scc0 L_NOT_HALTED
+
+L_HALTED:
+ // Host trap may occur while wave is halted.
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+L_CHECK_SAVE:
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK
+ s_cbranch_scc1 L_SAVE
+
+ // Wave is halted but neither host trap nor SAVECTX is raised.
+ // Caused by instruction fetch memory violation.
+ // Spin wait until context saved to prevent interrupt storm.
+ s_sleep 0x10
+ s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+ s_branch L_CHECK_SAVE
+
+L_NOT_HALTED:
+ // Let second-level handle non-SAVECTX exception or trap.
+ // Any concurrent SAVECTX will be handled upon re-entry once halted.
+
+ // Check non-maskable exceptions. memory_violation, illegal_instruction
+ // and xnack_error exceptions always cause the wave to enter the trap
+ // handler.
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+ // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
+ // Maskable exceptions only cause the wave to enter the trap handler if
+ // their respective bit in mode.excp_en is set.
+ s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
+ s_and_b32 ttmp3, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK
+ s_cbranch_scc0 L_NOT_ADDR_WATCH
+ s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK
+
+L_NOT_ADDR_WATCH:
+ s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL)
+ s_and_b32 ttmp2, ttmp3, ttmp2
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+L_CHECK_TRAP_ID:
+ // Check trap_id != 0
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+#if SINGLE_STEP_MISSED_WORKAROUND
+ // Prioritize single step exception over context save.
+ // Second-level trap will halt wave and RFE, re-entering for SAVECTX.
+ // WAVE_TRAP_CTRL is already in ttmp3.
+ s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+#endif
+
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK
+ s_cbranch_scc1 L_SAVE
+
+L_FETCH_2ND_TRAP:
+ // Read second-level TBA/TMA from first-level TMA and jump if available.
+ // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
+ // ttmp12 holds SQ_WAVE_STATUS
+ s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA)
+ s_wait_idle
+ s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+
+ s_bitcmp1_b32 ttmp15, 0xF
+ s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
+ s_or_b32 ttmp15, ttmp15, 0xFFFF0000
+L_NO_SIGN_EXTEND_TMA:
+
+ s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 scope:SCOPE_SYS // debug trap enabled flag
+ s_wait_idle
+ s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
+ s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK
+ s_or_b32 ttmp11, ttmp11, ttmp2
+
+ s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 scope:SCOPE_SYS // second-level TBA
+ s_wait_idle
+ s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 scope:SCOPE_SYS // second-level TMA
+ s_wait_idle
+
+ s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
+ s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
+ s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler
+
+L_NO_NEXT_TRAP:
+ // If not caused by trap then halt wave to prevent re-entry.
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+ s_cbranch_scc1 L_TRAP_CASE
+
+ // Host trap will not cause trap re-entry.
+ s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+ s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
+ s_cbranch_scc1 L_EXIT_TRAP
+ s_or_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK
+
+ // If the PC points to S_ENDPGM then context save will fail if STATE_PRIV.HALT is set.
+ // Rewind the PC to prevent this from occurring.
+ s_sub_u32 ttmp0, ttmp0, 0x8
+ s_subb_u32 ttmp1, ttmp1, 0x0
+
+ s_branch L_EXIT_TRAP
+
+L_TRAP_CASE:
+ // Advance past trap instruction to prevent re-entry.
+ s_add_u32 ttmp0, ttmp0, 0x4
+ s_addc_u32 ttmp1, ttmp1, 0x0
+
+L_EXIT_TRAP:
+ s_and_b32 ttmp1, ttmp1, 0xFFFF
+
+ // Restore SQ_WAVE_STATUS.
+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+
+ // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it.
+ // Only restore fields which the trap handler changes.
+ s_lshr_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_SCC_SHIFT
+ s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \
+ SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_state_priv
+
+ s_rfe_b64 [ttmp0, ttmp1]
+
+L_SAVE:
+ // If VGPRs have been deallocated then terminate the wavefront.
+ // It has no remaining program to run and cannot save without VGPRs.
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
+ s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT
+ s_cbranch_scc0 L_HAVE_VGPRS
+ s_endpgm
+L_HAVE_VGPRS:
+
+ s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
+ s_mov_b32 s_save_tmp, 0
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit
+
+ /* inform SPI the readiness and wait for SPI's go signal */
+ s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI
+ s_mov_b32 s_save_exec_hi, exec_hi
+ s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive
+
+ s_sendmsg_rtn_b64 [exec_lo, exec_hi], sendmsg(MSG_RTN_SAVE_WAVE)
+ s_wait_idle
+
+ // Save first_wave flag so we can clear high bits of save address.
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
+ s_lshl_b32 s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT - S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT)
+ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
+
+ // Trap temporaries must be saved via VGPR but all VGPRs are in use.
+ // There is no ttmp space to hold the resource constant for VGPR save.
+ // Save v0 by itself since it requires only two SGPRs.
+ s_mov_b32 s_save_ttmps_lo, exec_lo
+ s_and_b32 s_save_ttmps_hi, exec_hi, 0xFFFF
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] scope:SCOPE_SYS
+ v_mov_b32 v0, 0x0
+ s_mov_b32 exec_lo, s_save_ttmps_lo
+ s_mov_b32 exec_hi, s_save_ttmps_hi
+
+ // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
+ get_wave_size2(s_save_ttmps_hi)
+ get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi)
+ get_svgpr_size_bytes(s_save_ttmps_hi)
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi
+ s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes()
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
+ s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0
+
+ v_writelane_b32 v0, ttmp4, 0x4
+ v_writelane_b32 v0, ttmp5, 0x5
+ v_writelane_b32 v0, ttmp6, 0x6
+ v_writelane_b32 v0, ttmp7, 0x7
+ v_writelane_b32 v0, ttmp8, 0x8
+ v_writelane_b32 v0, ttmp9, 0x9
+ v_writelane_b32 v0, ttmp10, 0xA
+ v_writelane_b32 v0, ttmp11, 0xB
+ v_writelane_b32 v0, ttmp13, 0xD
+ v_writelane_b32 v0, exec_lo, 0xE
+ v_writelane_b32 v0, exec_hi, 0xF
+ valu_sgpr_hazard()
+
+ s_mov_b32 exec_lo, 0x3FFF
+ s_mov_b32 exec_hi, 0x0
+ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] offset:0x40 scope:SCOPE_SYS
+ v_readlane_b32 ttmp14, v0, 0xE
+ v_readlane_b32 ttmp15, v0, 0xF
+ s_mov_b32 exec_lo, ttmp14
+ s_mov_b32 exec_hi, ttmp15
+
+ /* setup Resource Contants */
+ s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo
+ s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
+ s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
+ s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
+
+ s_mov_b32 s_save_m0, m0
+
+ /* global mem offset */
+ s_mov_b32 s_save_mem_offset, 0x0
+ get_wave_size2(s_wave_size)
+
+ /* save first 4 VGPRs, needed for SGPR save */
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_SAVE_4VGPR_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_SAVE_4VGPR_WAVE32
+L_ENABLE_SAVE_4VGPR_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+ s_branch L_SAVE_4VGPR_WAVE64
+L_SAVE_4VGPR_WAVE32:
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR Allocated in 4-GPR granularity
+
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3
+ s_branch L_SAVE_HWREG
+
+L_SAVE_4VGPR_WAVE64:
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR Allocated in 4-GPR granularity
+
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3
+
+ /* save HW registers */
+
+L_SAVE_HWREG:
+ // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)
+ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
+ get_svgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes()
+
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ v_mov_b32 v0, 0x0 //Offset[31:0] from buffer resource
+ v_mov_b32 v1, 0x0 //Offset[63:32] from buffer resource
+ v_mov_b32 v2, 0x0 //Set of SGPRs for TCP store
+
+ // Ensure no further changes to barrier or LDS state.
+ // STATE_PRIV.BARRIER_COMPLETE may change up to this point.
+ s_barrier_signal -2
+ s_barrier_wait -2
+
+ // Re-read final state of BARRIER_COMPLETE field for save.
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATE_PRIV)
+ s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
+ s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
+ s_or_b32 s_save_state_priv, s_save_state_priv, s_save_tmp
+
+ s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
+ v_writelane_b32 v2, s_save_m0, 0x0
+ v_writelane_b32 v2, s_save_pc_lo, 0x1
+ v_writelane_b32 v2, s_save_tmp, 0x2
+ v_writelane_b32 v2, s_save_exec_lo, 0x3
+ v_writelane_b32 v2, s_save_exec_hi, 0x4
+ v_writelane_b32 v2, s_save_state_priv, 0x5
+ v_writelane_b32 v2, s_save_xnack_mask, 0x7
+ valu_sgpr_hazard()
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+ v_writelane_b32 v2, s_save_tmp, 0x6
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_MODE)
+ v_writelane_b32 v2, s_save_tmp, 0x8
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO)
+ v_writelane_b32 v2, s_save_tmp, 0x9
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI)
+ v_writelane_b32 v2, s_save_tmp, 0xA
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
+ v_writelane_b32 v2, s_save_tmp, 0xB
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_TRAP_CTRL)
+ v_writelane_b32 v2, s_save_tmp, 0xC
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
+ v_writelane_b32 v2, s_save_tmp, 0xD
+
+ s_get_barrier_state s_save_tmp, -1
+ s_wait_kmcnt (0)
+ v_writelane_b32 v2, s_save_tmp, 0xE
+ valu_sgpr_hazard()
+
+ // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
+ s_mov_b32 exec_lo, 0xFFFF
+ s_mov_b32 exec_hi, 0x0
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ // Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode.
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+
+ /* save SGPRs */
+ // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save...
+
+ // SGPR SR memory offset : size(VGPR)+size(SVGPR)
+ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
+ get_svgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ s_mov_b32 ttmp13, 0x0 //next VGPR lane to copy SGPR into
+
+ s_mov_b32 m0, 0x0 //SGPR initial index value =0
+ s_nop 0x0 //Manually inserted wait states
+L_SAVE_SGPR_LOOP:
+ // SGPR is allocated in 16 SGPR granularity
+ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
+ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
+ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
+ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
+ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
+ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
+ s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0]
+ s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0]
+
+ s_cmp_eq_u32 ttmp13, 0x0
+ s_cbranch_scc0 L_WRITE_V2_SECOND_HALF
+ write_16sgpr_to_v2(s0, 0x0)
+ s_branch L_SAVE_SGPR_SKIP_TCP_STORE
+L_WRITE_V2_SECOND_HALF:
+ write_16sgpr_to_v2(s0, 0x10)
+
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 0x80
+ s_mov_b32 ttmp13, 0x0
+ v_mov_b32 v2, 0x0
+L_SAVE_SGPR_SKIP_TCP_STORE:
+
+ s_add_u32 m0, m0, 16 //next sgpr index
+ s_cmp_lt_u32 m0, 96 //scc = (m0 < first 96 SGPR) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_SGPR_LOOP //first 96 SGPR save is complete?
+
+ //save the rest 12 SGPR
+ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
+ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
+ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
+ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
+ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
+ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
+ write_12sgpr_to_v2(s0)
+
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ /* save LDS */
+
+L_SAVE_LDS:
+ // Change EXEC to all threads...
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_SAVE_LDS_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_SAVE_LDS_NORMAL
+L_ENABLE_SAVE_LDS_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_SAVE_LDS_NORMAL:
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
+ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero?
+ s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
+
+ s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_SAVE_LDS_DONE
+
+ // first wave do LDS save;
+
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY
+ s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes
+
+ // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
+ //
+ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
+ get_svgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes()
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes()
+
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ //load 0~63*4(byte address) to vgpr v0
+ v_mbcnt_lo_u32_b32 v0, -1, 0
+ v_mbcnt_hi_u32_b32 v0, -1, v0
+ v_mul_u32_u24 v0, 4, v0
+
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_mov_b32 m0, 0x0
+ s_cbranch_scc1 L_SAVE_LDS_W64
+
+L_SAVE_LDS_W32:
+ s_mov_b32 s3, 128
+ s_nop 0
+ s_nop 0
+ s_nop 0
+L_SAVE_LDS_LOOP_W32:
+ ds_read_b32 v1, v0
+ s_wait_idle
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ s_add_u32 m0, m0, s3 //every buffer_store_lds does 128 bytes
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3
+ v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP_W32 //LDS save is complete?
+
+ s_branch L_SAVE_LDS_DONE
+
+L_SAVE_LDS_W64:
+ s_mov_b32 s3, 256
+ s_nop 0
+ s_nop 0
+ s_nop 0
+L_SAVE_LDS_LOOP_W64:
+ ds_read_b32 v1, v0
+ s_wait_idle
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3
+ v_add_nc_u32 v0, v0, 256 //mem offset increased by 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP_W64 //LDS save is complete?
+
+L_SAVE_LDS_DONE:
+ /* save VGPRs - set the Rest VGPRs */
+L_SAVE_VGPR:
+ // VGPR SR memory offset: 0
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_SAVE_VGPR_EXEC_HI
+ s_mov_b32 s_save_mem_offset, (0+128*4) // for the rest VGPRs
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_SAVE_VGPR_NORMAL
+L_ENABLE_SAVE_VGPR_EXEC_HI:
+ s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_SAVE_VGPR_NORMAL:
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
+ //determine it is wave32 or wave64
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_SAVE_VGPR_WAVE64
+
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR Allocated in 4-GPR granularity
+
+ // VGPR store using dw burst
+ s_mov_b32 m0, 0x4 //VGPR initial index value =4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc0 L_SAVE_VGPR_END
+
+L_SAVE_VGPR_W32_LOOP:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ v_movrels_b32 v1, v1 //v1 = v[1+m0]
+ v_movrels_b32 v2, v2 //v2 = v[2+m0]
+ v_movrels_b32 v3, v3 //v3 = v[3+m0]
+
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3
+
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 128*4 //every buffer_store_dword does 128 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_VGPR_W32_LOOP //VGPR save is complete?
+
+ s_branch L_SAVE_VGPR_END
+
+L_SAVE_VGPR_WAVE64:
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR store using dw burst
+ s_mov_b32 m0, 0x4 //VGPR initial index value =4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc0 L_SAVE_SHARED_VGPR
+
+L_SAVE_VGPR_W64_LOOP:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ v_movrels_b32 v1, v1 //v1 = v[1+m0]
+ v_movrels_b32 v2, v2 //v2 = v[2+m0]
+ v_movrels_b32 v3, v3 //v3 = v[3+m0]
+
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3
+
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP //VGPR save is complete?
+
+L_SAVE_SHARED_VGPR:
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)
+ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
+ s_cbranch_scc0 L_SAVE_VGPR_END //no shared_vgpr used? jump to L_SAVE_LDS
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value)
+ //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count.
+ //save shared_vgpr will start from the index of m0
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, m0
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+ s_mov_b32 exec_hi, 0x00000000
+
+L_SAVE_SHARED_VGPR_WAVE64_LOOP:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ s_add_u32 m0, m0, 1 //next vgpr index
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 128
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP //SHARED_VGPR save is complete?
+
+L_SAVE_VGPR_END:
+ s_branch L_END_PGM
+
+L_RESTORE:
+ /* Setup Resource Contants */
+ s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo
+ s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi
+ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
+ s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
+ s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
+
+ // Save s_restore_spi_init_hi for later use.
+ s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi
+
+ //determine it is wave32 or wave64
+ get_wave_size2(s_restore_size)
+
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_RESTORE_VGPR
+
+ /* restore LDS */
+L_RESTORE_LDS:
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_RESTORE_LDS_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_RESTORE_LDS_NORMAL
+L_ENABLE_RESTORE_LDS_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_RESTORE_LDS_NORMAL:
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
+ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero?
+ s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY
+ s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes
+
+ // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
+ //
+ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
+ get_svgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes()
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_mov_b32 m0, 0x0
+ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64
+
+L_RESTORE_LDS_LOOP_W32:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
+ s_wait_idle
+ ds_store_addtid_b32 v0
+ s_add_u32 m0, m0, 128 // 128 DW
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 128DW
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W32 //LDS restore is complete?
+ s_branch L_RESTORE_VGPR
+
+L_RESTORE_LDS_LOOP_W64:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
+ s_wait_idle
+ ds_store_addtid_b32 v0
+ s_add_u32 m0, m0, 256 // 256 DW
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256DW
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 //LDS restore is complete?
+
+ /* restore VGPRs */
+L_RESTORE_VGPR:
+ // VGPR SR memory offset : 0
+ s_mov_b32 s_restore_mem_offset, 0x0
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_RESTORE_VGPR_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_RESTORE_VGPR_NORMAL
+L_ENABLE_RESTORE_VGPR_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_RESTORE_VGPR_NORMAL:
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
+ //determine it is wave32 or wave64
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_RESTORE_VGPR_WAVE64
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR load using dw burst
+ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4
+ s_mov_b32 m0, 4 //VGPR initial index value = 4
+ s_cmp_lt_u32 m0, s_restore_alloc_size
+ s_cbranch_scc0 L_RESTORE_SGPR
+
+L_RESTORE_VGPR_WAVE32_LOOP:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*3
+ s_wait_idle
+ v_movreld_b32 v0, v0 //v[0+m0] = v0
+ v_movreld_b32 v1, v1
+ v_movreld_b32 v2, v2
+ v_movreld_b32 v3, v3
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 //every buffer_load_dword does 128 bytes
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_VGPR_WAVE32_LOOP //VGPR restore (except v0) is complete?
+
+ /* VGPR restore on v0 */
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*3
+ s_wait_idle
+
+ s_branch L_RESTORE_SGPR
+
+L_RESTORE_VGPR_WAVE64:
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR load using dw burst
+ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be the last
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
+ s_mov_b32 m0, 4 //VGPR initial index value = 4
+ s_cmp_lt_u32 m0, s_restore_alloc_size
+ s_cbranch_scc0 L_RESTORE_SHARED_VGPR
+
+L_RESTORE_VGPR_WAVE64_LOOP:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*3
+ s_wait_idle
+ v_movreld_b32 v0, v0 //v[0+m0] = v0
+ v_movreld_b32 v1, v1
+ v_movreld_b32 v2, v2
+ v_movreld_b32 v3, v3
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete?
+
+L_RESTORE_SHARED_VGPR:
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size
+ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
+ s_cbranch_scc0 L_RESTORE_V0 //no shared_vgpr used?
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value)
+ //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count.
+ //restore shared_vgpr will start from the index of m0
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, m0
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+ s_mov_b32 exec_hi, 0x00000000
+L_RESTORE_SHARED_VGPR_WAVE64_LOOP:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS
+ s_wait_idle
+ v_movreld_b32 v0, v0 //v[0+m0] = v0
+ s_add_u32 m0, m0, 1 //next vgpr index
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_SHARED_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete?
+
+ s_mov_b32 exec_hi, 0xFFFFFFFF //restore back exec_hi before restoring V0!!
+
+ /* VGPR restore on v0 */
+L_RESTORE_V0:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*3
+ s_wait_idle
+
+ /* restore SGPRs */
+ //will be 2+8+16*6
+ // SGPR SR memory offset : size(VGPR)+size(SVGPR)
+L_RESTORE_SGPR:
+ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
+ get_svgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
+ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 20*4 //s108~s127 is not saved
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ s_mov_b32 m0, s_sgpr_save_num
+
+ read_4sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_sub_u32 m0, m0, 4 // Restore from S[0] to S[104]
+ s_nop 0 // hazard SALU M0=> S_MOVREL
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+
+ read_8sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_sub_u32 m0, m0, 8 // Restore from S[0] to S[96]
+ s_nop 0 // hazard SALU M0=> S_MOVREL
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+ s_movreld_b64 s4, s4
+ s_movreld_b64 s6, s6
+
+ L_RESTORE_SGPR_LOOP:
+ read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0]
+ s_nop 0 // hazard SALU M0=> S_MOVREL
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+ s_movreld_b64 s4, s4
+ s_movreld_b64 s6, s6
+ s_movreld_b64 s8, s8
+ s_movreld_b64 s10, s10
+ s_movreld_b64 s12, s12
+ s_movreld_b64 s14, s14
+
+ s_cmp_eq_u32 m0, 0 //scc = (m0 < s_sgpr_save_num) ? 1 : 0
+ s_cbranch_scc0 L_RESTORE_SGPR_LOOP
+
+ // s_barrier with STATE_PRIV.TRAP_AFTER_INST=1, STATUS.PRIV=1 incorrectly asserts debug exception.
+ // Clear DEBUG_EN before and restore MODE after the barrier.
+ s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE), 0
+
+ /* restore HW registers */
+L_RESTORE_HWREG:
+ // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)
+ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
+ get_svgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // Restore s_restore_spi_init_hi before the saved value gets clobbered.
+ s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save
+
+ read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_state_priv, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_excp_flag_priv, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_LO), s_restore_flat_scratch
+
+ read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_HI), s_restore_flat_scratch
+
+ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp
+
+ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+ s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp
+
+ // Only the first wave needs to restore the workgroup barrier.
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
+
+ // Skip over WAVE_STATUS, since there is no state to restore from it
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4
+
+ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET
+ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
+
+ // extract the saved signal count from s_restore_tmp
+ s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET
+
+ // We need to call s_barrier_signal repeatedly to restore the signal
+ // count of the work group barrier. The member count is already
+ // initialized with the number of waves in the work group.
+L_BARRIER_RESTORE_LOOP:
+ s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp
+ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
+ s_barrier_signal -1
+ s_add_i32 s_restore_tmp, s_restore_tmp, -1
+ s_branch L_BARRIER_RESTORE_LOOP
+
+L_SKIP_BARRIER_RESTORE:
+
+ s_mov_b32 m0, s_restore_m0
+ s_mov_b32 exec_lo, s_restore_exec_lo
+ s_mov_b32 exec_hi, s_restore_exec_hi
+
+ // EXCP_FLAG_PRIV.SAVE_CONTEXT and HOST_TRAP may have changed.
+ // Only restore the other fields to avoid clobbering them.
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, 0, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE), s_restore_excp_flag_priv
+ s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE), s_restore_excp_flag_priv
+ s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE), s_restore_excp_flag_priv
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_MODE), s_restore_mode
+
+ // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
+ get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size)
+ get_svgpr_size_bytes(s_restore_ttmps_hi)
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes()
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
+ s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
+ s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
+ s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 scope:SCOPE_SYS
+ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 scope:SCOPE_SYS
+ s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 scope:SCOPE_SYS
+ s_wait_idle
+
+ s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV), s_restore_state_priv // SCC is included, which is changed by previous salu
+
+ // Make barrier and LDS state visible to all waves in the group.
+ // STATE_PRIV.BARRIER_COMPLETE may change after this point.
+ s_barrier_signal -2
+ s_barrier_wait -2
+
+ s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
+
+L_END_PGM:
+ // Make sure that no wave of the workgroup can exit the trap handler
+ // before the workgroup barrier state is saved.
+ s_barrier_signal -2
+ s_barrier_wait -2
+ s_endpgm_saved
+end
+
+function write_16sgpr_to_v2(s, lane_offset)
+ // Copy into VGPR for later TCP store.
+ for var sgpr_idx = 0; sgpr_idx < 16; sgpr_idx ++
+ v_writelane_b32 v2, s[sgpr_idx], sgpr_idx + lane_offset
+ end
+ valu_sgpr_hazard()
+ s_add_u32 ttmp13, ttmp13, 0x10
+end
+
+function write_12sgpr_to_v2(s)
+ // Copy into VGPR for later TCP store.
+ for var sgpr_idx = 0; sgpr_idx < 12; sgpr_idx ++
+ v_writelane_b32 v2, s[sgpr_idx], sgpr_idx
+ end
+ valu_sgpr_hazard()
+end
+
+function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
+ s_buffer_load_dword s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+ s_add_u32 s_mem_offset, s_mem_offset, 4
+end
+
+function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*16
+ s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+end
+
+function read_8sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*8
+ s_buffer_load_dwordx8 s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+end
+
+function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*4
+ s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+end
+
+function get_vgpr_size_bytes(s_vgpr_size_byte, s_size)
+ s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+ s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1
+ s_bitcmp1_b32 s_size, S_WAVE_SIZE
+ s_cbranch_scc1 L_ENABLE_SHIFT_W64
+ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+7) //Number of VGPRs = (vgpr_size + 1) * 4 * 32 * 4 (non-zero value)
+ s_branch L_SHIFT_DONE
+L_ENABLE_SHIFT_W64:
+ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value)
+L_SHIFT_DONE:
+end
+
+function get_svgpr_size_bytes(s_svgpr_size_byte)
+ s_getreg_b32 s_svgpr_size_byte, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)
+ s_lshl_b32 s_svgpr_size_byte, s_svgpr_size_byte, (3+7)
+end
+
+function get_sgpr_size_bytes
+ return 512
+end
+
+function get_hwreg_size_bytes
+ return 128
+end
+
+function get_wave_size2(s_reg)
+ s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE)
+ s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
+end
+
+function valu_sgpr_hazard
+#if HAVE_VALU_SGPR_HAZARD
+ for var rep = 0; rep < 8; rep ++
+ ds_nop
+ end
+#endif
+end
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index bb26338204f4..6869e07a2fff 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -37,17 +37,28 @@
* gc_9_4_3:
* cpp -DASIC_FAMILY=GC_9_4_3 cwsr_trap_handler_gfx9.asm -P -o gc_9_4_3.sp3
* sp3 gc_9_4_3.sp3 -hex gc_9_4_3.hex
+ *
+ * gc_9_5_0:
+ * cpp -DASIC_FAMILY=GC_9_5_0 cwsr_trap_handler_gfx9.asm -P -o gc_9_5_0.sp3
+ * sp3 gc_9_5_0.sp3 -hex gc_9_5_0.hex
*/
#define CHIP_VEGAM 18
#define CHIP_ARCTURUS 23
#define CHIP_ALDEBARAN 25
#define CHIP_GC_9_4_3 26
+#define CHIP_GC_9_5_0 27
var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency
var SAVE_AFTER_XNACK_ERROR = 1 //workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
var SINGLE_STEP_MISSED_WORKAROUND = (ASIC_FAMILY <= CHIP_ALDEBARAN) //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
+#if ASIC_FAMILY < CHIP_GC_9_4_3
+#define VMEM_MODIFIERS slc:1 glc:1
+#else
+#define VMEM_MODIFIERS sc0:1 nt:1
+#endif
+
/**************************************************************************/
/* variables */
/**************************************************************************/
@@ -62,7 +73,13 @@ var SQ_WAVE_STATUS_ALLOW_REPLAY_MASK = 0x400000
var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
+#if ASIC_FAMILY >= CHIP_GC_9_5_0
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 11
+var LDS_RESTORE_GRANULARITY_BYTES = 1280
+#else
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
+var LDS_RESTORE_GRANULARITY_BYTES = 512
+#endif
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6
var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits
var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24
@@ -430,7 +447,9 @@ L_SAVE:
s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
-
+ // Clear VSKIP state now that MODE.VSKIP has been saved.
+ // If user shader set it then vector instructions would be skipped.
+ s_setvskip 0,0
/* the first wave in the threadgroup */
s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK // extract fisrt wave bit
@@ -557,12 +576,21 @@ if SAVE_AFTER_XNACK_ERROR
v_lshlrev_b32 v2, 2, v3
L_SAVE_LDS_LOOP_SQC:
+#if ASIC_FAMILY < CHIP_GC_9_5_0
ds_read2_b32 v[0:1], v2 offset0:0 offset1:0x40
s_waitcnt lgkmcnt(0)
-
write_vgprs_to_mem_with_sqc(v0, 2, s_save_buf_rsrc0, s_save_mem_offset)
v_add_u32 v2, 0x200, v2
+#else
+ // gfx950 needs to save in multiple of 256 bytes.
+ ds_read_b32 v0, v2
+ s_waitcnt lgkmcnt(0)
+ write_vgprs_to_mem_with_sqc(v0, 1, s_save_buf_rsrc0, s_save_mem_offset)
+
+ v_add_u32 v2, 0x100, v2
+#endif
+
v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
s_cbranch_vccnz L_SAVE_LDS_LOOP_SQC
@@ -581,11 +609,14 @@ end
L_SAVE_LDS_LOOP_VECTOR:
ds_read_b64 v[0:1], v2 //x =LDS[a], byte address
s_waitcnt lgkmcnt(0)
- buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1 glc:1 slc:1
+ buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset VMEM_MODIFIERS offen:1
// s_waitcnt vmcnt(0)
// v_add_u32 v2, vcc[0:1], v2, v3
v_add_u32 v2, v2, v3
v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
+#if ASIC_FAMILY >= CHIP_GC_9_5_0
+ s_mov_b64 exec, vcc
+#endif
s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR
// restore rsrc3
@@ -748,8 +779,13 @@ L_RESTORE:
L_RESTORE_LDS_LOOP:
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW
- s_add_u32 m0, m0, 256*2 // 128 DW
- s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW
+#if ASIC_FAMILY >= CHIP_GC_9_5_0
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:512 // third 64DW
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:768 // forth 64DW
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:1024 // fifth 64DW
+#endif
+ s_add_u32 m0, m0, LDS_RESTORE_GRANULARITY_BYTES // 128/320 DW
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, LDS_RESTORE_GRANULARITY_BYTES //mem offset increased by 128/320 DW
s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete?
@@ -979,17 +1015,17 @@ L_TCP_STORE_CHECK_DONE:
end
function write_4vgprs_to_mem(s_rsrc, s_mem_offset)
- buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
- buffer_store_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256
- buffer_store_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2
- buffer_store_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3
+ buffer_store_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
+ buffer_store_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256
+ buffer_store_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2
+ buffer_store_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3
end
function read_4vgprs_from_mem(s_rsrc, s_mem_offset)
- buffer_load_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
- buffer_load_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256
- buffer_load_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2
- buffer_load_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3
+ buffer_load_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
+ buffer_load_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256
+ buffer_load_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2
+ buffer_load_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3
s_waitcnt vmcnt(0)
end
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 3e6b4736a7fe..a2149afa5803 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -155,8 +155,8 @@ static int kfd_open(struct inode *inode, struct file *filep)
/* filep now owns the reference returned by kfd_create_process */
filep->private_data = process;
- dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
- process->pasid, process->is_32bit_user_mode);
+ dev_dbg(kfd_device, "process pid %d opened kfd node, compat mode (32 bit) - %d\n",
+ process->lead_thread->pid, process->is_32bit_user_mode);
return 0;
}
@@ -212,6 +212,11 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
return -EINVAL;
}
+ if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) {
+ args->ring_size = KFD_MIN_QUEUE_RING_SIZE;
+ pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE");
+ }
+
if (!access_ok((const void __user *) args->read_pointer_address,
sizeof(uint32_t))) {
pr_err("Can't access read pointer\n");
@@ -361,11 +366,11 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
goto err_acquire_queue_buf;
}
- pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
- p->pasid,
+ pr_debug("Creating queue for process pid %d on gpu 0x%x\n",
+ p->lead_thread->pid,
dev->id);
- err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
+ err = pqm_create_queue(&p->pqm, dev, &q_properties, &queue_id,
NULL, NULL, NULL, &doorbell_offset_in_process);
if (err != 0)
goto err_create_queue;
@@ -415,9 +420,9 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
int retval;
struct kfd_ioctl_destroy_queue_args *args = data;
- pr_debug("Destroying queue id %d for pasid 0x%x\n",
+ pr_debug("Destroying queue id %d for process pid %d\n",
args->queue_id,
- p->pasid);
+ p->lead_thread->pid);
mutex_lock(&p->mutex);
@@ -461,6 +466,11 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
return -EINVAL;
}
+ if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) {
+ args->ring_size = KFD_MIN_QUEUE_RING_SIZE;
+ pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE");
+ }
+
properties.queue_address = args->ring_base_address;
properties.queue_size = args->ring_size;
properties.queue_percent = args->queue_percentage & 0xFF;
@@ -468,8 +478,8 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
properties.pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;
properties.priority = args->queue_priority;
- pr_debug("Updating queue id %d for pasid 0x%x\n",
- args->queue_id, p->pasid);
+ pr_debug("Updating queue id %d for process pid %d\n",
+ args->queue_id, p->lead_thread->pid);
mutex_lock(&p->mutex);
@@ -596,7 +606,8 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
default_policy,
alternate_policy,
(void __user *)args->alternate_aperture_base,
- args->alternate_aperture_size))
+ args->alternate_aperture_size,
+ args->misc_process_flag))
err = -EINVAL;
out:
@@ -695,7 +706,7 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
struct kfd_process_device_apertures *pAperture;
int i;
- dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
+ dev_dbg(kfd_device, "get apertures for process pid %d", p->lead_thread->pid);
args->num_of_nodes = 0;
@@ -747,7 +758,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp,
int ret;
int i;
- dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
+ dev_dbg(kfd_device, "get apertures for process pid %d",
+ p->lead_thread->pid);
if (args->num_of_nodes == 0) {
/* Return number of nodes, so that user space can alloacate
@@ -2027,9 +2039,7 @@ static int criu_get_process_object_info(struct kfd_process *p,
num_events = kfd_get_num_events(p);
- ret = svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);
- if (ret)
- return ret;
+ svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);
*num_objects = num_queues + num_events + num_svm_ranges;
@@ -3365,12 +3375,12 @@ static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process,
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- pr_debug("pasid 0x%x mapping mmio page\n"
+ pr_debug("process pid %d mapping mmio page\n"
" target user address == 0x%08llX\n"
" physical address == 0x%08llX\n"
" vm_flags == 0x%04lX\n"
" size == 0x%04lX\n",
- process->pasid, (unsigned long long) vma->vm_start,
+ process->lead_thread->pid, (unsigned long long) vma->vm_start,
address, vma->vm_flags, PAGE_SIZE);
return io_remap_pfn_range(vma,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 48caecf7e72e..4a7180b46b71 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -28,6 +28,7 @@
#include "kfd_topology.h"
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_xgmi.h"
/* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
* GPU processor ID are expressed with Bit[31]=1.
@@ -1422,6 +1423,7 @@ err:
static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
+ bool cache_line_size_missing,
struct kfd_gpu_cache_info *pcache_info)
{
struct amdgpu_device *adev = kdev->adev;
@@ -1436,6 +1438,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 128;
i++;
}
/* Scalar L1 Instruction Cache per SQC */
@@ -1448,6 +1452,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 128;
i++;
}
/* Scalar L1 Data Cache per SQC */
@@ -1459,6 +1465,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 64;
i++;
}
/* GL1 Data Cache per SA */
@@ -1471,7 +1479,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
- pcache_info[i].cache_line_size = 0;
+ if (cache_line_size_missing)
+ pcache_info[i].cache_line_size = 128;
i++;
}
/* L2 Data Cache per GPU (Total Tex Cache) */
@@ -1483,6 +1492,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 128;
i++;
}
/* L3 Data Cache per GPU */
@@ -1493,7 +1504,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
- pcache_info[i].cache_line_size = 0;
+ pcache_info[i].cache_line_size = 64;
i++;
}
return i;
@@ -1509,6 +1520,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
if (adev->gfx.config.gc_tcp_size_per_cu) {
pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu;
pcache_info[i].cache_level = 1;
+ /* Cacheline size not available in IP discovery for gc943,gc944 */
+ pcache_info[i].cache_line_size = 128;
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
@@ -1520,6 +1533,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
pcache_info[i].cache_size =
adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
pcache_info[i].cache_level = 1;
+ pcache_info[i].cache_line_size = 64;
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
@@ -1530,6 +1544,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
pcache_info[i].cache_level = 1;
+ pcache_info[i].cache_line_size = 64;
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
@@ -1540,6 +1555,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
if (adev->gfx.config.gc_tcc_size) {
pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size;
pcache_info[i].cache_level = 2;
+ pcache_info[i].cache_line_size = 128;
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
@@ -1550,6 +1566,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
if (adev->gmc.mall_size) {
pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
pcache_info[i].cache_level = 3;
+ pcache_info[i].cache_line_size = 64;
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
@@ -1562,6 +1579,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
{
int num_of_cache_types = 0;
+ bool cache_line_size_missing = false;
switch (kdev->adev->asic_type) {
case CHIP_KAVERI:
@@ -1621,6 +1639,7 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
break;
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
num_of_cache_types =
kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd,
*pcache_info);
@@ -1685,10 +1704,18 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ /* Cacheline size not available in IP discovery for gc11.
+ * kfd_fill_gpu_cache_info_from_gfx_config to hard code it
+ */
+ cache_line_size_missing = true;
+ fallthrough;
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
num_of_cache_types =
- kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);
+ kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd,
+ cache_line_size_missing,
+ *pcache_info);
break;
default:
*pcache_info = dummy_cache_info;
@@ -2106,9 +2133,6 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3);
int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT :
KFD_CRAT_INTRA_SOCKET_WEIGHT;
- uint32_t bandwidth = ext_cpu ? amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
- kdev->adev, NULL, true) : mem_bw;
-
/*
* with host gpu xgmi link, host can access gpu memory whether
* or not pcie bar type is large, so always create bidirectional
@@ -2117,8 +2141,16 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
sub_type_hdr->weight_xgmi = weight;
- sub_type_hdr->minimum_bandwidth_mbs = bandwidth;
- sub_type_hdr->maximum_bandwidth_mbs = bandwidth;
+ if (ext_cpu) {
+ amdgpu_xgmi_get_bandwidth(kdev->adev, NULL,
+ AMDGPU_XGMI_BW_MODE_PER_LINK,
+ AMDGPU_XGMI_BW_UNIT_MBYTES,
+ &sub_type_hdr->minimum_bandwidth_mbs,
+ &sub_type_hdr->maximum_bandwidth_mbs);
+ } else {
+ sub_type_hdr->minimum_bandwidth_mbs = mem_bw;
+ sub_type_hdr->maximum_bandwidth_mbs = mem_bw;
+ }
} else {
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
sub_type_hdr->minimum_bandwidth_mbs =
@@ -2171,12 +2203,12 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
if (use_ta_info) {
sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT *
- amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);
- sub_type_hdr->maximum_bandwidth_mbs =
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev,
- peer_kdev->adev, false);
- sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;
+ amdgpu_xgmi_get_hops_count(kdev->adev, peer_kdev->adev);
+ amdgpu_xgmi_get_bandwidth(kdev->adev, peer_kdev->adev,
+ AMDGPU_XGMI_BW_MODE_PER_PEER,
+ AMDGPU_XGMI_BW_UNIT_MBYTES,
+ &sub_type_hdr->minimum_bandwidth_mbs,
+ &sub_type_hdr->maximum_bandwidth_mbs);
} else {
bool is_single_hop = kdev->kfd == peer_kdev->kfd;
int weight = is_single_hop ? KFD_CRAT_INTRA_SOCKET_WEIGHT :
@@ -2329,6 +2361,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
continue;
if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id)
continue;
+ if (!amdgpu_xgmi_get_is_sharing_enabled(kdev->adev, peer_dev->gpu->adev))
+ continue;
sub_type_hdr = (typeof(sub_type_hdr))(
(char *)sub_type_hdr +
sizeof(struct crat_subtype_iolink));
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 312dfa84f29f..ba99e0f258ae 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -204,11 +204,12 @@ bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
size_t exception_data_size)
{
struct kfd_process *p;
+ struct kfd_process_device *pdd = NULL;
bool signaled_to_debugger_or_runtime = false;
- p = kfd_lookup_process_by_pasid(pasid);
+ p = kfd_lookup_process_by_pasid(pasid, &pdd);
- if (!p)
+ if (!pdd)
return false;
if (!kfd_dbg_ev_raise(trap_mask, p, dev, doorbell_id, true,
@@ -238,9 +239,8 @@ bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
mutex_unlock(&p->mutex);
} else if (trap_mask & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
- kfd_dqm_evict_pasid(dev->dqm, p->pasid);
- kfd_signal_vm_fault_event(dev, p->pasid, NULL,
- exception_data);
+ kfd_evict_process_device(pdd);
+ kfd_signal_vm_fault_event(pdd, NULL, exception_data);
signaled_to_debugger_or_runtime = true;
}
@@ -276,8 +276,8 @@ int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
data = (struct kfd_hsa_memory_exception_data *)
pdd->vm_fault_exc_data;
- kfd_dqm_evict_pasid(pdd->dev->dqm, p->pasid);
- kfd_signal_vm_fault_event(pdd->dev, p->pasid, NULL, data);
+ kfd_evict_process_device(pdd);
+ kfd_signal_vm_fault_event(pdd, NULL, data);
error_reason &= ~KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION);
}
@@ -350,10 +350,27 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
{
uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
uint32_t flags = pdd->process->dbg_flags;
+ struct amdgpu_device *adev = pdd->dev->adev;
+ int r;
if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
return 0;
+ if (!pdd->proc_ctx_cpu_ptr) {
+ r = amdgpu_amdkfd_alloc_gtt_mem(adev,
+ AMDGPU_MES_PROC_CTX_SIZE,
+ &pdd->proc_ctx_bo,
+ &pdd->proc_ctx_gpu_addr,
+ &pdd->proc_ctx_cpu_ptr,
+ false);
+ if (r) {
+ dev_err(adev->dev,
+ "failed to allocate process context bo\n");
+ return r;
+ }
+ memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+ }
+
return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd->proc_ctx_gpu_addr, spi_dbg_cntl,
pdd->watch_points, flags, sq_trap_en);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index 924d0fd85dfb..27aa1a5b120f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -79,6 +79,7 @@ static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0) ||
KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0));
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
index 4a5a0a4e00f2..9bde2c64540f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
@@ -27,6 +27,16 @@
#include "kfd_priv.h"
static struct dentry *debugfs_root;
+static struct dentry *debugfs_proc;
+static struct list_head procs;
+
+struct debugfs_proc_entry {
+ struct list_head list;
+ struct dentry *proc_dentry;
+ pid_t pid;
+};
+
+#define MAX_DEBUGFS_FILENAME_LEN 32
static int kfd_debugfs_open(struct inode *inode, struct file *file)
{
@@ -92,6 +102,8 @@ static const struct file_operations kfd_debugfs_hang_hws_fops = {
void kfd_debugfs_init(void)
{
debugfs_root = debugfs_create_dir("kfd", NULL);
+ debugfs_proc = debugfs_create_dir("proc", debugfs_root);
+ INIT_LIST_HEAD(&procs);
debugfs_create_file("mqds", S_IFREG | 0444, debugfs_root,
kfd_debugfs_mqds_by_process, &kfd_debugfs_fops);
@@ -107,5 +119,69 @@ void kfd_debugfs_init(void)
void kfd_debugfs_fini(void)
{
+ debugfs_remove_recursive(debugfs_proc);
debugfs_remove_recursive(debugfs_root);
}
+
+static ssize_t kfd_debugfs_pasid_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct kfd_process_device *pdd = file_inode(file)->i_private;
+ char tmp[32];
+ int len;
+
+ len = snprintf(tmp, sizeof(tmp), "%u\n", pdd->pasid);
+
+ return simple_read_from_buffer(buf, count, ppos, tmp, len);
+}
+
+static const struct file_operations kfd_debugfs_pasid_fops = {
+ .owner = THIS_MODULE,
+ .read = kfd_debugfs_pasid_read,
+};
+
+void kfd_debugfs_add_process(struct kfd_process *p)
+{
+ int i;
+ char name[MAX_DEBUGFS_FILENAME_LEN];
+ struct debugfs_proc_entry *entry;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return;
+
+ list_add(&entry->list, &procs);
+ entry->pid = p->lead_thread->pid;
+ snprintf(name, MAX_DEBUGFS_FILENAME_LEN, "%d",
+ (int)entry->pid);
+ entry->proc_dentry = debugfs_create_dir(name, debugfs_proc);
+
+ /* Create debugfs files for each GPU:
+ * - proc/<pid>/pasid_<gpuid>
+ */
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ snprintf(name, MAX_DEBUGFS_FILENAME_LEN, "pasid_%u",
+ pdd->dev->id);
+ debugfs_create_file((const char *)name, S_IFREG | 0444,
+ entry->proc_dentry, pdd,
+ &kfd_debugfs_pasid_fops);
+ }
+}
+
+void kfd_debugfs_remove_process(struct kfd_process *p)
+{
+ struct debugfs_proc_entry *entry, *next;
+
+ mutex_lock(&kfd_processes_mutex);
+ list_for_each_entry_safe(entry, next, &procs, list) {
+ if (entry->pid != p->lead_thread->pid)
+ continue;
+
+ debugfs_remove_recursive(entry->proc_dentry);
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ mutex_unlock(&kfd_processes_mutex);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index fad1c8f2bc83..bf0854bd5555 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -85,6 +85,7 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
case IP_VERSION(4, 4, 0):/* ALDEBARAN */
case IP_VERSION(4, 4, 2):
case IP_VERSION(4, 4, 5):
+ case IP_VERSION(4, 4, 4):
case IP_VERSION(5, 0, 0):/* NAVI10 */
case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
case IP_VERSION(5, 0, 2):/* NAVI14 */
@@ -100,6 +101,7 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
case IP_VERSION(6, 1, 0):
case IP_VERSION(6, 1, 1):
case IP_VERSION(6, 1, 2):
+ case IP_VERSION(6, 1, 3):
case IP_VERSION(7, 0, 0):
case IP_VERSION(7, 0, 1):
kfd->device_info.num_sdma_queues_per_engine = 8;
@@ -121,6 +123,7 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
case IP_VERSION(6, 1, 0):
case IP_VERSION(6, 1, 1):
case IP_VERSION(6, 1, 2):
+ case IP_VERSION(6, 1, 3):
case IP_VERSION(7, 0, 0):
case IP_VERSION(7, 0, 1):
/* Reserve 1 for paging and 1 for gfx */
@@ -152,6 +155,7 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
break;
case IP_VERSION(9, 4, 3): /* GC 9.4.3 */
case IP_VERSION(9, 4, 4): /* GC 9.4.4 */
+ case IP_VERSION(9, 5, 0): /* GC 9.5.0 */
kfd->device_info.event_interrupt_class =
&event_interrupt_class_v9_4_3;
break;
@@ -178,6 +182,7 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
break;
case IP_VERSION(12, 0, 0):
@@ -235,6 +240,9 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
*/
kfd->device_info.needs_pci_atomics = true;
kfd->device_info.no_atomic_fw_version = kfd->adev->gfx.rs64_enable ? 509 : 0;
+ } else if (gc_version < IP_VERSION(13, 0, 0)) {
+ kfd->device_info.needs_pci_atomics = true;
+ kfd->device_info.no_atomic_fw_version = 2090;
} else {
kfd->device_info.needs_pci_atomics = true;
}
@@ -344,15 +352,14 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
f2g = &aldebaran_kfd2kgd;
break;
case IP_VERSION(9, 4, 3):
- gfx_target_version = adev->rev_id >= 1 ? 90402
- : adev->flags & AMD_IS_APU ? 90400
- : 90401;
- f2g = &gc_9_4_3_kfd2kgd;
- break;
case IP_VERSION(9, 4, 4):
gfx_target_version = 90402;
f2g = &gc_9_4_3_kfd2kgd;
break;
+ case IP_VERSION(9, 5, 0):
+ gfx_target_version = 90500;
+ f2g = &gc_9_4_3_kfd2kgd;
+ break;
/* Navi10 */
case IP_VERSION(10, 1, 10):
gfx_target_version = 100100;
@@ -445,6 +452,10 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
gfx_target_version = 110502;
f2g = &gfx_v11_kfd2kgd;
break;
+ case IP_VERSION(11, 5, 3):
+ gfx_target_version = 110503;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
case IP_VERSION(12, 0, 0):
gfx_target_version = 120000;
f2g = &gfx_v12_kfd2kgd;
@@ -512,6 +523,10 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
> KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
+ } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 5, 0)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_5_0_hex) > PAGE_SIZE);
+ kfd->cwsr_isa = cwsr_trap_gfx9_5_0_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_5_0_hex);
} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex)
> KFD_CWSR_TMA_OFFSET);
@@ -534,7 +549,8 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
kfd->cwsr_isa = cwsr_trap_gfx11_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex);
} else {
- BUILD_BUG_ON(sizeof(cwsr_trap_gfx12_hex) > PAGE_SIZE);
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx12_hex)
+ > KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_gfx12_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx12_hex);
}
@@ -563,14 +579,19 @@ static int kfd_gws_init(struct kfd_node *node)
&& kfd->mec2_fw_version >= 0x28) ||
(KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3) ||
KFD_GC_VERSION(node) == IP_VERSION(9, 4, 4)) ||
+ (KFD_GC_VERSION(node) == IP_VERSION(9, 5, 0)) ||
(KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0)
&& KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0)
&& kfd->mec2_fw_version >= 0x6b) ||
(KFD_GC_VERSION(node) >= IP_VERSION(11, 0, 0)
&& KFD_GC_VERSION(node) < IP_VERSION(12, 0, 0)
- && mes_rev >= 68))))
+ && mes_rev >= 68) ||
+ (KFD_GC_VERSION(node) >= IP_VERSION(12, 0, 0))))) {
+ if (KFD_GC_VERSION(node) >= IP_VERSION(12, 0, 0))
+ node->adev->gds.gws_size = 64;
ret = amdgpu_amdkfd_alloc_gws(node->adev,
node->adev->gds.gws_size, &node->gws);
+ }
return ret;
}
@@ -634,6 +655,14 @@ static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes)
struct kfd_node *knode;
unsigned int i;
+ /*
+ * flush_work ensures that there are no outstanding
+ * work-queue items that will access interrupt_ring. New work items
+ * can't be created because we stopped interrupt handling above.
+ */
+ flush_workqueue(kfd->ih_wq);
+ destroy_workqueue(kfd->ih_wq);
+
for (i = 0; i < num_nodes; i++) {
knode = kfd->nodes[i];
device_queue_manager_uninit(knode->dqm);
@@ -729,14 +758,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1;
- /* For GFX9.4.3, we need special handling for VMIDs depending on
- * partition mode.
+ /* For multi-partition capable GPUs, we need special handling for VMIDs
+ * depending on partition mode.
* In CPX mode, the VMID range needs to be shared between XCDs.
* Additionally, there are 13 VMIDs (3-15) available for KFD. To
* divide them equally, we change starting VMID to 4 and not use
* VMID 3.
- * If the VMID range changes for GFX9.4.3, then this code MUST be
- * revisited.
+ * If the VMID range changes for multi-partition capable GPUs, then
+ * this code MUST be revisited.
*/
if (kfd->adev->xcp_mgr) {
partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr,
@@ -801,14 +830,12 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
/*
- * For GFX9.4.3, the KFD abstracts all partitions within a socket as
- * xGMI connected in the topology so assign a unique hive id per
- * device based on the pci device location if device is in PCIe mode.
+ * For multi-partition capable GPUs, the KFD abstracts all partitions
+ * within a socket as xGMI connected in the topology so assign a unique
+ * hive id per device based on the pci device location if device is in
+ * PCIe mode.
*/
- if (!kfd->hive_id &&
- (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) &&
- kfd->num_nodes > 1)
+ if (!kfd->hive_id && kfd->num_nodes > 1)
kfd->hive_id = pci_dev_id(kfd->adev->pdev);
kfd->noretry = kfd->adev->gmc.noretry;
@@ -846,12 +873,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
KFD_XCP_MEMORY_SIZE(node->adev, node->node_id) >> 20);
}
- if ((KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) &&
- partition_mode == AMDGPU_CPX_PARTITION_MODE &&
+ if (partition_mode == AMDGPU_CPX_PARTITION_MODE &&
kfd->num_nodes != 1) {
- /* For GFX9.4.3 and CPX mode, first XCD gets VMID range
- * 4-9 and second XCD gets VMID range 10-15.
+ /* For multi-partition capable GPUs and CPX mode, first
+ * XCD gets VMID range 4-9 and second XCD gets VMID
+ * range 10-15.
*/
node->vm_info.first_vmid_kfd = (i%2 == 0) ?
@@ -875,8 +901,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
amdgpu_amdkfd_get_local_mem_info(kfd->adev,
&node->local_mem_info, node->xcp);
- if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4))
+ if (kfd->adev->xcp_mgr)
kfd_setup_interrupt_bitmap(node, i);
/* Initialize the KFD node */
@@ -1055,21 +1080,6 @@ static int kfd_resume(struct kfd_node *node)
return err;
}
-static inline void kfd_queue_work(struct workqueue_struct *wq,
- struct work_struct *work)
-{
- int cpu, new_cpu;
-
- cpu = new_cpu = smp_processor_id();
- do {
- new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
- if (cpu_to_node(new_cpu) == numa_node_id())
- break;
- } while (cpu != new_cpu);
-
- queue_work_on(new_cpu, wq, work);
-}
-
/* This is called directly from KGD at ISR. */
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
{
@@ -1095,7 +1105,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
patched_ihre, &is_patched)
&& enqueue_ih_ring_entry(node,
is_patched ? patched_ihre : ih_ring_entry)) {
- kfd_queue_work(node->ih_wq, &node->interrupt_work);
+ queue_work(node->kfd->ih_wq, &node->interrupt_work);
spin_unlock_irqrestore(&node->interrupt_lock, flags);
return;
}
@@ -1392,6 +1402,13 @@ void kfd_dec_compute_active(struct kfd_node *node)
WARN_ONCE(count < 0, "Compute profile ref. count error");
}
+static bool kfd_compute_active(struct kfd_node *node)
+{
+ if (atomic_read(&node->kfd->compute_profile))
+ return true;
+ return false;
+}
+
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
{
/*
@@ -1485,6 +1502,91 @@ int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id)
return node->dqm->ops.halt(node->dqm);
}
+bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)
+{
+ struct kfd_node *node;
+
+ if (!kfd->init_complete)
+ return false;
+
+ if (node_id >= kfd->num_nodes) {
+ dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n",
+ node_id, kfd->num_nodes - 1);
+ return false;
+ }
+
+ node = kfd->nodes[node_id];
+
+ return kfd_compute_active(node);
+}
+
+/**
+ * kgd2kfd_vmfault_fast_path() - KFD vm page fault interrupt handling fast path for gmc v9
+ * @adev: amdgpu device
+ * @entry: vm fault interrupt vector
+ * @retry_fault: if this is retry fault
+ *
+ * retry fault -
+ * with CAM enabled, adev primary ring
+ * | gmc_v9_0_process_interrupt()
+ * adev soft_ring
+ * | gmc_v9_0_process_interrupt() worker failed to recover page fault
+ * KFD node ih_fifo
+ * | KFD interrupt_wq worker
+ * kfd_signal_vm_fault_event
+ *
+ * without CAM, adev primary ring1
+ * | gmc_v9_0_process_interrupt worker failed to recvoer page fault
+ * KFD node ih_fifo
+ * | KFD interrupt_wq worker
+ * kfd_signal_vm_fault_event
+ *
+ * no-retry fault -
+ * adev primary ring
+ * | gmc_v9_0_process_interrupt()
+ * KFD node ih_fifo
+ * | KFD interrupt_wq worker
+ * kfd_signal_vm_fault_event
+ *
+ * fast path - After kfd_signal_vm_fault_event, gmc_v9_0_process_interrupt drop the page fault
+ * of same process, don't copy interrupt to KFD node ih_fifo.
+ * With gdb debugger enabled, need convert the retry fault to no-retry fault for
+ * debugger, cannot use the fast path.
+ *
+ * Return:
+ * true - use the fast path to handle this fault
+ * false - use normal path to handle it
+ */
+bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
+ bool retry_fault)
+{
+ struct kfd_process *p;
+ u32 cam_index;
+
+ if (entry->ih == &adev->irq.ih_soft || entry->ih == &adev->irq.ih1) {
+ p = kfd_lookup_process_by_pasid(entry->pasid, NULL);
+ if (!p)
+ return true;
+
+ if (p->gpu_page_fault && !p->debug_trap_enabled) {
+ if (retry_fault && adev->irq.retry_cam_enabled) {
+ cam_index = entry->src_data[2] & 0x3ff;
+ WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
+ }
+
+ kfd_unref_process(p);
+ return true;
+ }
+
+ /*
+ * This is the first page fault, set flag and then signal user space
+ */
+ p->gpu_page_fault = true;
+ kfd_unref_process(p);
+ }
+ return false;
+}
+
#if defined(CONFIG_DEBUG_FS)
/* This function will send a package to HIQ to hang the HWS
@@ -1497,6 +1599,11 @@ int kfd_debugfs_hang_hws(struct kfd_node *dev)
return -EINVAL;
}
+ if (dev->kfd->shared_resources.enable_mes) {
+ dev_err(dev->adev->dev, "Inducing MES hang is not supported\n");
+ return -EINVAL;
+ }
+
return dqm_debugfs_hang_hws(dev->dqm);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 648f40091aa3..76359c6a3f3a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -36,12 +36,15 @@
#include "kfd_kernel_queue.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_reset.h"
+#include "amdgpu_sdma.h"
#include "mes_v11_api_def.h"
#include "kfd_debug.h"
/* Size of the per-pipe EOP queue */
#define CIK_HPD_EOP_BYTES_LOG2 11
#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
+/* See unmap_queues_cpsch() */
+#define USE_DEFAULT_GRACE_PERIOD 0xffffffff
static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
u32 pasid, unsigned int vmid);
@@ -66,7 +69,8 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm,
static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
static int allocate_sdma_queue(struct device_queue_manager *dqm,
struct queue *q, const uint32_t *restore_sdma_id);
-static void kfd_process_hw_exception(struct work_struct *work);
+
+static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma);
static inline
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
@@ -170,7 +174,7 @@ static void kfd_hws_hang(struct device_queue_manager *dqm)
/*
* Issue a GPU reset if HWS is unresponsive
*/
- schedule_work(&dqm->hw_exception_work);
+ amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
}
static int convert_to_mes_queue_type(int queue_type)
@@ -202,11 +206,13 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
int r, queue_type;
uint64_t wptr_addr_off;
+ if (!dqm->sched_running || dqm->sched_halt)
+ return 0;
if (!down_read_trylock(&adev->reset_domain->sem))
return -EIO;
memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
- queue_input.process_id = qpd->pqm->process->pasid;
+ queue_input.process_id = pdd->pasid;
queue_input.page_table_base_addr = qpd->page_table_base;
queue_input.process_va_start = 0;
queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
@@ -270,6 +276,8 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
int r;
struct mes_remove_queue_input queue_input;
+ if (!dqm->sched_running || dqm->sched_halt)
+ return 0;
if (!down_read_trylock(&adev->reset_domain->sem))
return -EIO;
@@ -292,7 +300,7 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
return r;
}
-static int remove_all_queues_mes(struct device_queue_manager *dqm)
+static int remove_all_kfd_queues_mes(struct device_queue_manager *dqm)
{
struct device_process_node *cur;
struct device *dev = dqm->dev->adev->dev;
@@ -319,6 +327,33 @@ static int remove_all_queues_mes(struct device_queue_manager *dqm)
return retval;
}
+static int add_all_kfd_queues_mes(struct device_queue_manager *dqm)
+{
+ struct device_process_node *cur;
+ struct device *dev = dqm->dev->adev->dev;
+ struct qcm_process_device *qpd;
+ struct queue *q;
+ int retval = 0;
+
+ list_for_each_entry(cur, &dqm->queues, list) {
+ qpd = cur->qpd;
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (!q->properties.is_active)
+ continue;
+ retval = add_queue_mes(dqm, q, qpd);
+ if (retval) {
+ dev_err(dev, "%s: Failed to add queue %d for dev %d",
+ __func__,
+ q->properties.queue_id,
+ dqm->dev->id);
+ return retval;
+ }
+ }
+ }
+
+ return retval;
+}
+
static int suspend_all_queues_mes(struct device_queue_manager *dqm)
{
struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
@@ -496,6 +531,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
{
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
struct device *dev = dqm->dev->adev->dev;
int allocated_vmid = -1, i;
@@ -514,9 +550,9 @@ static int allocate_vmid(struct device_queue_manager *dqm,
pr_debug("vmid allocated: %d\n", allocated_vmid);
- dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
+ dqm->vmid_pasid[allocated_vmid] = pdd->pasid;
- set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
+ set_pasid_vmid_mapping(dqm, pdd->pasid, allocated_vmid);
qpd->vmid = allocated_vmid;
q->properties.vmid = allocated_vmid;
@@ -768,6 +804,11 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process
return -EOPNOTSUPP;
}
+ /* taking the VMID for that process on the safe way using PDD */
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd)
+ return -EFAULT;
+
/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
* ATC_VMID15_PASID_MAPPING
* to check which VMID the current process is mapped to.
@@ -777,23 +818,19 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process
status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
(dev->adev, vmid, &queried_pasid);
- if (status && queried_pasid == p->pasid) {
- pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
- vmid, p->pasid);
+ if (status && queried_pasid == pdd->pasid) {
+ pr_debug("Killing wave fronts of vmid %d and process pid %d\n",
+ vmid, p->lead_thread->pid);
break;
}
}
if (vmid > last_vmid_to_scan) {
- dev_err(dev->adev->dev, "Didn't find vmid for pasid 0x%x\n", p->pasid);
+ dev_err(dev->adev->dev, "Didn't find vmid for process pid %d\n",
+ p->lead_thread->pid);
return -EFAULT;
}
- /* taking the VMID for that process on the safe way using PDD */
- pdd = kfd_get_process_device_data(dev, p);
- if (!pdd)
- return -EFAULT;
-
reg_gfx_index.bits.sh_broadcast_writes = 1;
reg_gfx_index.bits.se_broadcast_writes = 1;
reg_gfx_index.bits.instance_broadcast_writes = 1;
@@ -1029,8 +1066,8 @@ static int suspend_single_queue(struct device_queue_manager *dqm,
if (q->properties.is_suspended)
return 0;
- pr_debug("Suspending PASID %u queue [%i]\n",
- pdd->process->pasid,
+ pr_debug("Suspending process pid %d queue [%i]\n",
+ pdd->process->lead_thread->pid,
q->properties.queue_id);
is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW);
@@ -1077,8 +1114,8 @@ static int resume_single_queue(struct device_queue_manager *dqm,
pdd = qpd_to_pdd(qpd);
- pr_debug("Restoring from suspend PASID %u queue [%i]\n",
- pdd->process->pasid,
+ pr_debug("Restoring from suspend process pid %d queue [%i]\n",
+ pdd->process->lead_thread->pid,
q->properties.queue_id);
q->properties.is_suspended = false;
@@ -1111,8 +1148,8 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
goto out;
pdd = qpd_to_pdd(qpd);
- pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
- pdd->process->pasid);
+ pr_debug_ratelimited("Evicting process pid %d queues\n",
+ pdd->process->lead_thread->pid);
pdd->last_evict_timestamp = get_jiffies_64();
/* Mark all queues as evicted. Deactivate all active queues on
@@ -1169,8 +1206,8 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
if (!pdd->drm_priv)
goto out;
- pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
- pdd->process->pasid);
+ pr_debug_ratelimited("Evicting process pid %d queues\n",
+ pdd->process->lead_thread->pid);
/* Mark all queues as evicted. Deactivate all active queues on
* the qpd.
@@ -1184,11 +1221,13 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
decrement_queue_count(dqm, qpd, q);
if (dqm->dev->kfd->shared_resources.enable_mes) {
- retval = remove_queue_mes(dqm, q, qpd);
- if (retval) {
+ int err;
+
+ err = remove_queue_mes(dqm, q, qpd);
+ if (err) {
dev_err(dev, "Failed to evict queue %d\n",
q->properties.queue_id);
- goto out;
+ retval = err;
}
}
}
@@ -1228,8 +1267,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
goto out;
}
- pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
- pdd->process->pasid);
+ pr_debug_ratelimited("Restoring process pid %d queues\n",
+ pdd->process->lead_thread->pid);
/* Update PD Base in QPD */
qpd->page_table_base = pd_base;
@@ -1312,8 +1351,8 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
if (!pdd->drm_priv)
goto vm_not_acquired;
- pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
- pdd->process->pasid);
+ pr_debug_ratelimited("Restoring process pid %d queues\n",
+ pdd->process->lead_thread->pid);
/* Update PD Base in QPD */
qpd->page_table_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
@@ -1537,8 +1576,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
int bit;
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
- if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
- dev_err(dev, "No more SDMA queue to allocate\n");
+ if (bitmap_empty(dqm->sdma_bitmap, get_num_sdma_queues(dqm))) {
+ dev_warn(dev, "No more SDMA queue to allocate (%d total queues)\n",
+ get_num_sdma_queues(dqm));
return -ENOMEM;
}
@@ -1563,8 +1603,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
q->properties.sdma_queue_id = q->sdma_id /
kfd_get_num_sdma_engines(dqm->dev);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
- if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
- dev_err(dev, "No more XGMI SDMA queue to allocate\n");
+ if (bitmap_empty(dqm->xgmi_sdma_bitmap, get_num_xgmi_sdma_queues(dqm))) {
+ dev_warn(dev, "No more XGMI SDMA queue to allocate (%d total queues)\n",
+ get_num_xgmi_sdma_queues(dqm));
return -ENOMEM;
}
if (restore_sdma_id) {
@@ -1623,8 +1664,8 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
}
if (!free_bit_found) {
- dev_err(dev, "No more SDMA queue to allocate for target ID %i\n",
- q->properties.sdma_engine_id);
+ dev_warn(dev, "No more SDMA queue to allocate for target ID %i (%d total queues)\n",
+ q->properties.sdma_engine_id, num_queues);
return -ENOMEM;
}
}
@@ -1707,15 +1748,11 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
dqm->active_cp_queue_count = 0;
dqm->gws_queue_count = 0;
dqm->active_runlist = false;
- INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
dqm->trap_debug_vmid = 0;
init_sdma_bitmaps(dqm);
- if (dqm->dev->kfd2kgd->get_iq_wait_times)
- dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
- &dqm->wait_times,
- ffs(dqm->dev->xcc_mask) - 1);
+ update_dqm_wait_times(dqm);
return 0;
}
@@ -1742,7 +1779,7 @@ static int halt_cpsch(struct device_queue_manager *dqm)
KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
USE_DEFAULT_GRACE_PERIOD, false);
else
- ret = remove_all_queues_mes(dqm);
+ ret = remove_all_kfd_queues_mes(dqm);
}
dqm->sched_halt = true;
dqm_unlock(dqm);
@@ -1768,6 +1805,9 @@ static int unhalt_cpsch(struct device_queue_manager *dqm)
ret = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
0, USE_DEFAULT_GRACE_PERIOD);
+ else
+ ret = add_all_kfd_queues_mes(dqm);
+
dqm_unlock(dqm);
return ret;
@@ -1808,25 +1848,11 @@ static int start_cpsch(struct device_queue_manager *dqm)
/* clear hang status when driver try to start the hw scheduler */
dqm->sched_running = true;
- if (!dqm->dev->kfd->shared_resources.enable_mes)
+ if (!dqm->dev->kfd->shared_resources.enable_mes) {
+ if (pm_config_dequeue_wait_counts(&dqm->packet_mgr,
+ KFD_DEQUEUE_WAIT_INIT, 0 /* unused */))
+ dev_err(dev, "Setting optimized dequeue wait failed. Using default values\n");
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
-
- /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
- if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu &&
- (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) {
- uint32_t reg_offset = 0;
- uint32_t grace_period = 1;
-
- retval = pm_update_grace_period(&dqm->packet_mgr,
- grace_period);
- if (retval)
- dev_err(dev, "Setting grace timeout failed\n");
- else if (dqm->dev->kfd2kgd->build_grace_period_packet_info)
- /* Update dqm->wait_times maintained in software */
- dqm->dev->kfd2kgd->build_grace_period_packet_info(
- dqm->dev->adev, dqm->wait_times,
- grace_period, &reg_offset,
- &dqm->wait_times);
}
/* setup per-queue reset detection buffer */
@@ -1867,7 +1893,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
if (!dqm->dev->kfd->shared_resources.enable_mes)
unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
else
- remove_all_queues_mes(dqm);
+ remove_all_kfd_queues_mes(dqm);
dqm->sched_running = false;
@@ -2048,7 +2074,7 @@ int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm,
{
unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
struct device *dev = dqm->dev->adev->dev;
- uint64_t *fence_addr = dqm->fence_addr;
+ uint64_t *fence_addr = dqm->fence_addr;
while (*fence_addr != fence_value) {
/* Fatal err detected, this response won't come */
@@ -2101,8 +2127,8 @@ static void set_queue_as_reset(struct device_queue_manager *dqm, struct queue *q
{
struct kfd_process_device *pdd = qpd_to_pdd(qpd);
- dev_err(dqm->dev->adev->dev, "queue id 0x%0x at pasid 0x%0x is reset\n",
- q->properties.queue_id, q->process->pasid);
+ dev_err(dqm->dev->adev->dev, "queue id 0x%0x at pasid %d is reset\n",
+ q->properties.queue_id, pdd->process->lead_thread->pid);
pdd->has_reset_queue = true;
if (q->properties.is_active) {
@@ -2171,8 +2197,7 @@ static struct queue *find_queue_by_address(struct device_queue_manager *dqm, uin
return NULL;
}
-/* only for compute queue */
-static int reset_queues_on_hws_hang(struct device_queue_manager *dqm)
+static int reset_hung_queues(struct device_queue_manager *dqm)
{
int r = 0, reset_count = 0, i;
@@ -2225,7 +2250,112 @@ reset_fail:
return r;
}
-/* dqm->lock mutex has to be locked before calling this function */
+static bool sdma_has_hang(struct device_queue_manager *dqm)
+{
+ int engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm);
+ int engine_end = engine_start + get_num_all_sdma_engines(dqm);
+ int num_queues_per_eng = dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
+ int i, j;
+
+ for (i = engine_start; i < engine_end; i++) {
+ for (j = 0; j < num_queues_per_eng; j++) {
+ if (!dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j))
+ continue;
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool set_sdma_queue_as_reset(struct device_queue_manager *dqm,
+ uint32_t doorbell_off)
+{
+ struct device_process_node *cur;
+ struct qcm_process_device *qpd;
+ struct queue *q;
+
+ list_for_each_entry(cur, &dqm->queues, list) {
+ qpd = cur->qpd;
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) &&
+ q->properties.doorbell_off == doorbell_off) {
+ set_queue_as_reset(dqm, q, qpd);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static int reset_hung_queues_sdma(struct device_queue_manager *dqm)
+{
+ int engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm);
+ int engine_end = engine_start + get_num_all_sdma_engines(dqm);
+ int num_queues_per_eng = dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
+ int r = 0, i, j;
+
+ if (dqm->is_hws_hang)
+ return -EIO;
+
+ /* Scan for hung HW queues and reset engine. */
+ dqm->detect_hang_count = 0;
+ for (i = engine_start; i < engine_end; i++) {
+ for (j = 0; j < num_queues_per_eng; j++) {
+ uint32_t doorbell_off =
+ dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j);
+
+ if (!doorbell_off)
+ continue;
+
+ /* Reset engine and check. */
+ if (amdgpu_sdma_reset_engine(dqm->dev->adev, i) ||
+ dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j) ||
+ !set_sdma_queue_as_reset(dqm, doorbell_off)) {
+ r = -ENOTRECOVERABLE;
+ goto reset_fail;
+ }
+
+ /* Should only expect one queue active per engine */
+ dqm->detect_hang_count++;
+ break;
+ }
+ }
+
+ /* Signal process reset */
+ if (dqm->detect_hang_count)
+ kfd_signal_reset_event(dqm->dev);
+ else
+ r = -ENOTRECOVERABLE;
+
+reset_fail:
+ dqm->detect_hang_count = 0;
+
+ return r;
+}
+
+static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma)
+{
+ while (halt_if_hws_hang)
+ schedule();
+
+ if (!amdgpu_gpu_recovery)
+ return -ENOTRECOVERABLE;
+
+ return is_sdma ? reset_hung_queues_sdma(dqm) : reset_hung_queues(dqm);
+}
+
+/* dqm->lock mutex has to be locked before calling this function
+ *
+ * @grace_period: If USE_DEFAULT_GRACE_PERIOD then default wait time
+ * for context switch latency. Lower values are used by debugger
+ * since context switching are triggered at high frequency.
+ * This is configured by setting CP_IQ_WAIT_TIME2.SCH_WAVE
+ *
+ */
static int unmap_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
uint32_t filter_param,
@@ -2244,7 +2374,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
return -EIO;
if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
- retval = pm_update_grace_period(&dqm->packet_mgr, grace_period);
+ retval = pm_config_dequeue_wait_counts(&dqm->packet_mgr,
+ KFD_DEQUEUE_WAIT_SET_SCH_WAVE, grace_period);
if (retval)
goto out;
}
@@ -2254,6 +2385,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
goto out;
*dqm->fence_addr = KFD_FENCE_INIT;
+ mb();
pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr,
KFD_FENCE_COMPLETED);
/* should be timed out */
@@ -2274,30 +2406,32 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
* check those fields
*/
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
- if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd)) {
- if (reset_queues_on_hws_hang(dqm)) {
- while (halt_if_hws_hang)
- schedule();
- dqm->is_hws_hang = true;
- kfd_hws_hang(dqm);
- retval = -ETIME;
- goto out;
- }
- }
+ if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd) &&
+ reset_queues_on_hws_hang(dqm, false))
+ goto reset_fail;
+
+ /* Check for SDMA hang and attempt SDMA reset */
+ if (sdma_has_hang(dqm) && reset_queues_on_hws_hang(dqm, true))
+ goto reset_fail;
/* We need to reset the grace period value for this device */
if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
- if (pm_update_grace_period(&dqm->packet_mgr,
- USE_DEFAULT_GRACE_PERIOD))
+ if (pm_config_dequeue_wait_counts(&dqm->packet_mgr,
+ KFD_DEQUEUE_WAIT_RESET, 0 /* unused */))
dev_err(dev, "Failed to reset grace period\n");
}
pm_release_ib(&dqm->packet_mgr);
dqm->active_runlist = false;
-
out:
up_read(&dqm->dev->adev->reset_domain->sem);
return retval;
+
+reset_fail:
+ dqm->is_hws_hang = true;
+ kfd_hws_hang(dqm);
+ up_read(&dqm->dev->adev->reset_domain->sem);
+ return -ETIME;
}
/* only for compute queue */
@@ -2338,6 +2472,9 @@ static int wait_on_destroy_queue(struct device_queue_manager *dqm,
q->process);
int ret = 0;
+ if (WARN_ON(!pdd))
+ return ret;
+
if (pdd->qpd.is_debug)
return ret;
@@ -2451,20 +2588,13 @@ failed_try_destroy_debugged_queue:
return retval;
}
-/*
- * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
- * stay in user mode.
- */
-#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
-/* APE1 limit is inclusive and 64K aligned. */
-#define APE1_LIMIT_ALIGNMENT 0xFFFF
-
static bool set_cache_memory_policy(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
bool retval = true;
@@ -2473,41 +2603,17 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
dqm_lock(dqm);
- if (alternate_aperture_size == 0) {
- /* base > limit disables APE1 */
- qpd->sh_mem_ape1_base = 1;
- qpd->sh_mem_ape1_limit = 0;
- } else {
- /*
- * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
- * SH_MEM_APE1_BASE[31:0], 0x0000 }
- * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
- * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
- * Verify that the base and size parameters can be
- * represented in this format and convert them.
- * Additionally restrict APE1 to user-mode addresses.
- */
-
- uint64_t base = (uintptr_t)alternate_aperture_base;
- uint64_t limit = base + alternate_aperture_size - 1;
-
- if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
- (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
- retval = false;
- goto out;
- }
-
- qpd->sh_mem_ape1_base = base >> 16;
- qpd->sh_mem_ape1_limit = limit >> 16;
- }
-
retval = dqm->asic_ops.set_cache_memory_policy(
dqm,
qpd,
default_policy,
alternate_policy,
alternate_aperture_base,
- alternate_aperture_size);
+ alternate_aperture_size,
+ misc_process_properties);
+
+ if (retval)
+ goto out;
if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
program_sh_mem_settings(dqm, qpd);
@@ -2934,20 +3040,19 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm)
int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id)
{
- struct kfd_process_device *pdd;
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process_device *pdd = NULL;
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, &pdd);
struct device_queue_manager *dqm = knode->dqm;
struct device *dev = dqm->dev->adev->dev;
struct qcm_process_device *qpd;
struct queue *q = NULL;
int ret = 0;
- if (!p)
+ if (!pdd)
return -EINVAL;
dqm_lock(dqm);
- pdd = kfd_get_process_device_data(dqm->dev, p);
if (pdd) {
qpd = &pdd->qpd;
@@ -2980,6 +3085,7 @@ int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbel
out:
dqm_unlock(dqm);
+ kfd_unref_process(p);
return ret;
}
@@ -3021,35 +3127,25 @@ out:
return ret;
}
-int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid)
+int kfd_evict_process_device(struct kfd_process_device *pdd)
{
- struct kfd_process_device *pdd;
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct device_queue_manager *dqm;
+ struct kfd_process *p;
int ret = 0;
- if (!p)
- return -EINVAL;
+ p = pdd->process;
+ dqm = pdd->dev->dqm;
+
WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
- pdd = kfd_get_process_device_data(dqm->dev, p);
- if (pdd) {
- if (dqm->dev->kfd->shared_resources.enable_mes)
- ret = kfd_dqm_evict_pasid_mes(dqm, &pdd->qpd);
- else
- ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
- }
- kfd_unref_process(p);
+ if (dqm->dev->kfd->shared_resources.enable_mes)
+ ret = kfd_dqm_evict_pasid_mes(dqm, &pdd->qpd);
+ else
+ ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
return ret;
}
-static void kfd_process_hw_exception(struct work_struct *work)
-{
- struct device_queue_manager *dqm = container_of(work,
- struct device_queue_manager, hw_exception_work);
- amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
-}
-
int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
@@ -3173,7 +3269,7 @@ struct copy_context_work_handler_workarea {
struct kfd_process *p;
};
-static void copy_context_work_handler (struct work_struct *work)
+static void copy_context_work_handler(struct work_struct *work)
{
struct copy_context_work_handler_workarea *workarea;
struct mqd_manager *mqd_mgr;
@@ -3200,6 +3296,9 @@ static void copy_context_work_handler (struct work_struct *work)
struct qcm_process_device *qpd = &pdd->qpd;
list_for_each_entry(q, &qpd->queues_list, list) {
+ if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE)
+ continue;
+
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
/* We ignore the return value from get_wave_state
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 09ab36f8e8c6..74a61b5b2f0b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -37,7 +37,6 @@
#define KFD_MES_PROCESS_QUANTUM 100000
#define KFD_MES_GANG_QUANTUM 10000
-#define USE_DEFAULT_GRACE_PERIOD 0xffffffff
struct device_process_node {
struct qcm_process_device *qpd;
@@ -174,7 +173,8 @@ struct device_queue_manager_ops {
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
int (*process_termination)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
@@ -210,7 +210,8 @@ struct device_queue_manager_asic_ops {
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void (*init_sdma_vm)(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
@@ -269,7 +270,6 @@ struct device_queue_manager {
/* hw exception */
bool is_hws_hang;
bool is_resetting;
- struct work_struct hw_exception_work;
struct kfd_mem_obj hiq_sdma_mqd;
bool sched_running;
bool sched_halt;
@@ -359,4 +359,14 @@ static inline int read_sdma_queue_counter(uint64_t __user *q_rptr, uint64_t *val
/* SDMA activity counter is stored at queue's RPTR + 0x8 location. */
return get_user(*val, q_rptr + 1);
}
+
+static inline void update_dqm_wait_times(struct device_queue_manager *dqm)
+{
+ if (dqm->dev->kfd2kgd->get_iq_wait_times)
+ dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
+ &dqm->wait_times,
+ ffs(dqm->dev->xcc_mask) - 1);
+}
+
+
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index d4d95c7f2e5d..0508ef5a41d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -27,12 +27,21 @@
#include "oss/oss_2_4_sh_mask.h"
#include "gca/gfx_7_2_sh_mask.h"
+/*
+ * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
+ * stay in user mode.
+ */
+#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
+/* APE1 limit is inclusive and 64K aligned. */
+#define APE1_LIMIT_ALIGNMENT 0xFFFF
+
static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
static int update_qpd_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm(struct device_queue_manager *dqm,
@@ -80,10 +89,41 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
uint32_t default_mtype;
uint32_t ape1_mtype;
+ unsigned int temp;
+ bool retval = true;
+
+ if (alternate_aperture_size == 0) {
+ /* base > limit disables APE1 */
+ qpd->sh_mem_ape1_base = 1;
+ qpd->sh_mem_ape1_limit = 0;
+ } else {
+ /*
+ * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
+ * SH_MEM_APE1_BASE[31:0], 0x0000 }
+ * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
+ * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
+ * Verify that the base and size parameters can be
+ * represented in this format and convert them.
+ * Additionally restrict APE1 to user-mode addresses.
+ */
+
+ uint64_t base = (uintptr_t)alternate_aperture_base;
+ uint64_t limit = base + alternate_aperture_size - 1;
+
+ if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
+ (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
+ retval = false;
+ goto out;
+ }
+
+ qpd->sh_mem_ape1_base = base >> 16;
+ qpd->sh_mem_ape1_limit = limit >> 16;
+ }
default_mtype = (default_policy == cache_policy_coherent) ?
MTYPE_NONCACHED :
@@ -97,37 +137,22 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
| ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
| DEFAULT_MTYPE(default_mtype)
| APE1_MTYPE(ape1_mtype);
-
- return true;
-}
-
-static int update_qpd_cik(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
-{
- struct kfd_process_device *pdd;
- unsigned int temp;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
- DEFAULT_MTYPE(MTYPE_NONCACHED) |
- APE1_MTYPE(MTYPE_NONCACHED);
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
*/
- temp = get_sh_mem_bases_nybble_64(pdd);
+ temp = get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd));
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
+out:
+ return retval;
+}
+
+static int update_qpd_cik(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
index 245a90dfc2f6..ba6e3d747ccd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
@@ -31,10 +31,18 @@ static int update_qpd_v10(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm_v10(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void device_queue_manager_init_v10(
struct device_queue_manager_asic_ops *asic_ops)
{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v10;
asic_ops->update_qpd = update_qpd_v10;
asic_ops->init_sdma_vm = init_sdma_vm_v10;
asic_ops->mqd_manager_init = mqd_manager_init_v10;
@@ -49,27 +57,28 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
private_base;
}
-static int update_qpd_v10(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
+static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
- struct kfd_process_device *pdd;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
- (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
- qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+ qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+ return true;
+}
+static int update_qpd_v10(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
index 2e129da7acb4..8b447d04558f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
@@ -30,10 +30,18 @@ static int update_qpd_v11(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void device_queue_manager_init_v11(
struct device_queue_manager_asic_ops *asic_ops)
{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v11;
asic_ops->update_qpd = update_qpd_v11;
asic_ops->init_sdma_vm = init_sdma_vm_v11;
asic_ops->mqd_manager_init = mqd_manager_init_v11;
@@ -48,28 +56,29 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
private_base;
}
-static int update_qpd_v11(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
+static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
- struct kfd_process_device *pdd;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
- (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
-
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
+ qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
- qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+ return true;
+}
+static int update_qpd_v11(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
index 4f3295b29dfb..3550da3a46f9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
@@ -30,10 +30,18 @@ static int update_qpd_v12(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm_v12(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v12(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void device_queue_manager_init_v12(
struct device_queue_manager_asic_ops *asic_ops)
{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v12;
asic_ops->update_qpd = update_qpd_v12;
asic_ops->init_sdma_vm = init_sdma_vm_v12;
asic_ops->mqd_manager_init = mqd_manager_init_v12;
@@ -48,28 +56,29 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
private_base;
}
-static int update_qpd_v12(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
+static bool set_cache_memory_policy_v12(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
- struct kfd_process_device *pdd;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
- (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
-
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
+ qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
- qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+ return true;
+}
+static int update_qpd_v12(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index 210bcc048f4c..9fcc8c6e57b7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -30,10 +30,18 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v9(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void device_queue_manager_init_v9(
struct device_queue_manager_asic_ops *asic_ops)
{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v9;
asic_ops->update_qpd = update_qpd_v9;
asic_ops->init_sdma_vm = init_sdma_vm_v9;
asic_ops->mqd_manager_init = mqd_manager_init_v9;
@@ -48,10 +56,42 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
private_base;
}
+static bool set_cache_memory_policy_v9(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
+{
+ qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+
+ if (dqm->dev->kfd->noretry)
+ qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+
+ if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4))
+ qpd->sh_mem_config |= (1 << SH_MEM_CONFIG__F8_MODE__SHIFT);
+
+ if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0)) {
+ if (misc_process_properties & KFD_PROC_FLAG_MFMA_HIGH_PRECISION)
+ qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__PRECISION_MODE__SHIFT;
+ }
+
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
+
+ pr_debug("sh_mem_bases 0x%X sh_mem_config 0x%X\n", qpd->sh_mem_bases,
+ qpd->sh_mem_config);
+ return true;
+}
+
static int update_qpd_v9(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
- struct kfd_process_device *pdd;
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
pdd = qpd_to_pdd(qpd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index b291ee0fab94..dad83356e976 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -27,12 +27,21 @@
#include "gca/gfx_8_0_sh_mask.h"
#include "oss/oss_3_0_sh_mask.h"
+/*
+ * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
+ * stay in user mode.
+ */
+#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
+/* APE1 limit is inclusive and 64K aligned. */
+#define APE1_LIMIT_ALIGNMENT 0xFFFF
+
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
static int update_qpd_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm(struct device_queue_manager *dqm,
@@ -81,10 +90,41 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
uint32_t default_mtype;
uint32_t ape1_mtype;
+ unsigned int temp;
+ bool retval = true;
+
+ if (alternate_aperture_size == 0) {
+ /* base > limit disables APE1 */
+ qpd->sh_mem_ape1_base = 1;
+ qpd->sh_mem_ape1_limit = 0;
+ } else {
+ /*
+ * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
+ * SH_MEM_APE1_BASE[31:0], 0x0000 }
+ * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
+ * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
+ * Verify that the base and size parameters can be
+ * represented in this format and convert them.
+ * Additionally restrict APE1 to user-mode addresses.
+ */
+
+ uint64_t base = (uintptr_t)alternate_aperture_base;
+ uint64_t limit = base + alternate_aperture_size - 1;
+
+ if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
+ (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
+ retval = false;
+ goto out;
+ }
+
+ qpd->sh_mem_ape1_base = base >> 16;
+ qpd->sh_mem_ape1_limit = limit >> 16;
+ }
default_mtype = (default_policy == cache_policy_coherent) ?
MTYPE_UC :
@@ -100,40 +140,21 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
- return true;
-}
-
-static int update_qpd_vi(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
-{
- struct kfd_process_device *pdd;
- unsigned int temp;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
- MTYPE_UC <<
- SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
- MTYPE_UC <<
- SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
-
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
*/
- temp = get_sh_mem_bases_nybble_64(pdd);
+ temp = get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd));
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n",
temp, qpd->sh_mem_bases);
+out:
+ return retval;
+}
+static int update_qpd_vi(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index ea3792249209..2b294ada3ec0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -727,7 +727,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
* to process context, kfd_process could attempt to exit while we are
* running so the lookup function increments the process ref count.
*/
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p)
return; /* Presumably process exited. */
@@ -748,6 +748,16 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
uint64_t *slots = page_slots(p->signal_page);
uint32_t id;
+ /*
+ * If id is valid but slot is not signaled, GPU may signal the same event twice
+ * before driver have chance to process the first interrupt, then signal slot is
+ * auto-reset after set_event wakeup the user space, just drop the second event as
+ * the application only need wakeup once.
+ */
+ if ((valid_id_bits > 31 || (1U << valid_id_bits) >= KFD_SIGNAL_EVENT_LIMIT) &&
+ partial_id < KFD_SIGNAL_EVENT_LIMIT && slots[partial_id] == UNSIGNALED_EVENT_SLOT)
+ goto out_unlock;
+
if (valid_id_bits)
pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n",
partial_id, valid_id_bits);
@@ -776,6 +786,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
}
}
+out_unlock:
rcu_read_unlock();
kfd_unref_process(p);
}
@@ -1128,8 +1139,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
if (type == KFD_EVENT_TYPE_MEMORY) {
dev_warn(kfd_device,
- "Sending SIGSEGV to process %d (pasid 0x%x)",
- p->lead_thread->pid, p->pasid);
+ "Sending SIGSEGV to process pid %d",
+ p->lead_thread->pid);
send_sig(SIGSEGV, p->lead_thread, 0);
}
@@ -1137,13 +1148,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
if (send_signal) {
if (send_sigterm) {
dev_warn(kfd_device,
- "Sending SIGTERM to process %d (pasid 0x%x)",
- p->lead_thread->pid, p->pasid);
+ "Sending SIGTERM to process pid %d",
+ p->lead_thread->pid);
send_sig(SIGTERM, p->lead_thread, 0);
} else {
dev_err(kfd_device,
- "Process %d (pasid 0x%x) got unhandled exception",
- p->lead_thread->pid, p->pasid);
+ "Process pid %d got unhandled exception",
+ p->lead_thread->pid);
}
}
@@ -1157,7 +1168,7 @@ void kfd_signal_hw_exception_event(u32 pasid)
* to process context, kfd_process could attempt to exit while we are
* running so the lookup function increments the process ref count.
*/
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p)
return; /* Presumably process exited. */
@@ -1166,22 +1177,39 @@ void kfd_signal_hw_exception_event(u32 pasid)
kfd_unref_process(p);
}
-void kfd_signal_vm_fault_event(struct kfd_node *dev, u32 pasid,
+void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va)
+{
+ struct kfd_process_device *pdd;
+ struct kfd_hsa_memory_exception_data exception_data;
+ int i;
+
+ memset(&exception_data, 0, sizeof(exception_data));
+ exception_data.va = gpu_va;
+ exception_data.failure.NotPresent = 1;
+
+ // Send VM seg fault to all kfd process device
+ for (i = 0; i < p->n_pdds; i++) {
+ pdd = p->pdds[i];
+ exception_data.gpu_id = pdd->user_gpu_id;
+ kfd_evict_process_device(pdd);
+ kfd_signal_vm_fault_event(pdd, NULL, &exception_data);
+ }
+}
+
+void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
struct kfd_vm_fault_info *info,
struct kfd_hsa_memory_exception_data *data)
{
struct kfd_event *ev;
uint32_t id;
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = pdd->process;
struct kfd_hsa_memory_exception_data memory_exception_data;
int user_gpu_id;
- if (!p)
- return; /* Presumably process exited. */
-
- user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+ user_gpu_id = kfd_process_get_user_gpu_id(p, pdd->dev->id);
if (unlikely(user_gpu_id == -EINVAL)) {
- WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+ WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n",
+ pdd->dev->id);
return;
}
@@ -1218,7 +1246,6 @@ void kfd_signal_vm_fault_event(struct kfd_node *dev, u32 pasid,
}
rcu_read_unlock();
- kfd_unref_process(p);
}
void kfd_signal_reset_event(struct kfd_node *dev)
@@ -1253,7 +1280,8 @@ void kfd_signal_reset_event(struct kfd_node *dev)
}
if (unlikely(!pdd)) {
- WARN_ONCE(1, "Could not get device data from pasid:0x%x\n", p->pasid);
+ WARN_ONCE(1, "Could not get device data from process pid:%d\n",
+ p->lead_thread->pid);
continue;
}
@@ -1262,8 +1290,15 @@ void kfd_signal_reset_event(struct kfd_node *dev)
if (dev->dqm->detect_hang_count) {
struct amdgpu_task_info *ti;
+ struct amdgpu_fpriv *drv_priv;
+
+ if (unlikely(amdgpu_file_to_fpriv(pdd->drm_file, &drv_priv))) {
+ WARN_ONCE(1, "Could not get vm for device %x from pid:%d\n",
+ dev->id, p->lead_thread->pid);
+ continue;
+ }
- ti = amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid);
+ ti = amdgpu_vm_get_task_info_vm(&drv_priv->vm);
if (ti) {
dev_err(dev->adev->dev,
"Queues reset on process %s tid %d thread %s pid %d\n",
@@ -1300,7 +1335,7 @@ void kfd_signal_reset_event(struct kfd_node *dev)
void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid)
{
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
struct kfd_hsa_memory_exception_data memory_exception_data;
struct kfd_hsa_hw_exception_data hw_exception_data;
struct kfd_event *ev;
@@ -1315,6 +1350,7 @@ void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid)
user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
if (unlikely(user_gpu_id == -EINVAL)) {
WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+ kfd_unref_process(p);
return;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
index 37b69fe0ede3..3e1ad8974797 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
@@ -168,14 +168,14 @@ static bool event_interrupt_isr_v10(struct kfd_node *dev,
client_id != SOC15_IH_CLIENTID_SE3SH)
return false;
- pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
- client_id, source_id, vmid, pasid);
- pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
- data[0], data[1], data[2], data[3],
- data[4], data[5], data[6], data[7]);
+ dev_dbg(dev->adev->dev,
+ "client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+ client_id, source_id, vmid, pasid);
+ dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+ data[0], data[1], data[2], data[3], data[4], data[5], data[6],
+ data[7]);
- /* If there is no valid PASID, it's likely a bug */
- if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
+ if (pasid == 0)
return 0;
/* Interrupt types we care about: various signals and faults.
@@ -217,37 +217,66 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING);
switch (encoding) {
case SQ_INTERRUPT_WORD_ENCODING_AUTO:
- pr_debug_ratelimited(
+ dev_dbg_ratelimited(
+ dev->adev->dev,
"sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf0_full %d, ttrac_buf1_full %d, ttrace_utc_err %d\n",
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_AUTO_CTXID1,
- SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
- THREAD_TRACE),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
- WLT),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
- THREAD_TRACE_BUF0_FULL),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
- THREAD_TRACE_BUF1_FULL),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
- THREAD_TRACE_UTC_ERROR));
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_AUTO_CTXID1,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ WLT),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_BUF0_FULL),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_BUF1_FULL),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_UTC_ERROR));
break;
case SQ_INTERRUPT_WORD_ENCODING_INST:
- pr_debug_ratelimited("sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
- SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- DATA),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- SA_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- WAVE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- SIMD_ID),
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
- WGP_ID));
+ dev_dbg_ratelimited(
+ dev->adev->dev,
+ "sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ DATA),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SA_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ PRIV),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ WAVE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SIMD_ID),
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ WGP_ID));
if (context_id0 & SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV_MASK) {
if (kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_DEBUG_DOORBELL_ID(context_id0),
@@ -259,21 +288,37 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
case SQ_INTERRUPT_WORD_ENCODING_ERROR:
sq_intr_err_type = REG_GET_FIELD(context_id0, KFD_CTXID0,
ERR_TYPE);
- pr_warn_ratelimited("sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n",
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
- SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- DATA),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- SA_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- WAVE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- SIMD_ID),
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
- WGP_ID),
+ dev_warn_ratelimited(
+ dev->adev->dev,
+ "sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n",
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ DATA),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SA_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ PRIV),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ WAVE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SIMD_ID),
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ WGP_ID),
sq_intr_err_type);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
index b3f988b275a8..2788a52714d1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
@@ -148,44 +148,69 @@ enum SQ_INTERRUPT_ERROR_TYPE {
#define KFD_CTXID0_DOORBELL_ID(ctxid0) ((ctxid0) & \
KFD_CTXID0_DOORBELL_ID_MASK)
-static void print_sq_intr_info_auto(uint32_t context_id0, uint32_t context_id1)
+static void print_sq_intr_info_auto(struct kfd_node *dev, uint32_t context_id0,
+ uint32_t context_id1)
{
- pr_debug_ratelimited(
+ dev_dbg_ratelimited(
+ dev->adev->dev,
"sq_intr: auto, ttrace %d, wlt %d, ttrace_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n",
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE),
REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, WLT),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_BUF_FULL),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, REG_TIMESTAMP),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, CMD_TIMESTAMP),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_CMD_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_REG_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, IMMED_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_UTC_ERROR));
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_BUF_FULL),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ REG_TIMESTAMP),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ CMD_TIMESTAMP),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ HOST_CMD_OVERFLOW),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ HOST_REG_OVERFLOW),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ IMMED_OVERFLOW),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_UTC_ERROR));
}
-static void print_sq_intr_info_inst(uint32_t context_id0, uint32_t context_id1)
+static void print_sq_intr_info_inst(struct kfd_node *dev, uint32_t context_id0,
+ uint32_t context_id1)
{
- pr_debug_ratelimited(
+ dev_dbg_ratelimited(
+ dev->adev->dev,
"sq_intr: inst, data 0x%08x, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, DATA),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, SH_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SH_ID),
REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, WAVE_ID),
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, SIMD_ID),
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, WGP_ID));
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ WAVE_ID),
+ REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ SIMD_ID),
+ REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ WGP_ID));
}
-static void print_sq_intr_info_error(uint32_t context_id0, uint32_t context_id1)
+static void print_sq_intr_info_error(struct kfd_node *dev, uint32_t context_id0,
+ uint32_t context_id1)
{
- pr_warn_ratelimited(
+ dev_warn_ratelimited(
+ dev->adev->dev,
"sq_intr: error, detail 0x%08x, type %d, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, DETAIL),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, SH_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, WAVE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, SIMD_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, WGP_ID));
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ DETAIL),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ TYPE),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ SH_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ PRIV),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ WAVE_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1,
+ SIMD_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1,
+ WGP_ID));
}
static void event_interrupt_poison_consumption_v11(struct kfd_node *dev,
@@ -194,7 +219,7 @@ static void event_interrupt_poison_consumption_v11(struct kfd_node *dev,
enum amdgpu_ras_block block = 0;
int ret = -EINVAL;
uint32_t reset = 0;
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p)
return;
@@ -255,14 +280,14 @@ static bool event_interrupt_isr_v11(struct kfd_node *dev,
(context_id0 & AMDGPU_FENCE_MES_QUEUE_FLAG))
return false;
- pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
- client_id, source_id, vmid, pasid);
- pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
- data[0], data[1], data[2], data[3],
- data[4], data[5], data[6], data[7]);
+ dev_dbg(dev->adev->dev,
+ "client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+ client_id, source_id, vmid, pasid);
+ dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+ data[0], data[1], data[2], data[3], data[4], data[5], data[6],
+ data[7]);
- /* If there is no valid PASID, it's likely a bug */
- if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
+ if (pasid == 0)
return false;
/* Interrupt types we care about: various signals and faults.
@@ -353,10 +378,10 @@ static void event_interrupt_wq_v11(struct kfd_node *dev,
SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING);
switch (sq_int_enc) {
case SQ_INTERRUPT_WORD_ENCODING_AUTO:
- print_sq_intr_info_auto(context_id0, context_id1);
+ print_sq_intr_info_auto(dev, context_id0, context_id1);
break;
case SQ_INTERRUPT_WORD_ENCODING_INST:
- print_sq_intr_info_inst(context_id0, context_id1);
+ print_sq_intr_info_inst(dev, context_id0, context_id1);
sq_int_priv = REG_GET_FIELD(context_id0,
SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV);
if (sq_int_priv && (kfd_set_dbg_ev_from_interrupt(dev, pasid,
@@ -366,7 +391,7 @@ static void event_interrupt_wq_v11(struct kfd_node *dev,
return;
break;
case SQ_INTERRUPT_WORD_ENCODING_ERROR:
- print_sq_intr_info_error(context_id0, context_id1);
+ print_sq_intr_info_error(dev, context_id0, context_id1);
sq_int_errtype = REG_GET_FIELD(context_id0,
SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE);
if (sq_int_errtype != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index d46a13156ee9..4ceb251312a6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -146,7 +146,7 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
{
enum amdgpu_ras_block block = 0;
uint32_t reset = 0;
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
enum ras_event_type type = RAS_EVENT_TYPE_POISON_CONSUMPTION;
u64 event_id;
int old_poison, ret;
@@ -184,6 +184,7 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
} else {
reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
}
+ amdgpu_ras_set_err_poison(dev->adev, AMDGPU_RAS_BLOCK__GFX);
break;
case SOC15_IH_CLIENTID_VMC:
case SOC15_IH_CLIENTID_VMC1:
@@ -213,6 +214,7 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
} else {
reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
}
+ amdgpu_ras_set_err_poison(dev->adev, AMDGPU_RAS_BLOCK__SDMA);
break;
default:
dev_warn(dev->adev->dev,
@@ -312,11 +314,12 @@ static bool event_interrupt_isr_v9(struct kfd_node *dev,
& ~pasid_mask) | pasid);
}
- pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
- client_id, source_id, vmid, pasid);
- pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
- data[0], data[1], data[2], data[3],
- data[4], data[5], data[6], data[7]);
+ dev_dbg(dev->adev->dev,
+ "client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+ client_id, source_id, vmid, pasid);
+ dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+ data[0], data[1], data[2], data[3], data[4], data[5], data[6],
+ data[7]);
/* If there is no valid PASID, it's likely a bug */
if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
@@ -377,28 +380,82 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
encoding = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, ENCODING);
switch (encoding) {
case SQ_INTERRUPT_WORD_ENCODING_AUTO:
- pr_debug_ratelimited(
+ dev_dbg_ratelimited(
+ dev->adev->dev,
"sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n",
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, WLT),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_BUF_FULL),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, REG_TIMESTAMP),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, CMD_TIMESTAMP),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_CMD_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_REG_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, IMMED_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_UTC_ERROR));
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ THREAD_TRACE),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ WLT),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ THREAD_TRACE_BUF_FULL),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ REG_TIMESTAMP),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ CMD_TIMESTAMP),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ HOST_CMD_OVERFLOW),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ HOST_REG_OVERFLOW),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ IMMED_OVERFLOW),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ THREAD_TRACE_UTC_ERROR));
break;
case SQ_INTERRUPT_WORD_ENCODING_INST:
- pr_debug_ratelimited("sq_intr: inst, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, intr_data 0x%x\n",
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID),
+ dev_dbg_ratelimited(
+ dev->adev->dev,
+ "sq_intr: inst, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, intr_data 0x%x\n",
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ DATA),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SH_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ PRIV),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ WAVE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SIMD_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ CU_ID),
sq_int_data);
if (context_id0 & SQ_INTERRUPT_WORD_WAVE_CTXID__PRIV_MASK) {
if (kfd_set_dbg_ev_from_interrupt(dev, pasid,
@@ -410,14 +467,37 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
break;
case SQ_INTERRUPT_WORD_ENCODING_ERROR:
sq_intr_err = REG_GET_FIELD(sq_int_data, KFD_SQ_INT_DATA, ERR_TYPE);
- pr_warn_ratelimited("sq_intr: error, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, err_type %d\n",
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID),
+ dev_warn_ratelimited(
+ dev->adev->dev,
+ "sq_intr: error, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, err_type %d\n",
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ DATA),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SH_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ PRIV),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ WAVE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SIMD_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ CU_ID),
sq_intr_err);
if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index 9b6b6e882593..783c2f5a04e4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -46,7 +46,7 @@
#include <linux/kfifo.h>
#include "kfd_priv.h"
-#define KFD_IH_NUM_ENTRIES 8192
+#define KFD_IH_NUM_ENTRIES 16384
static void interrupt_wq(struct work_struct *);
@@ -62,11 +62,14 @@ int kfd_interrupt_init(struct kfd_node *node)
return r;
}
- node->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1);
- if (unlikely(!node->ih_wq)) {
- kfifo_free(&node->ih_fifo);
- dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n");
- return -ENOMEM;
+ if (!node->kfd->ih_wq) {
+ node->kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI | WQ_UNBOUND,
+ node->kfd->num_nodes);
+ if (unlikely(!node->kfd->ih_wq)) {
+ kfifo_free(&node->ih_fifo);
+ dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n");
+ return -ENOMEM;
+ }
}
spin_lock_init(&node->interrupt_lock);
@@ -96,16 +99,6 @@ void kfd_interrupt_exit(struct kfd_node *node)
spin_lock_irqsave(&node->interrupt_lock, flags);
node->interrupts_active = false;
spin_unlock_irqrestore(&node->interrupt_lock, flags);
-
- /*
- * flush_work ensures that there are no outstanding
- * work-queue items that will access interrupt_ring. New work items
- * can't be created because we stopped interrupt handling above.
- */
- flush_workqueue(node->ih_wq);
-
- destroy_workqueue(node->ih_wq);
-
kfifo_free(&node->ih_fifo);
}
@@ -114,55 +107,48 @@ void kfd_interrupt_exit(struct kfd_node *node)
*/
bool enqueue_ih_ring_entry(struct kfd_node *node, const void *ih_ring_entry)
{
- int count;
-
- count = kfifo_in(&node->ih_fifo, ih_ring_entry,
- node->kfd->device_info.ih_ring_entry_size);
- if (count != node->kfd->device_info.ih_ring_entry_size) {
- dev_dbg_ratelimited(node->adev->dev,
- "Interrupt ring overflow, dropping interrupt %d\n",
- count);
+ if (kfifo_is_full(&node->ih_fifo)) {
+ dev_warn_ratelimited(node->adev->dev, "KFD node %d ih_fifo overflow\n",
+ node->node_id);
return false;
}
+ kfifo_in(&node->ih_fifo, ih_ring_entry, node->kfd->device_info.ih_ring_entry_size);
return true;
}
/*
* Assumption: single reader/writer. This function is not re-entrant
*/
-static bool dequeue_ih_ring_entry(struct kfd_node *node, void *ih_ring_entry)
+static bool dequeue_ih_ring_entry(struct kfd_node *node, u32 **ih_ring_entry)
{
int count;
- count = kfifo_out(&node->ih_fifo, ih_ring_entry,
- node->kfd->device_info.ih_ring_entry_size);
-
- WARN_ON(count && count != node->kfd->device_info.ih_ring_entry_size);
+ if (kfifo_is_empty(&node->ih_fifo))
+ return false;
+ count = kfifo_out_linear_ptr(&node->ih_fifo, ih_ring_entry,
+ node->kfd->device_info.ih_ring_entry_size);
+ WARN_ON(count != node->kfd->device_info.ih_ring_entry_size);
return count == node->kfd->device_info.ih_ring_entry_size;
}
static void interrupt_wq(struct work_struct *work)
{
- struct kfd_node *dev = container_of(work, struct kfd_node,
- interrupt_work);
- uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
+ struct kfd_node *dev = container_of(work, struct kfd_node, interrupt_work);
+ uint32_t *ih_ring_entry;
unsigned long start_jiffies = jiffies;
- if (dev->kfd->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) {
- dev_err_once(dev->adev->dev, "Ring entry too small\n");
- return;
- }
-
- while (dequeue_ih_ring_entry(dev, ih_ring_entry)) {
+ while (dequeue_ih_ring_entry(dev, &ih_ring_entry)) {
dev->kfd->device_info.event_interrupt_class->interrupt_wq(dev,
ih_ring_entry);
+ kfifo_skip_count(&dev->ih_fifo, dev->kfd->device_info.ih_ring_entry_size);
+
if (time_is_before_jiffies(start_jiffies + HZ)) {
/* If we spent more than a second processing signals,
* reschedule the worker to avoid soft-lockup warnings
*/
- queue_work(dev->ih_wq, &dev->interrupt_work);
+ queue_work(dev->kfd->ih_wq, &dev->interrupt_work);
break;
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 4843dcb9a5f7..2b0a830f5b29 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -125,7 +125,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
memset(kq->pq_kernel_addr, 0, queue_size);
memset(kq->rptr_kernel, 0, sizeof(*kq->rptr_kernel));
- memset(kq->wptr_kernel, 0, sizeof(*kq->wptr_kernel));
+ memset(kq->wptr_kernel, 0, dev->kfd->device_info.doorbell_size);
prop.queue_size = queue_size;
prop.is_interop = false;
@@ -306,12 +306,17 @@ int kq_submit_packet(struct kernel_queue *kq)
if (amdgpu_amdkfd_is_fed(kq->dev->adev))
return -EIO;
+ /* Make sure ring buffer is updated before wptr updated */
+ mb();
+
if (kq->dev->kfd->device_info.doorbell_size == 8) {
*kq->wptr64_kernel = kq->pending_wptr64;
+ mb(); /* Make sure wptr updated before ring doorbell */
write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
kq->pending_wptr64);
} else {
*kq->wptr_kernel = kq->pending_wptr;
+ mb(); /* Make sure wptr updated before ring doorbell */
write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
kq->pending_wptr);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 8ee3d07ffbdf..79251f22b702 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -278,10 +278,11 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
dma_addr_t *scratch, uint64_t ttm_res_offset)
{
- uint64_t npages = migrate->cpages;
+ uint64_t npages = migrate->npages;
struct amdgpu_device *adev = node->adev;
struct device *dev = adev->dev;
struct amdgpu_res_cursor cursor;
+ uint64_t mpages = 0;
dma_addr_t *src;
uint64_t *dst;
uint64_t i, j;
@@ -295,18 +296,20 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
amdgpu_res_first(prange->ttm_res, ttm_res_offset,
npages << PAGE_SHIFT, &cursor);
- for (i = j = 0; i < npages; i++) {
+ for (i = j = 0; (i < npages) && (mpages < migrate->cpages); i++) {
struct page *spage;
- dst[i] = cursor.start + (j << PAGE_SHIFT);
- migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
- svm_migrate_get_vram_page(prange, migrate->dst[i]);
- migrate->dst[i] = migrate_pfn(migrate->dst[i]);
-
+ if (migrate->src[i] & MIGRATE_PFN_MIGRATE) {
+ dst[i] = cursor.start + (j << PAGE_SHIFT);
+ migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
+ svm_migrate_get_vram_page(prange, migrate->dst[i]);
+ migrate->dst[i] = migrate_pfn(migrate->dst[i]);
+ mpages++;
+ }
spage = migrate_pfn_to_page(migrate->src[i]);
if (spage && !is_zone_device_page(spage)) {
src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
- DMA_TO_DEVICE);
+ DMA_BIDIRECTIONAL);
r = dma_mapping_error(dev, src[i]);
if (r) {
dev_err(dev, "%s: fail %d dma_map_page\n",
@@ -353,9 +356,12 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
out_free_vram_pages:
if (r) {
pr_debug("failed %d to copy memory to vram\n", r);
- while (i--) {
+ for (i = 0; i < npages && mpages; i++) {
+ if (!dst[i])
+ continue;
svm_migrate_put_vram_page(adev, dst[i]);
migrate->dst[i] = 0;
+ mpages--;
}
}
@@ -445,14 +451,13 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
mpages, cpages, migrate.npages);
- kfd_smi_event_migration_end(node, p->lead_thread->pid,
- start >> PAGE_SHIFT, end >> PAGE_SHIFT,
- 0, node->id, trigger);
-
svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages);
out_free:
kvfree(buf);
+ kfd_smi_event_migration_end(node, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ 0, node->id, trigger, r);
out:
if (!r && mpages) {
pdd = svm_range_get_pdd_by_node(prange, node);
@@ -630,7 +635,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
goto out_oom;
}
- dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
r = dma_mapping_error(dev, dst[i]);
if (r) {
dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r);
@@ -751,14 +756,13 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
- kfd_smi_event_migration_end(node, p->lead_thread->pid,
- start >> PAGE_SHIFT, end >> PAGE_SHIFT,
- node->id, 0, trigger);
-
svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages);
out_free:
kvfree(buf);
+ kfd_smi_event_migration_end(node, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ node->id, 0, trigger, r);
out:
if (!r && cpages) {
mpages = cpages - upages;
@@ -1023,7 +1027,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 1))
return -EINVAL;
- if (adev->flags & AMD_IS_APU)
+ if (adev->apu_prefer_gtt)
return 0;
pgmap = &kfddev->pgmap;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 2eff37aaf827..1695dd78ede8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -107,6 +107,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
@@ -167,10 +169,10 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd);
- m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK;
m->cp_hqd_pq_control |=
ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
- m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
+
pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index 68dbc0399c87..3c0ae28c5923 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -154,6 +154,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
@@ -221,10 +223,9 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd);
- m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK;
m->cp_hqd_pq_control |=
ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
- m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
index 2b72d5b4949b..565858b9044d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
@@ -121,6 +121,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
@@ -184,10 +186,9 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd);
- m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK;
m->cp_hqd_pq_control |=
ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
- m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 84e8ea3a8a0c..97933d2a3803 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -78,7 +78,8 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
m->compute_static_thread_mgmt_se2 = se_mask[2];
m->compute_static_thread_mgmt_se3 = se_mask[3];
if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
- KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4)) {
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0)) {
m->compute_static_thread_mgmt_se4 = se_mask[4];
m->compute_static_thread_mgmt_se5 = se_mask[5];
m->compute_static_thread_mgmt_se6 = se_mask[6];
@@ -182,6 +183,9 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
+
m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
@@ -244,7 +248,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd);
- m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK;
m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
@@ -301,7 +305,8 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m->cp_hqd_ctx_save_control = 0;
if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
- KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0))
update_cu_mask(mm, mqd, minfo, 0);
set_priority(m, q);
@@ -490,6 +495,10 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
m->sdma_engine_id = q->sdma_engine_id;
m->sdma_queue_id = q->sdma_queue_id;
m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
+ /* Allow context switch so we don't cross-process starve with a massive
+ * command buffer of long-running SDMA commands
+ */
+ m->sdmax_rlcx_ib_cntl |= SDMA0_GFX_IB_CNTL__SWITCH_INSIDE_IB_MASK;
q->is_active = QUEUE_IS_ACTIVE(*q);
}
@@ -549,7 +558,7 @@ static void init_mqd_hiq_v9_4_3(struct mqd_manager *mm, void **mqd,
m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
- if (amdgpu_sriov_vf(mm->dev->adev))
+ if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
m->cp_hqd_pq_doorbell_control |= 1 <<
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
m->cp_mqd_stride_size = kfd_hiq_mqd_stride(mm->dev);
@@ -662,7 +671,9 @@ static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset*xcc);
init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
-
+ if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
+ m->cp_hqd_pq_doorbell_control |= 1 <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
m->cp_mqd_stride_size = offset;
/*
@@ -722,6 +733,9 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd + size * xcc);
update_mqd(mm, m, q, minfo);
+ if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
+ m->cp_hqd_pq_doorbell_control |= 1 <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
update_cu_mask(mm, m, minfo, xcc);
if (q->format == KFD_QUEUE_FORMAT_AQL) {
@@ -744,6 +758,21 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
}
}
+static void restore_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, u32 ctl_stack_size)
+{
+ restore_mqd(mm, mqd, mqd_mem_obj, gart_addr, qp, mqd_src, ctl_stack_src, ctl_stack_size);
+ if (amdgpu_sriov_multi_vf_mode(mm->dev->adev)) {
+ struct v9_mqd *m;
+
+ m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
+ m->cp_hqd_pq_doorbell_control |= 1 <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
+ }
+}
static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
enum kfd_preempt_type type, unsigned int timeout,
uint32_t pipe_id, uint32_t queue_id)
@@ -878,23 +907,25 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
mqd->is_occupied = kfd_is_occupied_cp;
mqd->get_checkpoint_info = get_checkpoint_info;
mqd->checkpoint_mqd = checkpoint_mqd;
- mqd->restore_mqd = restore_mqd;
mqd->mqd_size = sizeof(struct v9_mqd);
mqd->mqd_stride = mqd_stride_v9;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) {
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
mqd->init_mqd = init_mqd_v9_4_3;
mqd->load_mqd = load_mqd_v9_4_3;
mqd->update_mqd = update_mqd_v9_4_3;
+ mqd->restore_mqd = restore_mqd_v9_4_3;
mqd->destroy_mqd = destroy_mqd_v9_4_3;
mqd->get_wave_state = get_wave_state_v9_4_3;
} else {
mqd->init_mqd = init_mqd;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
+ mqd->restore_mqd = restore_mqd;
mqd->destroy_mqd = kfd_destroy_mqd_cp;
mqd->get_wave_state = get_wave_state;
}
@@ -909,8 +940,10 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
+ mqd->check_preemption_failed = check_preemption_failed;
if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) {
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
mqd->init_mqd = init_mqd_hiq_v9_4_3;
mqd->load_mqd = hiq_load_mqd_kiq_v9_4_3;
mqd->destroy_mqd = destroy_hiq_mqd_v9_4_3;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 37930629edc5..b1a6eb349bb3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -28,6 +28,11 @@
#include "kfd_kernel_queue.h"
#include "kfd_priv.h"
+#define OVER_SUBSCRIPTION_PROCESS_COUNT (1 << 0)
+#define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT (1 << 1)
+#define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT (1 << 2)
+#define OVER_SUBSCRIPTION_XNACK_CONFLICT (1 << 3)
+
static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
unsigned int buffer_size_bytes)
{
@@ -40,7 +45,8 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
static void pm_calc_rlib_size(struct packet_manager *pm,
unsigned int *rlib_size,
- bool *over_subscription)
+ int *over_subscription,
+ int xnack_conflict)
{
unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
unsigned int map_queue_size;
@@ -58,17 +64,22 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
* hws_max_conc_proc has been done in
* kgd2kfd_device_init().
*/
- *over_subscription = false;
+ *over_subscription = 0;
if (node->max_proc_per_quantum > 1)
max_proc_per_quantum = node->max_proc_per_quantum;
- if ((process_count > max_proc_per_quantum) ||
- compute_queue_count > get_cp_queues_num(pm->dqm) ||
- gws_queue_count > 1) {
- *over_subscription = true;
+ if (process_count > max_proc_per_quantum)
+ *over_subscription |= OVER_SUBSCRIPTION_PROCESS_COUNT;
+ if (compute_queue_count > get_cp_queues_num(pm->dqm))
+ *over_subscription |= OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT;
+ if (gws_queue_count > 1)
+ *over_subscription |= OVER_SUBSCRIPTION_GWS_QUEUE_COUNT;
+ if (xnack_conflict && (node->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN))
+ *over_subscription |= OVER_SUBSCRIPTION_XNACK_CONFLICT;
+
+ if (*over_subscription)
dev_dbg(dev, "Over subscribed runlist\n");
- }
map_queue_size = pm->pmf->map_queues_size;
/* calculate run list ib allocation size */
@@ -89,7 +100,8 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
unsigned int **rl_buffer,
uint64_t *rl_gpu_buffer,
unsigned int *rl_buffer_size,
- bool *is_over_subscription)
+ int *is_over_subscription,
+ int xnack_conflict)
{
struct kfd_node *node = pm->dqm->dev;
struct device *dev = node->adev->dev;
@@ -98,7 +110,8 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
if (WARN_ON(pm->allocated))
return -EINVAL;
- pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
+ pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription,
+ xnack_conflict);
mutex_lock(&pm->lock);
@@ -134,12 +147,28 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
struct qcm_process_device *qpd;
struct queue *q;
struct kernel_queue *kq;
- bool is_over_subscription;
+ int is_over_subscription;
+ int xnack_enabled = -1;
+ bool xnack_conflict = 0;
rl_wptr = retval = processes_mapped = 0;
+ /* Check if processes set different xnack modes */
+ list_for_each_entry(cur, queues, list) {
+ qpd = cur->qpd;
+ if (xnack_enabled < 0)
+ /* First process */
+ xnack_enabled = qpd->pqm->process->xnack_enabled;
+ else if (qpd->pqm->process->xnack_enabled != xnack_enabled) {
+ /* Found a process with a different xnack mode */
+ xnack_conflict = 1;
+ break;
+ }
+ }
+
retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
- &alloc_size_bytes, &is_over_subscription);
+ &alloc_size_bytes, &is_over_subscription,
+ xnack_conflict);
if (retval)
return retval;
@@ -149,9 +178,13 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
dev_dbg(dev, "Building runlist ib process count: %d queues count %d\n",
pm->dqm->processes_count, pm->dqm->active_queue_count);
+build_runlist_ib:
/* build the run list ib packet */
list_for_each_entry(cur, queues, list) {
qpd = cur->qpd;
+ /* group processes with the same xnack mode together */
+ if (qpd->pqm->process->xnack_enabled != xnack_enabled)
+ continue;
/* build map process packet */
if (processes_mapped >= pm->dqm->processes_count) {
dev_dbg(dev, "Not enough space left in runlist IB\n");
@@ -208,20 +241,33 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
alloc_size_bytes);
}
}
+ if (xnack_conflict) {
+ /* pick up processes with the other xnack mode */
+ xnack_enabled = !xnack_enabled;
+ xnack_conflict = 0;
+ goto build_runlist_ib;
+ }
dev_dbg(dev, "Finished map process and queues to runlist\n");
if (is_over_subscription) {
if (!pm->is_over_subscription)
- dev_warn(
- dev,
- "Runlist is getting oversubscribed. Expect reduced ROCm performance.\n");
+ dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s%s. Expect reduced ROCm performance.\n",
+ is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ?
+ " too many processes" : "",
+ is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ?
+ " too many queues" : "",
+ is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ?
+ " multiple processes using cooperative launch" : "",
+ is_over_subscription & OVER_SUBSCRIPTION_XNACK_CONFLICT ?
+ " xnack on/off processes mixed on gfx9" : "");
+
retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
*rl_gpu_addr,
alloc_size_bytes / sizeof(uint32_t),
true);
}
- pm->is_over_subscription = is_over_subscription;
+ pm->is_over_subscription = !!is_over_subscription;
for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++)
pr_debug("0x%2X ", rl_buffer[i]);
@@ -248,7 +294,8 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
default:
if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) ||
KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 5, 0))
pm->pmf = &kfd_aldebaran_pm_funcs;
else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1))
pm->pmf = &kfd_v9_pm_funcs;
@@ -383,14 +430,33 @@ out:
return retval;
}
-int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
+/* pm_config_dequeue_wait_counts: Configure dequeue timer Wait Counts
+ * by writing to CP_IQ_WAIT_TIME2 registers.
+ *
+ * @cmd: See emum kfd_config_dequeue_wait_counts_cmd definition
+ * @value: Depends on the cmd. This parameter is unused for
+ * KFD_DEQUEUE_WAIT_INIT and KFD_DEQUEUE_WAIT_RESET. For
+ * KFD_DEQUEUE_WAIT_SET_SCH_WAVE it holds value to be set
+ *
+ */
+int pm_config_dequeue_wait_counts(struct packet_manager *pm,
+ enum kfd_config_dequeue_wait_counts_cmd cmd,
+ uint32_t value)
{
struct kfd_node *node = pm->dqm->dev;
struct device *dev = node->adev->dev;
int retval = 0;
uint32_t *buffer, size;
- size = pm->pmf->set_grace_period_size;
+ if (!pm->pmf->config_dequeue_wait_counts ||
+ !pm->pmf->config_dequeue_wait_counts_size)
+ return 0;
+
+ if (cmd == KFD_DEQUEUE_WAIT_INIT && (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) ||
+ KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0)))
+ return 0;
+
+ size = pm->pmf->config_dequeue_wait_counts_size;
mutex_lock(&pm->lock);
@@ -406,13 +472,18 @@ int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
goto out;
}
- retval = pm->pmf->set_grace_period(pm, buffer, grace_period);
- if (!retval)
+ retval = pm->pmf->config_dequeue_wait_counts(pm, buffer,
+ cmd, value);
+ if (!retval) {
retval = kq_submit_packet(pm->priv_queue);
- else
+
+ /* If default value is modified, cache that in dqm->wait_times */
+ if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT)
+ update_dqm_wait_times(pm->dqm);
+ } else {
kq_rollback_packet(pm->priv_queue);
+ }
}
-
out:
mutex_unlock(&pm->lock);
return retval;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 1f9f5bfeaf86..505036968a77 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -43,11 +43,11 @@ static int pm_map_process_v9(struct packet_manager *pm,
memset(buffer, 0, sizeof(struct pm4_mes_map_process));
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
sizeof(struct pm4_mes_map_process));
- if (adev->enforce_isolation[kfd->node_id])
+ if (adev->enforce_isolation[kfd->node_id] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
packet->bitfields2.exec_cleaner_shader = 1;
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
packet->bitfields2.process_quantum = 10;
- packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields2.pasid = pdd->pasid;
packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
@@ -102,11 +102,12 @@ static int pm_map_process_aldebaran(struct packet_manager *pm,
memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran));
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
sizeof(struct pm4_mes_map_process_aldebaran));
- if (adev->enforce_isolation[knode->node_id])
+ if (adev->enforce_isolation[knode->node_id] ==
+ AMDGPU_ENFORCE_ISOLATION_ENABLE)
packet->bitfields2.exec_cleaner_shader = 1;
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
packet->bitfields2.process_quantum = 10;
- packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields2.pasid = pdd->pasid;
packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
@@ -165,9 +166,9 @@ static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
* hws_max_conc_proc has been done in
* kgd2kfd_device_init().
*/
- concurrent_proc_cnt = adev->enforce_isolation[kfd->node_id] ?
- 1 : min(pm->dqm->processes_count,
- kfd->max_proc_per_quantum);
+ concurrent_proc_cnt = (adev->enforce_isolation[kfd->node_id] ==
+ AMDGPU_ENFORCE_ISOLATION_ENABLE) ?
+ 1 : min(pm->dqm->processes_count, kfd->max_proc_per_quantum);
packet = (struct pm4_mes_runlist *)buffer;
@@ -202,6 +203,8 @@ static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer,
queue_type__mes_set_resources__hsa_interface_queue_hiq;
packet->bitfields2.vmid_mask = res->vmid_mask;
packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
+ if (pm->dqm->dev->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN)
+ packet->bitfields2.enb_xnack_retry_disable_check = 1;
packet->bitfields7.oac_mask = res->oac_mask;
packet->bitfields8.gds_heap_base = res->gds_heap_base;
packet->bitfields8.gds_heap_size = res->gds_heap_size;
@@ -237,7 +240,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
packet->bitfields2.engine_sel =
engine_sel__mes_map_queues__compute_vi;
- packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
+ packet->bitfields2.gws_control_queue = q->properties.is_gws ? 1 : 0;
packet->bitfields2.extended_engine_sel =
extended_engine_sel__mes_map_queues__legacy_engine_sel;
packet->bitfields2.queue_type =
@@ -297,23 +300,79 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
return 0;
}
-static int pm_set_grace_period_v9(struct packet_manager *pm,
+static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manager *pm,
+ uint32_t sch_value, uint32_t que_sleep, uint32_t *reg_offset,
+ uint32_t *reg_data)
+{
+ pm->dqm->dev->kfd2kgd->build_dequeue_wait_counts_packet_info(
+ pm->dqm->dev->adev,
+ pm->dqm->wait_times,
+ sch_value,
+ que_sleep,
+ reg_offset,
+ reg_data);
+}
+
+/* pm_config_dequeue_wait_counts_v9: Builds WRITE_DATA packet with
+ * register/value for configuring dequeue wait counts
+ *
+ * @return: -ve for failure and 0 for success and buffer is
+ * filled in with packet
+ *
+ **/
+static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm,
uint32_t *buffer,
- uint32_t grace_period)
+ enum kfd_config_dequeue_wait_counts_cmd cmd,
+ uint32_t value)
{
struct pm4_mec_write_data_mmio *packet;
uint32_t reg_offset = 0;
uint32_t reg_data = 0;
- pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
- pm->dqm->dev->adev,
- pm->dqm->wait_times,
- grace_period,
- &reg_offset,
- &reg_data);
+ switch (cmd) {
+ case KFD_DEQUEUE_WAIT_INIT: {
+ uint32_t sch_wave = 0, que_sleep = 1;
+
+ /* For all gfx9 ASICs > gfx941,
+ * Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40.
+ * On a 1GHz machine this is roughly 1 microsecond, which is
+ * about how long it takes to load data out of memory during
+ * queue connect
+ * QUE_SLEEP: Wait Count for Dequeue Retry.
+ *
+ * Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU
+ */
+ if (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) ||
+ KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0))
+ return -EPERM;
+
+ if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu &&
+ (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
+ sch_wave = 1;
- if (grace_period == USE_DEFAULT_GRACE_PERIOD)
- reg_data = pm->dqm->wait_times;
+ pm_build_dequeue_wait_counts_packet_info(pm, sch_wave, que_sleep,
+ &reg_offset, &reg_data);
+
+ break;
+ }
+ case KFD_DEQUEUE_WAIT_RESET:
+ /* reg_data would be set to dqm->wait_times */
+ pm_build_dequeue_wait_counts_packet_info(pm, 0, 0, &reg_offset, &reg_data);
+ break;
+
+ case KFD_DEQUEUE_WAIT_SET_SCH_WAVE:
+ /* The CP cannot handle value 0 and it will result in
+ * an infinite grace period being set so set to 1 to prevent this. Also
+ * avoid debugger API breakage as it sets 0 and expects a low value.
+ */
+ if (!value)
+ value = 1;
+ pm_build_dequeue_wait_counts_packet_info(pm, value, 0, &reg_offset, &reg_data);
+ break;
+ default:
+ pr_err("Invalid dequeue wait cmd\n");
+ return -EINVAL;
+ }
packet = (struct pm4_mec_write_data_mmio *)buffer;
memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio));
@@ -415,7 +474,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
.set_resources = pm_set_resources_v9,
.map_queues = pm_map_queues_v9,
.unmap_queues = pm_unmap_queues_v9,
- .set_grace_period = pm_set_grace_period_v9,
+ .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9,
.query_status = pm_query_status_v9,
.release_mem = NULL,
.map_process_size = sizeof(struct pm4_mes_map_process),
@@ -423,7 +482,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
- .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio),
+ .config_dequeue_wait_counts_size = sizeof(struct pm4_mec_write_data_mmio),
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = 0,
};
@@ -434,7 +493,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
.set_resources = pm_set_resources_v9,
.map_queues = pm_map_queues_v9,
.unmap_queues = pm_unmap_queues_v9,
- .set_grace_period = pm_set_grace_period_v9,
+ .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9,
.query_status = pm_query_status_v9,
.release_mem = NULL,
.map_process_size = sizeof(struct pm4_mes_map_process_aldebaran),
@@ -442,7 +501,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
- .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio),
+ .config_dequeue_wait_counts_size = sizeof(struct pm4_mec_write_data_mmio),
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = 0,
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
index c1199d06d131..a1de5d7e173a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
@@ -42,6 +42,7 @@ unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size)
static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer,
struct qcm_process_device *qpd)
{
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
struct pm4_mes_map_process *packet;
packet = (struct pm4_mes_map_process *)buffer;
@@ -52,7 +53,7 @@ static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer,
sizeof(struct pm4_mes_map_process));
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
packet->bitfields2.process_quantum = 10;
- packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields2.pasid = pdd->pasid;
packet->bitfields3.page_table_base = qpd->page_table_base;
packet->bitfields10.gds_size = qpd->gds_size;
packet->bitfields10.num_gws = qpd->num_gws;
@@ -303,7 +304,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
.set_resources = pm_set_resources_vi,
.map_queues = pm_map_queues_vi,
.unmap_queues = pm_unmap_queues_vi,
- .set_grace_period = NULL,
+ .config_dequeue_wait_counts = NULL,
.query_status = pm_query_status_vi,
.release_mem = pm_release_mem_vi,
.map_process_size = sizeof(struct pm4_mes_map_process),
@@ -311,7 +312,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
- .set_grace_period_size = 0,
+ .config_dequeue_wait_counts_size = 0,
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = sizeof(struct pm4_mec_release_mem)
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
deleted file mode 100644
index e3b250918f39..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
+++ /dev/null
@@ -1,70 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR MIT
-/*
- * Copyright 2014-2022 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/types.h>
-#include "kfd_priv.h"
-#include "amdgpu_ids.h"
-
-static unsigned int pasid_bits = 16;
-static bool pasids_allocated; /* = false */
-
-bool kfd_set_pasid_limit(unsigned int new_limit)
-{
- if (new_limit < 2)
- return false;
-
- if (new_limit < (1U << pasid_bits)) {
- if (pasids_allocated)
- /* We've already allocated user PASIDs, too late to
- * change the limit
- */
- return false;
-
- while (new_limit < (1U << pasid_bits))
- pasid_bits--;
- }
-
- return true;
-}
-
-unsigned int kfd_get_pasid_limit(void)
-{
- return 1U << pasid_bits;
-}
-
-u32 kfd_pasid_alloc(void)
-{
- int r = amdgpu_pasid_alloc(pasid_bits);
-
- if (r > 0) {
- pasids_allocated = true;
- return r;
- }
-
- return 0;
-}
-
-void kfd_pasid_free(u32 pasid)
-{
- amdgpu_pasid_free(pasid);
-}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
index cd8611401a66..e356a207d03c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
@@ -63,7 +63,8 @@ struct pm4_mes_set_resources {
struct {
uint32_t vmid_mask:16;
uint32_t unmap_latency:8;
- uint32_t reserved1:5;
+ uint32_t reserved1:4;
+ uint32_t enb_xnack_retry_disable_check:1;
enum mes_set_resources_queue_type_enum queue_type:3;
} bitfields2;
uint32_t ordinal2;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 26e48fdc8728..d221c58dccc3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -32,7 +32,7 @@
#include <linux/atomic.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
-#include <linux/kfd_ioctl.h>
+#include <uapi/linux/kfd_ioctl.h>
#include <linux/idr.h>
#include <linux/kfifo.h>
#include <linux/seq_file.h>
@@ -207,7 +207,8 @@ enum cache_policy {
#define KFD_SUPPORT_XNACK_PER_PROCESS(dev)\
((KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) || \
(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) || \
- (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)))
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) || \
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)))
struct kfd_node;
@@ -273,7 +274,6 @@ struct kfd_node {
/* Interrupts */
struct kfifo ih_fifo;
- struct workqueue_struct *ih_wq;
struct work_struct interrupt_work;
spinlock_t interrupt_lock;
@@ -289,7 +289,6 @@ struct kfd_node {
/* Global GWS resource shared between processes */
void *gws;
- bool gws_debug_workaround;
/* Clients watching SMI events */
struct list_head smi_clients;
@@ -366,6 +365,8 @@ struct kfd_dev {
struct kfd_node *nodes[MAX_KFD_NODES];
unsigned int num_nodes;
+ struct workqueue_struct *ih_wq;
+
/* Kernel doorbells for KFD device */
struct amdgpu_bo *doorbells;
@@ -849,6 +850,8 @@ struct kfd_process_device {
/* Tracks queue reset status */
bool has_reset_queue;
+
+ u32 pasid;
};
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -908,8 +911,6 @@ struct kfd_process {
/* We want to receive a notification when the mm_struct is destroyed */
struct mmu_notifier mmu_notifier;
- u32 pasid;
-
/*
* Array of kfd_process_device pointers,
* one for each device the process is using.
@@ -1002,6 +1003,9 @@ struct kfd_process {
struct semaphore runtime_enable_sema;
bool is_runtime_retry;
struct kfd_runtime_info runtime_info;
+
+ /* if gpu page fault sent to KFD */
+ bool gpu_page_fault;
};
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
@@ -1034,7 +1038,8 @@ void kfd_process_destroy_wq(void);
void kfd_cleanup_processes(void);
struct kfd_process *kfd_create_process(struct task_struct *thread);
struct kfd_process *kfd_get_process(const struct task_struct *task);
-struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
+struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid,
+ struct kfd_process_device **pdd);
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
@@ -1086,8 +1091,6 @@ struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid);
/* PASIDs */
int kfd_pasid_init(void);
void kfd_pasid_exit(void);
-bool kfd_set_pasid_limit(unsigned int new_limit);
-unsigned int kfd_get_pasid_limit(void);
u32 kfd_pasid_alloc(void);
void kfd_pasid_free(u32 pasid);
@@ -1137,7 +1140,6 @@ struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
uint32_t proximity_domain);
struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);
struct kfd_node *kfd_device_by_id(uint32_t gpu_id);
-struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev);
static inline bool kfd_irq_is_from_node(struct kfd_node *node, uint32_t node_id,
uint32_t vmid)
{
@@ -1150,7 +1152,8 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev,
uint32_t i;
if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
- KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0))
return dev->nodes[0];
for (i = 0; i < dev->num_nodes; i++)
@@ -1331,7 +1334,7 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_node *dev,
enum kfd_queue_type type);
void kernel_queue_uninit(struct kernel_queue *kq);
-int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid);
+int kfd_evict_process_device(struct kfd_process_device *pdd);
int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id);
/* Process Queue Manager */
@@ -1347,7 +1350,6 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p);
void pqm_uninit(struct process_queue_manager *pqm);
int pqm_create_queue(struct process_queue_manager *pqm,
struct kfd_node *dev,
- struct file *f,
struct queue_properties *properties,
unsigned int *qid,
const struct kfd_criu_queue_priv_data *q_data,
@@ -1361,8 +1363,6 @@ int pqm_update_mqd(struct process_queue_manager *pqm, unsigned int qid,
struct mqd_update_info *minfo);
int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
void *gws);
-struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
- unsigned int qid);
struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
unsigned int qid);
int pqm_get_wave_state(struct process_queue_manager *pqm,
@@ -1389,6 +1389,24 @@ int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
#define KFD_FENCE_COMPLETED (100)
#define KFD_FENCE_INIT (10)
+/**
+ * enum kfd_config_dequeue_wait_counts_cmd - Command for configuring
+ * dequeue wait counts.
+ *
+ * @KFD_DEQUEUE_WAIT_INIT: Set optimized dequeue wait counts for a
+ * certain ASICs. For these ASICs, this is default value used by RESET
+ * @KFD_DEQUEUE_WAIT_RESET: Reset dequeue wait counts to the optimized value
+ * for certain ASICs. For others set it to default hardware reset value
+ * @KFD_DEQUEUE_WAIT_SET_SCH_WAVE: Set context switch latency wait
+ *
+ */
+enum kfd_config_dequeue_wait_counts_cmd {
+ KFD_DEQUEUE_WAIT_INIT = 1,
+ KFD_DEQUEUE_WAIT_RESET = 2,
+ KFD_DEQUEUE_WAIT_SET_SCH_WAVE = 3
+};
+
+
struct packet_manager {
struct device_queue_manager *dqm;
struct kernel_queue *priv_queue;
@@ -1414,8 +1432,8 @@ struct packet_manager_funcs {
int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
enum kfd_unmap_queues_filter mode,
uint32_t filter_param, bool reset);
- int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer,
- uint32_t grace_period);
+ int (*config_dequeue_wait_counts)(struct packet_manager *pm, uint32_t *buffer,
+ enum kfd_config_dequeue_wait_counts_cmd cmd, uint32_t value);
int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
uint64_t fence_address, uint64_t fence_value);
int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
@@ -1426,7 +1444,7 @@ struct packet_manager_funcs {
int set_resources_size;
int map_queues_size;
int unmap_queues_size;
- int set_grace_period_size;
+ int config_dequeue_wait_counts_size;
int query_status_size;
int release_mem_size;
};
@@ -1449,7 +1467,9 @@ int pm_send_unmap_queue(struct packet_manager *pm,
void pm_release_ib(struct packet_manager *pm);
-int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period);
+int pm_config_dequeue_wait_counts(struct packet_manager *pm,
+ enum kfd_config_dequeue_wait_counts_cmd cmd,
+ uint32_t wait_counts_config);
/* Following PM funcs can be shared among VI and AI */
unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
@@ -1487,7 +1507,9 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
int kfd_get_num_events(struct kfd_process *p);
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
-void kfd_signal_vm_fault_event(struct kfd_node *dev, u32 pasid,
+void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va);
+
+void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
struct kfd_vm_fault_info *info,
struct kfd_hsa_memory_exception_data *data);
@@ -1561,10 +1583,15 @@ int kfd_debugfs_hang_hws(struct kfd_node *dev);
int pm_debugfs_hang_hws(struct packet_manager *pm);
int dqm_debugfs_hang_hws(struct device_queue_manager *dqm);
+void kfd_debugfs_add_process(struct kfd_process *p);
+void kfd_debugfs_remove_process(struct kfd_process *p);
+
#else
static inline void kfd_debugfs_init(void) {}
static inline void kfd_debugfs_fini(void) {}
+static inline void kfd_debugfs_add_process(struct kfd_process *p) {}
+static inline void kfd_debugfs_remove_process(struct kfd_process *p) {}
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index d4aa843aacfd..722ac1662bdc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -35,6 +35,7 @@
#include <linux/pm_runtime.h>
#include "amdgpu_amdkfd.h"
#include "amdgpu.h"
+#include "amdgpu_reset.h"
struct mm_struct;
@@ -271,11 +272,9 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
struct kfd_process *proc = NULL;
struct kfd_process_device *pdd = NULL;
int i;
- struct kfd_cu_occupancy cu_occupancy[AMDGPU_MAX_QUEUES];
+ struct kfd_cu_occupancy *cu_occupancy;
u32 queue_format;
- memset(cu_occupancy, 0x0, sizeof(cu_occupancy));
-
pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy);
dev = pdd->dev;
if (dev->kfd2kgd->get_cu_occupancy == NULL)
@@ -284,8 +283,8 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
cu_cnt = 0;
proc = pdd->process;
if (pdd->qpd.queue_count == 0) {
- pr_debug("Gpu-Id: %d has no active queues for process %d\n",
- dev->id, proc->pasid);
+ pr_debug("Gpu-Id: %d has no active queues for process pid %d\n",
+ dev->id, (int)proc->lead_thread->pid);
return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
}
@@ -293,6 +292,10 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
wave_cnt = 0;
max_waves_per_cu = 0;
+ cu_occupancy = kcalloc(AMDGPU_MAX_QUEUES, sizeof(*cu_occupancy), GFP_KERNEL);
+ if (!cu_occupancy)
+ return -ENOMEM;
+
/*
* For GFX 9.4.3, fetch the CU occupancy from the first XCC in the partition.
* For AQL queues, because of cooperative dispatch we multiply the wave count
@@ -318,18 +321,16 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
/* Translate wave count to number of compute units */
cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
+ kfree(cu_occupancy);
return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
}
static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
- if (strcmp(attr->name, "pasid") == 0) {
- struct kfd_process *p = container_of(attr, struct kfd_process,
- attr_pasid);
-
- return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid);
- } else if (strncmp(attr->name, "vram_", 5) == 0) {
+ if (strcmp(attr->name, "pasid") == 0)
+ return snprintf(buffer, PAGE_SIZE, "%d\n", 0);
+ else if (strncmp(attr->name, "vram_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
attr_vram);
return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage));
@@ -338,8 +339,8 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
attr_sdma);
struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler;
- INIT_WORK(&sdma_activity_work_handler.sdma_activity_work,
- kfd_sdma_activity_worker);
+ INIT_WORK_ONSTACK(&sdma_activity_work_handler.sdma_activity_work,
+ kfd_sdma_activity_worker);
sdma_activity_work_handler.pdd = pdd;
sdma_activity_work_handler.sdma_activity_counter = 0;
@@ -347,6 +348,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
schedule_work(&sdma_activity_work_handler.sdma_activity_work);
flush_work(&sdma_activity_work_handler.sdma_activity_work);
+ destroy_work_on_stack(&sdma_activity_work_handler.sdma_activity_work);
return snprintf(buffer, PAGE_SIZE, "%llu\n",
(sdma_activity_work_handler.sdma_activity_counter)/
@@ -837,6 +839,14 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
return ERR_PTR(-EINVAL);
}
+ /* If the process just called exec(3), it is possible that the
+ * cleanup of the kfd_process (following the release of the mm
+ * of the old process image) is still in the cleanup work queue.
+ * Make sure to drain any job before trying to recreate any
+ * resource for this process.
+ */
+ flush_workqueue(kfd_process_wq);
+
/*
* take kfd processes mutex before starting of process creation
* so there won't be a case where two threads of the same process
@@ -850,19 +860,13 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
goto out;
}
- /* A prior open of /dev/kfd could have already created the process. */
- process = find_process(thread, false);
+ /* A prior open of /dev/kfd could have already created the process.
+ * find_process will increase process kref in this case
+ */
+ process = find_process(thread, true);
if (process) {
pr_debug("Process already found\n");
} else {
- /* If the process just called exec(3), it is possible that the
- * cleanup of the kfd_process (following the release of the mm
- * of the old process image) is still in the cleanup work queue.
- * Make sure to drain any job before trying to recreate any
- * resource for this process.
- */
- flush_workqueue(kfd_process_wq);
-
process = create_process(thread);
if (IS_ERR(process))
goto out;
@@ -896,11 +900,11 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
kfd_procfs_add_sysfs_files(process);
kfd_procfs_add_sysfs_counters(process);
+ kfd_debugfs_add_process(process);
+
init_waitqueue_head(&process->wait_irq_drain);
}
out:
- if (!IS_ERR(process))
- kref_get(&process->ref);
mutex_unlock(&kfd_processes_mutex);
mmput(thread->mm);
@@ -1052,17 +1056,15 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
- pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
- pdd->dev->id, p->pasid);
+ kfd_smi_event_process(pdd, false);
+ pr_debug("Releasing pdd (topology id %d, for pid %d)\n",
+ pdd->dev->id, p->lead_thread->pid);
kfd_process_device_destroy_cwsr_dgpu(pdd);
kfd_process_device_destroy_ib_mem(pdd);
- if (pdd->drm_file) {
- amdgpu_amdkfd_gpuvm_release_process_vm(
- pdd->dev->adev, pdd->drm_priv);
+ if (pdd->drm_file)
fput(pdd->drm_file);
- }
if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
@@ -1072,7 +1074,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
kfd_free_process_doorbells(pdd->dev->kfd, pdd);
- if (pdd->dev->kfd->shared_resources.enable_mes)
+ if (pdd->dev->kfd->shared_resources.enable_mes &&
+ pdd->proc_ctx_cpu_ptr)
amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
&pdd->proc_ctx_bo);
/*
@@ -1135,6 +1138,17 @@ static void kfd_process_remove_sysfs(struct kfd_process *p)
p->kobj = NULL;
}
+/*
+ * If any GPU is ongoing reset, wait for reset complete.
+ */
+static void kfd_process_wait_gpu_reset_complete(struct kfd_process *p)
+{
+ int i;
+
+ for (i = 0; i < p->n_pdds; i++)
+ flush_workqueue(p->pdds[i]->dev->adev->reset_domain->wq);
+}
+
/* No process locking is needed in this function, because the process
* is not findable any more. We must assume that no other thread is
* using it any more, otherwise we couldn't safely free the process
@@ -1149,15 +1163,22 @@ static void kfd_process_wq_release(struct work_struct *work)
kfd_process_dequeue_from_all_devices(p);
pqm_uninit(&p->pqm);
+ /*
+ * If GPU in reset, user queues may still running, wait for reset complete.
+ */
+ kfd_process_wait_gpu_reset_complete(p);
+
/* Signal the eviction fence after user mode queues are
* destroyed. This allows any BOs to be freed without
* triggering pointless evictions or waiting for fences.
*/
synchronize_rcu();
ef = rcu_access_pointer(p->ef);
- dma_fence_signal(ef);
+ if (ef)
+ dma_fence_signal(ef);
kfd_process_remove_sysfs(p);
+ kfd_debugfs_remove_process(p);
kfd_process_kunmap_signal_bo(p);
kfd_process_free_outstanding_kfd_bos(p);
@@ -1168,7 +1189,6 @@ static void kfd_process_wq_release(struct work_struct *work)
kfd_event_free_process(p);
- kfd_pasid_free(p->pasid);
mutex_destroy(&p->mutex);
put_task_struct(p->lead_thread);
@@ -1186,10 +1206,8 @@ static void kfd_process_ref_release(struct kref *ref)
static struct mmu_notifier *kfd_process_alloc_notifier(struct mm_struct *mm)
{
- int idx = srcu_read_lock(&kfd_processes_srcu);
- struct kfd_process *p = find_process_by_mm(mm);
-
- srcu_read_unlock(&kfd_processes_srcu, idx);
+ /* This increments p->ref counter if kfd process p exists */
+ struct kfd_process *p = kfd_lookup_process_by_mm(mm);
return p ? &p->mmu_notifier : ERR_PTR(-ESRCH);
}
@@ -1521,12 +1539,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
atomic_set(&process->debugged_process_count, 0);
sema_init(&process->runtime_enable_sema, 0);
- process->pasid = kfd_pasid_alloc();
- if (process->pasid == 0) {
- err = -ENOSPC;
- goto err_alloc_pasid;
- }
-
err = pqm_init(&process->pqm, process);
if (err != 0)
goto err_process_pqm_init;
@@ -1580,8 +1592,6 @@ err_init_svm_range_list:
err_init_apertures:
pqm_uninit(&process->pqm);
err_process_pqm_init:
- kfd_pasid_free(process->pasid);
-err_alloc_pasid:
kfd_event_free_process(process);
err_event_init:
mutex_destroy(&process->mutex);
@@ -1606,7 +1616,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
struct kfd_process *p)
{
struct kfd_process_device *pdd = NULL;
- int retval = 0;
if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
return NULL;
@@ -1630,21 +1639,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
pdd->user_gpu_id = dev->id;
atomic64_set(&pdd->evict_duration_counter, 0);
- if (dev->kfd->shared_resources.enable_mes) {
- retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
- AMDGPU_MES_PROC_CTX_SIZE,
- &pdd->proc_ctx_bo,
- &pdd->proc_ctx_gpu_addr,
- &pdd->proc_ctx_cpu_ptr,
- false);
- if (retval) {
- dev_err(dev->adev->dev,
- "failed to allocate process context bo\n");
- goto err_free_pdd;
- }
- memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
- }
-
p->pdds[p->n_pdds++] = pdd;
if (kfd_dbg_is_per_vmid_supported(pdd->dev))
pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap(
@@ -1656,10 +1650,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
idr_init(&pdd->alloc_idr);
return pdd;
-
-err_free_pdd:
- kfree(pdd);
- return NULL;
}
/**
@@ -1720,15 +1710,21 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
if (ret)
goto err_init_cwsr;
- ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, avm, p->pasid);
- if (ret)
- goto err_set_pasid;
+ if (unlikely(!avm->pasid)) {
+ dev_warn(pdd->dev->adev->dev, "WARN: vm %p has no pasid associated",
+ avm);
+ ret = -EINVAL;
+ goto err_get_pasid;
+ }
+ pdd->pasid = avm->pasid;
pdd->drm_file = drm_file;
+ kfd_smi_event_process(pdd, true);
+
return 0;
-err_set_pasid:
+err_get_pasid:
kfd_process_device_destroy_cwsr_dgpu(pdd);
err_init_cwsr:
kfd_process_device_destroy_ib_mem(pdd);
@@ -1814,25 +1810,50 @@ void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
idr_remove(&pdd->alloc_idr, handle);
}
-/* This increments the process->ref counter. */
-struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid)
+static struct kfd_process_device *kfd_lookup_process_device_by_pasid(u32 pasid)
{
- struct kfd_process *p, *ret_p = NULL;
+ struct kfd_process_device *ret_p = NULL;
+ struct kfd_process *p;
unsigned int temp;
-
- int idx = srcu_read_lock(&kfd_processes_srcu);
+ int i;
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- if (p->pasid == pasid) {
- kref_get(&p->ref);
- ret_p = p;
- break;
+ for (i = 0; i < p->n_pdds; i++) {
+ if (p->pdds[i]->pasid == pasid) {
+ ret_p = p->pdds[i];
+ break;
+ }
}
+ if (ret_p)
+ break;
+ }
+ return ret_p;
+}
+
+/* This increments the process->ref counter. */
+struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid,
+ struct kfd_process_device **pdd)
+{
+ struct kfd_process_device *ret_p;
+
+ int idx = srcu_read_lock(&kfd_processes_srcu);
+
+ ret_p = kfd_lookup_process_device_by_pasid(pasid);
+ if (ret_p) {
+ if (pdd)
+ *pdd = ret_p;
+ kref_get(&ret_p->process->ref);
+
+ srcu_read_unlock(&kfd_processes_srcu, idx);
+ return ret_p->process;
}
srcu_read_unlock(&kfd_processes_srcu, idx);
- return ret_p;
+ if (pdd)
+ *pdd = NULL;
+
+ return NULL;
}
/* This increments the process->ref counter. */
@@ -1988,7 +2009,7 @@ static void evict_process_worker(struct work_struct *work)
*/
p = container_of(dwork, struct kfd_process, eviction_work);
- pr_debug("Started evicting pasid 0x%x\n", p->pasid);
+ pr_debug("Started evicting process pid %d\n", p->lead_thread->pid);
ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM);
if (!ret) {
/* If another thread already signaled the eviction fence,
@@ -2000,9 +2021,9 @@ static void evict_process_worker(struct work_struct *work)
msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)))
kfd_process_restore_queues(p);
- pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
+ pr_debug("Finished evicting process pid %d\n", p->lead_thread->pid);
} else
- pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
+ pr_err("Failed to evict queues of process pid %d\n", p->lead_thread->pid);
}
static int restore_process_helper(struct kfd_process *p)
@@ -2019,9 +2040,11 @@ static int restore_process_helper(struct kfd_process *p)
ret = kfd_process_restore_queues(p);
if (!ret)
- pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
+ pr_debug("Finished restoring process pid %d\n",
+ p->lead_thread->pid);
else
- pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
+ pr_err("Failed to restore queues of process pid %d\n",
+ p->lead_thread->pid);
return ret;
}
@@ -2038,7 +2061,7 @@ static void restore_process_worker(struct work_struct *work)
* lifetime of this thread, kfd_process p will be valid
*/
p = container_of(dwork, struct kfd_process, restore_work);
- pr_debug("Started restoring pasid 0x%x\n", p->pasid);
+ pr_debug("Started restoring process pasid %d\n", (int)p->lead_thread->pid);
/* Setting last_restore_timestamp before successful restoration.
* Otherwise this would have to be set by KGD (restore_process_bos)
@@ -2054,8 +2077,8 @@ static void restore_process_worker(struct work_struct *work)
ret = restore_process_helper(p);
if (ret) {
- pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
- p->pasid, PROCESS_BACK_OFF_TIME_MS);
+ pr_debug("Failed to restore BOs of process pid %d, retry after %d ms\n",
+ p->lead_thread->pid, PROCESS_BACK_OFF_TIME_MS);
if (mod_delayed_work(kfd_restore_wq, &p->restore_work,
msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)))
kfd_process_restore_queues(p);
@@ -2071,7 +2094,7 @@ void kfd_suspend_all_processes(void)
WARN(debug_evictions, "Evicting all processes");
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND))
- pr_err("Failed to suspend process 0x%x\n", p->pasid);
+ pr_err("Failed to suspend process pid %d\n", p->lead_thread->pid);
signal_eviction_fence(p);
}
srcu_read_unlock(&kfd_processes_srcu, idx);
@@ -2085,8 +2108,8 @@ int kfd_resume_all_processes(void)
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
if (restore_process_helper(p)) {
- pr_err("Restore process %d failed during resume\n",
- p->pasid);
+ pr_err("Restore process pid %d failed during resume\n",
+ p->lead_thread->pid);
ret = -EFAULT;
}
}
@@ -2141,13 +2164,14 @@ int kfd_process_drain_interrupts(struct kfd_process_device *pdd)
memset(irq_drain_fence, 0, sizeof(irq_drain_fence));
irq_drain_fence[0] = (KFD_IRQ_FENCE_SOURCEID << 8) |
KFD_IRQ_FENCE_CLIENTID;
- irq_drain_fence[3] = pdd->process->pasid;
+ irq_drain_fence[3] = pdd->pasid;
/*
- * For GFX 9.4.3, send the NodeId also in IH cookie DW[3]
+ * For GFX 9.4.3/9.5.0, send the NodeId also in IH cookie DW[3]
*/
if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4)) {
+ KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 5, 0)) {
node_id = ffs(pdd->dev->interrupt_bitmap) - 1;
irq_drain_fence[3] |= node_id << 16;
}
@@ -2171,7 +2195,7 @@ void kfd_process_close_interrupt_drain(unsigned int pasid)
{
struct kfd_process *p;
- p = kfd_lookup_process_by_pasid(pasid);
+ p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p)
return;
@@ -2292,8 +2316,8 @@ int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- seq_printf(m, "Process %d PASID 0x%x:\n",
- p->lead_thread->tgid, p->pasid);
+ seq_printf(m, "Process %d PASID %d:\n",
+ p->lead_thread->tgid, p->lead_thread->pid);
mutex_lock(&p->mutex);
r = pqm_debugfs_mqds(m, &p->pqm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 01b960b15274..c643e0ccec52 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -69,8 +69,8 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
pr_debug("The new slot id %lu\n", found);
if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
- pr_info("Cannot open more queues for process with pasid 0x%x\n",
- pqm->process->pasid);
+ pr_info("Cannot open more queues for process with pid %d\n",
+ pqm->process->lead_thread->pid);
return -ENOMEM;
}
@@ -86,9 +86,12 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
if (pdd->already_dequeued)
return;
-
+ /* The MES context flush needs to filter out the case which the
+ * KFD process is created without setting up the MES context and
+ * queue for creating a compute queue.
+ */
dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
- if (dev->kfd->shared_resources.enable_mes &&
+ if (dev->kfd->shared_resources.enable_mes && !!pdd->proc_ctx_gpu_addr &&
down_read_trylock(&dev->adev->reset_domain->sem)) {
amdgpu_mes_flush_shader_debugger(dev->adev,
pdd->proc_ctx_gpu_addr);
@@ -131,8 +134,9 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
if (!gws && pdd->qpd.num_gws == 0)
return -EINVAL;
- if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
- KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ if ((KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0)) &&
!dev->kfd->shared_resources.enable_mes) {
if (gws)
ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
@@ -197,6 +201,7 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
if (pqn->q->gws) {
if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 5, 0) &&
!dev->kfd->shared_resources.enable_mes)
amdgpu_amdkfd_remove_gws_from_process(
pqm->process->kgd_process_info, pqn->q->gws);
@@ -212,13 +217,17 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
void pqm_uninit(struct process_queue_manager *pqm)
{
struct process_queue_node *pqn, *next;
- struct kfd_process_device *pdd;
list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
if (pqn->q) {
- pdd = kfd_get_process_device_data(pqn->q->device, pqm->process);
- kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
- kfd_queue_release_buffers(pdd, &pqn->q->properties);
+ struct kfd_process_device *pdd = kfd_get_process_device_data(pqn->q->device,
+ pqm->process);
+ if (pdd) {
+ kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
+ kfd_queue_release_buffers(pdd, &pqn->q->properties);
+ } else {
+ WARN_ON(!pdd);
+ }
pqm_clean_queue_resource(pqm, pqn);
}
@@ -235,7 +244,7 @@ void pqm_uninit(struct process_queue_manager *pqm)
static int init_user_queue(struct process_queue_manager *pqm,
struct kfd_node *dev, struct queue **q,
struct queue_properties *q_properties,
- struct file *f, unsigned int qid)
+ unsigned int qid)
{
int retval;
@@ -270,20 +279,17 @@ static int init_user_queue(struct process_queue_manager *pqm,
/* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
* on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
*/
- if (((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK)
- >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) {
- if (dev->adev != amdgpu_ttm_adev(q_properties->wptr_bo->tbo.bdev)) {
- pr_err("Queue memory allocated to wrong device\n");
- retval = -EINVAL;
- goto free_gang_ctx_bo;
- }
+ if (dev->adev != amdgpu_ttm_adev(q_properties->wptr_bo->tbo.bdev)) {
+ pr_err("Queue memory allocated to wrong device\n");
+ retval = -EINVAL;
+ goto free_gang_ctx_bo;
+ }
- retval = amdgpu_amdkfd_map_gtt_bo_to_gart(q_properties->wptr_bo,
- &(*q)->wptr_bo_gart);
- if (retval) {
- pr_err("Failed to map wptr bo to GART\n");
- goto free_gang_ctx_bo;
- }
+ retval = amdgpu_amdkfd_map_gtt_bo_to_gart(q_properties->wptr_bo,
+ &(*q)->wptr_bo_gart);
+ if (retval) {
+ pr_err("Failed to map wptr bo to GART\n");
+ goto free_gang_ctx_bo;
}
}
@@ -291,7 +297,7 @@ static int init_user_queue(struct process_queue_manager *pqm,
return 0;
free_gang_ctx_bo:
- amdgpu_amdkfd_free_gtt_mem(dev->adev, (*q)->gang_ctx_bo);
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, &(*q)->gang_ctx_bo);
cleanup:
uninit_queue(*q);
*q = NULL;
@@ -300,7 +306,6 @@ cleanup:
int pqm_create_queue(struct process_queue_manager *pqm,
struct kfd_node *dev,
- struct file *f,
struct queue_properties *properties,
unsigned int *qid,
const struct kfd_criu_queue_priv_data *q_data,
@@ -317,11 +322,12 @@ int pqm_create_queue(struct process_queue_manager *pqm,
unsigned int max_queues = 127; /* HWS limit */
/*
- * On GFX 9.4.3, increase the number of queues that
- * can be created to 255. No HWS limit on GFX 9.4.3.
+ * On GFX 9.4.3/9.5.0, increase the number of queues that
+ * can be created to 255. No HWS limit on GFX 9.4.3/9.5.0.
*/
if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0))
max_queues = 255;
q = NULL;
@@ -354,10 +360,26 @@ int pqm_create_queue(struct process_queue_manager *pqm,
if (retval != 0)
return retval;
+ /* Register process if this is the first queue */
if (list_empty(&pdd->qpd.queues_list) &&
list_empty(&pdd->qpd.priv_queue_list))
dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
+ /* Allocate proc_ctx_bo only if MES is enabled and this is the first queue */
+ if (!pdd->proc_ctx_cpu_ptr && dev->kfd->shared_resources.enable_mes) {
+ retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
+ AMDGPU_MES_PROC_CTX_SIZE,
+ &pdd->proc_ctx_bo,
+ &pdd->proc_ctx_gpu_addr,
+ &pdd->proc_ctx_cpu_ptr,
+ false);
+ if (retval) {
+ dev_err(dev->adev->dev, "failed to allocate process context bo\n");
+ return retval;
+ }
+ memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+ }
+
pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
if (!pqn) {
retval = -ENOMEM;
@@ -374,7 +396,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
* allocate_sdma_queue() in create_queue() has the
* corresponding check logic.
*/
- retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -395,7 +417,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
- retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -426,8 +448,15 @@ int pqm_create_queue(struct process_queue_manager *pqm,
}
if (retval != 0) {
- pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n",
- pqm->process->pasid, type, retval);
+ if ((type == KFD_QUEUE_TYPE_SDMA ||
+ type == KFD_QUEUE_TYPE_SDMA_XGMI ||
+ type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) &&
+ retval == -ENOMEM)
+ pr_warn("process pid %d DQM create queue type %d failed. ret %d\n",
+ pqm->process->lead_thread->pid, type, retval);
+ else
+ pr_err("process pid %d DQM create queue type %d failed. ret %d\n",
+ pqm->process->lead_thread->pid, type, retval);
goto err_create_queue;
}
@@ -521,9 +550,9 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
if (retval) {
pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n",
- pqm->process->pasid,
+ pdd->pasid,
pqn->q->properties.queue_id, retval);
- if (retval != -ETIME)
+ if (retval != -ETIME && retval != -EIO)
goto err_destroy_queue;
}
kfd_procfs_del_queue(pqn->q);
@@ -643,19 +672,6 @@ int pqm_update_mqd(struct process_queue_manager *pqm,
return 0;
}
-struct kernel_queue *pqm_get_kernel_queue(
- struct process_queue_manager *pqm,
- unsigned int qid)
-{
- struct process_queue_node *pqn;
-
- pqn = get_queue_by_qid(pqm, qid);
- if (pqn && pqn->kq)
- return pqn->kq;
-
- return NULL;
-}
-
struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
unsigned int qid)
{
@@ -1029,8 +1045,7 @@ int kfd_criu_restore_queue(struct kfd_process *p,
print_queue_properties(&qp);
- ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, q_data, mqd, ctl_stack,
- NULL);
+ ret = pqm_create_queue(&p->pqm, pdd->dev, &qp, &queue_id, q_data, mqd, ctl_stack, NULL);
if (ret) {
pr_err("Failed to create new queue err:%d\n", ret);
goto exit;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
index ad29634f8b44..a65c67cf56ff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -233,6 +233,7 @@ void kfd_queue_buffer_put(struct amdgpu_bo **bo)
int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties)
{
struct kfd_topology_device *topo_dev;
+ u64 expected_queue_size;
struct amdgpu_vm *vm;
u32 total_cwsr_size;
int err;
@@ -241,6 +242,15 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope
if (!topo_dev)
return -EINVAL;
+ /* AQL queues on GFX7 and GFX8 appear twice their actual size */
+ if (properties->type == KFD_QUEUE_TYPE_COMPUTE &&
+ properties->format == KFD_QUEUE_FORMAT_AQL &&
+ topo_dev->node_props.gfx_target_version >= 70000 &&
+ topo_dev->node_props.gfx_target_version < 90000)
+ expected_queue_size = properties->queue_size / 2;
+ else
+ expected_queue_size = properties->queue_size;
+
vm = drm_priv_to_vm(pdd->drm_priv);
err = amdgpu_bo_reserve(vm->root.bo, false);
if (err)
@@ -255,7 +265,7 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope
goto out_err_unreserve;
err = kfd_queue_buffer_get(vm, (void *)properties->queue_address,
- &properties->ring_bo, properties->queue_size);
+ &properties->ring_bo, expected_queue_size);
if (err)
goto out_err_unreserve;
@@ -266,8 +276,8 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope
/* EOP buffer is not required for all ASICs */
if (properties->eop_ring_buffer_address) {
if (properties->eop_ring_buffer_size != topo_dev->node_props.eop_buffer_size) {
- pr_debug("queue eop bo size 0x%lx not equal to node eop buf size 0x%x\n",
- properties->eop_buf_bo->tbo.base.size,
+ pr_debug("queue eop bo size 0x%x not equal to node eop buf size 0x%x\n",
+ properties->eop_ring_buffer_size,
topo_dev->node_props.eop_buffer_size);
err = -EINVAL;
goto out_err_unreserve;
@@ -392,9 +402,10 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
{
u32 vgpr_size = 0x40000;
- if ((gfxv / 100 * 100) == 90400 || /* GFX_VERSION_AQUA_VANJARAM */
+ if (gfxv == 90402 || /* GFX_VERSION_AQUA_VANJARAM */
gfxv == 90010 || /* GFX_VERSION_ALDEBARAN */
- gfxv == 90008) /* GFX_VERSION_ARCTURUS */
+ gfxv == 90008 || /* GFX_VERSION_ARCTURUS */
+ gfxv == 90500)
vgpr_size = 0x80000;
else if (gfxv == 110000 || /* GFX_VERSION_PLUM_BONITO */
gfxv == 110001 || /* GFX_VERSION_WHEAT_NAS */
@@ -405,9 +416,10 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
return vgpr_size;
}
-#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv) \
+#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props) \
(kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\
- LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU)
+ (((gfxv) == 90500) ? (props->lds_size_in_kb << 10) : LDS_SIZE_PER_CU) +\
+ HWREG_SIZE_PER_CU)
#define CNTL_STACK_BYTES_PER_WAVE(gfxv) \
((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/
@@ -431,7 +443,7 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev)
min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512)
: cu_num * 32;
- wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv), PAGE_SIZE);
+ wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), PAGE_SIZE);
ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8;
ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size,
PAGE_SIZE);
@@ -450,7 +462,7 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev)
if (gfxv == 80002) /* GFX_VERSION_TONGA */
props->eop_buffer_size = 0x8000;
- else if ((gfxv / 100 * 100) == 90400) /* GFX_VERSION_AQUA_VANJARAM */
+ else if (gfxv == 90402) /* GFX_VERSION_AQUA_VANJARAM */
props->eop_buffer_size = 4096;
else if (gfxv >= 80000)
props->eop_buffer_size = 4096;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index de8b9abf7afc..83d9384ac815 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -44,7 +44,7 @@ struct kfd_smi_client {
bool suser;
};
-#define MAX_KFIFO_SIZE 1024
+#define KFD_MAX_KFIFO_SIZE 8192
static __poll_t kfd_smi_ev_poll(struct file *, struct poll_table_struct *);
static ssize_t kfd_smi_ev_read(struct file *, char __user *, size_t, loff_t *);
@@ -86,7 +86,7 @@ static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
struct kfd_smi_client *client = filep->private_data;
unsigned char *buf;
- size = min_t(size_t, size, MAX_KFIFO_SIZE);
+ size = min_t(size_t, size, KFD_MAX_KFIFO_SIZE);
buf = kmalloc(size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -163,10 +163,9 @@ static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
static bool kfd_smi_ev_enabled(pid_t pid, struct kfd_smi_client *client,
unsigned int event)
{
- uint64_t all = KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS);
uint64_t events = READ_ONCE(client->events);
- if (pid && client->pid != pid && !(client->suser && (events & all)))
+ if (pid && client->pid != pid && !client->suser)
return false;
return events & KFD_SMI_EVENT_MASK_FROM_INDEX(event);
@@ -292,12 +291,13 @@ void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
unsigned long start, unsigned long end,
- uint32_t from, uint32_t to, uint32_t trigger)
+ uint32_t from, uint32_t to, uint32_t trigger,
+ int error_code)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_END,
KFD_EVENT_FMT_MIGRATE_END(
ktime_get_boottime_ns(), pid, start, end - start,
- from, to, trigger));
+ from, to, trigger, error_code));
}
void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
@@ -344,6 +344,27 @@ void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid,
pid, address, last - address + 1, node->id, trigger));
}
+void kfd_smi_event_process(struct kfd_process_device *pdd, bool start)
+{
+ struct amdgpu_task_info *task_info;
+ struct amdgpu_vm *avm;
+
+ if (!pdd->drm_priv)
+ return;
+
+ avm = drm_priv_to_vm(pdd->drm_priv);
+ task_info = amdgpu_vm_get_task_info_vm(avm);
+
+ if (task_info) {
+ kfd_smi_event_add(0, pdd->dev,
+ start ? KFD_SMI_EVENT_PROCESS_START :
+ KFD_SMI_EVENT_PROCESS_END,
+ KFD_EVENT_FMT_PROCESS(task_info->pid,
+ task_info->task_name));
+ amdgpu_vm_put_task_info(task_info);
+ }
+}
+
int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd)
{
struct kfd_smi_client *client;
@@ -354,7 +375,7 @@ int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd)
return -ENOMEM;
INIT_LIST_HEAD(&client->list);
- ret = kfifo_alloc(&client->fifo, MAX_KFIFO_SIZE, GFP_KERNEL);
+ ret = kfifo_alloc(&client->fifo, KFD_MAX_KFIFO_SIZE, GFP_KERNEL);
if (ret) {
kfree(client);
return ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index 85010b8307f8..bb4d72b57387 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -44,7 +44,8 @@ void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
uint32_t trigger);
void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
unsigned long start, unsigned long end,
- uint32_t from, uint32_t to, uint32_t trigger);
+ uint32_t from, uint32_t to, uint32_t trigger,
+ int error_code);
void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
uint32_t trigger);
void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid);
@@ -52,4 +53,5 @@ void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm);
void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid,
unsigned long address, unsigned long last,
uint32_t trigger);
+void kfd_smi_event_process(struct kfd_process_device *pdd, bool start);
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 1893c27746a5..865dca2547de 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -563,7 +563,8 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
int r;
p = container_of(prange->svms, struct kfd_process, svms);
- pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms,
+ pr_debug("process pid: %d svms 0x%p [0x%lx 0x%lx]\n",
+ p->lead_thread->pid, prange->svms,
prange->start, prange->last);
if (svm_range_validate_svm_bo(node, prange))
@@ -1195,6 +1196,7 @@ svm_range_get_pte_flags(struct kfd_node *node,
struct kfd_node *bo_node;
uint32_t flags = prange->flags;
uint32_t mapping_flags = 0;
+ uint32_t gc_ip_version = KFD_GC_VERSION(node);
uint64_t pte_flags;
bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENT);
@@ -1204,7 +1206,7 @@ svm_range_get_pte_flags(struct kfd_node *node,
if (domain == SVM_RANGE_VRAM_DOMAIN)
bo_node = prange->svm_bo->node;
- switch (amdgpu_ip_version(node->adev, GC_HWIP, 0)) {
+ switch (gc_ip_version) {
case IP_VERSION(9, 4, 1):
if (domain == SVM_RANGE_VRAM_DOMAIN) {
if (bo_node == node) {
@@ -1241,8 +1243,9 @@ svm_range_get_pte_flags(struct kfd_node *node,
break;
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
if (ext_coherent)
- mtype_local = node->adev->rev_id ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_UC;
+ mtype_local = AMDGPU_VM_MTYPE_CC;
else
mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC :
amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
@@ -1257,9 +1260,13 @@ svm_range_get_pte_flags(struct kfd_node *node,
*/
else if (svm_nodes_in_same_hive(bo_node, node) && !ext_coherent)
mapping_flags |= AMDGPU_VM_MTYPE_NC;
- /* PCIe P2P or extended system scope coherence */
- else
+ /* PCIe P2P on GPUs pre-9.5.0 */
+ else if (gc_ip_version < IP_VERSION(9, 5, 0) &&
+ !svm_nodes_in_same_hive(bo_node, node))
mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ /* Other remote memory */
+ else
+ mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
/* system memory accessed by the APU */
} else if (node->adev->flags & AMD_IS_APU) {
/* On NUMA systems, locality is determined per-page
@@ -1271,18 +1278,15 @@ svm_range_get_pte_flags(struct kfd_node *node,
mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
/* system memory accessed by the dGPU */
} else {
- mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ if (gc_ip_version < IP_VERSION(9, 5, 0))
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ else
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
}
break;
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
- if (domain == SVM_RANGE_VRAM_DOMAIN) {
- if (bo_node != node)
- mapping_flags |= AMDGPU_VM_MTYPE_NC;
- } else {
- mapping_flags |= coherent ?
- AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
- }
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
break;
default:
mapping_flags |= coherent ?
@@ -1299,7 +1303,7 @@ svm_range_get_pte_flags(struct kfd_node *node,
pte_flags = AMDGPU_PTE_VALID;
pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM;
pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
- if (KFD_GC_VERSION(node) >= IP_VERSION(12, 0, 0))
+ if (gc_ip_version >= IP_VERSION(12, 0, 0))
pte_flags |= AMDGPU_PTE_IS_PTE;
pte_flags |= amdgpu_gem_va_map_flags(node->adev, mapping_flags);
@@ -2681,7 +2685,7 @@ svm_range_best_restore_location(struct svm_range *prange,
return -1;
}
- if (node->adev->flags & AMD_IS_APU)
+ if (node->adev->apu_prefer_gtt)
return 0;
if (prange->preferred_loc == gpuid ||
@@ -2969,7 +2973,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
return -EFAULT;
}
- p = kfd_lookup_process_by_pasid(pasid);
+ p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p) {
pr_debug("kfd process not founded pasid 0x%x\n", pasid);
return 0;
@@ -2998,19 +3002,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
goto out;
}
- /* check if this page fault time stamp is before svms->checkpoint_ts */
- if (svms->checkpoint_ts[gpuidx] != 0) {
- if (amdgpu_ih_ts_after(ts, svms->checkpoint_ts[gpuidx])) {
- pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
- r = 0;
- goto out;
- } else
- /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts
- * to zero to avoid following ts wrap around give wrong comparing
- */
- svms->checkpoint_ts[gpuidx] = 0;
- }
-
if (!p->xnack_enabled) {
pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
r = -EFAULT;
@@ -3030,6 +3021,21 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
mmap_read_lock(mm);
retry_write_locked:
mutex_lock(&svms->lock);
+
+ /* check if this page fault time stamp is before svms->checkpoint_ts */
+ if (svms->checkpoint_ts[gpuidx] != 0) {
+ if (amdgpu_ih_ts_after_or_equal(ts, svms->checkpoint_ts[gpuidx])) {
+ pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
+ r = -EAGAIN;
+ goto out_unlock_svms;
+ } else {
+ /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts
+ * to zero to avoid following ts wrap around give wrong comparing
+ */
+ svms->checkpoint_ts[gpuidx] = 0;
+ }
+ }
+
prange = svm_range_from_addr(svms, addr, NULL);
if (!prange) {
pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
@@ -3111,8 +3117,6 @@ retry_write_locked:
start = max_t(unsigned long, ALIGN_DOWN(addr, size), prange->start);
last = min_t(unsigned long, ALIGN(addr + 1, size) - 1, prange->last);
if (prange->actual_loc != 0 || best_loc != 0) {
- migration = true;
-
if (best_loc) {
r = svm_migrate_to_vram(prange, best_loc, start, last,
mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
@@ -3135,7 +3139,9 @@ retry_write_locked:
if (r) {
pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
r, svms, start, last);
- goto out_unlock_range;
+ goto out_migrate_fail;
+ } else {
+ migration = true;
}
}
@@ -3145,6 +3151,7 @@ retry_write_locked:
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
r, svms, start, last);
+out_migrate_fail:
kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
migration);
@@ -3154,7 +3161,8 @@ out_unlock_svms:
mutex_unlock(&svms->lock);
mmap_read_unlock(mm);
- svm_range_count_fault(node, p, gpuidx);
+ if (r != -EAGAIN)
+ svm_range_count_fault(node, p, gpuidx);
mmput(mm);
out:
@@ -3231,7 +3239,8 @@ void svm_range_list_fini(struct kfd_process *p)
struct svm_range *prange;
struct svm_range *next;
- pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms);
+ pr_debug("process pid %d svms 0x%p\n", p->lead_thread->pid,
+ &p->svms);
cancel_delayed_work_sync(&p->svms.restore_work);
@@ -3254,7 +3263,8 @@ void svm_range_list_fini(struct kfd_process *p)
mutex_destroy(&p->svms.lock);
- pr_debug("pasid 0x%x svms 0x%p done\n", p->pasid, &p->svms);
+ pr_debug("process pid %d svms 0x%p done\n",
+ p->lead_thread->pid, &p->svms);
}
int svm_range_list_init(struct kfd_process *p)
@@ -3427,7 +3437,7 @@ svm_range_best_prefetch_location(struct svm_range *prange)
goto out;
}
- if (bo_node->adev->flags & AMD_IS_APU) {
+ if (bo_node->adev->apu_prefer_gtt) {
best_loc = 0;
goto out;
}
@@ -3617,8 +3627,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
bool flush_tlb;
int r, ret = 0;
- pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
- p->pasid, &p->svms, start, start + size - 1, size);
+ pr_debug("process pid %d svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
+ p->lead_thread->pid, &p->svms, start, start + size - 1, size);
r = svm_range_check_attr(p, nattr, attrs);
if (r)
@@ -3726,8 +3736,8 @@ out_unlock_range:
out:
mutex_unlock(&process_info->lock);
- pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,
- &p->svms, start, start + size - 1, r);
+ pr_debug("process pid %d svms 0x%p [0x%llx 0x%llx] done, r=%d\n",
+ p->lead_thread->pid, &p->svms, start, start + size - 1, r);
return ret ? ret : r;
}
@@ -4065,8 +4075,8 @@ exit:
return ret;
}
-int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
- uint64_t *svm_priv_data_size)
+void svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
+ uint64_t *svm_priv_data_size)
{
uint64_t total_size, accessibility_size, common_attr_size;
int nattr_common = 4, nattr_accessibility = 1;
@@ -4078,8 +4088,6 @@ int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
*svm_priv_data_size = 0;
svms = &p->svms;
- if (!svms)
- return -EINVAL;
mutex_lock(&svms->lock);
list_for_each_entry(prange, &svms->list, list) {
@@ -4121,7 +4129,6 @@ int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
pr_debug("num_svm_ranges %u total_priv_size %llu\n", *num_svm_ranges,
*svm_priv_data_size);
- return 0;
}
int kfd_criu_checkpoint_svm(struct kfd_process *p,
@@ -4138,8 +4145,6 @@ int kfd_criu_checkpoint_svm(struct kfd_process *p,
struct mm_struct *mm;
svms = &p->svms;
- if (!svms)
- return -EINVAL;
mm = get_task_mm(p->lead_thread);
if (!mm) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index bddd24f04669..01c7a4877904 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -184,8 +184,8 @@ void schedule_deferred_list_work(struct svm_range_list *svms);
void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr,
unsigned long offset, unsigned long npages);
void svm_range_dma_unmap(struct svm_range *prange);
-int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
- uint64_t *svm_priv_data_size);
+void svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
+ uint64_t *svm_priv_data_size);
int kfd_criu_checkpoint_svm(struct kfd_process *p,
uint8_t __user *user_priv_data,
uint64_t *priv_offset);
@@ -202,7 +202,7 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s
* is initialized to not 0 when page migration register device memory.
*/
#define KFD_IS_SVM_API_SUPPORTED(adev) ((adev)->kfd.pgmap.type != 0 ||\
- ((adev)->flags & AMD_IS_APU))
+ ((adev)->apu_prefer_gtt))
void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);
@@ -237,13 +237,12 @@ static inline int svm_range_schedule_evict_svm_bo(
return -EINVAL;
}
-static inline int svm_range_get_info(struct kfd_process *p,
- uint32_t *num_svm_ranges,
- uint64_t *svm_priv_data_size)
+static inline void svm_range_get_info(struct kfd_process *p,
+ uint32_t *num_svm_ranges,
+ uint64_t *svm_priv_data_size)
{
*num_svm_ranges = 0;
*svm_priv_data_size = 0;
- return 0;
}
static inline int kfd_criu_checkpoint_svm(struct kfd_process *p,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 3871591c9aec..4ec73f33535e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -108,24 +108,6 @@ struct kfd_node *kfd_device_by_id(uint32_t gpu_id)
return top_dev->gpu;
}
-struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev)
-{
- struct kfd_topology_device *top_dev;
- struct kfd_node *device = NULL;
-
- down_read(&topology_lock);
-
- list_for_each_entry(top_dev, &topology_device_list, list)
- if (top_dev->gpu && top_dev->gpu->adev->pdev == pdev) {
- device = top_dev->gpu;
- break;
- }
-
- up_read(&topology_lock);
-
- return device;
-}
-
/* Called with write topology_lock acquired */
static void kfd_release_topology_device(struct kfd_topology_device *dev)
{
@@ -528,6 +510,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.capability |=
HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
+ if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0) &&
+ (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
+ dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED;
+
sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute",
dev->node_props.max_engine_clk_fcompute);
@@ -537,6 +523,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->gpu->kfd->mec_fw_version);
sysfs_show_32bit_prop(buffer, offs, "capability",
dev->node_props.capability);
+ sysfs_show_32bit_prop(buffer, offs, "capability2",
+ dev->node_props.capability2);
sysfs_show_64bit_prop(buffer, offs, "debug_prop",
dev->node_props.debug_prop);
sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",
@@ -968,24 +956,23 @@ static void kfd_update_system_properties(void)
up_read(&topology_lock);
}
-static void find_system_memory(const struct dmi_header *dm,
- void *private)
+static void find_system_memory(const struct dmi_header *dm, void *private)
{
+ struct dmi_mem_device *memdev = container_of(dm, struct dmi_mem_device, header);
struct kfd_mem_properties *mem;
- u16 mem_width, mem_clock;
struct kfd_topology_device *kdev =
(struct kfd_topology_device *)private;
- const u8 *dmi_data = (const u8 *)(dm + 1);
-
- if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) {
- mem_width = (u16)(*(const u16 *)(dmi_data + 0x6));
- mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11));
- list_for_each_entry(mem, &kdev->mem_props, list) {
- if (mem_width != 0xFFFF && mem_width != 0)
- mem->width = mem_width;
- if (mem_clock != 0)
- mem->mem_clk_max = mem_clock;
- }
+
+ if (memdev->header.type != DMI_ENTRY_MEM_DEVICE)
+ return;
+ if (memdev->header.length < sizeof(struct dmi_mem_device))
+ return;
+
+ list_for_each_entry(mem, &kdev->mem_props, list) {
+ if (memdev->total_width != 0xFFFF && memdev->total_width != 0)
+ mem->width = memdev->total_width;
+ if (memdev->speed != 0)
+ mem->mem_clk_max = memdev->speed;
}
}
@@ -1284,34 +1271,41 @@ static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev,
{
struct kfd_node *gpu = outbound_link->gpu;
struct amdgpu_device *adev = gpu->adev;
- int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes;
+ unsigned int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes;
+ unsigned int num_xgmi_sdma_engines = kfd_get_num_xgmi_sdma_engines(gpu);
+ unsigned int num_sdma_engines = kfd_get_num_sdma_engines(gpu);
+ uint32_t sdma_eng_id_mask = (1 << num_sdma_engines) - 1;
+ uint32_t xgmi_sdma_eng_id_mask =
+ ((1 << num_xgmi_sdma_engines) - 1) << num_sdma_engines;
+
bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu &&
adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 &&
- kfd_get_num_xgmi_sdma_engines(gpu) >= 14 &&
- (!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8);
+ num_xgmi_sdma_engines >= 6 && (!(adev->flags & AMD_IS_APU) &&
+ num_xgmi_nodes == 8);
if (support_rec_eng) {
int src_socket_id = adev->gmc.xgmi.physical_node_id;
int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id;
+ unsigned int reshift = num_xgmi_sdma_engines == 6 ? 1 : 0;
outbound_link->rec_sdma_eng_id_mask =
- 1 << rec_sdma_eng_map[src_socket_id][dst_socket_id];
+ 1 << (rec_sdma_eng_map[src_socket_id][dst_socket_id] >> reshift);
inbound_link->rec_sdma_eng_id_mask =
- 1 << rec_sdma_eng_map[dst_socket_id][src_socket_id];
- } else {
- int num_sdma_eng = kfd_get_num_sdma_engines(gpu);
- int i, eng_offset = 0;
+ 1 << (rec_sdma_eng_map[dst_socket_id][src_socket_id] >> reshift);
- if (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
- kfd_get_num_xgmi_sdma_engines(gpu) && to_dev->gpu) {
- eng_offset = num_sdma_eng;
- num_sdma_eng = kfd_get_num_xgmi_sdma_engines(gpu);
- }
+ /* If recommended engine is out of range, need to reset the mask */
+ if (outbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask)
+ outbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask;
+ if (inbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask)
+ inbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask;
- for (i = 0; i < num_sdma_eng; i++) {
- outbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset));
- inbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset));
- }
+ } else {
+ uint32_t engine_mask = (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
+ num_xgmi_sdma_engines && to_dev->gpu) ? xgmi_sdma_eng_id_mask :
+ sdma_eng_id_mask;
+
+ outbound_link->rec_sdma_eng_id_mask = engine_mask;
+ inbound_link->rec_sdma_eng_id_mask = engine_mask;
}
}
@@ -1683,17 +1677,32 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
int cache_type, unsigned int cu_processor_id,
struct kfd_node *knode)
{
- unsigned int cu_sibling_map_mask;
+ unsigned int cu_sibling_map_mask = 0;
int first_active_cu;
int i, j, k, xcc, start, end;
int num_xcc = NUM_XCC(knode->xcc_mask);
struct kfd_cache_properties *pcache = NULL;
enum amdgpu_memory_partition mode;
struct amdgpu_device *adev = knode->adev;
+ bool found = false;
start = ffs(knode->xcc_mask) - 1;
end = start + num_xcc;
- cu_sibling_map_mask = cu_info->bitmap[start][0][0];
+
+ /* To find the bitmap in the first active cu in the first
+ * xcc, it is based on the assumption that evrey xcc must
+ * have at least one active cu.
+ */
+ for (i = 0; i < gfx_info->max_shader_engines && !found; i++) {
+ for (j = 0; j < gfx_info->max_sh_per_se && !found; j++) {
+ if (cu_info->bitmap[start][i % 4][j % 4]) {
+ cu_sibling_map_mask =
+ cu_info->bitmap[start][i % 4][j % 4];
+ found = true;
+ }
+ }
+ }
+
cu_sibling_map_mask &=
((1 << pcache_info[cache_type].num_cu_shared) - 1);
first_active_cu = ffs(cu_sibling_map_mask);
@@ -1714,7 +1723,8 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
pcache->cacheline_size = pcache_info[cache_type].cache_line_size;
if (KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(knode) == IP_VERSION(9, 5, 0))
mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
else
mode = UNKNOWN_MEMORY_PARTITION_MODE;
@@ -1776,7 +1786,7 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info;
struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config;
int gpu_processor_id;
- struct kfd_cache_properties *props_ext;
+ struct kfd_cache_properties *props_ext = NULL;
int num_of_entries = 0;
int num_of_cache_types = 0;
struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
@@ -1998,14 +2008,14 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2))
dev->node_props.capability |=
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
+
+ if (!amdgpu_sriov_vf(dev->gpu->adev))
+ dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED;
+
} else {
dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
- if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0))
- dev->node_props.capability |=
- HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
-
if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(12, 0, 0))
dev->node_props.capability |=
HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 155b5c410af1..3de8ec0043bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -24,6 +24,7 @@
#ifndef __KFD_TOPOLOGY_H__
#define __KFD_TOPOLOGY_H__
+#include <linux/dmi.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/kfd_sysfs.h>
@@ -50,6 +51,7 @@ struct kfd_node_properties {
uint32_t cpu_core_id_base;
uint32_t simd_id_base;
uint32_t capability;
+ uint32_t capability2;
uint64_t debug_prop;
uint32_t max_waves_per_simd;
uint32_t lds_size_in_kb;
@@ -179,6 +181,22 @@ struct kfd_system_properties {
struct attribute attr_props;
};
+struct dmi_mem_device {
+ struct dmi_header header;
+ u16 physical_handle;
+ u16 error_handle;
+ u16 total_width;
+ u16 data_width;
+ u16 size;
+ u8 form_factor;
+ u8 device_set;
+ u8 device_locator;
+ u8 bank_locator;
+ u8 memory_type;
+ u16 type_detail;
+ u16 speed;
+} __packed;
+
struct kfd_topology_device *kfd_create_topology_device(
struct list_head *device_list);
void kfd_release_topology_device_list(struct list_head *device_list);