summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2020-09-08 09:40:13 +0300
committerDave Airlie <airlied@redhat.com>2020-09-08 09:40:13 +0300
commit0c8d22fcae2f9590a07b000e1724f665820b77f7 (patch)
tree5a2405fe298358d861a58dc933184ee6d3415eb4 /drivers/gpu/drm/amd/amdkfd
parentce5c207c6b8dd9cdeaeeb2345b8a69335c0d98bf (diff)
parent11bc98bd71fe2e0cb572988519e51bca9d58a18a (diff)
downloadlinux-0c8d22fcae2f9590a07b000e1724f665820b77f7.tar.xz
Merge tag 'amd-drm-next-5.10-2020-09-03' of git://people.freedesktop.org/~agd5f/linux into drm-next
amd-drm-next-5.10-2020-09-03: amdgpu: - RAS fixes - Sienna Cichlid updates - Navy Flounder updates - DCE6 (SI) support in DC - Enable plane rotation - Rework pre-OS vram reservation handling during driver init - Add standard interface to dump GPU metrics table from SMU - Rework tiling and tmz state handling in atomic commits - Pstate fixes - Add voltage and power hwmon interfaces for renoir - SW CTF fixes - S/G display fix for Raven - Print client strings for vmfaults for vega and newer - Manual fan control fixes - Display updates - Reorg power management directory structure - Misc bug fixes - Misc code cleanups amdkfd: - Topology fixes - Add SMI events for thermal throttling and GPU resets radeon: - switch from pci_* to dma_* for dma allocations - PLL fix Scheduler: - Clean up priority levels UAPI: - amdgpu INFO IOCTL query update for TMZ state https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6049 - amdkfd SMI event interface updates https://github.com/RadeonOpenCompute/rocm_smi_lib/tree/therm_thrott From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200903222921.4152-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h174
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c18
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c22
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c28
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_iommu.c10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c105
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c53
15 files changed, 280 insertions, 169 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 577d901fdb63..affbca7c0050 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -911,7 +911,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x705d0000, 0x807c817c,
0x8070ff70, 0x00000080,
0xbf0a7b7c, 0xbf85fff8,
- 0xbf82014f, 0xbef4037e,
+ 0xbf820151, 0xbef4037e,
0x8775ff7f, 0x0000ffff,
0x8875ff75, 0x00040000,
0xbef60380, 0xbef703ff,
@@ -1024,61 +1024,62 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbe883108, 0xbe8a310a,
0xbe8c310c, 0xbe8e310e,
0xbf06807c, 0xbf84fff0,
- 0xb9782a05, 0x80788178,
- 0xbf0d9972, 0xbf850002,
- 0x8f788978, 0xbf820001,
- 0x8f788a78, 0xb96e1e06,
- 0x8f6e8a6e, 0x80786e78,
- 0x8078ff78, 0x00000200,
- 0xbef603ff, 0x01000000,
- 0xf4211bfa, 0xf0000000,
- 0x80788478, 0xf4211b3a,
+ 0xba80f801, 0x00000000,
+ 0xbf8a0000, 0xb9782a05,
+ 0x80788178, 0xbf0d9972,
+ 0xbf850002, 0x8f788978,
+ 0xbf820001, 0x8f788a78,
+ 0xb96e1e06, 0x8f6e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0xbef603ff,
+ 0x01000000, 0xf4211bfa,
0xf0000000, 0x80788478,
- 0xf4211b7a, 0xf0000000,
- 0x80788478, 0xf4211c3a,
+ 0xf4211b3a, 0xf0000000,
+ 0x80788478, 0xf4211b7a,
0xf0000000, 0x80788478,
- 0xf4211c7a, 0xf0000000,
- 0x80788478, 0xf4211eba,
+ 0xf4211c3a, 0xf0000000,
+ 0x80788478, 0xf4211c7a,
0xf0000000, 0x80788478,
- 0xf4211efa, 0xf0000000,
- 0x80788478, 0xf4211e7a,
+ 0xf4211eba, 0xf0000000,
+ 0x80788478, 0xf4211efa,
0xf0000000, 0x80788478,
- 0xf4211cfa, 0xf0000000,
- 0x80788478, 0xf4211bba,
+ 0xf4211e7a, 0xf0000000,
+ 0x80788478, 0xf4211cfa,
0xf0000000, 0x80788478,
- 0xbf8cc07f, 0xb9eef814,
0xf4211bba, 0xf0000000,
0x80788478, 0xbf8cc07f,
- 0xb9eef815, 0xbefc036f,
- 0xbefe0370, 0xbeff0371,
- 0x876f7bff, 0x000003ff,
- 0xb9ef4803, 0xb9f9f816,
- 0x876f7bff, 0xfffff800,
- 0x906f8b6f, 0xb9efa2c3,
- 0xb9f3f801, 0xb96e2a05,
- 0x806e816e, 0xbf0d9972,
- 0xbf850002, 0x8f6e896e,
- 0xbf820001, 0x8f6e8a6e,
- 0x806eff6e, 0x00000200,
- 0x806e746e, 0x826f8075,
- 0x876fff6f, 0x0000ffff,
- 0xf4091c37, 0xfa000050,
- 0xf4091d37, 0xfa000060,
- 0xf4011e77, 0xfa000074,
- 0xbf8cc07f, 0x876fff6d,
- 0xfc000000, 0x906f9a6f,
- 0x8f6f906f, 0xbeee0380,
+ 0xb9eef814, 0xf4211bba,
+ 0xf0000000, 0x80788478,
+ 0xbf8cc07f, 0xb9eef815,
+ 0xbefc036f, 0xbefe0370,
+ 0xbeff0371, 0x876f7bff,
+ 0x000003ff, 0xb9ef4803,
+ 0xb9f9f816, 0x876f7bff,
+ 0xfffff800, 0x906f8b6f,
+ 0xb9efa2c3, 0xb9f3f801,
+ 0xb96e2a05, 0x806e816e,
+ 0xbf0d9972, 0xbf850002,
+ 0x8f6e896e, 0xbf820001,
+ 0x8f6e8a6e, 0x806eff6e,
+ 0x00000200, 0x806e746e,
+ 0x826f8075, 0x876fff6f,
+ 0x0000ffff, 0xf4091c37,
+ 0xfa000050, 0xf4091d37,
+ 0xfa000060, 0xf4011e77,
+ 0xfa000074, 0xbf8cc07f,
+ 0x876fff6d, 0xfc000000,
+ 0x906f9a6f, 0x8f6f906f,
+ 0xbeee0380, 0x886e6f6e,
+ 0x876fff6d, 0x02000000,
+ 0x906f996f, 0x8f6f8f6f,
0x886e6f6e, 0x876fff6d,
- 0x02000000, 0x906f996f,
- 0x8f6f8f6f, 0x886e6f6e,
- 0x876fff6d, 0x01000000,
- 0x906f986f, 0x8f6f996f,
- 0x886e6f6e, 0x876fff7a,
- 0x00800000, 0x906f976f,
- 0xb9eef807, 0x876dff6d,
- 0x0000ffff, 0x87fe7e7e,
- 0x87ea6a6a, 0xb9faf802,
- 0xbf8a0000, 0xbe80226c,
+ 0x01000000, 0x906f986f,
+ 0x8f6f996f, 0x886e6f6e,
+ 0x876fff7a, 0x00800000,
+ 0x906f976f, 0xb9eef807,
+ 0x876dff6d, 0x0000ffff,
+ 0x87fe7e7e, 0x87ea6a6a,
+ 0xb9faf802, 0xbe80226c,
0xbf810000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
@@ -1807,7 +1808,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x705d0000, 0x807c817c,
0x8070ff70, 0x00000080,
0xbf0a7b7c, 0xbf85fff8,
- 0xbf82013a, 0xbef4037e,
+ 0xbf82013c, 0xbef4037e,
0x8775ff7f, 0x0000ffff,
0x8875ff75, 0x00040000,
0xbef60380, 0xbef703ff,
@@ -1920,50 +1921,51 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbe883108, 0xbe8a310a,
0xbe8c310c, 0xbe8e310e,
0xbf06807c, 0xbf84fff0,
- 0xb9782a05, 0x80788178,
- 0xbf0d9972, 0xbf850002,
- 0x8f788978, 0xbf820001,
- 0x8f788a78, 0xb96e1e06,
- 0x8f6e8a6e, 0x80786e78,
- 0x8078ff78, 0x00000200,
- 0xbef603ff, 0x01000000,
- 0xf4211bfa, 0xf0000000,
- 0x80788478, 0xf4211b3a,
+ 0xba80f801, 0x00000000,
+ 0xbf8a0000, 0xb9782a05,
+ 0x80788178, 0xbf0d9972,
+ 0xbf850002, 0x8f788978,
+ 0xbf820001, 0x8f788a78,
+ 0xb96e1e06, 0x8f6e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0xbef603ff,
+ 0x01000000, 0xf4211bfa,
0xf0000000, 0x80788478,
- 0xf4211b7a, 0xf0000000,
- 0x80788478, 0xf4211c3a,
+ 0xf4211b3a, 0xf0000000,
+ 0x80788478, 0xf4211b7a,
0xf0000000, 0x80788478,
- 0xf4211c7a, 0xf0000000,
- 0x80788478, 0xf4211eba,
+ 0xf4211c3a, 0xf0000000,
+ 0x80788478, 0xf4211c7a,
0xf0000000, 0x80788478,
- 0xf4211efa, 0xf0000000,
- 0x80788478, 0xf4211e7a,
+ 0xf4211eba, 0xf0000000,
+ 0x80788478, 0xf4211efa,
0xf0000000, 0x80788478,
- 0xf4211cfa, 0xf0000000,
- 0x80788478, 0xf4211bba,
+ 0xf4211e7a, 0xf0000000,
+ 0x80788478, 0xf4211cfa,
0xf0000000, 0x80788478,
- 0xbf8cc07f, 0xb9eef814,
0xf4211bba, 0xf0000000,
0x80788478, 0xbf8cc07f,
- 0xb9eef815, 0xbefc036f,
- 0xbefe0370, 0xbeff0371,
- 0x876f7bff, 0x000003ff,
- 0xb9ef4803, 0x876f7bff,
- 0xfffff800, 0x906f8b6f,
- 0xb9efa2c3, 0xb9f3f801,
- 0xb96e2a05, 0x806e816e,
- 0xbf0d9972, 0xbf850002,
- 0x8f6e896e, 0xbf820001,
- 0x8f6e8a6e, 0x806eff6e,
- 0x00000200, 0x806e746e,
- 0x826f8075, 0x876fff6f,
- 0x0000ffff, 0xf4091c37,
- 0xfa000050, 0xf4091d37,
- 0xfa000060, 0xf4011e77,
- 0xfa000074, 0xbf8cc07f,
- 0x876dff6d, 0x0000ffff,
- 0x87fe7e7e, 0x87ea6a6a,
- 0xb9faf802, 0xbf8a0000,
+ 0xb9eef814, 0xf4211bba,
+ 0xf0000000, 0x80788478,
+ 0xbf8cc07f, 0xb9eef815,
+ 0xbefc036f, 0xbefe0370,
+ 0xbeff0371, 0x876f7bff,
+ 0x000003ff, 0xb9ef4803,
+ 0x876f7bff, 0xfffff800,
+ 0x906f8b6f, 0xb9efa2c3,
+ 0xb9f3f801, 0xb96e2a05,
+ 0x806e816e, 0xbf0d9972,
+ 0xbf850002, 0x8f6e896e,
+ 0xbf820001, 0x8f6e8a6e,
+ 0x806eff6e, 0x00000200,
+ 0x806e746e, 0x826f8075,
+ 0x876fff6f, 0x0000ffff,
+ 0xf4091c37, 0xfa000050,
+ 0xf4091d37, 0xfa000060,
+ 0xf4011e77, 0xfa000074,
+ 0xbf8cc07f, 0x876dff6d,
+ 0x0000ffff, 0x87fe7e7e,
+ 0x87ea6a6a, 0xb9faf802,
0xbe80226c, 0xbf810000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index 5b220f2a7501..5081f91190b8 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -894,6 +894,11 @@ L_RESTORE_SGPR:
s_cmp_eq_u32 m0, 0 //scc = (m0 < s_sgpr_save_num) ? 1 : 0
s_cbranch_scc0 L_RESTORE_SGPR_LOOP
+ // s_barrier with MODE.DEBUG_EN=1, STATUS.PRIV=1 incorrectly asserts debug exception.
+ // Clear DEBUG_EN before and restore MODE after the barrier.
+ s_setreg_imm32_b32 hwreg(HW_REG_MODE), 0
+ s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG
+
/* restore HW registers */
L_RESTORE_HWREG:
// HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)
@@ -976,8 +981,6 @@ L_RESTORE_HWREG:
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
- s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG
-
s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
L_END_PGM:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index e9b96ad3d9a5..b7b16adb0615 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1254,7 +1254,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev)
return true;
}
- if (dev->device_info->needs_iommu_device)
+ if (dev->use_iommu_v2)
return false;
amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 6a250f8fcfb8..3fac06b281ce 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -742,6 +742,22 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
return 0;
}
+static bool kfd_ignore_crat(void)
+{
+ bool ret;
+
+ if (ignore_crat)
+ return true;
+
+#ifndef KFD_SUPPORT_IOMMU_V2
+ ret = true;
+#else
+ ret = false;
+#endif
+
+ return ret;
+}
+
/*
* kfd_create_crat_image_acpi - Allocates memory for CRAT image and
* copies CRAT from ACPI (if available).
@@ -776,7 +792,7 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
return -EINVAL;
}
- if (ignore_crat) {
+ if (kfd_ignore_crat()) {
pr_info("CRAT table disabled by module option\n");
return -ENODATA;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 4bfedaab183f..0e71a0543f98 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -29,6 +29,7 @@
#include "cwsr_trap_handler.h"
#include "kfd_iommu.h"
#include "amdgpu_amdkfd.h"
+#include "kfd_smi_events.h"
#define MQD_SIZE_ALIGNED 768
@@ -115,6 +116,7 @@ static const struct kfd_device_info carrizo_device_info = {
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
+#endif
static const struct kfd_device_info raven_device_info = {
.asic_family = CHIP_RAVEN,
@@ -133,7 +135,6 @@ static const struct kfd_device_info raven_device_info = {
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
-#endif
static const struct kfd_device_info hawaii_device_info = {
.asic_family = CHIP_HAWAII,
@@ -711,11 +712,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto kfd_doorbell_error;
}
- if (kfd->kfd2kgd->get_hive_id)
- kfd->hive_id = kfd->kfd2kgd->get_hive_id(kfd->kgd);
+ kfd->hive_id = amdgpu_amdkfd_get_hive_id(kfd->kgd);
- if (kfd->kfd2kgd->get_unique_id)
- kfd->unique_id = kfd->kfd2kgd->get_unique_id(kfd->kgd);
+ kfd->unique_id = amdgpu_amdkfd_get_unique_id(kfd->kgd);
if (kfd_interrupt_init(kfd)) {
dev_err(kfd_device, "Error initializing interrupts\n");
@@ -737,6 +736,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto gws_error;
}
+ /* If CRAT is broken, won't set iommu enabled */
+ kfd_double_confirm_iommu_support(kfd);
+
if (kfd_iommu_device_init(kfd)) {
dev_err(kfd_device, "Error initializing iommuv2\n");
goto device_iommu_error;
@@ -810,6 +812,8 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
if (!kfd->init_complete)
return 0;
+ kfd_smi_event_update_gpu_reset(kfd, false);
+
kfd->dqm->ops.pre_reset(kfd->dqm);
kgd2kfd_suspend(kfd, false);
@@ -838,6 +842,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
atomic_set(&kfd->sram_ecc_flag, 0);
+ kfd_smi_event_update_gpu_reset(kfd, true);
+
return 0;
}
@@ -1245,6 +1251,12 @@ void kfd_dec_compute_active(struct kfd_dev *kfd)
WARN_ONCE(count < 0, "Compute profile ref. count error");
}
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
+{
+ if (kfd)
+ kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
+}
+
#if defined(CONFIG_DEBUG_FS)
/* This function will send a package to HIQ to hang the HWS
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e0e60b0d0669..560adc57a050 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -153,30 +153,6 @@ static void decrement_queue_count(struct device_queue_manager *dqm,
dqm->active_cp_queue_count--;
}
-int read_sdma_queue_counter(uint64_t q_rptr, uint64_t *val)
-{
- int ret;
- uint64_t tmp = 0;
-
- if (!val)
- return -EINVAL;
- /*
- * SDMA activity counter is stored at queue's RPTR + 0x8 location.
- */
- if (!access_ok((const void __user *)(q_rptr +
- sizeof(uint64_t)), sizeof(uint64_t))) {
- pr_err("Can't access sdma queue activity counter\n");
- return -EFAULT;
- }
-
- ret = get_user(tmp, (uint64_t *)(q_rptr + sizeof(uint64_t)));
- if (!ret) {
- *val = tmp;
- }
-
- return ret;
-}
-
static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
{
struct kfd_dev *dev = qpd->dqm->dev;
@@ -552,7 +528,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
/* Get the SDMA queue stats */
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
- retval = read_sdma_queue_counter((uint64_t)q->properties.read_ptr,
+ retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
&sdma_val);
if (retval)
pr_err("Failed to read SDMA queue counter for queue: %d\n",
@@ -1473,7 +1449,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
/* Get the SDMA queue stats */
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
- retval = read_sdma_queue_counter((uint64_t)q->properties.read_ptr,
+ retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
&sdma_val);
if (retval)
pr_err("Failed to read SDMA queue counter for queue: %d\n",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 49d8e324c636..16262e5d93f5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -251,5 +251,11 @@ static inline void dqm_unlock(struct device_queue_manager *dqm)
mutex_unlock(&dqm->lock_hidden);
}
-int read_sdma_queue_counter(uint64_t q_rptr, uint64_t *val);
+static inline int read_sdma_queue_counter(uint64_t __user *q_rptr, uint64_t *val)
+{
+ /*
+ * SDMA activity counter is stored at queue's RPTR + 0x8 location.
+ */
+ return get_user(*val, q_rptr + 1);
+}
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index 95a82ac455f2..309f63a0b34a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -62,7 +62,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
if (amdgpu_noretry &&
- !dqm->dev->device_info->needs_iommu_device)
+ !dqm->dev->use_iommu_v2)
qpd->sh_mem_config |=
1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index c1166c40ac15..3c22909470f2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -321,7 +321,7 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
pdd->lds_base = MAKE_LDS_APP_BASE_VI();
pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
- if (!pdd->dev->device_info->needs_iommu_device) {
+ if (!pdd->dev->use_iommu_v2) {
/* dGPUs: SVM aperture starting at 0
* with small reserved space for kernel.
* Set them to CANONICAL addresses.
@@ -425,7 +425,7 @@ int kfd_init_apertures(struct kfd_process *process)
return -EINVAL;
}
- if (!dev->device_info->needs_iommu_device) {
+ if (!dev->use_iommu_v2) {
/* dGPUs: the reserved space for kernel
* before SVM
*/
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
index 7c8786b9eb0a..5a64915abaf7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
@@ -41,7 +41,7 @@ int kfd_iommu_check_device(struct kfd_dev *kfd)
struct amd_iommu_device_info iommu_info;
int err;
- if (!kfd->device_info->needs_iommu_device)
+ if (!kfd->use_iommu_v2)
return -ENODEV;
iommu_info.flags = 0;
@@ -63,7 +63,7 @@ int kfd_iommu_device_init(struct kfd_dev *kfd)
unsigned int pasid_limit;
int err;
- if (!kfd->device_info->needs_iommu_device)
+ if (!kfd->use_iommu_v2)
return 0;
iommu_info.flags = 0;
@@ -109,7 +109,7 @@ int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd)
struct kfd_process *p = pdd->process;
int err;
- if (!dev->device_info->needs_iommu_device || pdd->bound == PDD_BOUND)
+ if (!dev->use_iommu_v2 || pdd->bound == PDD_BOUND)
return 0;
if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
@@ -284,7 +284,7 @@ static void kfd_unbind_processes_from_device(struct kfd_dev *kfd)
*/
void kfd_iommu_suspend(struct kfd_dev *kfd)
{
- if (!kfd->device_info->needs_iommu_device)
+ if (!kfd->use_iommu_v2)
return;
kfd_unbind_processes_from_device(kfd);
@@ -304,7 +304,7 @@ int kfd_iommu_resume(struct kfd_dev *kfd)
unsigned int pasid_limit;
int err;
- if (!kfd->device_info->needs_iommu_device)
+ if (!kfd->use_iommu_v2)
return 0;
pasid_limit = kfd_get_pasid_limit();
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 6727e9de5b8b..023629f28495 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -297,6 +297,9 @@ struct kfd_dev {
bool pci_atomic_requested;
+ /* Use IOMMU v2 flag */
+ bool use_iommu_v2;
+
/* SRAM ECC flag */
atomic_t sram_ecc_flag;
@@ -309,6 +312,8 @@ struct kfd_dev {
/* Clients watching SMI events */
struct list_head smi_clients;
spinlock_t smi_lock;
+
+ uint32_t reset_seq_num;
};
enum kfd_mempool {
@@ -892,6 +897,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd);
int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
int kfd_numa_node_to_apic_id(int numa_node_id);
+void kfd_double_confirm_iommu_support(struct kfd_dev *gpu);
/* Interrupts */
int kfd_interrupt_init(struct kfd_dev *dev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 40695d52e9a8..a0e12a79ab7d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -87,7 +87,7 @@ struct kfd_sdma_activity_handler_workarea {
};
struct temp_sdma_queue_list {
- uint64_t rptr;
+ uint64_t __user *rptr;
uint64_t sdma_val;
unsigned int queue_id;
struct list_head list;
@@ -159,7 +159,7 @@ static void kfd_sdma_activity_worker(struct work_struct *work)
}
INIT_LIST_HEAD(&sdma_q->list);
- sdma_q->rptr = (uint64_t)q->properties.read_ptr;
+ sdma_q->rptr = (uint64_t __user *)q->properties.read_ptr;
sdma_q->queue_id = q->properties.queue_id;
list_add_tail(&sdma_q->list, &sdma_q_list.list);
}
@@ -218,7 +218,7 @@ static void kfd_sdma_activity_worker(struct work_struct *work)
continue;
list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
- if (((uint64_t)q->properties.read_ptr == sdma_q->rptr) &&
+ if (((uint64_t __user *)q->properties.read_ptr == sdma_q->rptr) &&
(sdma_q->queue_id == q->properties.queue_id)) {
list_del(&sdma_q->list);
kfree(sdma_q);
@@ -270,6 +270,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
kfd_sdma_activity_worker);
sdma_activity_work_handler.pdd = pdd;
+ sdma_activity_work_handler.sdma_activity_counter = 0;
schedule_work(&sdma_activity_work_handler.sdma_activity_work);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 7b348bf9df21..17d1736367ea 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -24,6 +24,7 @@
#include <linux/wait.h>
#include <linux/anon_inodes.h>
#include <uapi/linux/kfd_ioctl.h>
+#include "amdgpu.h"
#include "amdgpu_vm.h"
#include "kfd_priv.h"
#include "kfd_smi_events.h"
@@ -148,15 +149,94 @@ static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
return 0;
}
+static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
+ char *event_msg, int len)
+{
+ struct kfd_smi_client *client;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(client, &dev->smi_clients, list) {
+ if (!(READ_ONCE(client->events) &
+ KFD_SMI_EVENT_MASK_FROM_INDEX(smi_event)))
+ continue;
+ spin_lock(&client->lock);
+ if (kfifo_avail(&client->fifo) >= len) {
+ kfifo_in(&client->fifo, event_msg, len);
+ wake_up_all(&client->wait_queue);
+ } else {
+ pr_debug("smi_event(EventID: %u): no space left\n",
+ smi_event);
+ }
+ spin_unlock(&client->lock);
+ }
+
+ rcu_read_unlock();
+}
+
+void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
+{
+ /*
+ * GpuReset msg = Reset seq number (incremented for
+ * every reset message sent before GPU reset).
+ * 1 byte event + 1 byte space + 8 bytes seq num +
+ * 1 byte \n + 1 byte \0 = 12
+ */
+ char fifo_in[12];
+ int len;
+ unsigned int event;
+
+ if (list_empty(&dev->smi_clients))
+ return;
+
+ memset(fifo_in, 0x0, sizeof(fifo_in));
+
+ if (post_reset) {
+ event = KFD_SMI_EVENT_GPU_POST_RESET;
+ } else {
+ event = KFD_SMI_EVENT_GPU_PRE_RESET;
+ ++(dev->reset_seq_num);
+ }
+
+ len = snprintf(fifo_in, sizeof(fifo_in), "%x %x\n", event,
+ dev->reset_seq_num);
+
+ add_event_to_kfifo(dev, event, fifo_in, len);
+}
+
+void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
+ uint32_t throttle_bitmask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
+ /*
+ * ThermalThrottle msg = throttle_bitmask(8):
+ * thermal_interrupt_count(16):
+ * 1 byte event + 1 byte space + 8 byte throttle_bitmask +
+ * 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n +
+ * 1 byte \0 = 29
+ */
+ char fifo_in[29];
+ int len;
+
+ if (list_empty(&dev->smi_clients))
+ return;
+
+ len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%llx\n",
+ KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
+ atomic64_read(&adev->smu.throttle_int_counter));
+
+ add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len);
+}
+
void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
{
struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
struct amdgpu_task_info task_info;
/* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */
- /* 16 bytes event + 1 byte space + 25 bytes msg + 1 byte \n = 43
+ /* 1 byte event + 1 byte space + 25 bytes msg + 1 byte \n +
+ * 1 byte \0 = 29
*/
- char fifo_in[43];
- struct kfd_smi_client *client;
+ char fifo_in[29];
int len;
if (list_empty(&dev->smi_clients))
@@ -168,25 +248,10 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
if (!task_info.pid)
return;
- len = snprintf(fifo_in, 43, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
+ len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
task_info.pid, task_info.task_name);
- rcu_read_lock();
-
- list_for_each_entry_rcu(client, &dev->smi_clients, list) {
- if (!(READ_ONCE(client->events) & KFD_SMI_EVENT_VMFAULT))
- continue;
- spin_lock(&client->lock);
- if (kfifo_avail(&client->fifo) >= len) {
- kfifo_in(&client->fifo, fifo_in, len);
- wake_up_all(&client->wait_queue);
- }
- else
- pr_debug("smi_event(vmfault): no space left\n");
- spin_unlock(&client->lock);
- }
-
- rcu_read_unlock();
+ add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
}
int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index a9cb218fef96..b9b0438202e2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -25,5 +25,8 @@
int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
+void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
+ uint32_t throttle_bitmask);
+void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index f185f6cbc05c..2b31c3066aaa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -446,7 +446,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count",
dev->node_props.cpu_cores_count);
sysfs_show_32bit_prop(buffer, offs, "simd_count",
- dev->node_props.simd_count);
+ dev->gpu ? dev->node_props.simd_count : 0);
sysfs_show_32bit_prop(buffer, offs, "mem_banks_count",
dev->node_props.mem_banks_count);
sysfs_show_32bit_prop(buffer, offs, "caches_count",
@@ -1139,7 +1139,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
/* Discrete GPUs need their own topology device list
* entries. Don't assign them to CPU/APU nodes.
*/
- if (!gpu->device_info->needs_iommu_device &&
+ if (!gpu->use_iommu_v2 &&
dev->node_props.cpu_cores_count)
continue;
@@ -1239,7 +1239,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
void *crat_image = NULL;
size_t image_size = 0;
int proximity_domain;
- struct amdgpu_ras *ctx;
+ struct amdgpu_device *adev;
INIT_LIST_HEAD(&temp_topology_device_list);
@@ -1388,7 +1388,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
* Overwrite ATS capability according to needs_iommu_device to fix
* potential missing corresponding bit in CRAT of BIOS.
*/
- if (dev->gpu->device_info->needs_iommu_device)
+ if (dev->gpu->use_iommu_v2)
dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
else
dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT;
@@ -1404,19 +1404,17 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->node_props.max_waves_per_simd = 10;
}
- ctx = amdgpu_ras_get_context((struct amdgpu_device *)(dev->gpu->kgd));
- if (ctx) {
- /* kfd only concerns sram ecc on GFX/SDMA and HBM ecc on UMC */
- dev->node_props.capability |=
- (((ctx->features & BIT(AMDGPU_RAS_BLOCK__SDMA)) != 0) ||
- ((ctx->features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0)) ?
- HSA_CAP_SRAM_EDCSUPPORTED : 0;
- dev->node_props.capability |= ((ctx->features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
- HSA_CAP_MEM_EDCSUPPORTED : 0;
-
- dev->node_props.capability |= (ctx->features != 0) ?
+ adev = (struct amdgpu_device *)(dev->gpu->kgd);
+ /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
+ dev->node_props.capability |=
+ ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
+ HSA_CAP_SRAM_EDCSUPPORTED : 0;
+ dev->node_props.capability |= ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
+ HSA_CAP_MEM_EDCSUPPORTED : 0;
+
+ if (adev->asic_type != CHIP_VEGA10)
+ dev->node_props.capability |= (adev->ras_features != 0) ?
HSA_CAP_RASEVENTNOTIFY : 0;
- }
kfd_debug_print_topology();
@@ -1515,6 +1513,29 @@ int kfd_numa_node_to_apic_id(int numa_node_id)
return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
}
+void kfd_double_confirm_iommu_support(struct kfd_dev *gpu)
+{
+ struct kfd_topology_device *dev;
+
+ gpu->use_iommu_v2 = false;
+
+ if (!gpu->device_info->needs_iommu_device)
+ return;
+
+ down_read(&topology_lock);
+
+ /* Only use IOMMUv2 if there is an APU topology node with no GPU
+ * assigned yet. This GPU will be assigned to it.
+ */
+ list_for_each_entry(dev, &topology_device_list, list)
+ if (dev->node_props.cpu_cores_count &&
+ dev->node_props.simd_count &&
+ !dev->gpu)
+ gpu->use_iommu_v2 = true;
+
+ up_read(&topology_lock);
+}
+
#if defined(CONFIG_DEBUG_FS)
int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)