summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2025-09-22 01:44:52 +0300
committerDave Airlie <airlied@redhat.com>2025-09-22 01:45:51 +0300
commit342f141ba9f4c9e39de342d047a5245e8f4cda19 (patch)
tree4cf6d75acda32087f69739988f0a4eece260f243 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parent0faeb8cf99c040886ac4917b0d7f4684dc9ae846 (diff)
parenta490c8d77d500b5981e739be3d59c60cfe382536 (diff)
downloadlinux-342f141ba9f4c9e39de342d047a5245e8f4cda19.tar.xz
Merge tag 'amd-drm-next-6.18-2025-09-19' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.18-2025-09-19: amdgpu: - Fence drv clean up fix - DPC fixes - Misc display fixes - Support the MMIO remap page as a ttm pool - JPEG parser updates - UserQ updates - VCN ctx handling fixes - Documentation updates - Misc cleanups - SMU 13.0.x updates - SI DPM updates - GC 11.x cleaner shader updates - DMCUB updates - DML fixes - Improve fallback handling for pixel encoding - VCN reset improvements - DCE6 DC updates - DSC fixes - Use devm for i2c buses - GPUVM locking updates - GPUVM documentation improvements - Drop non-DC DCE11 code - S0ix fixes - Backlight fix - SR-IOV fixes amdkfd: - SVM updates Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://lore.kernel.org/r/20250919193354.2989255-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c15
1 files changed, 15 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 7fe5b1940df8..e0ee21150860 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -219,10 +219,17 @@ static int amdgpu_check_address_validity(struct amdgpu_device *adev,
struct amdgpu_vram_block_info blk_info;
uint64_t page_pfns[32] = {0};
int i, ret, count;
+ bool hit = false;
if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0))
return 0;
+ if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_virt_check_vf_critical_region(adev, address, &hit))
+ return -EPERM;
+ return hit ? -EACCES : 0;
+ }
+
if ((address >= adev->gmc.mc_vram_size) ||
(address >= RAS_UMC_INJECT_ADDR_LIMIT))
return -EFAULT;
@@ -2702,6 +2709,7 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
struct amdgpu_device *adev = ras->adev;
struct list_head device_list, *device_list_handle = NULL;
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+ unsigned int error_query_mode;
enum ras_event_type type;
if (hive) {
@@ -2730,6 +2738,13 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
device_list_handle = &device_list;
}
+ if (amdgpu_ras_get_error_query_mode(adev, &error_query_mode)) {
+ if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY) {
+ /* wait 500ms to ensure pmfw polling mca bank info done */
+ msleep(500);
+ }
+ }
+
type = amdgpu_ras_get_fatal_error_event(adev);
list_for_each_entry(remote_adev,
device_list_handle, gmc.xgmi.head) {