summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 22:47:25 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 22:47:25 +0300
commit58809f614e0e3f4e12b489bddf680bfeb31c0a20 (patch)
tree6b1468e6c1fbed9e04b0701ae49b634add62f794 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parent05a54fa773284d1a7923cdfdd8f0c8dabb98bd26 (diff)
parentb2ec5ca9d5c2c019e2316f7ba447596d1dcd8fde (diff)
downloadlinux-58809f614e0e3f4e12b489bddf680bfeb31c0a20.tar.xz
Merge tag 'drm-next-2025-10-01' of https://gitlab.freedesktop.org/drm/kernel
Pull drm updates from Dave Airlie: "cross-subsystem: - i2c-hid: Make elan touch controllers power on after panel is enabled - dt bindings for STM32MP25 SoC - pci vgaarb: use screen_info helpers - rust pin-init updates - add MEI driver for late binding firmware update/load uapi: - add ioctl for reassigning GEM handles - provide boot_display attribute on boot-up devices core: - document DRM_MODE_PAGE_FLIP_EVENT - add vendor specific recovery method to drm device wedged uevent gem: - Simplify gpuvm locking ttm: - add interface to populate buffers sched: - Fix race condition in trace code atomic: - Reallow no-op async page flips display: - dp: Fix command length video: - Improve pixel-format handling for struct screen_info rust: - drop Opaque<> from ioctl args - Alloc: - BorrowedPage type and AsPageIter traits - Implement Vmalloc::to_page() and VmallocPageIter - DMA/Scatterlist: - Add dma::DataDirection and type alias for dma_addr_t - Abstraction for struct scatterlist and sg_table - DRM: - simplify use of generics - add DriverFile type alias - drop Object::SIZE - Rust: - pin-init tree merge - Various methods for AsBytes and FromBytes traits gpuvm: - Support madvice in Xe driver gpusvm: - fix hmm_pfn_to_map_order usage in gpusvm bridge: - Improve and fix ref counting on bridge management - cdns-dsi: Various improvements to mode setting - Support Solomon SSD2825 plus DT bindings - Support Waveshare DSI2DPI plus DT bindings - Support Content Protection property - display-connector: Improve DP display detection - Add support for Radxa Ra620 plus DT bindings - adv7511: Provide SPD and HDMI infoframes - it6505: Replace crypto_shash with sha() - synopsys: Add support for DW DPTX Controller plus DT bindings - adv7511: Write full Audio infoframe - ite6263: Support vendor-specific infoframes - simple: Add support for Realtek RTD2171 DP-to-HDMI plus DT bindings panel: - panel-edp: Support mt8189 Chromebooks; Support BOE NV140WUM-N64; Support SHP LQ134Z1; Fixes - panel-simple: Support Olimex LCD-OLinuXino-5CTS plus DT bindings - Support Samsung AMS561RA01 - Support Hydis HV101HD1 plus DT bindings - ilitek-ili9881c: Refactor mode setting; Add support for Bestar BSD1218-A101KL68 LCD plus DT bindings - lvds: Add support for Ampire AMP19201200B5TZQW-T03 to DT bindings - edp: Add support for additonal mt8189 Chromebook panels - lvds: Add DT bindings for EDT ETML0700Z8DHA amdgpu: - add CRIU support for gem objects - RAS updates - VCN SRAM load fixes - EDID read fixes - eDP ALPM support - Documentation updates - Rework PTE flag generation - DCE6 fixes - VCN devcoredump cleanup - MMHUB client id fixes - VCN 5.0.1 RAS support - SMU 13.0.x updates - Expanded PCIe DPC support - Expanded VCN reset support - VPE per queue reset support - give kernel jobs unique id for tracing - pre-populate exported buffers - cyan skillfish updates - make vbios build number available in sysfs - userq updates - HDCP updates - support MMIO remap page as ttm pool - JPEG parser updates - DCE6 DC updates - use devm for i2c buses - GPUVM locking updates - Drop non-DC DCE11 code - improve fallback handling for pixel encoding amdkfd: - SVM/page migration fixes - debugfs fixes - add CRIO support for gem objects - SVM updates radeon: - use dev_warn_once in CS parsers xe: - add madvise interface - add DRM_IOCTL_XE_VM_QUERY_MEMORY_RANGE_ATTRS to query VMA count and memory attributes - drop L# bank mask reporting from media GT3 on Xe3+. - add SLPC power_profile sysfs interface - add configs attribs to add post/mid context-switch commands - handle firmware reported hardware errors notifying userspace with device wedged uevent - use same dir structure across sysfs/debugfs - cleanup and future proof vram region init - add G-states and PCI link states to debugfs - Add SRIOV support for CCS surfaces on Xe2+ - Enable SRIOV PF mode by default on supported platforms - move flush to common code - extended core workarounds for Xe2/3 - use DRM scheduler for delayed GT TLB invalidations - configs improvements and allow VF device enablement - prep work to expose mmio regions to userspace - VF migration support added - prepare GPU SVM for THP migration - start fixing XE_PAGE_SIZE vs PAGE_SIZE - add PSMI support for hw validation - resize VF bars to max possible size according to number of VFs - Ensure GT is in C0 during resume - pre-populate exported buffers - replace xe_hmm with gpusvm - add more SVM GT stats to debugfs - improve fake pci and WA kunnit handle for new platform testing - Test GuC to GuC comms to add debugging - use attribute groups to simplify sysfs registration - add Late Binding firmware code to interact with MEI i915: - apply multiple JSL/EHL/Gen7/Gen6 workarounds properly - protect against overflow in active_engine() - Use try_cmpxchg64() in __active_lookup() - include GuC registers in error state - get rid of dev->struct_mutex - iopoll: generalize read_poll_timout - lots more display refactoring - Reject HBR3 in any eDP Panel - Prune modes for YUV420 - Display Wa fix, additions, and updates - DP: Fix 2.7 Gbps link training on g4x - DP: Adjust the idle pattern handling - DP: Shuffle the link training code a bit - Don't set/read the DSI C clock divider on GLK - Enable_psr kernel parameter changes - Type-C enabled/disconnected dp-alt sink - Wildcat Lake enabling - DP HDR updates - DRAM detection - wait PSR idle on dsb commit - Remove FBC modulo 4 restriction for ADL-P+ - panic: refactor framebuffer allocation habanalabs: - debug/visibility improvements - vmalloc-backed coherent mmap support - HLDIO infrastructure nova-core: - various register!() macro improvements - minor vbios/firmware fixes/refactoring - advance firmware boot stages; process Booter and patch signatures - process GSP and GSP bootloader - Add r570.144 firmware bindings and update to it - Move GSP boot code to own module - Use new pin-init features to store driver's private data in a single allocation - Update ARef import from sync::aref nova-drm: - Update ARef import from sync::aref tyr: - initial driver skeleton for a rust driver for ARM Mali GPUs - capable of powering up, query metadata and provide it to userspace. msm: - GPU and Core: - in DT bindings describe clocks per GPU type - GMU bandwidth voting for x1-85 - a623/a663 speedbins - cleanup some remaining no-iommu leftovers after VM_BIND conversion - fix GEM obj 32b size truncation - add missing VM_BIND param validation - IFPC for x1-85 and a750 - register xml and gen_header.py sync from mesa - Display: - add missing bindings for display on SC8180X - added DisplayPort MST bindings - conversion from round_rate() to determine_rate() amdxdna: - add IOCTL_AMDXDNA_GET_ARRAY - support user space allocated buffers - streamline PM interfaces - Refactoring wrt. hardware contexts - improve error reporting nouveau: - use GSP firmware by default - improve error reporting - Pre-populate exported buffers ast: - Clean up detection of DRAM config exynos: - add DSIM bridge driver support for Exynos7870 - Document Exynos7870 DSIM compatible in dt-binding panthor: - Print task/pid on errors - Add support for Mali G710, G510, G310, Gx15, Gx20, Gx25 - Improve cache flushing - Fail VM bind if BO has offset renesas: - convert to RUNTIME_PM_OPS rcar-du: - Make number of lanes configurable - Use RUNTIME_PM_OPS - Add support for DSI commands rocket: - Add driver for Rockchip NPU plus DT bindings - Use kfree() and sizeof() correctly - Test DMA status rockchip: - dsi2: Add support for RK3576 plus DT bindings - Add support for RK3588 DPTX output tidss: - Use crtc_ fields for programming display mode - Remove other drivers from aperture pixpaper: - Add support for Mayqueen Pixpaper plus DT bindings v3d: - Support querying nubmer of GPU resets for KHR_robustness stm: - Clean up logging - ltdc: Add support support for STM32MP257F-EV1 plus DT bindings sitronix: - st7571-i2c: Add support for inverted displays and 2-bit grayscale tidss: - Convert to kernel's FIELD_ macros vesadrm: - Support 8-bit palette mode imagination: - Improve power management - Add support for TH1520 GPU - Support Risc-V architectures v3d: - Improve job management and locking vkms: - Support variants of ARGB8888, ARGB16161616, RGB565, RGB888 and P01x - Spport YUV with 16-bit components" * tag 'drm-next-2025-10-01' of https://gitlab.freedesktop.org/drm/kernel: (1455 commits) drm/amd: Add name to modes from amdgpu_connector_add_common_modes() drm/amd: Drop some common modes from amdgpu_connector_add_common_modes() drm/amdgpu: update MODULE_PARM_DESC for freesync_video drm/amd: Use dynamic array size declaration for amdgpu_connector_add_common_modes() drm/amd/display: Share dce100_validate_global with DCE6-8 drm/amd/display: Share dce100_validate_bandwidth with DCE6-8 drm/amdgpu: Fix fence signaling race condition in userqueue amd/amdkfd: enhance kfd process check in switch partition amd/amdkfd: resolve a race in amdgpu_amdkfd_device_fini_sw drm/amd/display: Reject modes with too high pixel clock on DCE6-10 drm/amd: Drop unnecessary check in amdgpu_connector_add_common_modes() drm/amd/display: Only enable common modes for eDP and LVDS drm/amdgpu: remove the redeclaration of variable i drm/amdgpu/userq: assign an error code for invalid userq va drm/amdgpu: revert "rework reserved VMID handling" v2 drm/amdgpu: remove leftover from enforcing isolation by VMID drm/amdgpu: Add fallback to pipe reset if KCQ ring reset fails accel/habanalabs: add Infineon version check accel/habanalabs/gaudi2: read preboot status after recovering from dirty state accel/habanalabs: add HL_GET_P_STATE passthrough type ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c348
1 files changed, 264 insertions, 84 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 540817e296da..e0ee21150860 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -122,12 +122,15 @@ const char *get_ras_block_str(struct ras_common_if *ras_block)
/* typical ECC bad page rate is 1 bad page per 100MB VRAM */
#define RAS_BAD_PAGE_COVER (100 * 1024 * 1024ULL)
-#define MAX_UMC_POISON_POLLING_TIME_ASYNC 300 //ms
+#define MAX_UMC_POISON_POLLING_TIME_ASYNC 10
#define AMDGPU_RAS_RETIRE_PAGE_INTERVAL 100 //ms
#define MAX_FLUSH_RETIRE_DWORK_TIMES 100
+#define BYPASS_ALLOCATED_ADDRESS 0x0
+#define BYPASS_INITIALIZATION_ADDRESS 0x1
+
enum amdgpu_ras_retire_page_reservation {
AMDGPU_RAS_RETIRE_PAGE_RESERVED,
AMDGPU_RAS_RETIRE_PAGE_PENDING,
@@ -136,10 +139,14 @@ enum amdgpu_ras_retire_page_reservation {
atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
-static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
+static int amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
uint64_t addr);
-static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+static int amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
uint64_t addr);
+
+static void amdgpu_ras_critical_region_init(struct amdgpu_device *adev);
+static void amdgpu_ras_critical_region_fini(struct amdgpu_device *adev);
+
#ifdef CONFIG_X86_MCE_AMD
static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev);
struct mce_notifier_adev_list {
@@ -169,18 +176,16 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre
struct eeprom_table_record err_rec;
int ret;
- if ((address >= adev->gmc.mc_vram_size) ||
- (address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
+ ret = amdgpu_ras_check_bad_page(adev, address);
+ if (ret == -EINVAL) {
dev_warn(adev->dev,
- "RAS WARN: input address 0x%llx is invalid.\n",
- address);
+ "RAS WARN: input address 0x%llx is invalid.\n",
+ address);
return -EINVAL;
- }
-
- if (amdgpu_ras_check_bad_page(adev, address)) {
+ } else if (ret == 1) {
dev_warn(adev->dev,
- "RAS WARN: 0x%llx has already been marked as bad page!\n",
- address);
+ "RAS WARN: 0x%llx has already been marked as bad page!\n",
+ address);
return 0;
}
@@ -207,6 +212,56 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre
return 0;
}
+static int amdgpu_check_address_validity(struct amdgpu_device *adev,
+ uint64_t address, uint64_t flags)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_vram_block_info blk_info;
+ uint64_t page_pfns[32] = {0};
+ int i, ret, count;
+ bool hit = false;
+
+ if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0))
+ return 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_virt_check_vf_critical_region(adev, address, &hit))
+ return -EPERM;
+ return hit ? -EACCES : 0;
+ }
+
+ if ((address >= adev->gmc.mc_vram_size) ||
+ (address >= RAS_UMC_INJECT_ADDR_LIMIT))
+ return -EFAULT;
+
+ count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
+ address, page_pfns, ARRAY_SIZE(page_pfns));
+ if (count <= 0)
+ return -EPERM;
+
+ for (i = 0; i < count; i++) {
+ memset(&blk_info, 0, sizeof(blk_info));
+ ret = amdgpu_vram_mgr_query_address_block_info(&adev->mman.vram_mgr,
+ page_pfns[i] << AMDGPU_GPU_PAGE_SHIFT, &blk_info);
+ if (!ret) {
+ /* The input address that needs to be checked is allocated by
+ * current calling process, so it is necessary to exclude
+ * the calling process.
+ */
+ if ((flags == BYPASS_ALLOCATED_ADDRESS) &&
+ ((blk_info.task.pid != task_pid_nr(current)) ||
+ strncmp(blk_info.task.comm, current->comm, TASK_COMM_LEN)))
+ return -EACCES;
+ else if ((flags == BYPASS_INITIALIZATION_ADDRESS) &&
+ (blk_info.task.pid == con->init_task_pid) &&
+ !strncmp(blk_info.task.comm, con->init_task_comm, TASK_COMM_LEN))
+ return -EACCES;
+ }
+ }
+
+ return 0;
+}
+
static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -297,6 +352,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
op = 2;
else if (strstr(str, "retire_page") != NULL)
op = 3;
+ else if (strstr(str, "check_address") != NULL)
+ op = 4;
else if (str[0] && str[1] && str[2] && str[3])
/* ascii string, but commands are not matched. */
return -EINVAL;
@@ -311,6 +368,15 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
data->inject.address = address;
return 0;
+ } else if (op == 4) {
+ if (sscanf(str, "%*s 0x%llx 0x%llx", &address, &value) != 2 &&
+ sscanf(str, "%*s %llu %llu", &address, &value) != 2)
+ return -EINVAL;
+
+ data->op = op;
+ data->inject.address = address;
+ data->inject.value = value;
+ return 0;
}
if (amdgpu_ras_find_block_id_by_name(block_name, &block_id))
@@ -500,6 +566,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
return size;
else
return ret;
+ } else if (data.op == 4) {
+ ret = amdgpu_check_address_validity(adev, data.inject.address, data.inject.value);
+ return ret ? ret : size;
}
if (!amdgpu_ras_is_supported(adev, data.head.block))
@@ -513,22 +582,16 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
break;
case 2:
- if ((data.inject.address >= adev->gmc.mc_vram_size &&
- adev->gmc.mc_vram_size) ||
- (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
- dev_warn(adev->dev, "RAS WARN: input address "
- "0x%llx is invalid.",
+ /* umc ce/ue error injection for a bad page is not allowed */
+ if (data.head.block == AMDGPU_RAS_BLOCK__UMC)
+ ret = amdgpu_ras_check_bad_page(adev, data.inject.address);
+ if (ret == -EINVAL) {
+ dev_warn(adev->dev, "RAS WARN: input address 0x%llx is invalid.",
data.inject.address);
- ret = -EINVAL;
break;
- }
-
- /* umc ce/ue error injection for a bad page is not allowed */
- if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) &&
- amdgpu_ras_check_bad_page(adev, data.inject.address)) {
- dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has "
- "already been marked as bad!\n",
- data.inject.address);
+ } else if (ret == 1) {
+ dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has already been marked as bad!\n",
+ data.inject.address);
break;
}
@@ -2566,18 +2629,26 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
goto out;
}
- *bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL);
+ *bps = kmalloc_array(data->count, sizeof(struct ras_badpage), GFP_KERNEL);
if (!*bps) {
ret = -ENOMEM;
goto out;
}
for (; i < data->count; i++) {
+ if (!data->bps[i].ts)
+ continue;
+
(*bps)[i] = (struct ras_badpage){
.bp = data->bps[i].retired_page,
.size = AMDGPU_GPU_PAGE_SIZE,
.flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
};
+
+ if (amdgpu_ras_check_critical_address(adev,
+ data->bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
+ continue;
+
status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr,
data->bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT);
if (status == -EBUSY)
@@ -2586,7 +2657,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
}
- *count = data->count;
+ *count = con->bad_page_num;
out:
mutex_unlock(&con->recovery_lock);
return ret;
@@ -2638,6 +2709,7 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
struct amdgpu_device *adev = ras->adev;
struct list_head device_list, *device_list_handle = NULL;
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+ unsigned int error_query_mode;
enum ras_event_type type;
if (hive) {
@@ -2666,6 +2738,13 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
device_list_handle = &device_list;
}
+ if (amdgpu_ras_get_error_query_mode(adev, &error_query_mode)) {
+ if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY) {
+ /* wait 500ms to ensure pmfw polling mca bank info done */
+ msleep(500);
+ }
+ }
+
type = amdgpu_ras_get_fatal_error_event(adev);
list_for_each_entry(remote_adev,
device_list_handle, gmc.xgmi.head) {
@@ -2722,7 +2801,7 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
unsigned int old_space = data->count + data->space_left;
unsigned int new_space = old_space + pages;
unsigned int align_space = ALIGN(new_space, 512);
- void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
+ void *bps = kmalloc_array(align_space, sizeof(*data->bps), GFP_KERNEL);
if (!bps) {
return -ENOMEM;
@@ -2814,8 +2893,11 @@ static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev,
for (j = 0; j < count; j++) {
if (amdgpu_ras_check_bad_page_unlock(con,
- bps[j].retired_page << AMDGPU_GPU_PAGE_SHIFT))
+ bps[j].retired_page << AMDGPU_GPU_PAGE_SHIFT)) {
+ data->count++;
+ data->space_left--;
continue;
+ }
if (!data->space_left &&
amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
@@ -2828,6 +2910,7 @@ static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev,
sizeof(struct eeprom_table_record));
data->count++;
data->space_left--;
+ con->bad_page_num++;
}
return 0;
@@ -2974,7 +3057,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
ret = __amdgpu_ras_convert_rec_array_from_rom(adev,
&bps[i], &err_data, nps);
if (ret)
- control->ras_num_bad_pages -= adev->umc.retire_unit;
+ con->bad_page_num -= adev->umc.retire_unit;
i += (adev->umc.retire_unit - 1);
} else {
break;
@@ -2988,8 +3071,10 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
ret = __amdgpu_ras_convert_rec_from_rom(adev,
&bps[i], &err_data, nps);
if (ret)
- control->ras_num_bad_pages -= adev->umc.retire_unit;
+ con->bad_page_num -= adev->umc.retire_unit;
}
+
+ con->eh_data->count_saved = con->eh_data->count;
} else {
ret = __amdgpu_ras_restore_bad_pages(adev, bps, pages);
}
@@ -3012,7 +3097,7 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
struct amdgpu_ras_eeprom_control *control;
- int save_count, unit_num, bad_page_num, i;
+ int save_count, unit_num, i;
if (!con || !con->eh_data) {
if (new_cnt)
@@ -3033,27 +3118,26 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
mutex_lock(&con->recovery_lock);
control = &con->eeprom_control;
data = con->eh_data;
- bad_page_num = control->ras_num_bad_pages;
- save_count = data->count - bad_page_num;
+ unit_num = data->count / adev->umc.retire_unit - control->ras_num_recs;
+ save_count = con->bad_page_num - control->ras_num_bad_pages;
mutex_unlock(&con->recovery_lock);
- unit_num = save_count / adev->umc.retire_unit;
if (new_cnt)
*new_cnt = unit_num;
/* only new entries are saved */
- if (save_count > 0) {
+ if (unit_num > 0) {
/*old asics only save pa to eeprom like before*/
if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) {
if (amdgpu_ras_eeprom_append(control,
- &data->bps[bad_page_num], save_count)) {
+ &data->bps[data->count_saved], unit_num)) {
dev_err(adev->dev, "Failed to save EEPROM table data!");
return -EIO;
}
} else {
for (i = 0; i < unit_num; i++) {
if (amdgpu_ras_eeprom_append(control,
- &data->bps[bad_page_num +
+ &data->bps[data->count_saved +
i * adev->umc.retire_unit], 1)) {
dev_err(adev->dev, "Failed to save EEPROM table data!");
return -EIO;
@@ -3062,6 +3146,7 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
}
dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count);
+ data->count_saved = data->count;
}
return 0;
@@ -3116,17 +3201,17 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
}
}
+ ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs, true);
+ if (ret)
+ goto out;
+
ret = amdgpu_ras_eeprom_check(control);
if (ret)
goto out;
/* HW not usable */
- if (amdgpu_ras_is_rma(adev)) {
+ if (amdgpu_ras_is_rma(adev))
ret = -EHWPOISON;
- goto out;
- }
-
- ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs, true);
}
out:
@@ -3134,18 +3219,24 @@ out:
return ret;
}
-static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
+static int amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
uint64_t addr)
{
struct ras_err_handler_data *data = con->eh_data;
+ struct amdgpu_device *adev = con->adev;
int i;
+ if ((addr >= adev->gmc.mc_vram_size &&
+ adev->gmc.mc_vram_size) ||
+ (addr >= RAS_UMC_INJECT_ADDR_LIMIT))
+ return -EINVAL;
+
addr >>= AMDGPU_GPU_PAGE_SHIFT;
for (i = 0; i < data->count; i++)
if (addr == data->bps[i].retired_page)
- return true;
+ return 1;
- return false;
+ return 0;
}
/*
@@ -3153,11 +3244,11 @@ static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
*
* Note: this check is only for umc block
*/
-static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+static int amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
uint64_t addr)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- bool ret = false;
+ int ret = 0;
if (!con || !con->eh_data)
return ret;
@@ -3241,7 +3332,7 @@ static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log)
INIT_RADIX_TREE(&ecc_log->de_page_tree, GFP_KERNEL);
ecc_log->de_queried_count = 0;
- ecc_log->prev_de_queried_count = 0;
+ ecc_log->consumption_q_count = 0;
}
static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log)
@@ -3261,7 +3352,7 @@ static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log)
mutex_destroy(&ecc_log->lock);
ecc_log->de_queried_count = 0;
- ecc_log->prev_de_queried_count = 0;
+ ecc_log->consumption_q_count = 0;
}
static bool amdgpu_ras_schedule_retirement_dwork(struct amdgpu_ras *con,
@@ -3287,7 +3378,6 @@ static void amdgpu_ras_do_page_retirement(struct work_struct *work)
page_retirement_dwork.work);
struct amdgpu_device *adev = con->adev;
struct ras_err_data err_data;
- unsigned long err_cnt;
/* If gpu reset is ongoing, delay retiring the bad pages */
if (amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) {
@@ -3299,13 +3389,9 @@ static void amdgpu_ras_do_page_retirement(struct work_struct *work)
amdgpu_ras_error_data_init(&err_data);
amdgpu_umc_handle_bad_pages(adev, &err_data);
- err_cnt = err_data.err_addr_cnt;
amdgpu_ras_error_data_fini(&err_data);
- if (err_cnt && amdgpu_ras_is_rma(adev))
- amdgpu_ras_reset_gpu(adev);
-
amdgpu_ras_schedule_retirement_dwork(con,
AMDGPU_RAS_RETIRE_PAGE_INTERVAL);
}
@@ -3316,49 +3402,39 @@ static int amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
int ret = 0;
struct ras_ecc_log_info *ecc_log;
struct ras_query_if info;
- uint32_t timeout = 0;
+ u32 timeout = MAX_UMC_POISON_POLLING_TIME_ASYNC;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
- uint64_t de_queried_count;
- uint32_t new_detect_count, total_detect_count;
- uint32_t need_query_count = poison_creation_count;
+ u64 de_queried_count;
+ u64 consumption_q_count;
enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION;
memset(&info, 0, sizeof(info));
info.head.block = AMDGPU_RAS_BLOCK__UMC;
ecc_log = &ras->umc_ecc_log;
- total_detect_count = 0;
+ ecc_log->de_queried_count = 0;
+ ecc_log->consumption_q_count = 0;
+
do {
ret = amdgpu_ras_query_error_status_with_event(adev, &info, type);
if (ret)
return ret;
de_queried_count = ecc_log->de_queried_count;
- if (de_queried_count > ecc_log->prev_de_queried_count) {
- new_detect_count = de_queried_count - ecc_log->prev_de_queried_count;
- ecc_log->prev_de_queried_count = de_queried_count;
- timeout = 0;
- } else {
- new_detect_count = 0;
- }
+ consumption_q_count = ecc_log->consumption_q_count;
- if (new_detect_count) {
- total_detect_count += new_detect_count;
- } else {
- if (!timeout && need_query_count)
- timeout = MAX_UMC_POISON_POLLING_TIME_ASYNC;
+ if (de_queried_count && consumption_q_count)
+ break;
- if (timeout) {
- if (!--timeout)
- break;
- msleep(1);
- }
- }
- } while (total_detect_count < need_query_count);
+ msleep(100);
+ } while (--timeout);
- if (total_detect_count)
+ if (de_queried_count)
schedule_delayed_work(&ras->page_retirement_dwork, 0);
+ if (amdgpu_ras_is_rma(adev) && atomic_cmpxchg(&ras->rma_in_recovery, 0, 1) == 0)
+ amdgpu_ras_reset_gpu(adev);
+
return 0;
}
@@ -3394,6 +3470,12 @@ static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev,
reset_flags |= msg.reset;
}
+ /*
+ * Try to ensure poison creation handler is completed first
+ * to set rma if bad page exceed threshold.
+ */
+ flush_delayed_work(&con->page_retirement_dwork);
+
/* for RMA, amdgpu_ras_poison_creation_handler will trigger gpu reset */
if (reset_flags && !amdgpu_ras_is_rma(adev)) {
if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET)
@@ -3403,8 +3485,6 @@ static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev,
else
reset = reset_flags;
- flush_delayed_work(&con->page_retirement_dwork);
-
con->gpu_reset_flags |= reset;
amdgpu_ras_reset_gpu(adev);
@@ -3434,6 +3514,7 @@ static int amdgpu_ras_page_retirement_thread(void *param)
if (kthread_should_stop())
break;
+ mutex_lock(&con->poison_lock);
gpu_reset = 0;
do {
@@ -3446,7 +3527,8 @@ static int amdgpu_ras_page_retirement_thread(void *param)
atomic_sub(poison_creation_count, &con->poison_creation_count);
atomic_sub(poison_creation_count, &con->page_retirement_req_cnt);
}
- } while (atomic_read(&con->poison_creation_count));
+ } while (atomic_read(&con->poison_creation_count) &&
+ !atomic_read(&con->poison_consumption_count));
if (ret != -EIO) {
msg_count = kfifo_len(&con->poison_fifo);
@@ -3463,6 +3545,7 @@ static int amdgpu_ras_page_retirement_thread(void *param)
/* gpu mode-1 reset is ongoing or just completed ras mode-1 reset */
/* Clear poison creation request */
atomic_set(&con->poison_creation_count, 0);
+ atomic_set(&con->poison_consumption_count, 0);
/* Clear poison fifo */
amdgpu_ras_clear_poison_fifo(adev);
@@ -3487,9 +3570,12 @@ static int amdgpu_ras_page_retirement_thread(void *param)
atomic_sub(msg_count, &con->page_retirement_req_cnt);
}
+ atomic_set(&con->poison_consumption_count, 0);
+
/* Wake up work to save bad pages to eeprom */
schedule_delayed_work(&con->page_retirement_dwork, 0);
}
+ mutex_unlock(&con->poison_lock);
}
return 0;
@@ -3570,8 +3656,10 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info)
}
mutex_init(&con->recovery_lock);
+ mutex_init(&con->poison_lock);
INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery);
atomic_set(&con->in_recovery, 0);
+ atomic_set(&con->rma_in_recovery, 0);
con->eeprom_control.bad_channel_bitmap = 0;
max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(&con->eeprom_control);
@@ -3589,6 +3677,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info)
init_waitqueue_head(&con->page_retirement_wq);
atomic_set(&con->page_retirement_req_cnt, 0);
atomic_set(&con->poison_creation_count, 0);
+ atomic_set(&con->poison_consumption_count, 0);
con->page_retirement_thread =
kthread_run(amdgpu_ras_page_retirement_thread, adev, "umc_page_retirement");
if (IS_ERR(con->page_retirement_thread)) {
@@ -3661,6 +3750,8 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
kfree(data);
mutex_unlock(&con->recovery_lock);
+ amdgpu_ras_critical_region_init(adev);
+
return 0;
}
/* recovery end */
@@ -4087,6 +4178,12 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
goto release_con;
}
+ con->init_task_pid = task_pid_nr(current);
+ get_task_comm(con->init_task_comm, current);
+
+ mutex_init(&con->critical_region_lock);
+ INIT_LIST_HEAD(&con->critical_region_head);
+
dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
"hardware ability[%x] ras_mask[%x]\n",
adev->ras_hw_enabled, adev->ras_enabled);
@@ -4366,6 +4463,9 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
if (!adev->ras_enabled || !con)
return 0;
+ amdgpu_ras_critical_region_fini(adev);
+ mutex_destroy(&con->critical_region_lock);
+
list_for_each_entry_safe(ras_node, tmp, &adev->ras_list, node) {
if (ras_node->ras_obj) {
obj = ras_node->ras_obj;
@@ -5274,6 +5374,9 @@ int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn)
uint64_t start = pfn << AMDGPU_GPU_PAGE_SHIFT;
int ret = 0;
+ if (amdgpu_ras_check_critical_address(adev, start))
+ return 0;
+
mutex_lock(&con->page_rsv_lock);
ret = amdgpu_vram_mgr_query_page_status(mgr, start);
if (ret == -ENOENT)
@@ -5310,3 +5413,80 @@ bool amdgpu_ras_is_rma(struct amdgpu_device *adev)
return con->is_rma;
}
+
+int amdgpu_ras_add_critical_region(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_vram_mgr_resource *vres;
+ struct ras_critical_region *region;
+ struct drm_buddy_block *block;
+ int ret = 0;
+
+ if (!bo || !bo->tbo.resource)
+ return -EINVAL;
+
+ vres = to_amdgpu_vram_mgr_resource(bo->tbo.resource);
+
+ mutex_lock(&con->critical_region_lock);
+
+ /* Check if the bo had been recorded */
+ list_for_each_entry(region, &con->critical_region_head, node)
+ if (region->bo == bo)
+ goto out;
+
+ /* Record new critical amdgpu bo */
+ list_for_each_entry(block, &vres->blocks, link) {
+ region = kzalloc(sizeof(*region), GFP_KERNEL);
+ if (!region) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ region->bo = bo;
+ region->start = amdgpu_vram_mgr_block_start(block);
+ region->size = amdgpu_vram_mgr_block_size(block);
+ list_add_tail(&region->node, &con->critical_region_head);
+ }
+
+out:
+ mutex_unlock(&con->critical_region_lock);
+
+ return ret;
+}
+
+static void amdgpu_ras_critical_region_init(struct amdgpu_device *adev)
+{
+ amdgpu_ras_add_critical_region(adev, adev->mman.fw_reserved_memory);
+}
+
+static void amdgpu_ras_critical_region_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_critical_region *region, *tmp;
+
+ mutex_lock(&con->critical_region_lock);
+ list_for_each_entry_safe(region, tmp, &con->critical_region_head, node) {
+ list_del(&region->node);
+ kfree(region);
+ }
+ mutex_unlock(&con->critical_region_lock);
+}
+
+bool amdgpu_ras_check_critical_address(struct amdgpu_device *adev, uint64_t addr)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_critical_region *region;
+ bool ret = false;
+
+ mutex_lock(&con->critical_region_lock);
+ list_for_each_entry(region, &con->critical_region_head, node) {
+ if ((region->start <= addr) &&
+ (addr < (region->start + region->size))) {
+ ret = true;
+ break;
+ }
+ }
+ mutex_unlock(&con->critical_region_lock);
+
+ return ret;
+}