summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
diff options
context:
space:
mode:
authorGuchun Chen <guchun.chen@amd.com>2020-07-23 11:20:02 +0300
committerAlex Deucher <alexander.deucher@amd.com>2020-08-05 00:26:54 +0300
commite8fbaf03429d085b05e66b153a8363b746c94b43 (patch)
treeaf9a73464c815142d4fae4d336d1c277296c87ef /drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
parent9c06f91ff2349da67651cddf88507aa967b58b08 (diff)
downloadlinux-e8fbaf03429d085b05e66b153a8363b746c94b43.tar.xz
drm/amdgpu: break GPU recovery once it's in bad state(v4)
When GPU executes recovery and retriving bad GPU tag from external eerpom device, the recovery will be broken and error message is printed as well for user's awareness. v2: Refine warning message in threshold reaching case, and fix spelling typo. v3: Fix explicit calling of bad gpu. v4: Rename function names. Signed-off-by: Guchun Chen <guchun.chen@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c40
1 files changed, 40 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 461dfd22bc1c..7848a426c95b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -386,6 +386,46 @@ static uint32_t __correct_eeprom_dest_address(uint32_t curr_address)
return curr_address;
}
+int amdgpu_ras_eeprom_check_err_threshold(
+ struct amdgpu_ras_eeprom_control *control,
+ bool *exceed_err_limit)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ unsigned char buff[EEPROM_ADDRESS_SIZE +
+ EEPROM_TABLE_HEADER_SIZE] = { 0 };
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+ struct i2c_msg msg = {
+ .addr = control->i2c_address,
+ .flags = I2C_M_RD,
+ .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
+ .buf = buff,
+ };
+ int ret;
+
+ *exceed_err_limit = false;
+
+ /* read EEPROM table header */
+ mutex_lock(&control->tbl_mutex);
+ ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1);
+ if (ret < 1) {
+ dev_err(adev->dev, "Failed to read EEPROM table header.\n");
+ goto err;
+ }
+
+ __decode_table_header_from_buff(hdr, &buff[2]);
+
+ if (hdr->header == EEPROM_TABLE_HDR_BAD) {
+ dev_warn(adev->dev, "This GPU is in BAD status.");
+ dev_warn(adev->dev, "Please retire it or setting one bigger "
+ "threshold value when reloading driver.\n");
+ *exceed_err_limit = true;
+ }
+
+err:
+ mutex_unlock(&control->tbl_mutex);
+ return 0;
+}
+
int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
struct eeprom_table_record *records,
bool write,