diff options
author | Dennis Li <Dennis.Li@amd.com> | 2020-07-08 10:07:13 +0300 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2020-07-27 23:21:37 +0300 |
commit | df9c8d1aa278c435c30a69b8f2418b4a52fcb929 (patch) | |
tree | bb486ddef62e0179b0b97b0ba93a5fa9fcb0a207 /drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | |
parent | c5079f35c0ec315c944398e687821c1ceb6c6d4f (diff) | |
download | linux-df9c8d1aa278c435c30a69b8f2418b4a52fcb929.tar.xz |
drm/amdgpu: fix system hang issue during GPU reset
when GPU hang, driver has multi-paths to enter amdgpu_device_gpu_recover,
the atomic adev->in_gpu_reset and hive->in_reset are used to avoid
re-entering GPU recovery.
During GPU reset and resume, it is unsafe that other threads access GPU,
which maybe cause GPU reset failed. Therefore the new rw_semaphore
adev->reset_sem is introduced, which protect GPU from being accessed by
external threads during recovery.
v2:
1. add rwlock for some ioctls, debugfs and file-close function.
2. change to use dqm->is_resetting and dqm_lock for protection in kfd
driver.
3. remove try_lock and change adev->in_gpu_reset as atomic, to avoid
re-enter GPU recovery for the same GPU hang.
v3:
1. change back to use adev->reset_sem to protect kfd callback
functions, because dqm_lock couldn't protect all codes, for example:
free_mqd must be called outside of dqm_lock;
[ 1230.176199] Hardware name: Supermicro SYS-7049GP-TRT/X11DPG-QT, BIOS 3.1 05/23/2019
[ 1230.177221] Call Trace:
[ 1230.178249] dump_stack+0x98/0xd5
[ 1230.179443] amdgpu_virt_kiq_reg_write_reg_wait+0x181/0x190 [amdgpu]
[ 1230.180673] gmc_v9_0_flush_gpu_tlb+0xcc/0x310 [amdgpu]
[ 1230.181882] amdgpu_gart_unbind+0xa9/0xe0 [amdgpu]
[ 1230.183098] amdgpu_ttm_backend_unbind+0x46/0x180 [amdgpu]
[ 1230.184239] ? ttm_bo_put+0x171/0x5f0 [ttm]
[ 1230.185394] ttm_tt_unbind+0x21/0x40 [ttm]
[ 1230.186558] ttm_tt_destroy.part.12+0x12/0x60 [ttm]
[ 1230.187707] ttm_tt_destroy+0x13/0x20 [ttm]
[ 1230.188832] ttm_bo_cleanup_memtype_use+0x36/0x80 [ttm]
[ 1230.189979] ttm_bo_put+0x1be/0x5f0 [ttm]
[ 1230.191230] amdgpu_bo_unref+0x1e/0x30 [amdgpu]
[ 1230.192522] amdgpu_amdkfd_free_gtt_mem+0xaf/0x140 [amdgpu]
[ 1230.193833] free_mqd+0x25/0x40 [amdgpu]
[ 1230.195143] destroy_queue_cpsch+0x1a7/0x270 [amdgpu]
[ 1230.196475] pqm_destroy_queue+0x105/0x260 [amdgpu]
[ 1230.197819] kfd_ioctl_destroy_queue+0x37/0x70 [amdgpu]
[ 1230.199154] kfd_ioctl+0x277/0x500 [amdgpu]
[ 1230.200458] ? kfd_ioctl_get_clock_counters+0x60/0x60 [amdgpu]
[ 1230.201656] ? tomoyo_file_ioctl+0x19/0x20
[ 1230.202831] ksys_ioctl+0x98/0xb0
[ 1230.204004] __x64_sys_ioctl+0x1a/0x20
[ 1230.205174] do_syscall_64+0x5f/0x250
[ 1230.206339] entry_SYSCALL_64_after_hwframe+0x49/0xbe
2. remove try_lock and introduce atomic hive->in_reset, to avoid
re-enter GPU recovery.
v4:
1. remove an unnecessary whitespace change in kfd_chardev.c
2. remove comment codes in amdgpu_device.c
3. add more detailed comment in commit message
4. define a wrap function amdgpu_in_reset
v5:
1. Fix some style issues.
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Suggested-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Suggested-by: Christian König <christian.koenig@amd.com>
Suggested-by: Felix Kuehling <Felix.Kuehling@amd.com>
Suggested-by: Lijo Lazar <Lijo.Lazar@amd.com>
Suggested-by: Luben Tukov <luben.tuikov@amd.com>
Signed-off-by: Dennis Li <Dennis.Li@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 350 |
1 files changed, 282 insertions, 68 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 0a05db9b7132..2f7b0550ff16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -163,7 +163,7 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev, enum amd_pm_state_type pm; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -172,6 +172,8 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) { if (adev->smu.ppt_funcs->get_current_power_state) pm = smu_get_current_power_state(&adev->smu); @@ -183,6 +185,8 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev, pm = adev->pm.dpm.user_state; } + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -201,7 +205,7 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev, enum amd_pm_state_type state; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; if (strncmp("battery", buf, strlen("battery")) == 0) @@ -219,6 +223,8 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) { mutex_lock(&adev->pm.mutex); adev->pm.dpm.user_state = state; @@ -232,6 +238,9 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev, amdgpu_pm_compute_clocks(adev); } + + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -307,7 +316,7 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev, enum amd_dpm_forced_level level = 0xff; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -316,6 +325,8 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) level = smu_get_performance_level(&adev->smu); else if (adev->powerplay.pp_funcs->get_performance_level) @@ -323,6 +334,8 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev, else level = adev->pm.dpm.forced_level; + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -349,7 +362,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev, enum amd_dpm_forced_level current_level = 0xff; int ret = 0; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; if (strncmp("low", buf, strlen("low")) == 0) { @@ -380,6 +393,8 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) current_level = smu_get_performance_level(&adev->smu); else if (adev->powerplay.pp_funcs->get_performance_level) @@ -388,7 +403,8 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev, if (current_level == level) { pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - return count; + ret = count; + goto pro_end; } if (adev->asic_type == CHIP_RAVEN) { @@ -409,7 +425,8 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev, pr_err("Currently not in any profile mode!\n"); pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - return -EINVAL; + ret = -EINVAL; + goto pro_end; } if (is_support_sw_smu(adev)) { @@ -417,7 +434,8 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev, if (ret) { pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - return -EINVAL; + ret = -EINVAL; + goto pro_end; } } else if (adev->powerplay.pp_funcs->force_performance_level) { mutex_lock(&adev->pm.mutex); @@ -425,14 +443,16 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev, mutex_unlock(&adev->pm.mutex); pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - return -EINVAL; + ret = -EINVAL; + goto pro_end; } ret = amdgpu_dpm_force_performance_level(adev, level); if (ret) { mutex_unlock(&adev->pm.mutex); pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - return -EINVAL; + ret = -EINVAL; + goto pro_end; } else { adev->pm.dpm.forced_level = level; } @@ -441,7 +461,9 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev, pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - return count; +pro_end: + up_read(&adev->reset_sem); + return ret; } static ssize_t amdgpu_get_pp_num_states(struct device *dev, @@ -453,7 +475,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev, struct pp_states_info data; int i, buf_len, ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -497,7 +519,7 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev, enum amd_pm_state_type pm = 0; int i = 0, ret = 0; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -538,7 +560,7 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; if (adev->pp_force_state_enabled) @@ -558,7 +580,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, unsigned long idx; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; if (strlen(buf) == 1) @@ -584,6 +606,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, return ret; } + down_read(&adev->reset_sem); /* only set user selected power states */ if (state != POWER_STATE_TYPE_INTERNAL_BOOT && state != POWER_STATE_TYPE_DEFAULT) { @@ -591,6 +614,8 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, AMD_PP_TASK_ENABLE_USER_STATE, &state); adev->pp_force_state_enabled = true; } + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); } @@ -618,7 +643,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev, char *table = NULL; int size, ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -662,7 +687,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int ret = 0; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -671,16 +696,21 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) { ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count); if (ret) { pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); + up_read(&adev->reset_sem); return ret; } } else if (adev->powerplay.pp_funcs->set_pp_table) amdgpu_dpm_set_pp_table(adev, buf, count); + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -815,7 +845,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, const char delimiter[3] = {' ', '\n', '\0'}; uint32_t type; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; if (count > 127) @@ -858,6 +888,10 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, return ret; } + ret = count; + + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) { ret = smu_od_edit_dpm_table(&adev->smu, type, parameter, parameter_size); @@ -865,7 +899,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, if (ret) { pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - return -EINVAL; + ret = -EINVAL; + goto pro_end; } } else { if (adev->powerplay.pp_funcs->odn_edit_dpm_table) { @@ -874,7 +909,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, if (ret) { pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - return -EINVAL; + ret = -EINVAL; + goto pro_end; } } @@ -885,18 +921,22 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, NULL); pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - return count; + ret = count; + goto pro_end; } else { pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - return -EINVAL; + ret = -EINVAL; + goto pro_end; } } } pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - return count; +pro_end: + up_read(&adev->reset_sem); + return ret; } static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, @@ -908,7 +948,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, ssize_t size; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -962,7 +1002,7 @@ static ssize_t amdgpu_set_pp_features(struct device *dev, uint64_t featuremask; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = kstrtou64(buf, 0, &featuremask); @@ -977,11 +1017,13 @@ static ssize_t amdgpu_set_pp_features(struct device *dev, return ret; } + down_read(&adev->reset_sem); if (is_support_sw_smu(adev)) { ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask); if (ret) { pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); + up_read(&adev->reset_sem); return -EINVAL; } } else if (adev->powerplay.pp_funcs->set_ppfeature_status) { @@ -989,9 +1031,12 @@ static ssize_t amdgpu_set_pp_features(struct device *dev, if (ret) { pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); + up_read(&adev->reset_sem); return -EINVAL; } } + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1007,7 +1052,7 @@ static ssize_t amdgpu_get_pp_features(struct device *dev, ssize_t size; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1016,6 +1061,8 @@ static ssize_t amdgpu_get_pp_features(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) size = smu_sys_get_pp_feature_mask(&adev->smu, buf); else if (adev->powerplay.pp_funcs->get_ppfeature_status) @@ -1023,6 +1070,8 @@ static ssize_t amdgpu_get_pp_features(struct device *dev, else size = snprintf(buf, PAGE_SIZE, "\n"); + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1068,7 +1117,7 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, ssize_t size; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1077,6 +1126,8 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) size = smu_print_clk_levels(&adev->smu, SMU_SCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) @@ -1084,6 +1135,8 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, else size = snprintf(buf, PAGE_SIZE, "\n"); + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1135,7 +1188,7 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, int ret; uint32_t mask = 0; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = amdgpu_read_mask(buf, count, &mask); @@ -1148,11 +1201,15 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1171,7 +1228,7 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, ssize_t size; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1180,6 +1237,8 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) size = smu_print_clk_levels(&adev->smu, SMU_MCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) @@ -1187,6 +1246,8 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, else size = snprintf(buf, PAGE_SIZE, "\n"); + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1203,7 +1264,7 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, uint32_t mask = 0; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = amdgpu_read_mask(buf, count, &mask); @@ -1216,11 +1277,15 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1239,7 +1304,7 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev, ssize_t size; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1248,6 +1313,8 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) size = smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) @@ -1255,6 +1322,8 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev, else size = snprintf(buf, PAGE_SIZE, "\n"); + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1271,7 +1340,7 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, int ret; uint32_t mask = 0; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = amdgpu_read_mask(buf, count, &mask); @@ -1284,6 +1353,8 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) @@ -1291,6 +1362,8 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, else ret = 0; + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1309,7 +1382,7 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev, ssize_t size; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1318,6 +1391,8 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) size = smu_print_clk_levels(&adev->smu, SMU_FCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) @@ -1325,6 +1400,8 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev, else size = snprintf(buf, PAGE_SIZE, "\n"); + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1341,7 +1418,7 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, int ret; uint32_t mask = 0; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = amdgpu_read_mask(buf, count, &mask); @@ -1354,6 +1431,8 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) @@ -1361,6 +1440,8 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, else ret = 0; + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1379,7 +1460,7 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev, ssize_t size; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1388,6 +1469,8 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) size = smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) @@ -1395,6 +1478,8 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev, else size = snprintf(buf, PAGE_SIZE, "\n"); + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1411,7 +1496,7 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, int ret; uint32_t mask = 0; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = amdgpu_read_mask(buf, count, &mask); @@ -1424,6 +1509,8 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) @@ -1431,6 +1518,8 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, else ret = 0; + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1449,7 +1538,7 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, ssize_t size; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1458,6 +1547,8 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) size = smu_print_clk_levels(&adev->smu, SMU_PCIE, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) @@ -1465,6 +1556,8 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, else size = snprintf(buf, PAGE_SIZE, "\n"); + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1481,7 +1574,7 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, int ret; uint32_t mask = 0; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = amdgpu_read_mask(buf, count, &mask); @@ -1494,6 +1587,8 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask); else if (adev->powerplay.pp_funcs->force_clock_level) @@ -1501,6 +1596,8 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, else ret = 0; + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1519,7 +1616,7 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev, uint32_t value = 0; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1528,11 +1625,15 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK); else if (adev->powerplay.pp_funcs->get_sclk_od) value = amdgpu_dpm_get_sclk_od(adev); + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1549,7 +1650,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, int ret; long int value; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = kstrtol(buf, 0, &value); @@ -1563,6 +1664,8 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) { value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value); } else { @@ -1577,6 +1680,8 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, } } + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1592,7 +1697,7 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev, uint32_t value = 0; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1601,11 +1706,15 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK); else if (adev->powerplay.pp_funcs->get_mclk_od) value = amdgpu_dpm_get_mclk_od(adev); + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1622,7 +1731,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, int ret; long int value; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = kstrtol(buf, 0, &value); @@ -1636,6 +1745,8 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) { value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value); } else { @@ -1650,6 +1761,8 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, } } + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1685,7 +1798,7 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, ssize_t size; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1694,6 +1807,8 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) size = smu_get_power_profile_mode(&adev->smu, buf); else if (adev->powerplay.pp_funcs->get_power_profile_mode) @@ -1701,6 +1816,8 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, else size = snprintf(buf, PAGE_SIZE, "\n"); + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1725,7 +1842,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, long int profile_mode = 0; const char delimiter[3] = {' ', '\n', '\0'}; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; tmp[0] = *(buf); @@ -1758,11 +1875,15 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true); else if (adev->powerplay.pp_funcs->set_power_profile_mode) ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size); + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1788,7 +1909,7 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int r, value, size = sizeof(value); - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; r = pm_runtime_get_sync(ddev->dev); @@ -1797,9 +1918,11 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev, return r; } + down_read(&adev->reset_sem); /* read the IP busy sensor */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size); + up_read(&adev->reset_sem); pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1826,7 +1949,7 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int r, value, size = sizeof(value); - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; r = pm_runtime_get_sync(ddev->dev); @@ -1835,10 +1958,14 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev, return r; } + down_read(&adev->reset_sem); + /* read the IP busy sensor */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size); + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1869,7 +1996,7 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev, uint64_t count0 = 0, count1 = 0; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; if (adev->flags & AMD_IS_APU) @@ -1884,8 +2011,12 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev, return ret; } + down_read(&adev->reset_sem); + amdgpu_asic_get_pcie_usage(adev, &count0, &count1); + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1910,7 +2041,7 @@ static ssize_t amdgpu_get_unique_id(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; if (adev->unique_id) @@ -2177,7 +2308,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, int channel = to_sensor_dev_attr(attr)->index; int r, temp = 0, size = sizeof(temp); - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; if (channel >= PP_TEMP_MAX) @@ -2189,6 +2320,8 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, return r; } + down_read(&adev->reset_sem); + switch (channel) { case PP_TEMP_JUNCTION: /* get current junction temperature */ @@ -2210,6 +2343,8 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, break; } + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2313,7 +2448,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, u32 pwm_mode = 0; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = pm_runtime_get_sync(adev->ddev->dev); @@ -2322,18 +2457,23 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) { pwm_mode = smu_get_fan_control_mode(&adev->smu); } else { if (!adev->powerplay.pp_funcs->get_fan_control_mode) { pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); + up_read(&adev->reset_sem); return -EINVAL; } pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); } + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2349,7 +2489,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, int err, ret; int value; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; err = kstrtoint(buf, 10, &value); @@ -2362,18 +2502,23 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) { smu_set_fan_control_mode(&adev->smu, value); } else { if (!adev->powerplay.pp_funcs->set_fan_control_mode) { pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); + up_read(&adev->reset_sem); return -EINVAL; } amdgpu_dpm_set_fan_control_mode(adev, value); } + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2403,7 +2548,7 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, u32 value; u32 pwm_mode; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; err = pm_runtime_get_sync(adev->ddev->dev); @@ -2412,11 +2557,15 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, return err; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) pwm_mode = smu_get_fan_control_mode(&adev->smu); else pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + up_read(&adev->reset_sem); + if (pwm_mode != AMD_FAN_CTRL_MANUAL) { pr_info("manual fan speed control should be enabled first\n"); pm_runtime_mark_last_busy(adev->ddev->dev); @@ -2457,7 +2606,7 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, int err; u32 speed = 0; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; err = pm_runtime_get_sync(adev->ddev->dev); @@ -2466,6 +2615,8 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, return err; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) err = smu_get_fan_speed_percent(&adev->smu, &speed); else if (adev->powerplay.pp_funcs->get_fan_speed_percent) @@ -2473,6 +2624,8 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, else err = -EINVAL; + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2492,7 +2645,7 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, int err; u32 speed = 0; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; err = pm_runtime_get_sync(adev->ddev->dev); @@ -2501,6 +2654,8 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, return err; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) err = smu_get_fan_speed_rpm(&adev->smu, &speed); else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) @@ -2508,6 +2663,8 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, else err = -EINVAL; + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2526,7 +2683,7 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev, u32 size = sizeof(min_rpm); int r; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); @@ -2535,9 +2692,13 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev, return r; } + down_read(&adev->reset_sem); + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM, (void *)&min_rpm, &size); + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2556,7 +2717,7 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev, u32 size = sizeof(max_rpm); int r; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); @@ -2565,9 +2726,13 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev, return r; } + down_read(&adev->reset_sem); + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM, (void *)&max_rpm, &size); + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2585,7 +2750,7 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev, int err; u32 rpm = 0; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; err = pm_runtime_get_sync(adev->ddev->dev); @@ -2594,6 +2759,8 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev, return err; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) err = smu_get_fan_speed_rpm(&adev->smu, &rpm); else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) @@ -2601,6 +2768,8 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev, else err = -EINVAL; + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2619,7 +2788,7 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, u32 value; u32 pwm_mode; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; err = pm_runtime_get_sync(adev->ddev->dev); @@ -2628,11 +2797,15 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, return err; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) pwm_mode = smu_get_fan_control_mode(&adev->smu); else pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + up_read(&adev->reset_sem); + if (pwm_mode != AMD_FAN_CTRL_MANUAL) { pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2646,6 +2819,8 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, return err; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) err = smu_set_fan_speed_rpm(&adev->smu, value); else if (adev->powerplay.pp_funcs->set_fan_speed_rpm) @@ -2653,6 +2828,8 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, else err = -EINVAL; + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2670,7 +2847,7 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev, u32 pwm_mode = 0; int ret; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; ret = pm_runtime_get_sync(adev->ddev->dev); @@ -2679,18 +2856,23 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev, return ret; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) { pwm_mode = smu_get_fan_control_mode(&adev->smu); } else { if (!adev->powerplay.pp_funcs->get_fan_control_mode) { pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); + up_read(&adev->reset_sem); return -EINVAL; } pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); } + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2707,7 +2889,7 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, int value; u32 pwm_mode; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; err = kstrtoint(buf, 10, &value); @@ -2727,17 +2909,22 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, return err; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) { smu_set_fan_control_mode(&adev->smu, pwm_mode); } else { if (!adev->powerplay.pp_funcs->set_fan_control_mode) { pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); + up_read(&adev->reset_sem); return -EINVAL; } amdgpu_dpm_set_fan_control_mode(adev, pwm_mode); } + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2752,7 +2939,7 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev, u32 vddgfx; int r, size = sizeof(vddgfx); - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); @@ -2761,9 +2948,11 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev, return r; } + down_read(&adev->reset_sem); /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&vddgfx, &size); + up_read(&adev->reset_sem); pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2789,7 +2978,7 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, u32 vddnb; int r, size = sizeof(vddnb); - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; /* only APUs have vddnb */ @@ -2802,9 +2991,11 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, return r; } + down_read(&adev->reset_sem); /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&vddnb, &size); + up_read(&adev->reset_sem); pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2831,7 +3022,7 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, int r, size = sizeof(u32); unsigned uw; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); @@ -2840,9 +3031,11 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, return r; } + down_read(&adev->reset_sem); /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size); + up_read(&adev->reset_sem); pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2872,7 +3065,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, ssize_t size; int r; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); @@ -2881,6 +3074,8 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, return r; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) { smu_get_power_limit(&adev->smu, &limit, true); size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); @@ -2891,6 +3086,8 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, size = snprintf(buf, PAGE_SIZE, "\n"); } + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2906,7 +3103,7 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, ssize_t size; int r; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); @@ -2915,6 +3112,8 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, return r; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) { smu_get_power_limit(&adev->smu, &limit, false); size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); @@ -2925,6 +3124,8 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, size = snprintf(buf, PAGE_SIZE, "\n"); } + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2941,7 +3142,7 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, int err; u32 value; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; if (amdgpu_sriov_vf(adev)) @@ -2960,6 +3161,8 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, return err; } + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) err = smu_set_power_limit(&adev->smu, value); else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) @@ -2967,6 +3170,8 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, else err = -EINVAL; + up_read(&adev->reset_sem); + pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2984,7 +3189,7 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, uint32_t sclk; int r, size = sizeof(sclk); - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); @@ -2993,9 +3198,11 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, return r; } + down_read(&adev->reset_sem); /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&sclk, &size); + up_read(&adev->reset_sem); pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -3021,7 +3228,7 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, uint32_t mclk; int r, size = sizeof(mclk); - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); @@ -3030,9 +3237,11 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, return r; } + down_read(&adev->reset_sem); /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK, (void *)&mclk, &size); + up_read(&adev->reset_sem); pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -3913,7 +4122,7 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) u32 flags = 0; int r; - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EPERM; r = pm_runtime_get_sync(dev->dev); @@ -3922,7 +4131,10 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) return r; } + down_read(&adev->reset_sem); amdgpu_device_ip_get_clockgating_state(adev, &flags); + up_read(&adev->reset_sem); + seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags); amdgpu_parse_cg_state(m, flags); seq_printf(m, "\n"); @@ -3934,6 +4146,7 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) return 0; } + down_read(&adev->reset_sem); if (!is_support_sw_smu(adev) && adev->powerplay.pp_funcs->debugfs_print_current_performance_level) { mutex_lock(&adev->pm.mutex); @@ -3946,6 +4159,7 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) } else { r = amdgpu_debugfs_pm_info_pp(m, adev); } + up_read(&adev->reset_sem); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); |