From d9a69fe512c5f032556764041760e8d5098fac26 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Tue, 16 Nov 2021 14:13:11 +0800 Subject: drm/amdgpu: Add recovery_lock to save bad pages function Fix race condition failure during UMC UE injection. Signed-off-by: Candice Li Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 08133de21fdd..53b957a5b9a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1935,9 +1935,11 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) if (!con || !con->eh_data) return 0; + mutex_lock(&con->recovery_lock); control = &con->eeprom_control; data = con->eh_data; save_count = data->count - control->ras_num_recs; + mutex_unlock(&con->recovery_lock); /* only new entries are saved */ if (save_count > 0) { if (amdgpu_ras_eeprom_append(control, -- cgit v1.2.3 From fdcb279d5b798d13b4365bdcf5548855f6c562a1 Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Thu, 18 Nov 2021 16:30:43 +0800 Subject: drm/amdgpu: query umc error info from ecc_table v2 if smu support ECCTABLE, driver can message smu to get ecc_table then query umc error info from ECCTABLE v2: optimize source code makes logical more reasonable Signed-off-by: Stanley.Yang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 42 ++++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 72 +++++++++++++++++++++++---------- 2 files changed, 83 insertions(+), 31 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 53b957a5b9a6..46910e7b2927 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -892,6 +892,38 @@ void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev, } } +static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_data *err_data) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + int ret = 0; + + /* + * choosing right query method according to + * whether smu support query error information + */ + ret = smu_get_ecc_info(&adev->smu, (void *)&(ras->umc_ecc)); + if (ret == -EOPNOTSUPP) { + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->query_ras_error_count) + adev->umc.ras_funcs->query_ras_error_count(adev, err_data); + + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->query_ras_error_address) + adev->umc.ras_funcs->query_ras_error_address(adev, err_data); + } else if (!ret) { + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->ecc_info_query_ras_error_count) + adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, err_data); + + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->ecc_info_query_ras_error_address) + adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, err_data); + } +} + /* query/inject/cure begin */ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info) @@ -905,15 +937,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, switch (info->head.block) { case AMDGPU_RAS_BLOCK__UMC: - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_count) - adev->umc.ras_funcs->query_ras_error_count(adev, &err_data); - /* umc query_ras_error_address is also responsible for clearing - * error status - */ - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_address) - adev->umc.ras_funcs->query_ras_error_address(adev, &err_data); + amdgpu_ras_get_ecc_info(adev, &err_data); break; case AMDGPU_RAS_BLOCK__SDMA: if (adev->sdma.funcs->query_ras_error_count) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index a90029ee9733..6e4bea012ea4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -94,30 +94,58 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int ret = 0; kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_count) - adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status); - - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_address && - adev->umc.max_ras_err_cnt_per_query) { - err_data->err_addr = - kcalloc(adev->umc.max_ras_err_cnt_per_query, - sizeof(struct eeprom_table_record), GFP_KERNEL); - - /* still call query_ras_error_address to clear error status - * even NOMEM error is encountered - */ - if(!err_data->err_addr) - dev_warn(adev->dev, "Failed to alloc memory for " - "umc error address record!\n"); - - /* umc query_ras_error_address is also responsible for clearing - * error status - */ - adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status); + ret = smu_get_ecc_info(&adev->smu, (void *)&(con->umc_ecc)); + if (ret == -EOPNOTSUPP) { + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->query_ras_error_count) + adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status); + + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->query_ras_error_address && + adev->umc.max_ras_err_cnt_per_query) { + err_data->err_addr = + kcalloc(adev->umc.max_ras_err_cnt_per_query, + sizeof(struct eeprom_table_record), GFP_KERNEL); + + /* still call query_ras_error_address to clear error status + * even NOMEM error is encountered + */ + if(!err_data->err_addr) + dev_warn(adev->dev, "Failed to alloc memory for " + "umc error address record!\n"); + + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status); + } + } else if (!ret) { + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->ecc_info_query_ras_error_count) + adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, ras_error_status); + + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->ecc_info_query_ras_error_address && + adev->umc.max_ras_err_cnt_per_query) { + err_data->err_addr = + kcalloc(adev->umc.max_ras_err_cnt_per_query, + sizeof(struct eeprom_table_record), GFP_KERNEL); + + /* still call query_ras_error_address to clear error status + * even NOMEM error is encountered + */ + if(!err_data->err_addr) + dev_warn(adev->dev, "Failed to alloc memory for " + "umc error address record!\n"); + + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, ras_error_status); + } } /* only uncorrectable error needs gpu reset */ -- cgit v1.2.3 From 232d1d43b522b64266a16606e918ce92a8a0b244 Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Fri, 26 Nov 2021 17:24:39 +0800 Subject: drm/amdgpu: fix disable ras feature failed when unload drvier v2 v2: still need call ras_disable_all_featrures to handle ras initilization failure case. Function amdgpu_device_fini_hw is called before amdgpu_device_fini_sw, so ras ta will unload before send ras disable command, ras dsiable operation must before hw fini. Signed-off-by: Stanley.Yang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 788b254caaa5..efa9ff5dcd26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2733,8 +2733,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) amdgpu_virt_release_ras_err_handler_data(adev); - amdgpu_ras_pre_fini(adev); - if (adev->gmc.xgmi.num_physical_nodes > 1) amdgpu_xgmi_remove_device(adev); @@ -3844,6 +3842,9 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) amdgpu_ucode_sysfs_fini(adev); sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); + /* disable ras feature must before hw fini */ + amdgpu_ras_pre_fini(adev); + amdgpu_device_ip_fini_early(adev); amdgpu_irq_fini_hw(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 46910e7b2927..3c623e589b79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2503,7 +2503,6 @@ void amdgpu_ras_late_fini(struct amdgpu_device *adev, amdgpu_ras_sysfs_remove(adev, ras_block); if (ih_info->cb) amdgpu_ras_interrupt_remove_handler(adev, ih_info); - amdgpu_ras_feature_enable(adev, ras_block, 0); } /* do some init work after IP late init as dependence. -- cgit v1.2.3 From bab73f092da654d149bb4771c418bf585c06044a Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Thu, 2 Dec 2021 13:06:05 +0800 Subject: drm/amdgpu: skip query ecc info in gpu recovery this is a workaround due to get ecc info failed during gpu recovery [ 700.236122] amdgpu 0000:09:00.0: amdgpu: Failed to export SMU ecc table! [ 700.236128] amdgpu 0000:09:00.0: amdgpu: GPU reset begin! [ 704.331171] amdgpu: qcm fence wait loop timeout expired [ 704.331194] amdgpu: The cp might be in an unrecoverable state due to an unsuccessful queues preemption [ 704.332445] amdgpu 0000:09:00.0: amdgpu: GPU reset begin! [ 704.332448] amdgpu 0000:09:00.0: amdgpu: Bailing on TDR for s_job:ffffffffffffffff, as another already in progress [ 704.332456] amdgpu: Pasid 0x8000 destroy queue 0 failed, ret -62 [ 710.360924] amdgpu 0000:09:00.0: amdgpu: SMU: I'm not done with your previous command: SMN_C2PMSG_66:0x00000013 SMN_C2PMSG_82:0x00000007 [ 710.360964] amdgpu 0000:09:00.0: amdgpu: Failed to disable smu features. [ 710.361002] amdgpu 0000:09:00.0: amdgpu: Fail to disable dpm features! [ 710.361014] [drm:amdgpu_device_ip_suspend_phase2 [amdgpu]] *ERROR* suspend of IP block failed -62 Signed-off-by: Stanley.Yang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 3c623e589b79..28678c8f4eb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -897,6 +897,10 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_d struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); int ret = 0; + /* skip get ecc info during gpu recovery */ + if (atomic_read(&ras->in_recovery) == 1) + return; + /* * choosing right query method according to * whether smu support query error information -- cgit v1.2.3 From aed1faab9d9563ca5ac5139b0170486027ec74a7 Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Fri, 3 Dec 2021 13:08:41 +0800 Subject: drm/amdgpu: only skip get ecc info for aldebaran skip get ecc info for aldebarn through check ip version do not affect other asic type Signed-off-by: Stanley.Yang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 28678c8f4eb2..1043d41b6807 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -898,7 +898,8 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_d int ret = 0; /* skip get ecc info during gpu recovery */ - if (atomic_read(&ras->in_recovery) == 1) + if (atomic_read(&ras->in_recovery) == 1 && + adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2)) return; /* -- cgit v1.2.3 From cf63b702720d734cb4144440d72d4b2ac6c494f8 Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Tue, 7 Dec 2021 14:28:58 +0800 Subject: drm/amdgpu: skip umc ras error count harvest remove in recovery stat check, skip umc ras err cnt harvest in amdgpu_ras_log_on_err_counter Signed-off-by: Stanley.Yang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 1043d41b6807..a95d200adff9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -897,11 +897,6 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_d struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); int ret = 0; - /* skip get ecc info during gpu recovery */ - if (atomic_read(&ras->in_recovery) == 1 && - adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2)) - return; - /* * choosing right query method according to * whether smu support query error information @@ -1752,6 +1747,16 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) if (info.head.block == AMDGPU_RAS_BLOCK__PCIE_BIF) continue; + /* + * this is a workaround for aldebaran, skip send msg to + * smu to get ecc_info table due to smu handle get ecc + * info table failed temporarily. + * should be removed until smu fix handle ecc_info table. + */ + if ((info.head.block == AMDGPU_RAS_BLOCK__UMC) && + (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2))) + continue; + amdgpu_ras_query_error_status(adev, &info); } } -- cgit v1.2.3 From 655ff3538eee3a3dca7103f97de883e033bd1011 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Wed, 8 Dec 2021 14:01:09 +0800 Subject: drm/amdgpu: enable RAS poison flag when GPU is connected to CPU The RAS poison mode is enabled by default on the platform. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index a95d200adff9..0003f2c64da8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2372,7 +2372,11 @@ int amdgpu_ras_init(struct amdgpu_device *adev) } /* Init poison supported flag, the default value is false */ - if (adev->df.funcs && + if (adev->gmc.xgmi.connected_to_cpu) { + /* enabled by default when GPU is connected to CPU */ + con->poison_supported = true; + } + else if (adev->df.funcs && adev->df.funcs->query_ras_poison_mode && adev->umc.ras_funcs && adev->umc.ras_funcs->query_ras_poison_mode) { -- cgit v1.2.3 From bbe04dec5c52a075175a627d510140d386a71f98 Mon Sep 17 00:00:00 2001 From: Isabella Basso Date: Tue, 7 Dec 2021 22:25:21 -0300 Subject: drm/amd: fix improper docstring syntax This fixes various warnings relating to erroneous docstring syntax, of which some are listed below: warning: Function parameter or member 'adev' not described in 'amdgpu_atomfirmware_ras_rom_addr' ... warning: expecting prototype for amdgpu_atpx_validate_functions(). Prototype was for amdgpu_atpx_validate() instead ... warning: Excess function parameter 'mem' description in 'amdgpu_preempt_mgr_new' ... warning: Cannot understand * @kfd_get_cu_occupancy - Collect number of waves in-flight on this device ... warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst Signed-off-by: Isabella Basso Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 1 - drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 -- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 2 -- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 4 ++-- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 5 ++--- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 7 +++++++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 4 ++++ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 2 +- 15 files changed, 29 insertions(+), 22 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 97178b307ed6..4d4ddf026faf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -470,8 +470,8 @@ bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *ade /** * amdgpu_atomfirmware_ras_rom_addr -- Get the RAS EEPROM addr from VBIOS - * adev: amdgpu_device pointer - * i2c_address: pointer to u8; if not NULL, will contain + * @adev: amdgpu_device pointer + * @i2c_address: pointer to u8; if not NULL, will contain * the RAS EEPROM address if the function returns true * * Return true if VBIOS supports RAS EEPROM address reporting, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 7abe9500c0c6..c2be6ad62308 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -165,7 +165,7 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas } /** - * amdgpu_atpx_validate_functions - validate ATPX functions + * amdgpu_atpx_validate - validate ATPX functions * * @atpx: amdgpu atpx struct * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 262a6b6d4a07..d9de92886dca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -331,7 +331,7 @@ void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos, } /** - * amdgpu_device_vram_access - access vram by vram aperature + * amdgpu_device_aper_access - access vram by vram aperature * * @adev: amdgpu_device pointer * @pos: offset of the buffer in vram @@ -3801,7 +3801,7 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) } /** - * amdgpu_device_fini - tear down the driver + * amdgpu_device_fini_hw - tear down the driver * * @adev: amdgpu_device pointer * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 3907fc726ab2..e4618c7777b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -390,7 +390,7 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev) } /** - * amdgpu_irq_fini - shut down interrupt handling + * amdgpu_irq_fini_sw - shut down interrupt handling * * @adev: amdgpu device pointer * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c index 4eaec446b49d..0bb2466d539a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c @@ -69,6 +69,7 @@ static void amdgpu_pll_reduce_ratio(unsigned *nom, unsigned *den, /** * amdgpu_pll_get_fb_ref_div - feedback and ref divider calculation * + * @adev: amdgpu_device pointer * @nom: nominator * @den: denominator * @post_div: post divider @@ -106,6 +107,7 @@ static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev, unsigned int n /** * amdgpu_pll_compute - compute PLL paramaters * + * @adev: amdgpu_device pointer * @pll: information about the PLL * @freq: requested frequency * @dot_clock_p: resulting pixel clock diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c index d02c8637f909..786afe4f58f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c @@ -59,7 +59,7 @@ static DEVICE_ATTR_RO(mem_info_preempt_used); * @man: TTM memory type manager * @tbo: TTM BO we need this range for * @place: placement flags and restrictions - * @mem: the resulting mem object + * @res: TTM memory object * * Dummy, just count the space used without allocating resources or any limit. */ @@ -85,7 +85,7 @@ static int amdgpu_preempt_mgr_new(struct ttm_resource_manager *man, * amdgpu_preempt_mgr_del - free ranges * * @man: TTM memory type manager - * @mem: TTM memory object + * @res: TTM memory object * * Free the allocated GTT again. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 0003f2c64da8..e079bfd10bd3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1161,9 +1161,9 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, /** * amdgpu_ras_query_error_count -- Get error counts of all IPs - * adev: pointer to AMD GPU device - * ce_count: pointer to an integer to be set to the count of correctible errors. - * ue_count: pointer to an integer to be set to the count of uncorrectible + * @adev: pointer to AMD GPU device + * @ce_count: pointer to an integer to be set to the count of correctible errors. + * @ue_count: pointer to an integer to be set to the count of uncorrectible * errors. * * If set, @ce_count or @ue_count, count and return the corresponding diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 688bef1649b5..344f711ad144 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -434,7 +434,6 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) * * @ring: ring we should submit the msg to * @handle: VCE session handle to use - * @bo: amdgpu object for which we query the offset * @fence: optional fence to return * * Open up a stream for HW test diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 853d1511b889..81e033549dda 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -481,8 +481,6 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring, * sdma_v5_0_ring_emit_mem_sync - flush the IB by graphics cache rinse * * @ring: amdgpu ring pointer - * @job: job to retrieve vmid from - * @ib: IB object to schedule * * flush the IB by graphics cache rinse. */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 4d4d1aa51b8a..4f546f632223 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -368,8 +368,6 @@ static void sdma_v5_2_ring_emit_ib(struct amdgpu_ring *ring, * sdma_v5_2_ring_emit_mem_sync - flush the IB by graphics cache rinse * * @ring: amdgpu ring pointer - * @job: job to retrieve vmid from - * @ib: IB object to schedule * * flush the IB by graphics cache rinse. */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index d84cec0022b1..48c2f2b6e217 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -108,8 +108,8 @@ error_free: * svm_migrate_copy_memory_gart - sdma copy data between ram and vram * * @adev: amdgpu device the sdma ring running - * @src: source page address array - * @dst: destination page address array + * @sys: system DMA pointer to be copied + * @vram: vram destination DMA pointer * @npages: number of pages to copy * @direction: enum MIGRATION_COPY_DIR * @mfence: output, sdma fence to signal after sdma is done diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 9158f9754a24..f1930ff2c74a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -251,14 +251,13 @@ cleanup: } /** - * @kfd_get_cu_occupancy - Collect number of waves in-flight on this device + * kfd_get_cu_occupancy - Collect number of waves in-flight on this device * by current process. Translates acquired wave count into number of compute units * that are occupied. * - * @atr: Handle of attribute that allows reporting of wave count. The attribute + * @attr: Handle of attribute that allows reporting of wave count. The attribute * handle encapsulates GPU device it is associated with, thereby allowing collection * of waves in flight, etc - * * @buffer: Handle of user provided buffer updated with wave count * * Return: Number of bytes written to user buffer or an error value diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index f2db49c7a8fd..82cb45e30197 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1650,6 +1650,10 @@ out_reschedule: /** * svm_range_evict - evict svm range + * @prange: svm range structure + * @mm: current process mm_struct + * @start: starting process queue number + * @last: last process queue number * * Stop all queues of the process to ensure GPU doesn't access the memory, then * return to let CPU evict the buffer and proceed CPU pagetable update. @@ -2161,6 +2165,9 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, /** * svm_range_cpu_invalidate_pagetables - interval notifier callback + * @mni: mmu_interval_notifier struct + * @range: mmu_notifier_range struct + * @cur_seq: value to pass to mmu_interval_set_seq() * * If event is MMU_NOTIFY_UNMAP, this is from CPU unmap range, otherwise, it * is from migration, or CPU page invalidation callback. diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index a022e5bb30a5..a71177305bcd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -285,8 +285,12 @@ static int __set_input_tf(struct dc_transfer_func *func, } /** + * amdgpu_dm_verify_lut_sizes + * @crtc_state: the DRM CRTC state + * * Verifies that the Degamma and Gamma LUTs attached to the |crtc_state| are of * the expected size. + * * Returns 0 on success. */ int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 048ca1673863..76f3bcfee82d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -94,7 +94,7 @@ static void smu_cmn_read_arg(struct smu_context *smu, /** * __smu_cmn_poll_stat -- poll for a status from the SMU - * smu: a pointer to SMU context + * @smu: a pointer to SMU context * * Returns the status of the SMU, which could be, * 0, the SMU is busy with your command; -- cgit v1.2.3 From 929bb8e200412da36aca4b61209ec26283f9c184 Mon Sep 17 00:00:00 2001 From: Isabella Basso Date: Thu, 9 Dec 2021 12:47:19 -0300 Subject: drm/amdgpu: fix amdgpu_ras_mca_query_error_status scope MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit fixes the compile-time warning below: warning: no previous prototype for ‘amdgpu_ras_mca_query_error_status’ [-Wmissing-prototypes] Changes since v1: - As suggested by Alexander Deucher: 1. Make function static instead of adding prototype. Signed-off-by: Isabella Basso Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index e079bfd10bd3..cd9e5914944b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -867,9 +867,9 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev, /* feature ctl end */ -void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev, - struct ras_common_if *ras_block, - struct ras_err_data *err_data) +static void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev, + struct ras_common_if *ras_block, + struct ras_err_data *err_data) { switch (ras_block->sub_block_index) { case AMDGPU_RAS_MCA_BLOCK__MP0: -- cgit v1.2.3