diff options
author | Dave Airlie <airlied@redhat.com> | 2021-06-03 23:13:56 +0300 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2021-06-03 23:13:57 +0300 |
commit | 5745d647d5563d3e9d32013ad4e5c629acff04d7 (patch) | |
tree | 8ce79933def43fcb34b675242304015457f25ef5 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | |
parent | ccd1950c2f7e38ae45aeefb99a08b39407cd6c63 (diff) | |
parent | 7d9c70d23550eb86a1bec1954ccaa8d6ec3a3328 (diff) | |
download | linux-5745d647d5563d3e9d32013ad4e5c629acff04d7.tar.xz |
Merge tag 'amd-drm-next-5.14-2021-06-02' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.14-2021-06-02:
amdgpu:
- GC/MM register access macro clean up for SR-IOV
- Beige Goby updates
- W=1 Fixes
- Aldebaran fixes
- Misc display fixes
- ACPI ATCS/ATIF handling rework
- SR-IOV fixes
- RAS fixes
- 16bpc fixed point format support
- Initial smartshift support
- RV/PCO power tuning fixes for suspend/resume
- More buffer object subclassing work
- Add new INFO query for additional vbios information
- Add new placement for preemptable SG buffers
amdkfd:
- Misc fixes
radeon:
- W=1 Fixes
- Misc cleanups
UAPI:
- Add new INFO query for additional vbios information
Useful for debugging vbios related issues. Proposed umr patch:
https://patchwork.freedesktop.org/patch/433297/
- 16bpc fixed point format support
IGT test:
https://lists.freedesktop.org/archives/igt-dev/2021-May/031507.html
Proposed Vulkan patch:
https://github.com/kleinerm/pal/commit/a25d4802074b13a8d5f7edc96ae45469ecbac3c4
- Add a new GEM flag which is only used internally in the kernel driver. Userspace
is not allowed to set it.
drm:
- 16bpc fixed point format fourcc
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210602214009.4553-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 63 |
1 files changed, 55 insertions, 8 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index c2c791ca00f4..9dfc1ebb41a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -27,6 +27,7 @@ #include <linux/uaccess.h> #include <linux/reboot.h> #include <linux/syscalls.h> +#include <linux/pm_runtime.h> #include "amdgpu.h" #include "amdgpu_ras.h" @@ -1043,29 +1044,36 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, } /* get the total error counts on all IPs */ -unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, - bool is_ce) +void amdgpu_ras_query_error_count(struct amdgpu_device *adev, + unsigned long *ce_count, + unsigned long *ue_count) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; - struct ras_err_data data = {0, 0}; + unsigned long ce, ue; if (!adev->ras_enabled || !con) - return 0; + return; + ce = 0; + ue = 0; list_for_each_entry(obj, &con->head, node) { struct ras_query_if info = { .head = obj->head, }; if (amdgpu_ras_query_error_status(adev, &info)) - return 0; + return; - data.ce_count += info.ce_count; - data.ue_count += info.ue_count; + ce += info.ce_count; + ue += info.ue_count; } - return is_ce ? data.ce_count : data.ue_count; + if (ce_count) + *ce_count = ce; + + if (ue_count) + *ue_count = ue; } /* query/inject/cure end */ @@ -2109,6 +2117,30 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) adev->ras_hw_enabled & amdgpu_ras_mask; } +static void amdgpu_ras_counte_dw(struct work_struct *work) +{ + struct amdgpu_ras *con = container_of(work, struct amdgpu_ras, + ras_counte_delay_work.work); + struct amdgpu_device *adev = con->adev; + struct drm_device *dev = &adev->ddev; + unsigned long ce_count, ue_count; + int res; + + res = pm_runtime_get_sync(dev->dev); + if (res < 0) + goto Out; + + /* Cache new values. + */ + amdgpu_ras_query_error_count(adev, &ce_count, &ue_count); + atomic_set(&con->ras_ce_count, ce_count); + atomic_set(&con->ras_ue_count, ue_count); + + pm_runtime_mark_last_busy(dev->dev); +Out: + pm_runtime_put_autosuspend(dev->dev); +} + int amdgpu_ras_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -2123,6 +2155,11 @@ int amdgpu_ras_init(struct amdgpu_device *adev) if (!con) return -ENOMEM; + con->adev = adev; + INIT_DELAYED_WORK(&con->ras_counte_delay_work, amdgpu_ras_counte_dw); + atomic_set(&con->ras_ce_count, 0); + atomic_set(&con->ras_ue_count, 0); + con->objs = (struct ras_manager *)(con + 1); amdgpu_ras_set_context(adev, con); @@ -2226,6 +2263,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, struct ras_fs_if *fs_info, struct ras_ih_if *ih_info) { + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + unsigned long ue_count, ce_count; int r; /* disable RAS feature per IP block if it is not supported */ @@ -2266,6 +2305,12 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, if (r) goto sysfs; + /* Those are the cached values at init. + */ + amdgpu_ras_query_error_count(adev, &ce_count, &ue_count); + atomic_set(&con->ras_ce_count, ce_count); + atomic_set(&con->ras_ue_count, ue_count); + return 0; cleanup: amdgpu_ras_sysfs_remove(adev, ras_block); @@ -2384,6 +2429,8 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) if (con->features) amdgpu_ras_disable_all_features(adev, 1); + cancel_delayed_work_sync(&con->ras_counte_delay_work); + amdgpu_ras_set_context(adev, NULL); kfree(con); |