diff options
author | Dave Airlie <airlied@redhat.com> | 2025-02-26 09:41:44 +0300 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2025-02-26 09:41:44 +0300 |
commit | 425b8481750abce45fa4aeecf6c32152cadbfa15 (patch) | |
tree | d97104ee978be5c85923cdf3071f54f63b04b805 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | |
parent | fb51bf02551958265b7116f6ba92752295c83c26 (diff) | |
parent | 3521276ad14fe47ce1c4382749f3c95762629375 (diff) | |
download | linux-425b8481750abce45fa4aeecf6c32152cadbfa15.tar.xz |
Merge tag 'amd-drm-next-6.15-2025-02-21' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.15-2025-02-21:
amdgpu:
- Add OEM i2c support for RGB lights, etc.
- Add support for GC 11.5.3
- Add support for GC 11.5.2
- Add support for SDMA 6.1.3
- Add support for NBIO 7.11.2
- Add support for NBIO 7.9.1
- Add support for MMHUB 3.3.2
- Add support for MMHUB 1.8.1
- Add support for SMU 14.0.5
- Add support for SMUIO 13.0.11
- Add support for PSP 14.0.5
- Add support for UMC 12.5.0
- Add support for DCN 3.6.0
- JPEG 4.0.3 updates
- Add dynamic workload profile switching for GC 10-12
- support larger vbios sizes
- GC 9.5.0 updates
- SMU 13.0.12 updates
- SMU 13.0.6 updates
- IP discovery updates
- GC 10 queue reset updates
- DCN 4.0.1 updates
- UHBR link rate fixes
- Aborted suspend fix
- Mark gttsize parameter as deprecated
- GC 10 cleaner shader updates
- PSR-SU fixes
- Clean up PM4 headers
- Cursor fixes
- Enable devcoredump for JPEG
- Misc cleanups
- Runpm cleanups
- MES updates
- GC 9 gfxoff fixes
- Vbios fetching cleanups
- Documentation updates
- Update secondary plane handling
- DML2 updates
- SDMA fixes for MI
- Cleaner shader fixes for GC 11/12
- ACA updates
- Initial JPEG queue reset support
- RAS updates
- Initial RAS CPER support
- DCN/DCE panic screen handling cleanup
- BT2020 fixes
- SR-IOV fixes
amdkfd:
- synchronize pasid values between KGD and KFD
- Misc cleanups
- Improve GTT/VRAM handling for APUs
- Topology updates
- Fix user queue validation on GC 7/8
UAPI:
- Enable "Broadcast RGB" drm property
- Add INFO IOCTL query for virtualization mode
Proposed userspace:
https://github.com/ROCm/amdsmi/commit/e663bed7d6b3df79f5959e73981749b1f22ec698
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250221213651.4176031-1-alexander.deucher@amd.com
Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 48 |
1 files changed, 26 insertions, 22 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index f0924aa3f4e4..3c3312bbfee8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1864,6 +1864,9 @@ int amdgpu_ras_sysfs_create(struct amdgpu_device *adev, if (!obj || obj->attr_inuse) return -EINVAL; + if (amdgpu_sriov_vf(adev) && !amdgpu_virt_ras_telemetry_block_en(adev, head->block)) + return 0; + get_obj(obj); snprintf(obj->fs_data.sysfs_name, sizeof(obj->fs_data.sysfs_name), @@ -3080,31 +3083,29 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); /* - * Justification of value bad_page_cnt_threshold in ras structure - * - * Generally, 0 <= amdgpu_bad_page_threshold <= max record length - * in eeprom or amdgpu_bad_page_threshold == -2, introduce two - * scenarios accordingly. - * - * Bad page retirement enablement: - * - If amdgpu_bad_page_threshold = -2, - * bad_page_cnt_threshold = typical value by formula. - * - * - When the value from user is 0 < amdgpu_bad_page_threshold < - * max record length in eeprom, use it directly. - * - * Bad page retirement disablement: - * - If amdgpu_bad_page_threshold = 0, bad page retirement - * functionality is disabled, and bad_page_cnt_threshold will - * take no effect. + * amdgpu_bad_page_threshold is used to config + * the threshold for the number of bad pages. + * -1: Threshold is set to default value + * Driver will issue a warning message when threshold is reached + * and continue runtime services. + * 0: Disable bad page retirement + * Driver will not retire bad pages + * which is intended for debugging purpose. + * -2: Threshold is determined by a formula + * that assumes 1 bad page per 100M of local memory. + * Driver will continue runtime services when threhold is reached. + * 0 < threshold < max number of bad page records in EEPROM, + * A user-defined threshold is set + * Driver will halt runtime services when this custom threshold is reached. */ - - if (amdgpu_bad_page_threshold < 0) { + if (amdgpu_bad_page_threshold == -2) { u64 val = adev->gmc.mc_vram_size; do_div(val, RAS_BAD_PAGE_COVER); con->bad_page_cnt_threshold = min(lower_32_bits(val), max_count); + } else if (amdgpu_bad_page_threshold == -1) { + con->bad_page_cnt_threshold = ((con->reserved_pages_in_bytes) >> 21) << 4; } else { con->bad_page_cnt_threshold = min_t(int, max_count, amdgpu_bad_page_threshold); @@ -3759,8 +3760,9 @@ init_ras_enabled_flag: adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 : adev->ras_hw_enabled & amdgpu_ras_mask; - /* aca is disabled by default */ - adev->aca.is_enabled = false; + /* aca is disabled by default except for psp v13_0_12 */ + adev->aca.is_enabled = + (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12)); /* bad page feature is not applicable to specific app platform */ if (adev->gmc.is_app_apu && @@ -3848,8 +3850,10 @@ static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev) case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 12): + con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT; + break; case IP_VERSION(13, 0, 14): - con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE; + con->reserved_pages_in_bytes = (AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT << 1); break; default: break; |