summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2024-06-11 07:01:55 +0300
committerDave Airlie <airlied@redhat.com>2024-06-11 07:01:55 +0300
commit1ddaaa244021aba8496536a6627b4ad2bc0f936a (patch)
tree2b37ec6170094757daaa0c7445670eebf3b996d9 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
parent7957066ca614b63aa6687e825ccbc215fa4584ea (diff)
parentb95fa494d6b74c30eeb4a50481aa1041c631754e (diff)
downloadlinux-1ddaaa244021aba8496536a6627b4ad2bc0f936a.tar.xz
Merge tag 'amd-drm-next-6.11-2024-06-07' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.11-2024-06-07: amdgpu: - DCN 4.0.x support - DCN 3.5 updates - GC 12.0 support - DP MST fixes - Cursor fixes - MES11 updates - MMHUB 4.1 support - DML2 Updates - DCN 3.1.5 fixes - IPS fixes - Various code cleanups - GMC 12.0 support - SDMA 7.0 support - SMU 13 updates - SR-IOV fixes - VCN 5.x fixes - MES12 support - SMU 14.x updates - Devcoredump improvements - Fixes for HDP flush on platforms with >4k pages - GC 9.4.3 fixes - RAS ACA updates - Silence UBSAN flex array warnings - MMHUB 3.3 updates amdkfd: - Contiguous VRAM allocations - GC 12.0 support - SDMA 7.0 support - SR-IOV fixes radeon: - Backlight workaround for iMac - Silence UBSAN flex array warnings UAPI: - GFX12 modifier and DCC support Proposed Mesa changes: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29510 - KFD GFX ALU exceptions Proposed ROCdebugger changes: https://github.com/ROCm/ROCdbgapi/commit/08c760622b6601abf906f75abbc5e21d9fd425df https://github.com/ROCm/ROCgdb/commit/944fe1c1414a68700414e86e32273b6bfa62ba6f - KFD Contiguous VRAM allocation flag Proposed ROCr/HIP changes: https://github.com/ROCm/ROCT-Thunk-Interface/commit/f7b4a269914a3ab4f1e2453c2879adb97b5cc9e5 https://github.com/ROCm/ROCR-Runtime/pull/214/commits/26e8530d05a775872cb06dde6693db72be0c454a https://github.com/ROCm/clr/commit/1d48f2a1ab38b632919c4b7274899b3faf4279ff Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240607195900.902537-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h22
1 files changed, 12 insertions, 10 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 7021c4a66fb5..e70c45712ddb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -47,12 +47,10 @@ struct amdgpu_iv_entry;
#define AMDGPU_RAS_GPU_ERR_SOCKET_ID(x) AMDGPU_GET_REG_FIELD(x, 10, 8)
#define AMDGPU_RAS_GPU_ERR_AID_ID(x) AMDGPU_GET_REG_FIELD(x, 12, 11)
#define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 14, 13)
-#define AMDGPU_RAS_GPU_ERR_BOOT_STATUS(x) AMDGPU_GET_REG_FIELD(x, 31, 31)
-#define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 1000
+#define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 100
#define AMDGPU_RAS_BOOT_STEADY_STATUS 0xBA
#define AMDGPU_RAS_BOOT_STATUS_MASK 0xFF
-#define AMDGPU_RAS_BOOT_SUCEESS 0x80000000
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0)
/* position of instance value in sub_block_index of
@@ -64,16 +62,14 @@ struct amdgpu_iv_entry;
#define AMDGPU_RAS_FEATURES_SOCKETID_SHIFT 29
#define AMDGPU_RAS_FEATURES_SOCKETID_MASK 0xe0000000
+/* Reserve 8 physical dram row for possible retirement.
+ * In worst cases, it will lose 8 * 2MB memory in vram domain */
+#define AMDGPU_RAS_RESERVED_VRAM_SIZE (16ULL << 20)
/* The high three bits indicates socketid */
#define AMDGPU_RAS_GET_FEATURES(val) ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK)
-#define RAS_EVENT_LOG(_adev, _id, _fmt, ...) \
-do { \
- if (amdgpu_ras_event_id_is_valid((_adev), (_id))) \
- dev_info((_adev)->dev, "{%llu}" _fmt, (_id), ##__VA_ARGS__); \
- else \
- dev_info((_adev)->dev, _fmt, ##__VA_ARGS__); \
-} while (0)
+#define RAS_EVENT_LOG(adev, id, fmt, ...) \
+ amdgpu_ras_event_log_print((adev), (id), (fmt), ##__VA_ARGS__);
enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__UMC = 0,
@@ -526,6 +522,7 @@ struct amdgpu_ras {
bool update_channel_flag;
/* Record status of smu mca debug mode */
bool is_aca_debug_mode;
+ bool is_rma;
/* Record special requirements of gpu reset caller */
uint32_t gpu_reset_flags;
@@ -546,6 +543,7 @@ struct amdgpu_ras {
struct ras_event_manager __event_mgr;
struct ras_event_manager *event_mgr;
+ uint64_t reserved_pages_in_bytes;
};
struct ras_fs_data {
@@ -956,4 +954,8 @@ int amdgpu_ras_put_poison_req(struct amdgpu_device *adev,
enum amdgpu_ras_block block, uint16_t pasid,
pasid_notify pasid_fn, void *data, uint32_t reset);
+__printf(3, 4)
+void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
+ const char *fmt, ...);
+
#endif