diff options
author | Dave Airlie <airlied@redhat.com> | 2022-03-10 02:28:38 +0300 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2022-03-10 02:28:39 +0300 |
commit | 955ad0c8ba93256c9eeeefde5644b3480c1ddedd (patch) | |
tree | 4d5d0efc823bd171d64fbd8889df2395e220adcb /drivers/gpu/drm | |
parent | 482d7b582d7f9688a5f64ed2424157a76a17f2a7 (diff) | |
parent | 96a2f0f2c8006d338a9647e068a15c6eb299f864 (diff) | |
download | linux-955ad0c8ba93256c9eeeefde5644b3480c1ddedd.tar.xz |
Merge tag 'amd-drm-next-5.18-2022-03-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.18-2022-03-09:
amdgpu:
- Misc code cleanups
- Misc display fixes
- PSR display fixes
- More RAS cleanup
- Hotplug fix
- Bump minor version for hotplug tests
- SR-IOV fixes
- GC 10.3.7 updates
- Remove some firmwares which are no longer used
- Mode2 reset refactor
- Aldebaran fixes
- Add VCN fwlog feature for VCN debugging
- CS code cleanup
- Fix clang warning
- Fix CS clean up rebase breakage
amdkfd:
- SVM fixes
- SMI event fixes and cleanups
- vmid_pasid mapping fix for gfx10.3
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220309224439.2178877-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm')
83 files changed, 982 insertions, 671 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index a545df4efce1..c6cc493a5486 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -31,6 +31,17 @@ #include "amdgpu_psp.h" #include "amdgpu_xgmi.h" +static bool aldebaran_is_mode2_default(struct amdgpu_reset_control *reset_ctl) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + + if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) && + adev->gmc.xgmi.connected_to_cpu)) + return true; + + return false; +} + static struct amdgpu_reset_handler * aldebaran_get_reset_handler(struct amdgpu_reset_control *reset_ctl, struct amdgpu_reset_context *reset_context) @@ -48,7 +59,7 @@ aldebaran_get_reset_handler(struct amdgpu_reset_control *reset_ctl, } } - if (adev->gmc.xgmi.connected_to_cpu) { + if (aldebaran_is_mode2_default(reset_ctl)) { list_for_each_entry(handler, &reset_ctl->reset_handlers, handler_list) { if (handler->reset_method == AMD_RESET_METHOD_MODE2) { @@ -136,18 +147,31 @@ static int aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, struct amdgpu_reset_context *reset_context) { - struct amdgpu_device *tmp_adev = NULL; struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + struct amdgpu_device *tmp_adev = NULL; + struct list_head reset_device_list; int r = 0; dev_dbg(adev->dev, "aldebaran perform hw reset\n"); - if (reset_context->hive == NULL) { + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) && + reset_context->hive == NULL) { /* Wrong context, return error */ return -EINVAL; } - list_for_each_entry(tmp_adev, &reset_context->hive->device_list, - gmc.xgmi.head) { + INIT_LIST_HEAD(&reset_device_list); + if (reset_context->hive) { + list_for_each_entry (tmp_adev, + &reset_context->hive->device_list, + gmc.xgmi.head) + list_add_tail(&tmp_adev->reset_list, + &reset_device_list); + } else { + list_add_tail(&reset_context->reset_req_dev->reset_list, + &reset_device_list); + } + + list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { mutex_lock(&tmp_adev->reset_cntl->reset_lock); tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_MODE2; } @@ -155,8 +179,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, * Mode2 reset doesn't need any sync between nodes in XGMI hive, instead launch * them together so that they can be completed asynchronously on multiple nodes */ - list_for_each_entry(tmp_adev, &reset_context->hive->device_list, - gmc.xgmi.head) { + list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { /* For XGMI run all resets in parallel to speed up the process */ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { if (!queue_work(system_unbound_wq, @@ -174,9 +197,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, /* For XGMI wait for all resets to complete before proceed */ if (!r) { - list_for_each_entry(tmp_adev, - &reset_context->hive->device_list, - gmc.xgmi.head) { + list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { flush_work(&tmp_adev->reset_cntl->reset_work); r = tmp_adev->asic_reset_res; @@ -186,8 +207,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, } } - list_for_each_entry(tmp_adev, &reset_context->hive->device_list, - gmc.xgmi.head) { + list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { mutex_unlock(&tmp_adev->reset_cntl->reset_lock); tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE; } @@ -319,16 +339,30 @@ static int aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, struct amdgpu_reset_context *reset_context) { - int r; struct amdgpu_device *tmp_adev = NULL; + struct list_head reset_device_list; + int r; - if (reset_context->hive == NULL) { + if (reset_context->reset_req_dev->ip_versions[MP1_HWIP][0] == + IP_VERSION(13, 0, 2) && + reset_context->hive == NULL) { /* Wrong context, return error */ return -EINVAL; } - list_for_each_entry(tmp_adev, &reset_context->hive->device_list, - gmc.xgmi.head) { + INIT_LIST_HEAD(&reset_device_list); + if (reset_context->hive) { + list_for_each_entry (tmp_adev, + &reset_context->hive->device_list, + gmc.xgmi.head) + list_add_tail(&tmp_adev->reset_list, + &reset_device_list); + } else { + list_add_tail(&reset_context->reset_req_dev->reset_list, + &reset_device_list); + } + + list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); r = aldebaran_mode2_restore_ip(tmp_adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 37ff8cf5bbed..cdf0818088b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -60,7 +60,6 @@ #include <drm/amdgpu_drm.h> #include <drm/drm_gem.h> #include <drm/drm_ioctl.h> -#include <drm/gpu_scheduler.h> #include <kgd_kfd_interface.h> #include "dm_pp_interface.h" @@ -233,6 +232,9 @@ extern int amdgpu_cik_support; #endif extern int amdgpu_num_kcq; +#define AMDGPU_VCNFW_LOG_SIZE (32 * 1024) +extern int amdgpu_vcnfw_log; + #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ @@ -274,9 +276,6 @@ extern int amdgpu_num_kcq; #define AMDGPU_SMARTSHIFT_MIN_BIAS (-100) struct amdgpu_device; -struct amdgpu_ib; -struct amdgpu_cs_parser; -struct amdgpu_job; struct amdgpu_irq_src; struct amdgpu_fpriv; struct amdgpu_bo_va_mapping; @@ -465,20 +464,6 @@ struct amdgpu_flip_work { /* - * CP & rings. - */ - -struct amdgpu_ib { - struct amdgpu_sa_bo *sa_bo; - uint32_t length_dw; - uint64_t gpu_addr; - uint32_t *ptr; - uint32_t flags; -}; - -extern const struct drm_sched_backend_ops amdgpu_sched_ops; - -/* * file private structure */ @@ -493,79 +478,6 @@ struct amdgpu_fpriv { int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); -int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, - unsigned size, - enum amdgpu_ib_pool_type pool, - struct amdgpu_ib *ib); -void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, - struct dma_fence *f); -int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, - struct amdgpu_ib *ibs, struct amdgpu_job *job, - struct dma_fence **f); -int amdgpu_ib_pool_init(struct amdgpu_device *adev); -void amdgpu_ib_pool_fini(struct amdgpu_device *adev); -int amdgpu_ib_ring_tests(struct amdgpu_device *adev); - -/* - * CS. - */ -struct amdgpu_cs_chunk { - uint32_t chunk_id; - uint32_t length_dw; - void *kdata; -}; - -struct amdgpu_cs_post_dep { - struct drm_syncobj *syncobj; - struct dma_fence_chain *chain; - u64 point; -}; - -struct amdgpu_cs_parser { - struct amdgpu_device *adev; - struct drm_file *filp; - struct amdgpu_ctx *ctx; - - /* chunks */ - unsigned nchunks; - struct amdgpu_cs_chunk *chunks; - - /* scheduler job object */ - struct amdgpu_job *job; - struct drm_sched_entity *entity; - - /* buffer objects */ - struct ww_acquire_ctx ticket; - struct amdgpu_bo_list *bo_list; - struct amdgpu_mn *mn; - struct amdgpu_bo_list_entry vm_pd; - struct list_head validated; - struct dma_fence *fence; - uint64_t bytes_moved_threshold; - uint64_t bytes_moved_vis_threshold; - uint64_t bytes_moved; - uint64_t bytes_moved_vis; - - /* user fence */ - struct amdgpu_bo_list_entry uf_entry; - - unsigned num_post_deps; - struct amdgpu_cs_post_dep *post_deps; -}; - -static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, - uint32_t ib_idx, int idx) -{ - return p->job->ibs[ib_idx].ptr[idx]; -} - -static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, - uint32_t ib_idx, int idx, - uint32_t value) -{ - p->job->ibs[ib_idx].ptr[idx] = value; -} - /* * Writeback */ @@ -1436,10 +1348,6 @@ static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { retu static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } #endif -int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, - uint64_t addr, struct amdgpu_bo **bo, - struct amdgpu_bo_va_mapping **mapping); - #if defined(CONFIG_DRM_AMD_DC) int amdgpu_dm_display_resume(struct amdgpu_device *adev ); #else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index e9c80ce13f3e..ba21ec6b35e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -26,6 +26,8 @@ #include "gc/gc_10_3_0_sh_mask.h" #include "oss/osssys_5_0_0_offset.h" #include "oss/osssys_5_0_0_sh_mask.h" +#include "athub/athub_2_1_0_offset.h" +#include "athub/athub_2_1_0_sh_mask.h" #include "soc15_common.h" #include "v10_structs.h" #include "nv.h" @@ -606,6 +608,18 @@ static int wave_control_execute_v10_3(struct amdgpu_device *adev, return 0; } +static bool get_atc_vmid_pasid_mapping_info_v10_3(struct amdgpu_device *adev, + uint8_t vmid, uint16_t *p_pasid) +{ + uint32_t value; + + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +} + static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { @@ -788,7 +802,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { .hqd_destroy = hqd_destroy_v10_3, .hqd_sdma_destroy = hqd_sdma_destroy_v10_3, .wave_control_execute = wave_control_execute_v10_3, - .get_atc_vmid_pasid_mapping_info = NULL, + .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3, .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, .program_trap_handler_settings = program_trap_handler_settings_v10_3, #if 0 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e762e45b7b85..fe660a8e150f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -32,6 +32,7 @@ #include <drm/amdgpu_drm.h> #include <drm/drm_syncobj.h> +#include "amdgpu_cs.h" #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_gmc.h" @@ -782,12 +783,12 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); amdgpu_bo_kunmap(aobj); - r = amdgpu_ring_parse_cs(ring, p, j); + r = amdgpu_ring_parse_cs(ring, p, p->job, ib); if (r) return r; } else { ib->ptr = (uint32_t *)kptr; - r = amdgpu_ring_patch_cs_in_place(ring, p, j); + r = amdgpu_ring_patch_cs_in_place(ring, p, p->job, ib); amdgpu_bo_kunmap(aobj); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h new file mode 100644 index 000000000000..30ecc4917f81 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h @@ -0,0 +1,80 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_CS_H__ +#define __AMDGPU_CS_H__ + +#include "amdgpu_job.h" +#include "amdgpu_bo_list.h" +#include "amdgpu_ring.h" + +struct amdgpu_bo_va_mapping; + +struct amdgpu_cs_chunk { + uint32_t chunk_id; + uint32_t length_dw; + void *kdata; +}; + +struct amdgpu_cs_post_dep { + struct drm_syncobj *syncobj; + struct dma_fence_chain *chain; + u64 point; +}; + +struct amdgpu_cs_parser { + struct amdgpu_device *adev; + struct drm_file *filp; + struct amdgpu_ctx *ctx; + + /* chunks */ + unsigned nchunks; + struct amdgpu_cs_chunk *chunks; + + /* scheduler job object */ + struct amdgpu_job *job; + struct drm_sched_entity *entity; + + /* buffer objects */ + struct ww_acquire_ctx ticket; + struct amdgpu_bo_list *bo_list; + struct amdgpu_mn *mn; + struct amdgpu_bo_list_entry vm_pd; + struct list_head validated; + struct dma_fence *fence; + uint64_t bytes_moved_threshold; + uint64_t bytes_moved_vis_threshold; + uint64_t bytes_moved; + uint64_t bytes_moved_vis; + + /* user fence */ + struct amdgpu_bo_list_entry uf_entry; + + unsigned num_post_deps; + struct amdgpu_cs_post_dep *post_deps; +}; + +int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, + uint64_t addr, struct amdgpu_bo **bo, + struct amdgpu_bo_va_mapping **mapping); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index f522b52725e4..5981c7d9bd48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -23,6 +23,7 @@ */ #include <drm/drm_auth.h> +#include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_sched.h" #include "amdgpu_ras.h" @@ -204,9 +205,15 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, if (r) goto error_free_entity; - ctx->entities[hw_ip][ring] = entity; + /* It's not an error if we fail to install the new entity */ + if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity)) + goto cleanup_entity; + return 0; +cleanup_entity: + drm_sched_entity_fini(&entity->entity); + error_free_entity: kfree(entity); @@ -261,9 +268,6 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, struct amdgpu_device *adev = ctx->adev; enum amd_dpm_forced_level current_level; - if (!ctx) - return -EINVAL; - current_level = amdgpu_dpm_get_performance_level(adev); switch (current_level) { @@ -293,9 +297,6 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, enum amd_dpm_forced_level level; int r; - if (!ctx) - return -EINVAL; - mutex_lock(&adev->pm.stable_pstate_ctx_lock); if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) { r = -EBUSY; @@ -339,7 +340,7 @@ static void amdgpu_ctx_fini(struct kref *ref) { struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); struct amdgpu_device *adev = ctx->adev; - unsigned i, j; + unsigned i, j, idx; if (!adev) return; @@ -350,7 +351,12 @@ static void amdgpu_ctx_fini(struct kref *ref) ctx->entities[i][j] = NULL; } } - amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE); + + if (drm_dev_enter(&adev->ddev, &idx)) { + amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE); + drm_dev_exit(idx); + } + kfree(ctx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 426b63e4f1f6..5d04d24a0d5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1678,7 +1678,7 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, { struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; char reg_offset[11]; - uint32_t *tmp; + uint32_t *new, *tmp = NULL; int ret, i = 0, len = 0; do { @@ -1689,7 +1689,12 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, goto error_free; } - tmp = krealloc_array(tmp, i + 1, sizeof(uint32_t), GFP_KERNEL); + new = krealloc_array(tmp, i + 1, sizeof(uint32_t), GFP_KERNEL); + if (!new) { + ret = -ENOMEM; + goto error_free; + } + tmp = new; if (sscanf(reg_offset, "%X %n", &tmp[i], &ret) != 1) { ret = -EINVAL; goto error_free; @@ -1773,6 +1778,16 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_debugfs_ring_init(adev, ring); } + for ( i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (!amdgpu_vcnfw_log) + break; + + if (adev->vcn.harvest_config & (1 << i)) + continue; + + amdgpu_debugfs_vcn_fwlog_init(adev, i, &adev->vcn.inst[i]); + } + amdgpu_ras_debugfs_create_all(adev); amdgpu_rap_debugfs_init(adev); amdgpu_securedisplay_debugfs_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ca854626a108..ddc5cd61af58 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -80,12 +80,7 @@ MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); -MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin"); -MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); -MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); -MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin"); -MODULE_FIRMWARE("amdgpu/yellow_carp_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 #define AMDGPU_MAX_RETRY_LIMIT 2 @@ -1554,7 +1549,6 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_gmc_tmz_set(adev); - amdgpu_gmc_noretry_set(adev); return 0; } @@ -1992,27 +1986,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) case CHIP_ARCTURUS: chip_name = "arcturus"; break; - case CHIP_RENOIR: - if (adev->apu_flags & AMD_APU_IS_RENOIR) - chip_name = "renoir"; - else - chip_name = "green_sardine"; - break; - case CHIP_NAVI10: - chip_name = "navi10"; - break; - case CHIP_NAVI14: - chip_name = "navi14"; - break; case CHIP_NAVI12: chip_name = "navi12"; break; - case CHIP_VANGOGH: - chip_name = "vangogh"; - break; - case CHIP_YELLOW_CARP: - chip_name = "yellow_carp"; - break; } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); @@ -3711,6 +3687,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; + amdgpu_gmc_noretry_set(adev); /* Need to get xgmi info early to decide the reset behavior*/ if (adev->gmc.xgmi.supported) { r = adev->gfxhub.funcs->get_xgmi_info(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index cae57d3b317a..fae5c1debfad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -691,9 +691,9 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) return -EINVAL; } - if (adev->asic_type >= CHIP_SIENNA_CICHLID) + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) version = AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS; - else if (adev->family == AMDGPU_FAMILY_NV) + else if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0)) version = AMD_FMT_MOD_TILE_VER_GFX10; else version = AMD_FMT_MOD_TILE_VER_GFX9; @@ -787,7 +787,7 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) if (adev->family >= AMDGPU_FAMILY_NV) { int extra_pipe = 0; - if (adev->asic_type >= CHIP_SIENNA_CICHLID && + if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) && pipes == packers && pipes > 1) extra_pipe = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 2ab675123ae3..bb1c025d9001 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -100,9 +100,10 @@ * - 3.43.0 - Add device hot plug/unplug support * - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B * - 3.45.0 - Add context ioctl stable pstate interface + * * 3.46.0 - To enable hot plug amdgpu tests in libdrm */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 45 +#define KMS_DRIVER_MINOR 46 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit; @@ -177,6 +178,7 @@ int amdgpu_reset_method = -1; /* auto */ int amdgpu_num_kcq = -1; int amdgpu_smartshift_bias; int amdgpu_use_xgmi_p2p = 1; +int amdgpu_vcnfw_log; static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -855,6 +857,13 @@ MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 module_param_named(num_kcq, amdgpu_num_kcq, int, 0444); /** + * DOC: vcnfw_log (int) + * Enable vcnfw log output for debugging, the default is disabled. + */ +MODULE_PARM_DESC(vcnfw_log, "Enable vcnfw log(0 = disable (default value), 1 = enable)"); +module_param_named(vcnfw_log, amdgpu_vcnfw_log, int, 0444); + +/** * DOC: smu_pptable_id (int) * Used to override pptable id. id = 0 use VBIOS pptable. * id > 0 use the soft pptable with specicfied id. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 52912b6bcb20..8fe939976224 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -644,13 +644,6 @@ late_fini: return r; } -void amdgpu_gfx_ras_fini(struct amdgpu_device *adev) -{ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && - adev->gfx.ras_if) - amdgpu_ras_block_late_fini(adev, adev->gfx.ras_if); -} - int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, struct amdgpu_iv_entry *entry) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index ccca0a85b982..dcb3c7871c73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -387,7 +387,6 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value); int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); -void amdgpu_gfx_ras_fini(struct amdgpu_device *adev); int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, struct amdgpu_iv_entry *entry); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 78498d1d1769..58fd2729f577 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -454,17 +454,7 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) { - if (adev->umc.ras && adev->umc.ras->ras_block.ras_fini) - adev->umc.ras->ras_block.ras_fini(adev); - if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_fini) - adev->mmhub.ras->ras_block.ras_fini(adev); - - if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_fini) - adev->gmc.xgmi.ras->ras_block.ras_fini(adev); - - if (adev->hdp.ras && adev->hdp.ras->ras_block.ras_fini) - adev->hdp.ras->ras_block.ras_fini(adev); } /* @@ -569,11 +559,11 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) { struct amdgpu_gmc *gmc = &adev->gmc; - switch (adev->asic_type) { - case CHIP_VEGA10: - case CHIP_VEGA20: - case CHIP_ARCTURUS: - case CHIP_ALDEBARAN: + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(9, 0, 1): + case IP_VERSION(9, 4, 0): + case IP_VERSION(9, 4, 1): + case IP_VERSION(9, 4, 2): /* * noretry = 0 will cause kfd page fault tests fail * for some ASICs, so set default to 1 for these ASICs. @@ -583,7 +573,6 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) else gmc->noretry = amdgpu_noretry; break; - case CHIP_RAVEN: default: /* Raven currently has issues with noretry * regardless of what we decide for other diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c index b7fbc114a175..3f3d92e16c2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c @@ -24,9 +24,7 @@ #include "amdgpu.h" #include "amdgpu_ras.h" -void amdgpu_hdp_ras_fini(struct amdgpu_device *adev) +void amdgpu_hdp_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) { - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) && - adev->hdp.ras_if) - amdgpu_ras_block_late_fini(adev, adev->hdp.ras_if); + } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h index aabd59aa5213..9181c7bef7c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h @@ -44,5 +44,4 @@ struct amdgpu_hdp { }; int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); -void amdgpu_hdp_ras_fini(struct amdgpu_device *adev); #endif /* __AMDGPU_HDP_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index be48487e2ca7..92a70fb57fa3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -204,7 +204,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, unsigned i; int r; - if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait)) + if (!dma_fence_is_signaled(ring->vmid_wait)) return amdgpu_sync_fence(sync, ring->vmid_wait); fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index d970336d2261..67f66f2f1809 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -81,14 +81,10 @@ exit: int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, struct amdgpu_job **job, struct amdgpu_vm *vm) { - size_t size = sizeof(struct amdgpu_job); - if (num_ibs == 0) return -EINVAL; - size += sizeof(struct amdgpu_ib) * num_ibs; - - *job = kzalloc(size, GFP_KERNEL); + *job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL); if (!*job) return -ENOMEM; @@ -98,7 +94,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, */ (*job)->base.sched = &adev->rings[0]->sched; (*job)->vm = vm; - (*job)->ibs = (void *)&(*job)[1]; (*job)->num_ibs = num_ibs; amdgpu_sync_create(&(*job)->sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 9e65730193b8..d599c0540b46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -23,6 +23,10 @@ #ifndef __AMDGPU_JOB_H__ #define __AMDGPU_JOB_H__ +#include <drm/gpu_scheduler.h> +#include "amdgpu_sync.h" +#include "amdgpu_ring.h" + /* bit set means command submit involves a preamble IB */ #define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means preamble IB is first presented in belonging context */ @@ -45,12 +49,10 @@ struct amdgpu_job { struct amdgpu_vm *vm; struct amdgpu_sync sync; struct amdgpu_sync sched_sync; - struct amdgpu_ib *ibs; struct dma_fence hw_fence; struct dma_fence *external_hw_fence; uint32_t preamble_status; uint32_t preemption_status; - uint32_t num_ibs; bool vm_needs_flush; uint64_t vm_pd_addr; unsigned vmid; @@ -66,6 +68,9 @@ struct amdgpu_job { /* job_run_counter >= 1 means a resubmit job */ uint32_t job_run_counter; + + uint32_t num_ibs; + struct amdgpu_ib ibs[]; }; int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index e2607d9f5cf4..51c2a82e2fa4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -70,9 +70,3 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev, amdgpu_mca_reset_error_count(adev, mc_status_addr); } - -void amdgpu_mca_ras_fini(struct amdgpu_device *adev, - struct amdgpu_mca_ras *mca_dev) -{ - amdgpu_ras_block_late_fini(adev, mca_dev->ras_if); -}
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index 15e1a1efeb4f..7ce16d16e34b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -56,7 +56,4 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev, uint64_t mc_status_addr, void *ras_error_status); -void amdgpu_mca_ras_fini(struct amdgpu_device *adev, - struct amdgpu_mca_ras *mca_dev); - #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c index 42413813765a..8f2fa247d605 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c @@ -24,9 +24,7 @@ #include "amdgpu.h" #include "amdgpu_ras.h" -void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev) +void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) { - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) && - adev->mmhub.ras_if) - amdgpu_ras_block_late_fini(adev, adev->mmhub.ras_if); + } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 240b26d9a388..9f1540f0ebf9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -47,6 +47,5 @@ struct amdgpu_mmhub { struct amdgpu_mmhub_ras *ras; }; -void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c index f09ad80f0772..37d779b8e4a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -43,10 +43,3 @@ late_fini: amdgpu_ras_block_late_fini(adev, ras_block); return r; } - -void amdgpu_nbio_ras_fini(struct amdgpu_device *adev) -{ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF) && - adev->nbio.ras_if) - amdgpu_ras_block_late_fini(adev, adev->nbio.ras_if); -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index f9546c7341b8..3d13e601fc35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -105,5 +105,4 @@ struct amdgpu_nbio { }; int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); -void amdgpu_nbio_ras_fini(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 94bfe502b55e..3ce1d38a7822 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -277,7 +277,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp) ret = psp_init_cap_microcode(psp, "sienna_cichlid"); break; case IP_VERSION(13, 0, 2): - ret = psp_init_ta_microcode(psp, "aldebaran"); + ret = psp_init_cap_microcode(psp, "aldebaran"); break; default: BUG(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index b53abc15a830..d78c2970e558 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2477,6 +2477,12 @@ void amdgpu_ras_block_late_fini(struct amdgpu_device *adev, amdgpu_ras_interrupt_remove_handler(adev, ras_block); } +static void amdgpu_ras_block_late_fini_default(struct amdgpu_device *adev, + struct ras_common_if *ras_block) +{ + return amdgpu_ras_block_late_fini(adev, ras_block); +} + /* do some init work after IP late init as dependence. * and it runs in resume/gpu reset/booting up cases. */ @@ -2572,11 +2578,27 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) int amdgpu_ras_fini(struct amdgpu_device *adev) { struct amdgpu_ras_block_list *ras_node, *tmp; + struct amdgpu_ras_block_object *obj = NULL; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); if (!adev->ras_enabled || !con) return 0; + list_for_each_entry_safe(ras_node, tmp, &adev->ras_list, node) { + if (ras_node->ras_obj) { + obj = ras_node->ras_obj; + if (amdgpu_ras_is_supported(adev, obj->ras_comm.block) && + obj->ras_fini) + obj->ras_fini(adev, &obj->ras_comm); + else + amdgpu_ras_block_late_fini_default(adev, &obj->ras_comm); + } + + /* Clear ras blocks from ras_list and free ras block list node */ + list_del(&ras_node->node); + kfree(ras_node); + } + amdgpu_ras_fs_fini(adev); amdgpu_ras_interrupt_remove_all(adev); @@ -2590,12 +2612,6 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) amdgpu_ras_set_context(adev, NULL); kfree(con); - /* Clear ras blocks from ras_list and free ras block list node */ - list_for_each_entry_safe(ras_node, tmp, &adev->ras_list, node) { - list_del(&ras_node->node); - kfree(ras_node); - } - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 143a83043d7c..7cddaad90d6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -491,7 +491,7 @@ struct amdgpu_ras_block_object { int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj, enum amdgpu_ras_block block, uint32_t sub_block_index); int (*ras_late_init)(struct amdgpu_device *adev, struct ras_common_if *ras_block); - void (*ras_fini)(struct amdgpu_device *adev); + void (*ras_fini)(struct amdgpu_device *adev, struct ras_common_if *ras_block); ras_ih_cb ras_cb; const struct amdgpu_ras_block_hw_ops *hw_ops; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 248d64158721..c80af0889773 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -36,8 +36,8 @@ int amdgpu_reset_init(struct amdgpu_device *adev) { int ret = 0; - switch (adev->asic_type) { - case CHIP_ALDEBARAN: + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(13, 0, 2): ret = aldebaran_reset_init(adev); break; default: @@ -51,8 +51,8 @@ int amdgpu_reset_fini(struct amdgpu_device *adev) { int ret = 0; - switch (adev->asic_type) { - case CHIP_ALDEBARAN: + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(13, 0, 2): ret = aldebaran_reset_fini(adev); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 35bcb6dc1816..7f33ae87cb41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -193,6 +193,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, adev->rings[ring->idx] = ring; ring->num_hw_submission = sched_hw_submission; ring->sched_score = sched_score; + ring->vmid_wait = dma_fence_get_stub(); r = amdgpu_fence_driver_init_ring(ring); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 48365da213dc..a8bed1b47899 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -28,6 +28,13 @@ #include <drm/gpu_scheduler.h> #include <drm/drm_print.h> +struct amdgpu_device; +struct amdgpu_ring; +struct amdgpu_ib; +struct amdgpu_cs_parser; +struct amdgpu_job; +struct amdgpu_vm; + /* max number of rings */ #define AMDGPU_MAX_RINGS 28 #define AMDGPU_MAX_HWIP_RINGS 8 @@ -82,11 +89,13 @@ enum amdgpu_ib_pool_type { AMDGPU_IB_POOL_MAX }; -struct amdgpu_device; -struct amdgpu_ring; -struct amdgpu_ib; -struct amdgpu_cs_parser; -struct amdgpu_job; +struct amdgpu_ib { + struct amdgpu_sa_bo *sa_bo; + uint32_t length_dw; + uint64_t gpu_addr; + uint32_t *ptr; + uint32_t flags; +}; struct amdgpu_sched { u32 num_scheds; @@ -111,6 +120,8 @@ struct amdgpu_fence_driver { struct dma_fence **fences; }; +extern const struct drm_sched_backend_ops amdgpu_sched_ops; + void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); @@ -152,8 +163,12 @@ struct amdgpu_ring_funcs { u64 (*get_wptr)(struct amdgpu_ring *ring); void (*set_wptr)(struct amdgpu_ring *ring); /* validating and patching of IBs */ - int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx); - int (*patch_cs_in_place)(struct amdgpu_cs_parser *p, uint32_t ib_idx); + int (*parse_cs)(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib); + int (*patch_cs_in_place)(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib); /* constants to calculate how many DW are needed for an emit */ unsigned emit_frame_size; unsigned emit_ib_size; @@ -253,8 +268,8 @@ struct amdgpu_ring { atomic_t *sched_score; }; -#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) -#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib))) +#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib))) +#define amdgpu_ring_patch_cs_in_place(r, p, job, ib) ((r)->funcs->patch_cs_in_place((p), (job), (ib))) #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t)) #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) @@ -352,4 +367,29 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring); void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); + +static inline u32 amdgpu_ib_get_value(struct amdgpu_ib *ib, int idx) +{ + return ib->ptr[idx]; +} + +static inline void amdgpu_ib_set_value(struct amdgpu_ib *ib, int idx, + uint32_t value) +{ + ib->ptr[idx] = value; +} + +int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, + unsigned size, + enum amdgpu_ib_pool_type pool, + struct amdgpu_ib *ib); +void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, + struct dma_fence *f); +int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, + struct amdgpu_ib *ibs, struct amdgpu_job *job, + struct dma_fence **f); +int amdgpu_ib_pool_init(struct amdgpu_device *adev); +void amdgpu_ib_pool_fini(struct amdgpu_device *adev); +int amdgpu_ib_ring_tests(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 3b5c43575aa3..e1835fd4b237 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -111,13 +111,6 @@ late_fini: return r; } -void amdgpu_sdma_ras_fini(struct amdgpu_device *adev) -{ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) && - adev->sdma.ras_if) - amdgpu_ras_block_late_fini(adev, adev->sdma.ras_if); -} - int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, struct amdgpu_iv_entry *entry) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 8b226ffee32c..53ac3ebae8d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -118,7 +118,6 @@ int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index); uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid); int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); -void amdgpu_sdma_ras_fini(struct amdgpu_device *adev); int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, struct amdgpu_iv_entry *entry); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c index 57c6c39ba064..b96d885f6e33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c @@ -23,6 +23,7 @@ */ #include <drm/amdgpu_drm.h> +#include "amdgpu_cs.h" #include "amdgpu.h" #define CREATE_TRACE_POINTS diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 428f4df184d0..40dffbac85a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -343,7 +343,8 @@ union amdgpu_firmware_header { * fw loading support */ enum AMDGPU_UCODE_ID { - AMDGPU_UCODE_ID_SDMA0 = 0, + AMDGPU_UCODE_ID_CAP = 0, + AMDGPU_UCODE_ID_SDMA0, AMDGPU_UCODE_ID_SDMA1, AMDGPU_UCODE_ID_SDMA2, AMDGPU_UCODE_ID_SDMA3, @@ -378,7 +379,6 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_VCN0_RAM, AMDGPU_UCODE_ID_VCN1_RAM, AMDGPU_UCODE_ID_DMCUB, - AMDGPU_UCODE_ID_CAP, AMDGPU_UCODE_ID_MAXIMUM, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 9400260e3263..85da6cbaf3b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -162,13 +162,6 @@ late_fini: return r; } -void amdgpu_umc_ras_fini(struct amdgpu_device *adev) -{ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && - adev->umc.ras_if) - amdgpu_ras_block_late_fini(adev, adev->umc.ras_if); -} - int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index e4b3678a6685..2ec6698aa1fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -73,7 +73,6 @@ struct amdgpu_umc { }; int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); -void amdgpu_umc_ras_fini(struct amdgpu_device *adev); int amdgpu_umc_poison_handler(struct amdgpu_device *adev, void *ras_error_status, bool reset); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 9cc23b220537..39c74d9fa7cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -37,6 +37,7 @@ #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_uvd.h" +#include "amdgpu_cs.h" #include "cikd.h" #include "uvd/uvd_4_2_d.h" @@ -98,7 +99,7 @@ struct amdgpu_uvd_cs_ctx { unsigned reg, count; unsigned data0, data1; unsigned idx; - unsigned ib_idx; + struct amdgpu_ib *ib; /* does the IB has a msg command */ bool has_msg_cmd; @@ -557,8 +558,8 @@ static u64 amdgpu_uvd_get_addr_from_ctx(struct amdgpu_uvd_cs_ctx *ctx) uint32_t lo, hi; uint64_t addr; - lo = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data0); - hi = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data1); + lo = amdgpu_ib_get_value(ctx->ib, ctx->data0); + hi = amdgpu_ib_get_value(ctx->ib, ctx->data1); addr = ((uint64_t)lo) | (((uint64_t)hi) << 32); return addr; @@ -589,7 +590,7 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx) if (!ctx->parser->adev->uvd.address_64_bit) { /* check if it's a message or feedback command */ - cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1; + cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1; if (cmd == 0x0 || cmd == 0x3) { /* yes, force it into VRAM */ uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; @@ -927,12 +928,10 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE; start += addr; - amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data0, - lower_32_bits(start)); - amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data1, - upper_32_bits(start)); + amdgpu_ib_set_value(ctx->ib, ctx->data0, lower_32_bits(start)); + amdgpu_ib_set_value(ctx->ib, ctx->data1, upper_32_bits(start)); - cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1; + cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1; if (cmd < 0x4) { if ((end - start) < ctx->buf_sizes[cmd]) { DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, @@ -992,14 +991,13 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx, int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) { - struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx]; int i, r; ctx->idx++; for (i = 0; i <= ctx->count; ++i) { unsigned reg = ctx->reg + i; - if (ctx->idx >= ib->length_dw) { + if (ctx->idx >= ctx->ib->length_dw) { DRM_ERROR("Register command after end of CS!\n"); return -EINVAL; } @@ -1039,11 +1037,10 @@ static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx, static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx, int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) { - struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx]; int r; - for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) { - uint32_t cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx); + for (ctx->idx = 0 ; ctx->idx < ctx->ib->length_dw; ) { + uint32_t cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx); unsigned type = CP_PACKET_GET_TYPE(cmd); switch (type) { case PACKET_TYPE0: @@ -1068,11 +1065,14 @@ static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx, * amdgpu_uvd_ring_parse_cs - UVD command submission parser * * @parser: Command submission parser context - * @ib_idx: Which indirect buffer to use + * @job: the job to parse + * @ib: the IB to patch * * Parse the command stream, patch in addresses as necessary. */ -int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) +int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) { struct amdgpu_uvd_cs_ctx ctx = {}; unsigned buf_sizes[] = { @@ -1082,10 +1082,9 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) [0x00000003] = 2048, [0x00000004] = 0xFFFFFFFF, }; - struct amdgpu_ib *ib = &parser->job->ibs[ib_idx]; int r; - parser->job->vm = NULL; + job->vm = NULL; ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); if (ib->length_dw % 16) { @@ -1096,7 +1095,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) ctx.parser = parser; ctx.buf_sizes = buf_sizes; - ctx.ib_idx = ib_idx; + ctx.ib = ib; /* first round only required on chips without UVD 64 bit address support */ if (!parser->adev->uvd.address_64_bit) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index 76ac9699885d..9f89bb7cd60b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -82,7 +82,9 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, bool direct, struct dma_fence **fence); void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp); -int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx); +int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring); void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring); int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 344f711ad144..02cb3a12dd76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -34,6 +34,7 @@ #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_vce.h" +#include "amdgpu_cs.h" #include "cikd.h" /* 1 second timeout */ @@ -587,8 +588,7 @@ err: /** * amdgpu_vce_validate_bo - make sure not to cross 4GB boundary * - * @p: parser context - * @ib_idx: indirect buffer to use + * @ib: indirect buffer to use * @lo: address of lower dword * @hi: address of higher dword * @size: minimum size @@ -596,8 +596,9 @@ err: * * Make sure that no BO cross a 4GB boundary. */ -static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx, - int lo, int hi, unsigned size, int32_t index) +static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, + struct amdgpu_ib *ib, int lo, int hi, + unsigned size, int32_t index) { int64_t offset = ((uint64_t)size) * ((int64_t)index); struct ttm_operation_ctx ctx = { false, false }; @@ -607,8 +608,8 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx, uint64_t addr; int r; - addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) | - ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; + addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) | + ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32; if (index >= 0) { addr += offset; fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT; @@ -638,7 +639,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx, * amdgpu_vce_cs_reloc - command submission relocation * * @p: parser context - * @ib_idx: indirect buffer to use + * @ib: indirect buffer to use * @lo: address of lower dword * @hi: address of higher dword * @size: minimum size @@ -646,7 +647,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx, * * Patch relocation inside command stream with real buffer address */ -static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, +static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib, int lo, int hi, unsigned size, uint32_t index) { struct amdgpu_bo_va_mapping *mapping; @@ -657,8 +658,8 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, if (index == 0xffffffff) index = 0; - addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) | - ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; + addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) | + ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32; addr += ((uint64_t)size) * ((uint64_t)index); r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping); @@ -679,8 +680,8 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, addr += amdgpu_bo_gpu_offset(bo); addr -= ((uint64_t)size) * ((uint64_t)index); - amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr)); - amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr)); + amdgpu_ib_set_value(ib, lo, lower_32_bits(addr)); + amdgpu_ib_set_value(ib, hi, upper_32_bits(addr)); return 0; } @@ -729,11 +730,13 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, * amdgpu_vce_ring_parse_cs - parse and validate the command stream * * @p: parser context - * @ib_idx: indirect buffer to use + * @job: the job to parse + * @ib: the IB to patch */ -int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) +int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) { - struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; unsigned fb_idx = 0, bs_idx = 0; int session_idx = -1; uint32_t destroyed = 0; @@ -744,12 +747,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) unsigned idx; int i, r = 0; - p->job->vm = NULL; + job->vm = NULL; ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); for (idx = 0; idx < ib->length_dw;) { - uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); - uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); + uint32_t len = amdgpu_ib_get_value(ib, idx); + uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1); if ((len < 8) || (len & 3)) { DRM_ERROR("invalid VCE command length (%d)!\n", len); @@ -759,52 +762,52 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) switch (cmd) { case 0x00000002: /* task info */ - fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6); - bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7); + fb_idx = amdgpu_ib_get_value(ib, idx + 6); + bs_idx = amdgpu_ib_get_value(ib, idx + 7); break; case 0x03000001: /* encode */ - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 10, - idx + 9, 0, 0); + r = amdgpu_vce_validate_bo(p, ib, idx + 10, idx + 9, + 0, 0); if (r) goto out; - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 12, - idx + 11, 0, 0); + r = amdgpu_vce_validate_bo(p, ib, idx + 12, idx + 11, + 0, 0); if (r) goto out; break; case 0x05000001: /* context buffer */ - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, - idx + 2, 0, 0); + r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, + 0, 0); if (r) goto out; break; case 0x05000004: /* video bitstream buffer */ - tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4); - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2, + tmp = amdgpu_ib_get_value(ib, idx + 4); + r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, tmp, bs_idx); if (r) goto out; break; case 0x05000005: /* feedback buffer */ - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2, + r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, 4096, fb_idx); if (r) goto out; break; case 0x0500000d: /* MV buffer */ - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, - idx + 2, 0, 0); + r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, + 0, 0); if (r) goto out; - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8, - idx + 7, 0, 0); + r = amdgpu_vce_validate_bo(p, ib, idx + 8, idx + 7, + 0, 0); if (r) goto out; break; @@ -814,12 +817,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) } for (idx = 0; idx < ib->length_dw;) { - uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); - uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); + uint32_t len = amdgpu_ib_get_value(ib, idx); + uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1); switch (cmd) { case 0x00000001: /* session */ - handle = amdgpu_get_ib_value(p, ib_idx, idx + 2); + handle = amdgpu_ib_get_value(ib, idx + 2); session_idx = amdgpu_vce_validate_handle(p, handle, &allocated); if (session_idx < 0) { @@ -830,8 +833,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) break; case 0x00000002: /* task info */ - fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6); - bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7); + fb_idx = amdgpu_ib_get_value(ib, idx + 6); + bs_idx = amdgpu_ib_get_value(ib, idx + 7); break; case 0x01000001: /* create */ @@ -846,8 +849,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) goto out; } - *size = amdgpu_get_ib_value(p, ib_idx, idx + 8) * - amdgpu_get_ib_value(p, ib_idx, idx + 10) * + *size = amdgpu_ib_get_value(ib, idx + 8) * + amdgpu_ib_get_value(ib, idx + 10) * 8 * 3 / 2; break; @@ -876,12 +879,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) break; case 0x03000001: /* encode */ - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9, + r = amdgpu_vce_cs_reloc(p, ib, idx + 10, idx + 9, *size, 0); if (r) goto out; - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11, + r = amdgpu_vce_cs_reloc(p, ib, idx + 12, idx + 11, *size / 3, 0); if (r) goto out; @@ -892,35 +895,35 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) break; case 0x05000001: /* context buffer */ - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2, *size * 2, 0); if (r) goto out; break; case 0x05000004: /* video bitstream buffer */ - tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4); - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + tmp = amdgpu_ib_get_value(ib, idx + 4); + r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2, tmp, bs_idx); if (r) goto out; break; case 0x05000005: /* feedback buffer */ - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2, 4096, fb_idx); if (r) goto out; break; case 0x0500000d: /* MV buffer */ - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, - idx + 2, *size, 0); + r = amdgpu_vce_cs_reloc(p, ib, idx + 3, + idx + 2, *size, 0); if (r) goto out; - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8, - idx + 7, *size / 12, 0); + r = amdgpu_vce_cs_reloc(p, ib, idx + 8, + idx + 7, *size / 12, 0); if (r) goto out; break; @@ -965,11 +968,13 @@ out: * amdgpu_vce_ring_parse_cs_vm - parse the command stream in VM mode * * @p: parser context - * @ib_idx: indirect buffer to use + * @job: the job to parse + * @ib: the IB to patch */ -int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx) +int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) { - struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; int session_idx = -1; uint32_t destroyed = 0; uint32_t created = 0; @@ -978,8 +983,8 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx) int i, r = 0, idx = 0; while (idx < ib->length_dw) { - uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); - uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); + uint32_t len = amdgpu_ib_get_value(ib, idx); + uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1); if ((len < 8) || (len & 3)) { DRM_ERROR("invalid VCE command length (%d)!\n", len); @@ -989,7 +994,7 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx) switch (cmd) { case 0x00000001: /* session */ - handle = amdgpu_get_ib_value(p, ib_idx, idx + 2); + handle = amdgpu_ib_get_value(ib, idx + 2); session_idx = amdgpu_vce_validate_handle(p, handle, &allocated); if (session_idx < 0) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index be4a6e773c5b..ea680fc9a6c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -59,8 +59,11 @@ int amdgpu_vce_entity_init(struct amdgpu_device *adev); int amdgpu_vce_suspend(struct amdgpu_device *adev); int amdgpu_vce_resume(struct amdgpu_device *adev); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); -int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); -int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx); +int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, struct amdgpu_job *job, + struct amdgpu_ib *ib); +int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib); void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, uint32_t flags); void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 5e0dbf54d561..f99093f2ebc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -27,6 +27,7 @@ #include <linux/firmware.h> #include <linux/module.h> #include <linux/pci.h> +#include <linux/debugfs.h> #include <drm/drm_drv.h> #include "amdgpu.h" @@ -79,6 +80,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) const char *fw_name; const struct common_firmware_header *hdr; unsigned char fw_check; + unsigned int fw_shared_size, log_offset; int i, r; INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); @@ -226,7 +228,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); - bo_size += AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); + fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); + log_offset = offsetof(struct amdgpu_fw_shared, fw_log); + bo_size += fw_shared_size; + + if (amdgpu_vcnfw_log) + bo_size += AMDGPU_VCNFW_LOG_SIZE; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { if (adev->vcn.harvest_config & (1 << i)) @@ -240,10 +247,18 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) return r; } - adev->vcn.inst[i].fw_shared_cpu_addr = adev->vcn.inst[i].cpu_addr + - bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); - adev->vcn.inst[i].fw_shared_gpu_addr = adev->vcn.inst[i].gpu_addr + - bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); + adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr + + bo_size - fw_shared_size; + adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr + + bo_size - fw_shared_size; + + adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size; + + if (amdgpu_vcnfw_log) { + adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE; + adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE; + adev->vcn.inst[i].fw_shared.log_offset = log_offset; + } if (adev->vcn.indirect_sram) { r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, @@ -979,3 +994,112 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); } } + +/* + * debugfs for mapping vcn firmware log buffer. + */ +#if defined(CONFIG_DEBUG_FS) +static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_vcn_inst *vcn; + void *log_buf; + volatile struct amdgpu_vcn_fwlog *plog; + unsigned int read_pos, write_pos, available, i, read_bytes = 0; + unsigned int read_num[2] = {0}; + + vcn = file_inode(f)->i_private; + if (!vcn) + return -ENODEV; + + if (!vcn->fw_shared.cpu_addr || !amdgpu_vcnfw_log) + return -EFAULT; + + log_buf = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size; + + plog = (volatile struct amdgpu_vcn_fwlog *)log_buf; + read_pos = plog->rptr; + write_pos = plog->wptr; + + if (read_pos > AMDGPU_VCNFW_LOG_SIZE || write_pos > AMDGPU_VCNFW_LOG_SIZE) + return -EFAULT; + + if (!size || (read_pos == write_pos)) + return 0; + + if (write_pos > read_pos) { + available = write_pos - read_pos; + read_num[0] = min(size, (size_t)available); + } else { + read_num[0] = AMDGPU_VCNFW_LOG_SIZE - read_pos; + available = read_num[0] + write_pos - plog->header_size; + if (size > available) + read_num[1] = write_pos - plog->header_size; + else if (size > read_num[0]) + read_num[1] = size - read_num[0]; + else + read_num[0] = size; + } + + for (i = 0; i < 2; i++) { + if (read_num[i]) { + if (read_pos == AMDGPU_VCNFW_LOG_SIZE) + read_pos = plog->header_size; + if (read_num[i] == copy_to_user((buf + read_bytes), + (log_buf + read_pos), read_num[i])) + return -EFAULT; + + read_bytes += read_num[i]; + read_pos += read_num[i]; + } + } + + plog->rptr = read_pos; + *pos += read_bytes; + return read_bytes; +} + +static const struct file_operations amdgpu_debugfs_vcnfwlog_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_vcn_fwlog_read, + .llseek = default_llseek +}; +#endif + +void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i, + struct amdgpu_vcn_inst *vcn) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + char name[32]; + + sprintf(name, "amdgpu_vcn_%d_fwlog", i); + debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, vcn, + &amdgpu_debugfs_vcnfwlog_fops, + AMDGPU_VCNFW_LOG_SIZE); +#endif +} + +void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn) +{ +#if defined(CONFIG_DEBUG_FS) + volatile uint32_t *flag = vcn->fw_shared.cpu_addr; + void *fw_log_cpu_addr = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size; + uint64_t fw_log_gpu_addr = vcn->fw_shared.gpu_addr + vcn->fw_shared.mem_size; + volatile struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr; + volatile struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr + + vcn->fw_shared.log_offset; + *flag |= cpu_to_le32(AMDGPU_VCN_FW_LOGGING_FLAG); + fw_log->is_enabled = 1; + fw_log->addr_lo = cpu_to_le32(fw_log_gpu_addr & 0xFFFFFFFF); + fw_log->addr_hi = cpu_to_le32(fw_log_gpu_addr >> 32); + fw_log->size = cpu_to_le32(AMDGPU_VCNFW_LOG_SIZE); + + log_buf->header_size = sizeof(struct amdgpu_vcn_fwlog); + log_buf->buffer_size = AMDGPU_VCNFW_LOG_SIZE; + log_buf->rptr = log_buf->header_size; + log_buf->wptr = log_buf->header_size; + log_buf->wrapped = 0; +#endif +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 5d3728b027d3..e2fde88aaf5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -158,6 +158,7 @@ #define AMDGPU_VCN_FW_SHARED_FLAG_0_RB (1 << 6) #define AMDGPU_VCN_MULTI_QUEUE_FLAG (1 << 8) #define AMDGPU_VCN_SW_RING_FLAG (1 << 9) +#define AMDGPU_VCN_FW_LOGGING_FLAG (1 << 10) #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001 #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001 @@ -205,6 +206,13 @@ struct amdgpu_vcn_reg{ unsigned scratch9; }; +struct amdgpu_vcn_fw_shared { + void *cpu_addr; + uint64_t gpu_addr; + uint32_t mem_size; + uint32_t log_offset; +}; + struct amdgpu_vcn_inst { struct amdgpu_bo *vcpu_bo; void *cpu_addr; @@ -221,8 +229,7 @@ struct amdgpu_vcn_inst { uint64_t dpg_sram_gpu_addr; uint32_t *dpg_sram_curr_addr; atomic_t dpg_enc_submission_cnt; - void *fw_shared_cpu_addr; - uint64_t fw_shared_gpu_addr; + struct amdgpu_vcn_fw_shared fw_shared; }; struct amdgpu_vcn { @@ -265,6 +272,13 @@ struct amdgpu_fw_shared_sw_ring { uint8_t padding[3]; }; +struct amdgpu_fw_shared_fw_logging { + uint8_t is_enabled; + uint32_t addr_lo; + uint32_t addr_hi; + uint32_t size; +}; + struct amdgpu_fw_shared { uint32_t present_flag_0; uint8_t pad[44]; @@ -272,6 +286,15 @@ struct amdgpu_fw_shared { uint8_t pad1[1]; struct amdgpu_fw_shared_multi_queue multi_queue; struct amdgpu_fw_shared_sw_ring sw_ring; + struct amdgpu_fw_shared_fw_logging fw_log; +}; + +struct amdgpu_vcn_fwlog { + uint32_t rptr; + uint32_t wptr; + uint32_t buffer_size; + uint32_t header_size; + uint8_t wrapped; }; struct amdgpu_vcn_decode_buffer { @@ -313,4 +336,7 @@ enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring); void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev); +void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn); +void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, + uint8_t i, struct amdgpu_vcn_inst *vcn); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 2cd9f1a2e5fa..fc4563cf2828 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -779,7 +779,8 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) amdgpu_vm_eviction_lock(vm); ret = !vm->evicting; amdgpu_vm_eviction_unlock(vm); - return ret; + + return ret && list_empty(&vm->evicted); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 91817a31f3e1..05c0d6e2c75c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -768,13 +768,6 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_comm return amdgpu_ras_block_late_init(adev, ras_block); } -static void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev) -{ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) && - adev->gmc.xgmi.ras_if) - amdgpu_ras_block_late_fini(adev, adev->gmc.xgmi.ras_if); -} - uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr) { @@ -982,6 +975,5 @@ struct amdgpu_xgmi_ras xgmi_ras = { }, .hw_ops = &xgmi_ras_hw_ops, .ras_late_init = amdgpu_xgmi_ras_late_init, - .ras_fini = amdgpu_xgmi_ras_fini, }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 90158289cd30..713d39d89e30 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -250,13 +250,6 @@ MODULE_FIRMWARE("amdgpu/yellow_carp_mec.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_mec2.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_rlc.bin"); -MODULE_FIRMWARE("amdgpu/cyan_skillfish_ce.bin"); -MODULE_FIRMWARE("amdgpu/cyan_skillfish_pfp.bin"); -MODULE_FIRMWARE("amdgpu/cyan_skillfish_me.bin"); -MODULE_FIRMWARE("amdgpu/cyan_skillfish_mec.bin"); -MODULE_FIRMWARE("amdgpu/cyan_skillfish_mec2.bin"); -MODULE_FIRMWARE("amdgpu/cyan_skillfish_rlc.bin"); - MODULE_FIRMWARE("amdgpu/cyan_skillfish2_ce.bin"); MODULE_FIRMWARE("amdgpu/cyan_skillfish2_pfp.bin"); MODULE_FIRMWARE("amdgpu/cyan_skillfish2_me.bin"); @@ -4043,10 +4036,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) break; case IP_VERSION(10, 1, 3): case IP_VERSION(10, 1, 4): - if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) - chip_name = "cyan_skillfish2"; - else - chip_name = "cyan_skillfish"; + chip_name = "cyan_skillfish2"; break; case IP_VERSION(10, 3, 7): chip_name = "gc_10_3_7"; @@ -6557,6 +6547,7 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); @@ -7857,6 +7848,7 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev) case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); /* wait for RLC_SAFE_MODE */ @@ -7894,6 +7886,7 @@ static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev) case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); break; default: @@ -8348,6 +8341,7 @@ static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 6): + case IP_VERSION(10, 3, 7): data = 0x4E20 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh; WREG32_SOC15(GC, 0, mmRLC_PG_DELAY_3, data); break; @@ -8417,6 +8411,7 @@ static int gfx_v10_0_set_powergating_state(void *handle, case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 6): + case IP_VERSION(10, 3, 7): gfx_v10_cntl_pg(adev, enable); amdgpu_gfx_off_ctrl(adev, enable); break; @@ -8445,6 +8440,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle, case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): gfx_v10_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 1997f129db9c..8def7f630d4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2204,10 +2204,6 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) if (!adev->gfx.ras->ras_block.ras_late_init) adev->gfx.ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init; - /* If not define special ras_fini function, use gfx default ras_fini */ - if (!adev->gfx.ras->ras_block.ras_fini) - adev->gfx.ras->ras_block.ras_fini = amdgpu_gfx_ras_fini; - /* If not defined special ras_cb function, use default ras_cb */ if (!adev->gfx.ras->ras_block.ras_cb) adev->gfx.ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb; @@ -2432,9 +2428,6 @@ static int gfx_v9_0_sw_fini(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->gfx.ras && adev->gfx.ras->ras_block.ras_fini) - adev->gfx.ras->ras_block.ras_fini(adev); - for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 3dcd82b49481..f60b7bd4dbf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -683,10 +683,6 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev) if (!adev->umc.ras->ras_block.ras_late_init) adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; - /* If don't define special ras_fini function, use default ras_fini */ - if (!adev->umc.ras->ras_block.ras_fini) - adev->umc.ras->ras_block.ras_fini = amdgpu_umc_ras_fini; - /* If not defined special ras_cb function, use default ras_cb */ if (!adev->umc.ras->ras_block.ras_cb) adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index df35f0252eea..431742eb7811 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1243,10 +1243,6 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) if (!adev->umc.ras->ras_block.ras_late_init) adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; - /* If don't define special ras_fini function, use default ras_fini */ - if (!adev->umc.ras->ras_block.ras_fini) - adev->umc.ras->ras_block.ras_fini = amdgpu_umc_ras_fini; - /* If not defined special ras_cb function, use default ras_cb */ if (!adev->umc.ras->ras_block.ras_cb) adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; @@ -1292,10 +1288,6 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) adev->mmhub.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MMHUB; adev->mmhub.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->mmhub.ras_if = &adev->mmhub.ras->ras_block.ras_comm; - - /* If don't define special ras_fini function, use default ras_fini */ - if (!adev->mmhub.ras->ras_block.ras_fini) - adev->mmhub.ras->ras_block.ras_fini = amdgpu_mmhub_ras_fini; } } @@ -1561,7 +1553,7 @@ static void gmc_v9_0_save_registers(struct amdgpu_device *adev) static int gmc_v9_0_sw_init(void *handle) { - int r, vram_width = 0, vram_type = 0, vram_vendor = 0; + int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfxhub.funcs->init(adev); @@ -1677,12 +1669,13 @@ static int gmc_v9_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ - r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); + dma_addr_bits = adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ? 48:44; + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_addr_bits)); if (r) { printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); return r; } - adev->need_swiotlb = drm_need_swiotlb(44); + adev->need_swiotlb = drm_need_swiotlb(dma_addr_bits); r = gmc_v9_0_mc_init(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 02400d97a95c..046216635262 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -166,7 +166,6 @@ struct amdgpu_hdp_ras hdp_v4_0_ras = { .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, }, .hw_ops = &hdp_v4_0_ras_hw_ops, - .ras_fini = amdgpu_hdp_ras_fini, }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c index b4b36899f5c6..d4bd7d1d2649 100644 --- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c @@ -37,11 +37,6 @@ static void mca_v3_0_mp0_query_ras_error_count(struct amdgpu_device *adev, ras_error_status); } -static void mca_v3_0_mp0_ras_fini(struct amdgpu_device *adev) -{ - amdgpu_mca_ras_fini(adev, &adev->mca.mp0); -} - static int mca_v3_0_ras_block_match(struct amdgpu_ras_block_object *block_obj, enum amdgpu_ras_block block, uint32_t sub_block_index) { @@ -71,7 +66,6 @@ struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = { }, .hw_ops = &mca_v3_0_mp0_hw_ops, .ras_block_match = mca_v3_0_ras_block_match, - .ras_fini = mca_v3_0_mp0_ras_fini, }, }; @@ -83,11 +77,6 @@ static void mca_v3_0_mp1_query_ras_error_count(struct amdgpu_device *adev, ras_error_status); } -static void mca_v3_0_mp1_ras_fini(struct amdgpu_device *adev) -{ - amdgpu_mca_ras_fini(adev, &adev->mca.mp1); -} - const struct amdgpu_ras_block_hw_ops mca_v3_0_mp1_hw_ops = { .query_ras_error_count = mca_v3_0_mp1_query_ras_error_count, .query_ras_error_address = NULL, @@ -103,7 +92,6 @@ struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = { }, .hw_ops = &mca_v3_0_mp1_hw_ops, .ras_block_match = mca_v3_0_ras_block_match, - .ras_fini = mca_v3_0_mp1_ras_fini, }, }; @@ -115,11 +103,6 @@ static void mca_v3_0_mpio_query_ras_error_count(struct amdgpu_device *adev, ras_error_status); } -static void mca_v3_0_mpio_ras_fini(struct amdgpu_device *adev) -{ - amdgpu_mca_ras_fini(adev, &adev->mca.mpio); -} - const struct amdgpu_ras_block_hw_ops mca_v3_0_mpio_hw_ops = { .query_ras_error_count = mca_v3_0_mpio_query_ras_error_count, .query_ras_error_address = NULL, @@ -135,7 +118,6 @@ struct amdgpu_mca_ras_block mca_v3_0_mpio_ras = { }, .hw_ops = &mca_v3_0_mpio_hw_ops, .ras_block_match = mca_v3_0_ras_block_match, - .ras_fini = mca_v3_0_mpio_ras_fini, }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 14768570c298..b31df4db01fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -671,7 +671,6 @@ struct amdgpu_nbio_ras nbio_v7_4_ras = { }, .hw_ops = &nbio_v7_4_ras_hw_ops, .ras_late_init = amdgpu_nbio_ras_late_init, - .ras_fini = amdgpu_nbio_ras_fini, }, .handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring, .handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring, diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index f6aeef759ee1..e19f14c3ef59 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -946,10 +946,21 @@ static int nv_common_early_init(void *handle) AMD_CG_SUPPORT_GFX_3D_CGLS | AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_CP_LS | - AMD_CG_SUPPORT_GFX_FGCG; + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | - AMD_PG_SUPPORT_JPEG; + AMD_PG_SUPPORT_JPEG | + AMD_PG_SUPPORT_GFX_PG; adev->external_rev_id = adev->rev_id + 0x01; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 2c6070b90dcf..024f60631faf 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -31,6 +31,7 @@ MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin"); +MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin"); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index e26c39fcd336..01b385568c14 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1995,10 +1995,6 @@ static int sdma_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops && - adev->sdma.ras->ras_block.ras_fini) - adev->sdma.ras->ras_block.ras_fini(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); if (adev->sdma.has_page_queue) @@ -2826,10 +2822,6 @@ static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev) if (!adev->sdma.ras->ras_block.ras_late_init) adev->sdma.ras->ras_block.ras_late_init = amdgpu_sdma_ras_late_init; - /* If don't define special ras_fini function, use default ras_fini */ - if (!adev->sdma.ras->ras_block.ras_fini) - adev->sdma.ras->ras_block.ras_fini = amdgpu_sdma_ras_fini; - /* If not defined special ras_cb function, use default ras_cb */ if (!adev->sdma.ras->ras_block.ras_cb) adev->sdma.ras->ras_block.ras_cb = amdgpu_sdma_process_ras_data_cb; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 81e033549dda..53a8df4b030e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -51,9 +51,6 @@ MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin"); MODULE_FIRMWARE("amdgpu/navi12_sdma.bin"); MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin"); -MODULE_FIRMWARE("amdgpu/cyan_skillfish_sdma.bin"); -MODULE_FIRMWARE("amdgpu/cyan_skillfish_sdma1.bin"); - MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma.bin"); MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma1.bin"); @@ -264,10 +261,7 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) chip_name = "navi12"; break; case IP_VERSION(5, 0, 1): - if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) - chip_name = "cyan_skillfish2"; - else - chip_name = "cyan_skillfish"; + chip_name = "cyan_skillfish2"; break; default: BUG(); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index b7c24149a6bd..496c4a6e23ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1214,9 +1214,6 @@ static int soc15_common_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->nbio.ras && adev->nbio.ras->ras_block.ras_fini) - adev->nbio.ras->ras_block.ras_fini(adev); - if (adev->df.funcs && adev->df.funcs->sw_fini) adev->df.funcs->sw_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index b483f03b4591..2f15b8e0f7d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_uvd.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "soc15_common.h" @@ -1275,14 +1276,15 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) * uvd_v7_0_ring_patch_cs_in_place - Patch the IB for command submission. * * @p: the CS parser with the IBs - * @ib_idx: which IB to patch + * @job: which job this ib is in + * @ib: which IB to patch * */ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, - uint32_t ib_idx) + struct amdgpu_job *job, + struct amdgpu_ib *ib) { - struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); - struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; + struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); unsigned i; /* No patching necessary for the first instance */ @@ -1290,12 +1292,12 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, return 0; for (i = 0; i < ib->length_dw; i += 2) { - uint32_t reg = amdgpu_get_ib_value(p, ib_idx, i); + uint32_t reg = amdgpu_ib_get_value(ib, i); reg -= p->adev->reg_offset[UVD_HWIP][0][1]; reg += p->adev->reg_offset[UVD_HWIP][1][1]; - amdgpu_set_ib_value(p, ib_idx, i, reg); + amdgpu_ib_set_value(ib, i, reg); } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 3799226defc0..7bbb9ba6b80b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -148,6 +148,13 @@ static int vcn_v1_0_sw_init(void *handle) adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode; + if (amdgpu_vcnfw_log) { + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; + + fw_shared->present_flag_0 = 0; + amdgpu_vcn_fwlog_init(adev->vcn.inst); + } + r = jpeg_v1_0_sw_init(handle); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 313fc1b53999..319ac8ea434b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -172,8 +172,12 @@ static int vcn_v2_0_sw_init(void *handle) if (r) return r; - fw_shared = adev->vcn.inst->fw_shared_cpu_addr; + fw_shared = adev->vcn.inst->fw_shared.cpu_addr; fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG); + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(adev->vcn.inst); + return 0; } @@ -188,7 +192,7 @@ static int vcn_v2_0_sw_fini(void *handle) { int r, idx; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr; + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; if (drm_dev_enter(adev_to_drm(adev), &idx)) { fw_shared->present_flag_0 = 0; @@ -364,9 +368,9 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) /* non-cache window */ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr)); + lower_32_bits(adev->vcn.inst->fw_shared.gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr)); + upper_32_bits(adev->vcn.inst->fw_shared.gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0, AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared))); @@ -455,10 +459,10 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec /* non-cache window */ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), - lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect); + lower_32_bits(adev->vcn.inst->fw_shared.gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), - upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect); + upper_32_bits(adev->vcn.inst->fw_shared.gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( @@ -784,7 +788,7 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev) static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr; + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; @@ -921,7 +925,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) static int vcn_v2_0_start(struct amdgpu_device *adev) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr; + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; @@ -1207,7 +1211,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); if (!ret_code) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr; + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; /* pause DPG */ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 44fc4c218433..1869bae4104b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -196,8 +196,11 @@ static int vcn_v2_5_sw_init(void *handle) return r; } - fw_shared = adev->vcn.inst[j].fw_shared_cpu_addr; + fw_shared = adev->vcn.inst[j].fw_shared.cpu_addr; fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG); + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); } if (amdgpu_sriov_vf(adev)) { @@ -229,7 +232,7 @@ static int vcn_v2_5_sw_fini(void *handle) for (i = 0; i < adev->vcn.num_vcn_inst; i++) { if (adev->vcn.harvest_config & (1 << i)) continue; - fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->present_flag_0 = 0; } drm_dev_exit(idx); @@ -423,9 +426,9 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) /* non-cache window */ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr)); + lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr)); + upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0); WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_SIZE0, AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared))); @@ -513,10 +516,10 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx /* non-cache window */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), - lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), - upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( @@ -757,7 +760,7 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; @@ -981,7 +984,7 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) vcn_v2_5_mc_resume(adev); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (adev->vcn.harvest_config & (1 << i)) continue; /* VCN global tiling registers */ @@ -1403,7 +1406,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); if (!ret_code) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; /* pause DPG */ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index da11ceba0698..5dbf5ba7d62d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_vcn.h" #include "amdgpu_pm.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "vcn_v2_0.h" @@ -213,11 +214,14 @@ static int vcn_v3_0_sw_init(void *handle) return r; } - fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SW_RING_FLAG) | cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG) | cpu_to_le32(AMDGPU_VCN_FW_SHARED_FLAG_0_RB); fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED); + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); } if (amdgpu_sriov_vf(adev)) { @@ -249,7 +253,7 @@ static int vcn_v3_0_sw_fini(void *handle) if (adev->vcn.harvest_config & (1 << i)) continue; - fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->present_flag_0 = 0; fw_shared->sw_ring.is_enabled = false; } @@ -295,6 +299,7 @@ static int vcn_v3_0_hw_init(void *handle) ring = &adev->vcn.inst[i].ring_dec; if (amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, i)) { ring->sched.ready = false; + ring->no_scheduler = true; dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name); } else { ring->wptr = 0; @@ -307,6 +312,7 @@ static int vcn_v3_0_hw_init(void *handle) ring = &adev->vcn.inst[i].ring_enc[j]; if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) { ring->sched.ready = false; + ring->no_scheduler = true; dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name); } else { ring->wptr = 0; @@ -469,9 +475,9 @@ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst) /* non-cache window */ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.inst[inst].fw_shared_gpu_addr)); + lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.inst[inst].fw_shared_gpu_addr)); + upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_OFFSET0, 0); WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_SIZE0, AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared))); @@ -558,10 +564,10 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx /* non-cache window */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), - lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), - upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( @@ -923,7 +929,7 @@ static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst) static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; @@ -1220,7 +1226,7 @@ static int vcn_v3_0_start(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp); - fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); /* programm the RB_BASE for ring buffer */ @@ -1611,7 +1617,7 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(3, 0, 33)) { /* Restore */ - fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; + fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); ring = &adev->vcn.inst[inst_idx].ring_enc[0]; ring->wptr = 0; @@ -1700,7 +1706,7 @@ static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { /*whenever update RBC_RB_WPTR, we save the wptr in shared rb.wptr and scratch2 */ - fw_shared = adev->vcn.inst[ring->me].fw_shared_cpu_addr; + fw_shared = adev->vcn.inst[ring->me].fw_shared.cpu_addr; fw_shared->rb.wptr = lower_32_bits(ring->wptr); WREG32_SOC15(VCN, ring->me, mmUVD_SCRATCH2, lower_32_bits(ring->wptr)); @@ -1806,21 +1812,23 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; -static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p) +static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p, + struct amdgpu_job *job) { struct drm_gpu_scheduler **scheds; /* The create msg must be in the first IB submitted */ - if (atomic_read(&p->entity->fence_seq)) + if (atomic_read(&job->base.entity->fence_seq)) return -EINVAL; scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC] [AMDGPU_RING_PRIO_DEFAULT].sched; - drm_sched_entity_modify_sched(p->entity, scheds, 1); + drm_sched_entity_modify_sched(job->base.entity, scheds, 1); return 0; } -static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) +static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, + uint64_t addr) { struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_bo_va_mapping *map; @@ -1891,7 +1899,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) continue; - r = vcn_v3_0_limit_sched(p); + r = vcn_v3_0_limit_sched(p, job); if (r) goto out; } @@ -1902,10 +1910,10 @@ out: } static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, - uint32_t ib_idx) + struct amdgpu_job *job, + struct amdgpu_ib *ib) { - struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); - struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; + struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); uint32_t msg_lo = 0, msg_hi = 0; unsigned i; int r; @@ -1915,8 +1923,8 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, return 0; for (i = 0; i < ib->length_dw; i += 2) { - uint32_t reg = amdgpu_get_ib_value(p, ib_idx, i); - uint32_t val = amdgpu_get_ib_value(p, ib_idx, i + 1); + uint32_t reg = amdgpu_ib_get_value(ib, i); + uint32_t val = amdgpu_ib_get_value(ib, i + 1); if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) { msg_lo = val; @@ -1924,7 +1932,8 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, msg_hi = val; } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) && val == 0) { - r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 | msg_lo); + r = vcn_v3_0_dec_msg(p, job, + ((u64)msg_hi) << 32 | msg_lo); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 1cd2ea536bd0..acf4f7975850 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -500,6 +500,11 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process pr_debug("Killing all process wavefronts\n"); + if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { + pr_err("no vmid pasid mapping supported \n"); + return -EOPNOTSUPP; + } + /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. * ATC_VMID15_PASID_MAPPING * to check which VMID the current process is mapped to. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index f9eafc796e70..e4beebb1c80a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -82,7 +82,8 @@ static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user, struct kfd_smi_client *client = filep->private_data; unsigned char *buf; - buf = kmalloc_array(MAX_KFIFO_SIZE, sizeof(*buf), GFP_KERNEL); + size = min_t(size_t, size, MAX_KFIFO_SIZE); + buf = kmalloc(size, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -96,7 +97,7 @@ static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user, ret = -EAGAIN; goto ret_err; } - to_copy = min3(size, sizeof(buf), to_copy); + to_copy = min(size, to_copy); ret = kfifo_out(&client->fifo, buf, to_copy); spin_unlock(&client->lock); if (ret <= 0) { @@ -175,22 +176,29 @@ static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event, rcu_read_unlock(); } -void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset) +__printf(3, 4) +static void kfd_smi_event_add(struct kfd_dev *dev, unsigned int event, + char *fmt, ...) { - /* - * GpuReset msg = Reset seq number (incremented for - * every reset message sent before GPU reset). - * 1 byte event + 1 byte space + 8 bytes seq num + - * 1 byte \n + 1 byte \0 = 12 - */ - char fifo_in[12]; + char fifo_in[KFD_SMI_EVENT_MSG_SIZE]; int len; - unsigned int event; + va_list args; if (list_empty(&dev->smi_clients)) return; - memset(fifo_in, 0x0, sizeof(fifo_in)); + len = snprintf(fifo_in, sizeof(fifo_in), "%x ", event); + + va_start(args, fmt); + len += vsnprintf(fifo_in + len, sizeof(fifo_in) - len, fmt, args); + va_end(args); + + add_event_to_kfifo(dev, event, fifo_in, len); +} + +void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset) +{ + unsigned int event; if (post_reset) { event = KFD_SMI_EVENT_GPU_POST_RESET; @@ -198,48 +206,20 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset) event = KFD_SMI_EVENT_GPU_PRE_RESET; ++(dev->reset_seq_num); } - - len = snprintf(fifo_in, sizeof(fifo_in), "%x %x\n", event, - dev->reset_seq_num); - - add_event_to_kfifo(dev, event, fifo_in, len); + kfd_smi_event_add(dev, event, "%x\n", dev->reset_seq_num); } void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, uint64_t throttle_bitmask) { - /* - * ThermalThrottle msg = throttle_bitmask(8): - * thermal_interrupt_count(16): - * 1 byte event + 1 byte space + 16 byte throttle_bitmask + - * 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n + - * 1 byte \0 = 37 - */ - char fifo_in[37]; - int len; - - if (list_empty(&dev->smi_clients)) - return; - - len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n", - KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask, - amdgpu_dpm_get_thermal_throttling_counter(dev->adev)); - - add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len); + kfd_smi_event_add(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n", + throttle_bitmask, + amdgpu_dpm_get_thermal_throttling_counter(dev->adev)); } void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid) { struct amdgpu_task_info task_info; - /* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */ - /* 1 byte event + 1 byte space + 25 bytes msg + 1 byte \n + - * 1 byte \0 = 29 - */ - char fifo_in[29]; - int len; - - if (list_empty(&dev->smi_clients)) - return; memset(&task_info, 0, sizeof(struct amdgpu_task_info)); amdgpu_vm_get_task_info(dev->adev, pasid, &task_info); @@ -247,10 +227,8 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid) if (!task_info.pid) return; - len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT, - task_info.pid, task_info.task_name); - - add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len); + kfd_smi_event_add(dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n", + task_info.pid, task_info.task_name); } int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index b71d47afd243..509d915cbe69 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1629,6 +1629,7 @@ retry_flush_work: static void svm_range_restore_work(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); + struct amdkfd_process_info *process_info; struct svm_range_list *svms; struct svm_range *prange; struct kfd_process *p; @@ -1645,6 +1646,7 @@ static void svm_range_restore_work(struct work_struct *work) pr_debug("restore svm ranges\n"); p = container_of(svms, struct kfd_process, svms); + process_info = p->kgd_process_info; /* Keep mm reference when svm_range_validate_and_map ranges */ mm = get_task_mm(p->lead_thread); @@ -1653,6 +1655,7 @@ static void svm_range_restore_work(struct work_struct *work) return; } + mutex_lock(&process_info->lock); svm_range_list_lock_and_flush_work(svms, mm); mutex_lock(&svms->lock); @@ -1705,6 +1708,7 @@ static void svm_range_restore_work(struct work_struct *work) out_reschedule: mutex_unlock(&svms->lock); mmap_write_unlock(mm); + mutex_unlock(&process_info->lock); mmput(mm); /* If validation failed, reschedule another attempt */ @@ -3209,6 +3213,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, uint64_t start, uint64_t size, uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) { + struct amdkfd_process_info *process_info = p->kgd_process_info; struct list_head update_list; struct list_head insert_list; struct list_head remove_list; @@ -3226,6 +3231,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, svms = &p->svms; + mutex_lock(&process_info->lock); + svm_range_list_lock_and_flush_work(svms, mm); r = svm_range_is_valid(p, start, size); @@ -3300,6 +3307,8 @@ out_unlock_range: mutex_unlock(&svms->lock); mmap_read_unlock(mm); out: + mutex_unlock(&process_info->lock); + pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid, &p->svms, start, start + size - 1, r); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 740435ae3997..31ac1fce36f8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1198,11 +1198,11 @@ void pre_validate_dsc(struct drm_atomic_state *state, struct dc_state *local_dc_state = NULL; if (!is_dsc_precompute_needed(state)) { - DRM_DEBUG_DRIVER("DSC precompute is not needed.\n"); + DRM_INFO_ONCE("DSC precompute is not needed.\n"); return; } if (dm_atomic_get_state(state, dm_state_ptr)) { - DRM_DEBUG_DRIVER("dm_atomic_get_state() failed\n"); + DRM_INFO_ONCE("dm_atomic_get_state() failed\n"); return; } dm_state = *dm_state_ptr; @@ -1245,7 +1245,7 @@ void pre_validate_dsc(struct drm_atomic_state *state, } if (!pre_compute_mst_dsc_configs_for_state(state, local_dc_state, vars)) { - DRM_DEBUG_DRIVER("pre_compute_mst_dsc_configs_for_state() failed\n"); + DRM_INFO_ONCE("pre_compute_mst_dsc_configs_for_state() failed\n"); goto clean_exit; } @@ -1258,7 +1258,7 @@ void pre_validate_dsc(struct drm_atomic_state *state, if (local_dc_state->streams[i] && is_timing_changed(stream, local_dc_state->streams[i])) { - DRM_DEBUG_DRIVER("crtc[%d] needs mode_changed\n", i); + DRM_INFO_ONCE("crtc[%d] needs mode_changed\n", i); } else { int ind = find_crtc_index_in_state_by_stream(state, stream); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 27f3fc416448..b2ed2b683ba5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -981,7 +981,8 @@ static bool should_verify_link_capability_destructively(struct dc_link *link, destrictive = false; } } - } + } else if (dc_is_hdmi_signal(link->local_sink->sink_signal)) + destrictive = true; return destrictive; } @@ -2801,6 +2802,17 @@ static bool dp_active_dongle_validate_timing( return false; } + /* Check 3D format */ + switch (timing->timing_3d_format) { + case TIMING_3D_FORMAT_NONE: + case TIMING_3D_FORMAT_FRAME_ALTERNATE: + /*Only frame alternate 3D is supported on active dongle*/ + break; + default: + /*other 3D formats are not supported due to bad infoframe translation */ + return false; + } + #if defined(CONFIG_DRM_AMD_DC_DCN) if (dongle_caps->dp_hdmi_frl_max_link_bw_in_kbps > 0) { // DP to HDMI FRL converter struct dc_crtc_timing outputTiming = *timing; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 5688b15ca9e6..1895252765dc 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -505,17 +505,24 @@ static void vendor_specific_lttpr_wa_four( } } -static void vendor_specific_lttpr_wa_five( +static void dp_fixed_vs_pe_set_retimer_lane_settings( struct dc_link *link, const union dpcd_training_lane dpcd_lane_adjust[LANE_COUNT_DP_MAX], uint8_t lane_count) { - const uint32_t vendor_lttpr_write_address = 0xF004F; + const uint8_t offset = dp_convert_to_count( + link->dpcd_caps.lttpr_caps.phy_repeater_cnt); const uint8_t vendor_lttpr_write_data_reset[4] = {0x1, 0x50, 0x63, 0xFF}; + uint32_t vendor_lttpr_write_address = 0xF004F; uint8_t vendor_lttpr_write_data_vs[4] = {0x1, 0x51, 0x63, 0x0}; uint8_t vendor_lttpr_write_data_pe[4] = {0x1, 0x52, 0x63, 0x0}; uint8_t lane = 0; + if (offset != 0xFF) { + vendor_lttpr_write_address += + ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1)); + } + for (lane = 0; lane < lane_count; lane++) { vendor_lttpr_write_data_vs[3] |= dpcd_lane_adjust[lane].bits.VOLTAGE_SWING_SET << (2 * lane); @@ -5989,15 +5996,14 @@ bool dc_link_dp_set_test_pattern( if (link->dc->debug.apply_vendor_specific_lttpr_wa && (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && link->lttpr_mode == LTTPR_MODE_TRANSPARENT) { - dpcd_set_lane_settings(link, p_link_settings, DPRX); - vendor_specific_lttpr_wa_five( + dp_fixed_vs_pe_set_retimer_lane_settings( link, p_link_settings->dpcd_lane_settings, p_link_settings->link_settings.lane_count); } else { dp_set_hw_lane_settings(link, &pipe_ctx->link_res, p_link_settings, DPRX); - dpcd_set_lane_settings(link, p_link_settings, DPRX); } + dpcd_set_lane_settings(link, p_link_settings, DPRX); } /* Blank stream if running test pattern */ diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index cc8e60ec35c6..bc2150f3d79b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2615,6 +2615,8 @@ static void set_avi_info_frame( hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; ///VIC + if (pipe_ctx->stream->timing.hdmi_vic != 0) + vic = 0; format = stream->timing.timing_3d_format; /*todo, add 3DStereo support*/ if (format != TIMING_3D_FORMAT_NONE) { diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 55d43d642b38..333f4a49cacb 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.174" +#define DC_VER "3.2.175" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -354,6 +354,7 @@ enum dc_psr_power_opts { psr_power_opt_invalid = 0x0, psr_power_opt_smu_opt_static_screen = 0x1, psr_power_opt_z10_static_screen = 0x10, + psr_power_opt_ds_disable_allow = 0x100, }; enum dcc_option { @@ -710,6 +711,7 @@ struct dc_debug_options { #endif bool apply_vendor_specific_lttpr_wa; bool ignore_dpref_ss; + uint8_t psr_power_use_phy_fsm; }; struct gpu_info_soc_bounding_box_v1_0; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index 8bd265b40847..312c68172689 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -320,6 +320,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, copy_settings_data->otg_inst = 0; // Misc + copy_settings_data->use_phy_fsm = link->ctx->dc->debug.psr_power_use_phy_fsm; copy_settings_data->psr_level = psr_context->psr_level.u32all; copy_settings_data->smu_optimizations_en = psr_context->allow_smu_optimizations; copy_settings_data->multi_disp_optimizations_en = psr_context->allow_multi_disp_optimizations; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 8378b80e8517..248602c15f3a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1509,6 +1509,31 @@ static enum dc_status apply_single_controller_ctx_to_hw( if (!pipe_ctx->stream->apply_seamless_boot_optimization && dc->config.use_pipe_ctx_sync_logic) check_syncd_pipes_for_disabled_master_pipe(dc, context, pipe_ctx->pipe_idx); + pipe_ctx->stream_res.opp->funcs->opp_program_fmt( + pipe_ctx->stream_res.opp, + &stream->bit_depth_params, + &stream->clamping); + + pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion( + pipe_ctx->stream_res.opp, + COLOR_SPACE_YCBCR601, + stream->timing.display_color_depth, + stream->signal); + + while (odm_pipe) { + odm_pipe->stream_res.opp->funcs->opp_set_dyn_expansion( + odm_pipe->stream_res.opp, + COLOR_SPACE_YCBCR601, + stream->timing.display_color_depth, + stream->signal); + + odm_pipe->stream_res.opp->funcs->opp_program_fmt( + odm_pipe->stream_res.opp, + &stream->bit_depth_params, + &stream->clamping); + odm_pipe = odm_pipe->next_odm_pipe; + } + /* DCN3.1 FPGA Workaround * Need to enable HPO DP Stream Encoder before setting OTG master enable. * To do so, move calling function enable_stream_timing to only be done AFTER calling @@ -1548,30 +1573,6 @@ static enum dc_status apply_single_controller_ctx_to_hw( if (dc_is_dp_signal(pipe_ctx->stream->signal)) dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_OTG); - pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion( - pipe_ctx->stream_res.opp, - COLOR_SPACE_YCBCR601, - stream->timing.display_color_depth, - stream->signal); - - pipe_ctx->stream_res.opp->funcs->opp_program_fmt( - pipe_ctx->stream_res.opp, - &stream->bit_depth_params, - &stream->clamping); - while (odm_pipe) { - odm_pipe->stream_res.opp->funcs->opp_set_dyn_expansion( - odm_pipe->stream_res.opp, - COLOR_SPACE_YCBCR601, - stream->timing.display_color_depth, - stream->signal); - - odm_pipe->stream_res.opp->funcs->opp_program_fmt( - odm_pipe->stream_res.opp, - &stream->bit_depth_params, - &stream->clamping); - odm_pipe = odm_pipe->next_odm_pipe; - } - if (!stream->dpms_off) core_link_enable_stream(context, pipe_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 559aa45f27e7..bc9dd48258e3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -107,7 +107,7 @@ void dcn10_lock_all_pipes(struct dc *dc, * (un)locking. Also skip if pipe is disabled. */ if (pipe_ctx->top_pipe || - !pipe_ctx->stream || !pipe_ctx->plane_state || + !pipe_ctx->stream || !tg->funcs->is_tg_enabled(tg)) continue; @@ -3093,7 +3093,8 @@ static void dcn10_config_stereo_parameters( flags->PROGRAM_STEREO = 1; flags->PROGRAM_POLARITY = 1; - if (timing_3d_format == TIMING_3D_FORMAT_INBAND_FA || + if (timing_3d_format == TIMING_3D_FORMAT_FRAME_ALTERNATE || + timing_3d_format == TIMING_3D_FORMAT_INBAND_FA || timing_3d_format == TIMING_3D_FORMAT_DP_HDMI_INBAND_FA || timing_3d_format == TIMING_3D_FORMAT_SIDEBAND_FA) { enum display_dongle_type dongle = \ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index dc1752e9f461..a665af19f201 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -707,17 +707,6 @@ bool hubp2_program_surface_flip_and_addr( REG_UPDATE(VMID_SETTINGS_0, VMID, address->vmid); - if (address->type == PLN_ADDR_TYPE_GRPH_STEREO) { - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x1); - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x1); - - } else { - // turn off stereo if not in stereo - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x0); - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x0); - } - - /* HW automatically latch rest of address register on write to * DCSURF_PRIMARY_SURFACE_ADDRESS if SURFACE_UPDATE_LOCK is not used @@ -942,10 +931,6 @@ void hubp2_set_blank_regs(struct hubp *hubp, bool blank) struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); uint32_t blank_en = blank ? 1 : 0; - REG_UPDATE_2(DCHUBP_CNTL, - HUBP_BLANK_EN, blank_en, - HUBP_TTU_DISABLE, blank_en); - if (blank) { uint32_t reg_val = REG_READ(DCHUBP_CNTL); @@ -958,9 +943,13 @@ void hubp2_set_blank_regs(struct hubp *hubp, bool blank) */ REG_WAIT(DCHUBP_CNTL, HUBP_NO_OUTSTANDING_REQ, 1, - 1, 200); + 1, 100000); } } + + REG_UPDATE_2(DCHUBP_CNTL, + HUBP_BLANK_EN, blank_en, + HUBP_TTU_DISABLE, 0); } void hubp2_cursor_set_position( diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c index 8b9b1a5309ba..d94fd1010deb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c @@ -240,18 +240,9 @@ static void enc31_hw_init(struct link_encoder *enc) // 100MHz -> 0x32 // 48MHz -> 0x18 -#ifdef CLEANUP_FIXME - /*from display_init*/ - REG_WRITE(RDPCSTX_DEBUG_CONFIG, 0); -#endif - // Set TMDS_CTL0 to 1. This is a legacy setting. REG_UPDATE(TMDS_CTL_BITS, TMDS_CTL0, 1); - /*HW default is 5*/ - REG_UPDATE(RDPCSTX_CNTL, - RDPCS_TX_FIFO_RD_START_DELAY, 4); - dcn10_aux_initialize(enc10); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 660e96a7fe7f..2ecd7bbfa0d4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1777,7 +1777,7 @@ static bool is_dual_plane(enum surface_pixel_format format) return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA; } -static int dcn31_populate_dml_pipes_from_context( +int dcn31_populate_dml_pipes_from_context( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, bool fast_validate) @@ -1810,6 +1810,7 @@ static int dcn31_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.immediate_flip = true; pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; + pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active; pipes[pipe_cnt].pipe.src.gpuvm = true; pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; @@ -2068,7 +2069,7 @@ static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; -static void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { struct clk_limit_table *clk_table = &bw_params->clk_table; struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h index a513363b3326..4b7ab21ea15b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h @@ -43,6 +43,11 @@ void dcn31_calculate_wm_and_dlg( display_e2e_pipe_params_st *pipes, int pipe_cnt, int vlevel); +int dcn31_populate_dml_pipes_from_context( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate); +void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); struct resource_pool *dcn31_create_resource_pool( diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c index a71073482881..9a035e517ca9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c @@ -1035,6 +1035,7 @@ static const struct dc_debug_options debug_defaults_drv = { }, .optimize_edp_link_rate = true, .enable_sw_cntl_psr = true, + .psr_power_use_phy_fsm = 0, }; static const struct dc_debug_options debug_defaults_diags = { diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index b71b5fb894e2..31109db02e93 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -184,4 +184,7 @@ int dm_helpers_dmub_set_config_sync(struct dc_context *ctx, const struct dc_link *link, struct set_config_cmd_payload *payload, enum set_config_status *operation_result); + +enum dc_edid_status dm_helpers_get_sbios_edid(struct dc_link *link, struct dc_edid *edid); + #endif /* __DM_HELPERS__ */ diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 34fb148474cc..01b597d021fd 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -46,10 +46,10 @@ /* Firmware versioning. */ #ifdef DMUB_EXPOSE_VERSION -#define DMUB_FW_VERSION_GIT_HASH 0x1422ef84 +#define DMUB_FW_VERSION_GIT_HASH 0x082bd4c8 #define DMUB_FW_VERSION_MAJOR 0 #define DMUB_FW_VERSION_MINOR 0 -#define DMUB_FW_VERSION_REVISION 104 +#define DMUB_FW_VERSION_REVISION 106 #define DMUB_FW_VERSION_TEST 0 #define DMUB_FW_VERSION_VBIOS 0 #define DMUB_FW_VERSION_HOTFIX 0 @@ -1560,10 +1560,14 @@ struct dmub_cmd_psr_copy_settings_data { * DSC enable status in driver */ uint8_t dsc_enable_status; + /* + * Use FSM state for PSR power up/down + */ + uint8_t use_phy_fsm; /** - * Explicit padding to 3 byte boundary. + * Explicit padding to 2 byte boundary. */ - uint8_t pad3[3]; + uint8_t pad3[2]; }; /** diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c index 3f040be0d158..37324f2009ca 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c @@ -3095,7 +3095,7 @@ static int vega10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr, void *pp_table, uint32_t classification_flag) { ATOM_Vega10_GFXCLK_Dependency_Record_V2 *patom_record_V2; - struct vega10_power_state *vega10_power_state = + struct vega10_power_state *vega10_ps = cast_phw_vega10_power_state(&(power_state->hardware)); struct vega10_performance_level *performance_level; ATOM_Vega10_State *state_entry = (ATOM_Vega10_State *)state; @@ -3145,17 +3145,17 @@ static int vega10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr, power_state->temperatures.min = 0; power_state->temperatures.max = 0; - performance_level = &(vega10_power_state->performance_levels - [vega10_power_state->performance_level_count++]); + performance_level = &(vega10_ps->performance_levels + [vega10_ps->performance_level_count++]); PP_ASSERT_WITH_CODE( - (vega10_power_state->performance_level_count < + (vega10_ps->performance_level_count < NUM_GFXCLK_DPM_LEVELS), "Performance levels exceeds SMC limit!", return -1); PP_ASSERT_WITH_CODE( - (vega10_power_state->performance_level_count <= + (vega10_ps->performance_level_count <= hwmgr->platform_descriptor. hardwareActivityPerformanceLevels), "Performance levels exceeds Driver limit!", @@ -3169,8 +3169,8 @@ static int vega10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr, performance_level->mem_clock = mclk_dep_table->entries [state_entry->ucMemClockIndexLow].ulMemClk; - performance_level = &(vega10_power_state->performance_levels - [vega10_power_state->performance_level_count++]); + performance_level = &(vega10_ps->performance_levels + [vega10_ps->performance_level_count++]); performance_level->soc_clock = socclk_dep_table->entries [state_entry->ucSocClockIndexHigh].ulClk; if (gfxclk_dep_table->ucRevId == 0) { @@ -3201,11 +3201,11 @@ static int vega10_get_pp_table_entry(struct pp_hwmgr *hwmgr, unsigned long entry_index, struct pp_power_state *state) { int result; - struct vega10_power_state *ps; + struct vega10_power_state *vega10_ps; state->hardware.magic = PhwVega10_Magic; - ps = cast_phw_vega10_power_state(&state->hardware); + vega10_ps = cast_phw_vega10_power_state(&state->hardware); result = vega10_get_powerplay_table_entry(hwmgr, entry_index, state, vega10_get_pp_table_entry_callback_func); @@ -3218,10 +3218,10 @@ static int vega10_get_pp_table_entry(struct pp_hwmgr *hwmgr, */ /* set DC compatible flag if this state supports DC */ if (!state->validation.disallowOnDC) - ps->dc_compatible = true; + vega10_ps->dc_compatible = true; - ps->uvd_clks.vclk = state->uvd_clocks.VCLK; - ps->uvd_clks.dclk = state->uvd_clocks.DCLK; + vega10_ps->uvd_clks.vclk = state->uvd_clocks.VCLK; + vega10_ps->uvd_clks.dclk = state->uvd_clocks.DCLK; return 0; } @@ -4823,33 +4823,41 @@ static int vega10_check_states_equal(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *pstate1, const struct pp_hw_power_state *pstate2, bool *equal) { - const struct vega10_power_state *psa; - const struct vega10_power_state *psb; + const struct vega10_power_state *vega10_psa; + const struct vega10_power_state *vega10_psb; int i; if (pstate1 == NULL || pstate2 == NULL || equal == NULL) return -EINVAL; - psa = cast_const_phw_vega10_power_state(pstate1); - psb = cast_const_phw_vega10_power_state(pstate2); - /* If the two states don't even have the same number of performance levels they cannot be the same state. */ - if (psa->performance_level_count != psb->performance_level_count) { + vega10_psa = cast_const_phw_vega10_power_state(pstate1); + vega10_psb = cast_const_phw_vega10_power_state(pstate2); + + /* If the two states don't even have the same number of performance levels + * they cannot be the same state. + */ + if (vega10_psa->performance_level_count != vega10_psb->performance_level_count) { *equal = false; return 0; } - for (i = 0; i < psa->performance_level_count; i++) { - if (!vega10_are_power_levels_equal(&(psa->performance_levels[i]), &(psb->performance_levels[i]))) { - /* If we have found even one performance level pair that is different the states are different. */ + for (i = 0; i < vega10_psa->performance_level_count; i++) { + if (!vega10_are_power_levels_equal(&(vega10_psa->performance_levels[i]), + &(vega10_psb->performance_levels[i]))) { + /* If we have found even one performance level pair + * that is different the states are different. + */ *equal = false; return 0; } } /* If all performance levels are the same try to use the UVD clocks to break the tie.*/ - *equal = ((psa->uvd_clks.vclk == psb->uvd_clks.vclk) && (psa->uvd_clks.dclk == psb->uvd_clks.dclk)); - *equal &= ((psa->vce_clks.evclk == psb->vce_clks.evclk) && (psa->vce_clks.ecclk == psb->vce_clks.ecclk)); - *equal &= (psa->sclk_threshold == psb->sclk_threshold); + *equal = ((vega10_psa->uvd_clks.vclk == vega10_psb->uvd_clks.vclk) && + (vega10_psa->uvd_clks.dclk == vega10_psb->uvd_clks.dclk)); + *equal &= ((vega10_psa->vce_clks.evclk == vega10_psb->vce_clks.evclk) && + (vega10_psa->vce_clks.ecclk == vega10_psb->vce_clks.ecclk)); + *equal &= (vega10_psa->sclk_threshold == vega10_psb->sclk_threshold); return 0; } @@ -5444,19 +5452,19 @@ static int vega10_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_ PHM_PerformanceLevelDesignation designation, uint32_t index, PHM_PerformanceLevel *level) { - const struct vega10_power_state *ps; + const struct vega10_power_state *vega10_ps; uint32_t i; if (level == NULL || hwmgr == NULL || state == NULL) return -EINVAL; - ps = cast_const_phw_vega10_power_state(state); + vega10_ps = cast_const_phw_vega10_power_state(state); - i = index > ps->performance_level_count - 1 ? - ps->performance_level_count - 1 : index; + i = index > vega10_ps->performance_level_count - 1 ? + vega10_ps->performance_level_count - 1 : index; - level->coreClock = ps->performance_levels[i].gfx_clock; - level->memory_clock = ps->performance_levels[i].mem_clock; + level->coreClock = vega10_ps->performance_levels[i].gfx_clock; + level->memory_clock = vega10_ps->performance_levels[i].mem_clock; return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h index b253be602cc2..3e4a314ef925 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h @@ -1481,9 +1481,67 @@ typedef struct { } SmuMetrics_V2_t; typedef struct { + uint32_t CurrClock[PPCLK_COUNT]; + + uint16_t AverageGfxclkFrequencyPreDs; + uint16_t AverageGfxclkFrequencyPostDs; + uint16_t AverageFclkFrequencyPreDs; + uint16_t AverageFclkFrequencyPostDs; + uint16_t AverageUclkFrequencyPreDs; + uint16_t AverageUclkFrequencyPostDs; + + + uint16_t AverageGfxActivity; + uint16_t AverageUclkActivity; + uint8_t CurrSocVoltageOffset; + uint8_t CurrGfxVoltageOffset; + uint8_t CurrMemVidOffset; + uint8_t Padding8; + uint16_t AverageSocketPower; + uint16_t TemperatureEdge; + uint16_t TemperatureHotspot; + uint16_t TemperatureMem; + uint16_t TemperatureVrGfx; + uint16_t TemperatureVrMem0; + uint16_t TemperatureVrMem1; + uint16_t TemperatureVrSoc; + uint16_t TemperatureLiquid0; + uint16_t TemperatureLiquid1; + uint16_t TemperaturePlx; + uint16_t Padding16; + uint32_t AccCnt; + uint8_t ThrottlingPercentage[THROTTLER_COUNT]; + + + uint8_t LinkDpmLevel; + uint8_t CurrFanPwm; + uint16_t CurrFanSpeed; + + //BACO metrics, PMFW-1721 + //metrics for D3hot entry/exit and driver ARM msgs + uint8_t D3HotEntryCountPerMode[D3HOT_SEQUENCE_COUNT]; + uint8_t D3HotExitCountPerMode[D3HOT_SEQUENCE_COUNT]; + uint8_t ArmMsgReceivedCountPerMode[D3HOT_SEQUENCE_COUNT]; + + //PMFW-4362 + uint32_t EnergyAccumulator; + uint16_t AverageVclk0Frequency; + uint16_t AverageDclk0Frequency; + uint16_t AverageVclk1Frequency; + uint16_t AverageDclk1Frequency; + uint16_t VcnUsagePercentage0; + uint16_t VcnUsagePercentage1; + uint8_t PcieRate; + uint8_t PcieWidth; + uint16_t AverageGfxclkFrequencyTarget; + +} SmuMetrics_V3_t; + +typedef struct { union { SmuMetrics_t SmuMetrics; SmuMetrics_V2_t SmuMetrics_V2; + SmuMetrics_V3_t SmuMetrics_V3; }; uint32_t Spare[1]; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index d9d634ce9575..38f04836c82f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -554,6 +554,11 @@ static uint32_t sienna_cichlid_get_throttler_status_locked(struct smu_context *s int i; if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && + (smu->smc_fw_version >= 0x3A4900)) { + for (i = 0; i < THROTTLER_COUNT; i++) + throttler_status |= + (metrics_ext->SmuMetrics_V3.ThrottlingPercentage[i] ? 1U << i : 0); + } else if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && (smu->smc_fw_version >= 0x3A4300)) { for (i = 0; i < THROTTLER_COUNT; i++) throttler_status |= @@ -574,11 +579,20 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu, &(((SmuMetricsExternal_t *)(smu_table->metrics_table))->SmuMetrics); SmuMetrics_V2_t *metrics_v2 = &(((SmuMetricsExternal_t *)(smu_table->metrics_table))->SmuMetrics_V2); - bool use_metrics_v2 = ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && - (smu->smc_fw_version >= 0x3A4300)) ? true : false; + SmuMetrics_V3_t *metrics_v3 = + &(((SmuMetricsExternal_t *)(smu_table->metrics_table))->SmuMetrics_V3); + bool use_metrics_v2 = false; + bool use_metrics_v3 = false; uint16_t average_gfx_activity; int ret = 0; + if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && + (smu->smc_fw_version >= 0x3A4900)) + use_metrics_v3 = true; + else if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && + (smu->smc_fw_version >= 0x3A4300)) + use_metrics_v2 = true; + ret = smu_cmn_get_metrics_table(smu, NULL, false); @@ -587,96 +601,119 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu, switch (member) { case METRICS_CURR_GFXCLK: - *value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_GFXCLK] : + *value = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_GFXCLK] : + use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_GFXCLK] : metrics->CurrClock[PPCLK_GFXCLK]; break; case METRICS_CURR_SOCCLK: - *value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_SOCCLK] : + *value = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_SOCCLK] : + use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_SOCCLK] : metrics->CurrClock[PPCLK_SOCCLK]; break; case METRICS_CURR_UCLK: - *value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_UCLK] : + *value = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_UCLK] : + use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_UCLK] : metrics->CurrClock[PPCLK_UCLK]; break; case METRICS_CURR_VCLK: - *value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_VCLK_0] : + *value = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_VCLK_0] : + use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_VCLK_0] : metrics->CurrClock[PPCLK_VCLK_0]; break; case METRICS_CURR_VCLK1: - *value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_VCLK_1] : + *value = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_VCLK_1] : + use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_VCLK_1] : metrics->CurrClock[PPCLK_VCLK_1]; break; case METRICS_CURR_DCLK: - *value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_0] : + *value = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_DCLK_0] : + use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_0] : metrics->CurrClock[PPCLK_DCLK_0]; break; case METRICS_CURR_DCLK1: - *value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_1] : + *value = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_DCLK_1] : + use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_1] : metrics->CurrClock[PPCLK_DCLK_1]; break; case METRICS_CURR_DCEFCLK: - *value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCEFCLK] : + *value = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_DCEFCLK] : + use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCEFCLK] : metrics->CurrClock[PPCLK_DCEFCLK]; break; case METRICS_CURR_FCLK: - *value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_FCLK] : + *value = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_FCLK] : + use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_FCLK] : metrics->CurrClock[PPCLK_FCLK]; break; case METRICS_AVERAGE_GFXCLK: - average_gfx_activity = use_metrics_v2 ? metrics_v2->AverageGfxActivity : + average_gfx_activity = use_metrics_v3 ? metrics_v3->AverageGfxActivity : + use_metrics_v2 ? metrics_v2->AverageGfxActivity : metrics->AverageGfxActivity; if (average_gfx_activity <= SMU_11_0_7_GFX_BUSY_THRESHOLD) - *value = use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPostDs : + *value = use_metrics_v3 ? metrics_v3->AverageGfxclkFrequencyPostDs : + use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPostDs : metrics->AverageGfxclkFrequencyPostDs; else - *value = use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPreDs : + *value = use_metrics_v3 ? metrics_v3->AverageGfxclkFrequencyPreDs : + use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPreDs : metrics->AverageGfxclkFrequencyPreDs; break; case METRICS_AVERAGE_FCLK: - *value = use_metrics_v2 ? metrics_v2->AverageFclkFrequencyPostDs : + *value = use_metrics_v3 ? metrics_v3->AverageFclkFrequencyPostDs : + use_metrics_v2 ? metrics_v2->AverageFclkFrequencyPostDs : metrics->AverageFclkFrequencyPostDs; break; case METRICS_AVERAGE_UCLK: - *value = use_metrics_v2 ? metrics_v2->AverageUclkFrequencyPostDs : + *value = use_metrics_v3 ? metrics_v3->AverageUclkFrequencyPostDs : + use_metrics_v2 ? metrics_v2->AverageUclkFrequencyPostDs : metrics->AverageUclkFrequencyPostDs; break; case METRICS_AVERAGE_GFXACTIVITY: - *value = use_metrics_v2 ? metrics_v2->AverageGfxActivity : + *value = use_metrics_v3 ? metrics_v3->AverageGfxActivity : + use_metrics_v2 ? metrics_v2->AverageGfxActivity : metrics->AverageGfxActivity; break; case METRICS_AVERAGE_MEMACTIVITY: - *value = use_metrics_v2 ? metrics_v2->AverageUclkActivity : + *value = use_metrics_v3 ? metrics_v3->AverageUclkActivity : + use_metrics_v2 ? metrics_v2->AverageUclkActivity : metrics->AverageUclkActivity; break; case METRICS_AVERAGE_SOCKETPOWER: - *value = use_metrics_v2 ? metrics_v2->AverageSocketPower << 8 : + *value = use_metrics_v3 ? metrics_v3->AverageSocketPower << 8 : + use_metrics_v2 ? metrics_v2->AverageSocketPower << 8 : metrics->AverageSocketPower << 8; break; case METRICS_TEMPERATURE_EDGE: - *value = (use_metrics_v2 ? metrics_v2->TemperatureEdge : metrics->TemperatureEdge) * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + *value = (use_metrics_v3 ? metrics_v3->TemperatureEdge : + use_metrics_v2 ? metrics_v2->TemperatureEdge : + metrics->TemperatureEdge) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_TEMPERATURE_HOTSPOT: - *value = (use_metrics_v2 ? metrics_v2->TemperatureHotspot : metrics->TemperatureHotspot) * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + *value = (use_metrics_v3 ? metrics_v3->TemperatureHotspot : + use_metrics_v2 ? metrics_v2->TemperatureHotspot : + metrics->TemperatureHotspot) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_TEMPERATURE_MEM: - *value = (use_metrics_v2 ? metrics_v2->TemperatureMem : metrics->TemperatureMem) * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + *value = (use_metrics_v3 ? metrics_v3->TemperatureMem : + use_metrics_v2 ? metrics_v2->TemperatureMem : + metrics->TemperatureMem) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_TEMPERATURE_VRGFX: - *value = (use_metrics_v2 ? metrics_v2->TemperatureVrGfx : metrics->TemperatureVrGfx) * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + *value = (use_metrics_v3 ? metrics_v3->TemperatureVrGfx : + use_metrics_v2 ? metrics_v2->TemperatureVrGfx : + metrics->TemperatureVrGfx) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_TEMPERATURE_VRSOC: - *value = (use_metrics_v2 ? metrics_v2->TemperatureVrSoc : metrics->TemperatureVrSoc) * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + *value = (use_metrics_v3 ? metrics_v3->TemperatureVrSoc : + use_metrics_v2 ? metrics_v2->TemperatureVrSoc : + metrics->TemperatureVrSoc) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_THROTTLER_STATUS: *value = sienna_cichlid_get_throttler_status_locked(smu); break; case METRICS_CURR_FANSPEED: - *value = use_metrics_v2 ? metrics_v2->CurrFanSpeed : metrics->CurrFanSpeed; + *value = use_metrics_v3 ? metrics_v3->CurrFanSpeed : + use_metrics_v2 ? metrics_v2->CurrFanSpeed : metrics->CurrFanSpeed; break; default: *value = UINT_MAX; @@ -3656,12 +3693,22 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu, &(metrics_external.SmuMetrics); SmuMetrics_V2_t *metrics_v2 = &(metrics_external.SmuMetrics_V2); + SmuMetrics_V3_t *metrics_v3 = + &(metrics_external.SmuMetrics_V3); struct amdgpu_device *adev = smu->adev; - bool use_metrics_v2 = ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && - (smu->smc_fw_version >= 0x3A4300)) ? true : false; + bool use_metrics_v2 = false; + bool use_metrics_v3 = false; uint16_t average_gfx_activity; int ret = 0; + if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && + (smu->smc_fw_version >= 0x3A4900)) + use_metrics_v3 = true; + else if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && + (smu->smc_fw_version >= 0x3A4300)) + use_metrics_v2 = true; + + ret = smu_cmn_get_metrics_table(smu, &metrics_external, true); @@ -3670,29 +3717,30 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu, smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3); - gpu_metrics->temperature_edge = + gpu_metrics->temperature_edge = use_metrics_v3 ? metrics_v3->TemperatureEdge : use_metrics_v2 ? metrics_v2->TemperatureEdge : metrics->TemperatureEdge; - gpu_metrics->temperature_hotspot = + gpu_metrics->temperature_hotspot = use_metrics_v3 ? metrics_v3->TemperatureHotspot : use_metrics_v2 ? metrics_v2->TemperatureHotspot : metrics->TemperatureHotspot; - gpu_metrics->temperature_mem = + gpu_metrics->temperature_mem = use_metrics_v3 ? metrics_v3->TemperatureMem : use_metrics_v2 ? metrics_v2->TemperatureMem : metrics->TemperatureMem; - gpu_metrics->temperature_vrgfx = + gpu_metrics->temperature_vrgfx = use_metrics_v3 ? metrics_v3->TemperatureVrGfx : use_metrics_v2 ? metrics_v2->TemperatureVrGfx : metrics->TemperatureVrGfx; - gpu_metrics->temperature_vrsoc = + gpu_metrics->temperature_vrsoc = use_metrics_v3 ? metrics_v3->TemperatureVrSoc : use_metrics_v2 ? metrics_v2->TemperatureVrSoc : metrics->TemperatureVrSoc; - gpu_metrics->temperature_vrmem = + gpu_metrics->temperature_vrmem = use_metrics_v3 ? metrics_v3->TemperatureVrMem0 : use_metrics_v2 ? metrics_v2->TemperatureVrMem0 : metrics->TemperatureVrMem0; - gpu_metrics->average_gfx_activity = + gpu_metrics->average_gfx_activity = use_metrics_v3 ? metrics_v3->AverageGfxActivity : use_metrics_v2 ? metrics_v2->AverageGfxActivity : metrics->AverageGfxActivity; - gpu_metrics->average_umc_activity = + gpu_metrics->average_umc_activity = use_metrics_v3 ? metrics_v3->AverageUclkActivity : use_metrics_v2 ? metrics_v2->AverageUclkActivity : metrics->AverageUclkActivity; - gpu_metrics->average_mm_activity = + gpu_metrics->average_mm_activity = use_metrics_v3 ? + (metrics_v3->VcnUsagePercentage0 + metrics_v3->VcnUsagePercentage1) / 2 : use_metrics_v2 ? metrics_v2->VcnActivityPercentage : metrics->VcnActivityPercentage; - gpu_metrics->average_socket_power = + gpu_metrics->average_socket_power = use_metrics_v3 ? metrics_v3->AverageSocketPower : use_metrics_v2 ? metrics_v2->AverageSocketPower : metrics->AverageSocketPower; - gpu_metrics->energy_accumulator = + gpu_metrics->energy_accumulator = use_metrics_v3 ? metrics_v3->EnergyAccumulator : use_metrics_v2 ? metrics_v2->EnergyAccumulator : metrics->EnergyAccumulator; if (metrics->CurrGfxVoltageOffset) @@ -3705,37 +3753,45 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu, gpu_metrics->voltage_soc = (155000 - 625 * metrics->CurrSocVoltageOffset) / 100; - average_gfx_activity = use_metrics_v2 ? metrics_v2->AverageGfxActivity : metrics->AverageGfxActivity; + average_gfx_activity = use_metrics_v3 ? metrics_v3->AverageGfxActivity : + use_metrics_v2 ? metrics_v2->AverageGfxActivity : metrics->AverageGfxActivity; if (average_gfx_activity <= SMU_11_0_7_GFX_BUSY_THRESHOLD) gpu_metrics->average_gfxclk_frequency = - use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPostDs : metrics->AverageGfxclkFrequencyPostDs; + use_metrics_v3 ? metrics_v3->AverageGfxclkFrequencyPostDs : + use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPostDs : + metrics->AverageGfxclkFrequencyPostDs; else gpu_metrics->average_gfxclk_frequency = - use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPreDs : metrics->AverageGfxclkFrequencyPreDs; + use_metrics_v3 ? metrics_v3->AverageGfxclkFrequencyPreDs : + use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPreDs : + metrics->AverageGfxclkFrequencyPreDs; + gpu_metrics->average_uclk_frequency = - use_metrics_v2 ? metrics_v2->AverageUclkFrequencyPostDs : metrics->AverageUclkFrequencyPostDs; - gpu_metrics->average_vclk0_frequency = + use_metrics_v3 ? metrics_v3->AverageUclkFrequencyPostDs : + use_metrics_v2 ? metrics_v2->AverageUclkFrequencyPostDs : + metrics->AverageUclkFrequencyPostDs; + gpu_metrics->average_vclk0_frequency = use_metrics_v3 ? metrics_v3->AverageVclk0Frequency : use_metrics_v2 ? metrics_v2->AverageVclk0Frequency : metrics->AverageVclk0Frequency; - gpu_metrics->average_dclk0_frequency = + gpu_metrics->average_dclk0_frequency = use_metrics_v3 ? metrics_v3->AverageDclk0Frequency : use_metrics_v2 ? metrics_v2->AverageDclk0Frequency : metrics->AverageDclk0Frequency; - gpu_metrics->average_vclk1_frequency = + gpu_metrics->average_vclk1_frequency = use_metrics_v3 ? metrics_v3->AverageVclk1Frequency : use_metrics_v2 ? metrics_v2->AverageVclk1Frequency : metrics->AverageVclk1Frequency; - gpu_metrics->average_dclk1_frequency = + gpu_metrics->average_dclk1_frequency = use_metrics_v3 ? metrics_v3->AverageDclk1Frequency : use_metrics_v2 ? metrics_v2->AverageDclk1Frequency : metrics->AverageDclk1Frequency; - gpu_metrics->current_gfxclk = + gpu_metrics->current_gfxclk = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_GFXCLK] : use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_GFXCLK] : metrics->CurrClock[PPCLK_GFXCLK]; - gpu_metrics->current_socclk = + gpu_metrics->current_socclk = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_SOCCLK] : use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_SOCCLK] : metrics->CurrClock[PPCLK_SOCCLK]; - gpu_metrics->current_uclk = + gpu_metrics->current_uclk = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_UCLK] : use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_UCLK] : metrics->CurrClock[PPCLK_UCLK]; - gpu_metrics->current_vclk0 = + gpu_metrics->current_vclk0 = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_VCLK_0] : use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_VCLK_0] : metrics->CurrClock[PPCLK_VCLK_0]; - gpu_metrics->current_dclk0 = + gpu_metrics->current_dclk0 = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_DCLK_0] : use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_0] : metrics->CurrClock[PPCLK_DCLK_0]; - gpu_metrics->current_vclk1 = + gpu_metrics->current_vclk1 = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_VCLK_1] : use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_VCLK_1] : metrics->CurrClock[PPCLK_VCLK_1]; - gpu_metrics->current_dclk1 = + gpu_metrics->current_dclk1 = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_DCLK_1] : use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_1] : metrics->CurrClock[PPCLK_DCLK_1]; gpu_metrics->throttle_status = sienna_cichlid_get_throttler_status_locked(smu); @@ -3743,12 +3799,15 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu, smu_cmn_get_indep_throttler_status(gpu_metrics->throttle_status, sienna_cichlid_throttler_map); - gpu_metrics->current_fan_speed = use_metrics_v2 ? metrics_v2->CurrFanSpeed : metrics->CurrFanSpeed; + gpu_metrics->current_fan_speed = use_metrics_v3 ? metrics_v3->CurrFanSpeed : + use_metrics_v2 ? metrics_v2->CurrFanSpeed : metrics->CurrFanSpeed; if (((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && smu->smc_fw_version > 0x003A1E00) || ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 11)) && smu->smc_fw_version > 0x00410400)) { - gpu_metrics->pcie_link_width = use_metrics_v2 ? metrics_v2->PcieWidth : metrics->PcieWidth; - gpu_metrics->pcie_link_speed = link_speed[use_metrics_v2 ? metrics_v2->PcieRate : metrics->PcieRate]; + gpu_metrics->pcie_link_width = use_metrics_v3 ? metrics_v3->PcieWidth : + use_metrics_v2 ? metrics_v2->PcieWidth : metrics->PcieWidth; + gpu_metrics->pcie_link_speed = link_speed[use_metrics_v3 ? metrics_v3->PcieRate : + use_metrics_v2 ? metrics_v2->PcieRate : metrics->PcieRate]; } else { gpu_metrics->pcie_link_width = smu_v11_0_get_current_pcie_link_width(smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 4e557f4f7c4d..cf09e30bdfe0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -752,6 +752,7 @@ int smu_v13_0_gfx_off_control(struct smu_context *smu, bool enable) case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 3): case IP_VERSION(13, 0, 5): + case IP_VERSION(13, 0, 8): if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) return 0; if (enable) |